From d0aa694b923960351ab79df021de7bbd7728d5cc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 23 Feb 2019 00:01:02 +0000 Subject: drm/i915/pmu: Always sample an active ringbuffer As we no longer have a precise indication of requests queued to an engine, make no presumptions and just sample the ring registers to see if the engine is busy. v2: Report busy while the ring is idling on a semaphore/event. v3: Give the struct a name! v4: Always 0 outside the powerwell; trusting the powerwell is accurate enough for our sampling pmu. v5: Protect against gen7 mmio madness and try to improve grammar Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190223000102.14290-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 710ffb221775..5d45ad4ecca9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -392,7 +392,7 @@ struct intel_engine_cs { bool irq_armed; } breadcrumbs; - struct { + struct intel_engine_pmu { /** * @enable: Bitmask of enable sample events on this engine. * -- cgit v1.2.3 From 89531e7d8ee8602b2723431a581250d5d0ec2913 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 09:49:19 +0000 Subject: drm/i915: Replace global_seqno with a hangcheck heartbeat seqno To determine whether an engine has 'stuck', we simply check whether or not is still on the same seqno for several seconds. To keep this simple mechanism intact over the loss of a global seqno, we can simply add a new global heartbeat seqno instead. As we cannot know the sequence in which requests will then be completed, we use a primitive random number generator instead (with a cycle long enough to not matter over an interval of a few thousand requests between hangcheck samples). The alternative to using a dedicated seqno on every request is to issue a heartbeat request and query its progress through the system. Sadly this requires us to reduce struct_mutex so that we can issue requests without requiring that bkl. v2: And without the extra CS_STALL for the hangcheck seqno -- we don't need strict serialisation with what comes later, we just need to be sure we don't write the hangcheck seqno before our batch is flushed. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 7 ++++--- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++-- drivers/gpu/drm/i915/intel_hangcheck.c | 6 +++--- drivers/gpu/drm/i915/intel_lrc.c | 15 ++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 36 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 19 ++++++++++++++++- 6 files changed, 77 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 37175414ce89..545091a5180b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1295,7 +1295,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) with_intel_runtime_pm(dev_priv, wakeref) { for_each_engine(engine, dev_priv, id) { acthd[id] = intel_engine_get_active_head(engine); - seqno[id] = intel_engine_get_seqno(engine); + seqno[id] = intel_engine_get_hangcheck_seqno(engine); } intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); @@ -1315,8 +1315,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) for_each_engine(engine, dev_priv, id) { seq_printf(m, "%s:\n", engine->name); seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n", - engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine), + engine->hangcheck.last_seqno, + seqno[id], + engine->hangcheck.next_seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 81b80f8fd9ea..57bc5c4fb3ff 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1497,10 +1497,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_reset_failed(engine->i915)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), - engine->hangcheck.seqno, + engine->hangcheck.last_seqno, + engine->hangcheck.next_seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 9be033b6f4d2..f1d8dfc58049 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -133,21 +133,21 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine, struct hangcheck *hc) { hc->acthd = intel_engine_get_active_head(engine); - hc->seqno = intel_engine_get_seqno(engine); + hc->seqno = intel_engine_get_hangcheck_seqno(engine); } static void hangcheck_store_sample(struct intel_engine_cs *engine, const struct hangcheck *hc) { engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.seqno = hc->seqno; + engine->hangcheck.last_seqno = hc->seqno; } static enum intel_engine_hangcheck_action hangcheck_get_action(struct intel_engine_cs *engine, const struct hangcheck *hc) { - if (engine->hangcheck.seqno != hc->seqno) + if (engine->hangcheck.last_seqno != hc->seqno) return ENGINE_ACTIVE_SEQNO; if (intel_engine_is_idle(engine)) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 34a0866959c5..0516fc6b9652 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -178,6 +178,12 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) I915_GEM_HWS_INDEX_ADDR); } +static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_HANGCHECK_ADDR); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -2206,6 +2212,10 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) request->fence.seqno, request->timeline->hwsp_offset); + cs = gen8_emit_ggtt_write(cs, + intel_engine_next_hangcheck_seqno(request->engine), + intel_hws_hangcheck_address(request->engine)); + cs = gen8_emit_ggtt_write(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); @@ -2230,6 +2240,11 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); + cs = gen8_emit_ggtt_write_rcs(cs, + intel_engine_next_hangcheck_seqno(request->engine), + intel_hws_hangcheck_address(request->engine), + 0); + cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, intel_hws_seqno_address(request->engine), diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7f841dba87b3..870184bbd169 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -43,6 +43,12 @@ */ #define LEGACY_REQUEST_SIZE 200 +static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_HANGCHECK_ADDR); +} + static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -316,6 +322,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; @@ -422,6 +433,11 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->timeline->hwsp_offset; *cs++ = rq->fence.seqno; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = hws_hangcheck_address(rq->engine); + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = (PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | @@ -447,12 +463,15 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -472,6 +491,10 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; @@ -487,6 +510,7 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -930,11 +954,16 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR; *cs++ = rq->fence.seqno; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR; *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -956,6 +985,10 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR; *cs++ = rq->fence.seqno; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; @@ -964,7 +997,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) } *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5d45ad4ecca9..2869aaa9d225 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -6,6 +6,7 @@ #include #include +#include #include #include "i915_gem_batch_pool.h" @@ -119,7 +120,8 @@ struct intel_instdone { struct intel_engine_hangcheck { u64 acthd; - u32 seqno; + u32 last_seqno; + u32 next_seqno; unsigned long action_timestamp; struct intel_instdone instdone; }; @@ -726,6 +728,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) #define I915_GEM_HWS_PREEMPT 0x32 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_HANGCHECK 0x34 +#define I915_GEM_HWS_HANGCHECK_ADDR (I915_GEM_HWS_HANGCHECK * sizeof(u32)) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 @@ -1086,4 +1090,17 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) #endif +static inline u32 +intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine) +{ + return engine->hangcheck.next_seqno = + next_pseudo_random32(engine->hangcheck.next_seqno); +} + +static inline u32 +intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine) +{ + return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK); +} + #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit v1.2.3 From 8892f47742ea25fe31eb27c73ab6b6f5f4616c1c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 09:49:20 +0000 Subject: drm/i915: Remove access to global seqno in the HWSP Stop accessing the HWSP to read the global seqno, and stop tracking the mirror in the engine's execution timeline -- it is unused. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gpu_error.c | 4 --- drivers/gpu/drm/i915/i915_gpu_error.h | 3 -- drivers/gpu/drm/i915/i915_request.c | 27 +++++++----------- drivers/gpu/drm/i915/i915_reset.c | 1 - drivers/gpu/drm/i915/intel_engine_cs.c | 14 +--------- drivers/gpu/drm/i915/intel_lrc.c | 21 ++++---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +---- drivers/gpu/drm/i915/intel_ringbuffer.h | 40 --------------------------- drivers/gpu/drm/i915/selftests/i915_request.c | 3 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 3 -- 10 files changed, 19 insertions(+), 104 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3f6eddb6f6de..061a767e3bed 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -526,8 +526,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, ee->vm_info.pp_dir_base); } } - err_printf(m, " seqno: 0x%08x\n", ee->seqno); - err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", @@ -1216,8 +1214,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->acthd = intel_engine_get_active_head(engine); - ee->seqno = intel_engine_get_seqno(engine); - ee->last_seqno = intel_engine_last_submit(engine); ee->start = I915_READ_START(engine); ee->head = I915_READ_HEAD(engine); ee->tail = I915_READ_TAIL(engine); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 94eaf8ab9051..19ac102afaff 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -94,8 +94,6 @@ struct i915_gpu_state { u32 cpu_ring_head; u32 cpu_ring_tail; - u32 last_seqno; - /* Register state */ u32 start; u32 tail; @@ -108,7 +106,6 @@ struct i915_gpu_state { u32 bbstate; u32 instpm; u32 instps; - u32 seqno; u64 bbaddr; u64 acthd; u32 fault_reg; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 124b3e279c88..596183f35b78 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -179,12 +179,11 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(engine)); + hwsp_seqno(rq)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -243,12 +242,11 @@ static void i915_request_retire(struct i915_request *request) { struct i915_active_request *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, - hwsp_seqno(request), - intel_engine_get_seqno(request->engine)); + hwsp_seqno(request)); lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); @@ -305,12 +303,11 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(rq->engine)); + hwsp_seqno(rq)); lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); @@ -354,12 +351,11 @@ void __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, engine->timeline.seqno + 1, - hwsp_seqno(request), - intel_engine_get_seqno(engine)); + hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline.lock); @@ -371,7 +367,6 @@ void __i915_request_submit(struct i915_request *request) seqno = next_global_seqno(&engine->timeline); GEM_BUG_ON(!seqno); - GEM_BUG_ON(intel_engine_signaled(engine, seqno)); /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -409,12 +404,11 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, request->global_seqno, - hwsp_seqno(request), - intel_engine_get_seqno(engine)); + hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline.lock); @@ -425,7 +419,6 @@ void __i915_request_unsubmit(struct i915_request *request) */ GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); - GEM_BUG_ON(intel_engine_has_completed(engine, request->global_seqno)); engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 39a08932a95a..55d6123dbba4 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -788,7 +788,6 @@ static void nop_submit_request(struct i915_request *request) spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); i915_request_mark_complete(request); - intel_engine_write_global_seqno(engine, request->global_seqno); spin_unlock_irqrestore(&engine->timeline.lock, flags); intel_engine_queue_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 57bc5c4fb3ff..ec859c7b8c7c 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -455,12 +455,6 @@ cleanup: return err; } -void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); -} - static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { i915_gem_batch_pool_init(&engine->batch_pool, engine); @@ -1011,10 +1005,6 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (i915_reset_failed(engine->i915)) return true; - /* Any inflight/incomplete requests? */ - if (!intel_engine_signaled(engine, intel_engine_last_submit(engine))) - return false; - /* Waiting to drain ELSP? */ if (READ_ONCE(engine->execlists.active)) { struct tasklet_struct *t = &engine->execlists.tasklet; @@ -1497,9 +1487,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_reset_failed(engine->i915)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n", - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine), + drm_printf(m, "\tHangcheck %x:%x [%d ms]\n", engine->hangcheck.last_seqno, engine->hangcheck.next_seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0516fc6b9652..09b56b84e007 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -528,13 +528,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq), - intel_engine_get_seqno(engine), rq_prio(rq)); } else { GEM_BUG_ON(!n); @@ -840,13 +839,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", + GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", rq->engine->name, (unsigned int)(port - execlists->port), rq->global_seqno, rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(rq->engine)); + hwsp_seqno(rq)); GEM_BUG_ON(!execlists->active); execlists_context_schedule_out(rq, @@ -902,8 +900,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s current %d\n", - engine->name, intel_engine_get_seqno(engine)); + GEM_TRACE("%s\n", engine->name); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -952,10 +949,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) kmem_cache_free(engine->i915->priorities, p); } - intel_write_status_page(engine, - I915_GEM_HWS_INDEX, - intel_engine_last_submit(engine)); - /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority_hint = INT_MIN; @@ -1071,14 +1064,13 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, rq ? hwsp_seqno(rq) : 0, - intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ @@ -1946,10 +1938,9 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + GEM_TRACE("%s seqno=%d, stalled? %s\n", engine->name, rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine), yesno(stalled)); if (!rq) goto out_unlock; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 870184bbd169..2d59e2990448 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -782,10 +782,9 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } } - GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + GEM_TRACE("%s seqno=%d, stalled? %s\n", engine->name, rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine), yesno(stalled)); /* * The guilty request will get skipped on a hung engine. @@ -924,10 +923,6 @@ static void cancel_requests(struct intel_engine_cs *engine) i915_request_mark_complete(request); } - intel_write_status_page(engine, - I915_GEM_HWS_INDEX, - intel_engine_last_submit(engine)); - /* Remaining _unready_ requests will be nop'ed when submitted */ spin_unlock_irqrestore(&engine->timeline.lock, flags); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2869aaa9d225..551b3daa741c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -848,8 +848,6 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) return (head - tail - CACHELINE_BYTES) & (size - 1); } -void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); - int intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -867,44 +865,6 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); -static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) -{ - /* - * We are only peeking at the tail of the submit queue (and not the - * queue itself) in order to gain a hint as to the current active - * state of the engine. Callers are not expected to be taking - * engine->timeline->lock, nor are they expected to be concerned - * wtih serialising this hint with anything, so document it as - * a hint and nothing more. - */ - return READ_ONCE(engine->timeline.seqno); -} - -static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) -{ - return intel_read_status_page(engine, I915_GEM_HWS_INDEX); -} - -static inline bool intel_engine_signaled(struct intel_engine_cs *engine, - u32 seqno) -{ - return i915_seqno_passed(intel_engine_get_seqno(engine), seqno); -} - -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, - u32 seqno) -{ - GEM_BUG_ON(!seqno); - return intel_engine_signaled(engine, seqno); -} - -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, - u32 seqno) -{ - GEM_BUG_ON(!seqno); - return intel_engine_signaled(engine, seqno - 1); -} - void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 03cc8ab6a620..7da52e3d67af 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -226,8 +226,7 @@ static int igt_request_rewind(void *arg) mutex_unlock(&i915->drm.struct_mutex); if (i915_request_wait(vip, 0, HZ) == -ETIME) { - pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", - vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + pr_err("timed out waiting for high priority request\n"); goto err; } diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 08f0cab02e0f..79bf27606ab8 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -86,8 +86,6 @@ static struct mock_request *first_request(struct mock_engine *engine) static void advance(struct mock_request *request) { list_del_init(&request->link); - intel_engine_write_global_seqno(request->base.engine, - request->base.global_seqno); i915_request_mark_complete(&request->base); GEM_BUG_ON(!i915_request_completed(&request->base)); @@ -278,7 +276,6 @@ void mock_engine_flush(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine) { - intel_engine_write_global_seqno(engine, 0); } void mock_engine_free(struct intel_engine_cs *engine) -- cgit v1.2.3 From b300fde8965fdd628341c4b602481ebde8ac9cb7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 09:49:21 +0000 Subject: drm/i915: Remove i915_request.global_seqno Having weaned the interrupt handling off using a single global execution queue, we no longer need to emit a global_seqno. Note that we still have a few assumptions about execution order along engine timelines, but this removes the most obvious artefact! Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gpu_error.c | 34 ++-------------- drivers/gpu/drm/i915/i915_gpu_error.h | 2 - drivers/gpu/drm/i915/i915_request.c | 34 +++------------- drivers/gpu/drm/i915/i915_request.h | 32 --------------- drivers/gpu/drm/i915/i915_trace.h | 25 ++++-------- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +-- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 34 ++-------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 50 +++--------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 - drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 5 +-- drivers/gpu/drm/i915/selftests/mock_engine.c | 1 - 12 files changed, 32 insertions(+), 194 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 061a767e3bed..fa86c60fb56c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -380,19 +380,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, "%s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x_%08x %8u %02x %02x %02x", + err_printf(m, " %08x_%08x %8u %02x %02x", upper_32_bits(err->gtt_offset), lower_32_bits(err->gtt_offset), err->size, err->read_domains, - err->write_domain, - err->wseqno); + err->write_domain); err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); err_puts(m, err->userptr ? " userptr" : ""); - err_puts(m, err->engine != -1 ? " " : ""); - err_puts(m, engine_name(m->i915, err->engine)); err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); if (err->name) @@ -1048,27 +1045,6 @@ i915_error_object_create(struct drm_i915_private *i915, return dst; } -/* The error capture is special as tries to run underneath the normal - * locking rules - so we use the raw version of the i915_active_request lookup. - */ -static inline u32 -__active_get_seqno(struct i915_active_request *active) -{ - struct i915_request *request; - - request = __i915_active_request_peek(active); - return request ? request->global_seqno : 0; -} - -static inline int -__active_get_engine_id(struct i915_active_request *active) -{ - struct i915_request *request; - - request = __i915_active_request_peek(active); - return request ? request->engine->id : -1; -} - static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { @@ -1077,9 +1053,6 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; - err->wseqno = __active_get_seqno(&obj->frontbuffer_write); - err->engine = __active_get_engine_id(&obj->frontbuffer_write); - err->gtt_offset = vma->node.start; err->read_domains = obj->read_domains; err->write_domain = obj->write_domain; @@ -1284,7 +1257,8 @@ static void record_request(struct i915_request *request, struct i915_gem_context *ctx = request->gem_context; erq->flags = request->fence.flags; - erq->context = ctx->hw_id; + erq->context = request->fence.context; + erq->seqno = request->fence.seqno; erq->sched_attr = request->sched.attr; erq->jiffies = request->emitted_jiffies; erq->start = i915_ggtt_offset(request->ring->vma); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 19ac102afaff..8c1569c1830d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -164,7 +164,6 @@ struct i915_gpu_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 wseqno; u64 gtt_offset; u32 read_domains; u32 write_domain; @@ -173,7 +172,6 @@ struct i915_gpu_state { u32 dirty:1; u32 purgeable:1; u32 userptr:1; - s32 engine:4; u32 cache_level:3; } *active_bo[I915_NUM_ENGINES], *pinned_bo; u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 596183f35b78..935db5548f80 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -179,10 +179,9 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, - rq->global_seqno, hwsp_seqno(rq)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -242,10 +241,9 @@ static void i915_request_retire(struct i915_request *request) { struct i915_active_request *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", request->engine->name, request->fence.context, request->fence.seqno, - request->global_seqno, hwsp_seqno(request)); lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -303,10 +301,9 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, - rq->global_seqno, hwsp_seqno(rq)); lockdep_assert_held(&rq->i915->drm.struct_mutex); @@ -339,22 +336,13 @@ static void move_to_timeline(struct i915_request *request, spin_unlock(&request->timeline->lock); } -static u32 next_global_seqno(struct i915_timeline *tl) -{ - if (!++tl->seqno) - ++tl->seqno; - return tl->seqno; -} - void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld -> current %d\n", engine->name, request->fence.context, request->fence.seqno, - engine->timeline.seqno + 1, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); @@ -363,16 +351,10 @@ void __i915_request_submit(struct i915_request *request) if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); - GEM_BUG_ON(request->global_seqno); - - seqno = next_global_seqno(&engine->timeline); - GEM_BUG_ON(!seqno); - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); - request->global_seqno = seqno; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !i915_request_enable_breadcrumb(request)) intel_engine_queue_breadcrumbs(engine); @@ -404,10 +386,9 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, request->fence.context, request->fence.seqno, - request->global_seqno, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); @@ -417,13 +398,9 @@ void __i915_request_unsubmit(struct i915_request *request) * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ - GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); - engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - request->global_seqno = 0; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) i915_request_cancel_breadcrumb(request); GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); @@ -637,7 +614,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ - rq->global_seqno = 0; rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 40f3e8dcbdd5..1e127c1c53fa 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -147,14 +147,6 @@ struct i915_request { */ const u32 *hwsp_seqno; - /** - * GEM sequence number associated with this request on the - * global execution timeline. It is zero when the request is not - * on the HW queue (i.e. not on the engine timeline list). - * Its value is guarded by the timeline spinlock. - */ - u32 global_seqno; - /** Position in the ring of the start of the request */ u32 head; @@ -247,30 +239,6 @@ i915_request_put(struct i915_request *rq) dma_fence_put(&rq->fence); } -/** - * i915_request_global_seqno - report the current global seqno - * @request - the request - * - * A request is assigned a global seqno only when it is on the hardware - * execution queue. The global seqno can be used to maintain a list of - * requests on the same engine in retirement order, for example for - * constructing a priority queue for waiting. Prior to its execution, or - * if it is subsequently removed in the event of preemption, its global - * seqno is zero. As both insertion and removal from the execution queue - * may operate in IRQ context, it is not guarded by the usual struct_mutex - * BKL. Instead those relying on the global seqno must be prepared for its - * value to change between reads. Only when the request is complete can - * the global seqno be stable (due to the memory barriers on submitting - * the commands to the hardware to write the breadcrumb, if the HWS shows - * that it has passed the global seqno and the global seqno is unchanged - * after the read, it is indeed complete). - */ -static inline u32 -i915_request_global_seqno(const struct i915_request *request) -{ - return READ_ONCE(request->global_seqno); -} - int i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 0d9cedb892b0..12893304c8f8 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -708,7 +708,6 @@ DECLARE_EVENT_CLASS(i915_request, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global) ), TP_fast_assign( @@ -718,13 +717,11 @@ DECLARE_EVENT_CLASS(i915_request, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global = rq->global_seqno; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global) + __entry->hw_id, __entry->ctx, __entry->seqno) ); DEFINE_EVENT(i915_request, i915_request_add, @@ -754,7 +751,6 @@ TRACE_EVENT(i915_request_in, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global_seqno) __field(u32, port) __field(u32, prio) ), @@ -766,15 +762,14 @@ TRACE_EVENT(i915_request_in, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; __entry->prio = rq->sched.attr.priority; __entry->port = port; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, global=%u, port=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->prio, __entry->global_seqno, __entry->port) + __entry->prio, __entry->port) ); TRACE_EVENT(i915_request_out, @@ -788,7 +783,6 @@ TRACE_EVENT(i915_request_out, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global_seqno) __field(u32, completed) ), @@ -799,14 +793,13 @@ TRACE_EVENT(i915_request_out, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; __entry->completed = i915_request_completed(rq); ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, completed?=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global_seqno, __entry->completed) + __entry->completed) ); #else @@ -849,7 +842,6 @@ TRACE_EVENT(i915_request_wait_begin, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global) __field(unsigned int, flags) ), @@ -866,14 +858,13 @@ TRACE_EVENT(i915_request_wait_begin, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global = rq->global_seqno; __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, blocking=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ec859c7b8c7c..b7b626195eda 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1271,15 +1271,14 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", + drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", prefix, - rq->global_seqno, + rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : "", test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags) ? "+" : "", - rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 8bc8aa54aa35..20cbceeabeae 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -535,7 +535,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) spin_lock(&client->wq_lock); guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring_tail, rq->global_seqno); + ring_tail, rq->fence.seqno); guc_ring_doorbell(client); client->submissions[engine->id] += 1; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 09b56b84e007..c4f4966b0f4f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -172,12 +172,6 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_INDEX_ADDR); -} - static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -528,10 +522,9 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, - rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq), rq_prio(rq)); @@ -839,10 +832,9 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", + GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", rq->engine->name, (unsigned int)(port - execlists->port), - rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq)); @@ -924,8 +916,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline.requests, link) { - GEM_BUG_ON(!rq->global_seqno); - if (!i915_request_signaled(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -1064,10 +1054,9 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", engine->name, port->context_id, count, - rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, rq ? hwsp_seqno(rq) : 0, @@ -1938,10 +1927,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - GEM_TRACE("%s seqno=%d, stalled? %s\n", - engine->name, - rq ? rq->global_seqno : 0, - yesno(stalled)); + GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled)); if (!rq) goto out_unlock; @@ -2196,9 +2182,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ - BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - cs = gen8_emit_ggtt_write(cs, request->fence.seqno, request->timeline->hwsp_offset); @@ -2207,10 +2190,6 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) intel_engine_next_hangcheck_seqno(request->engine), intel_hws_hangcheck_address(request->engine)); - cs = gen8_emit_ggtt_write(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine)); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2236,11 +2215,6 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) intel_hws_hangcheck_address(request->engine), 0); - cs = gen8_emit_ggtt_write_rcs(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine), - PIPE_CONTROL_CS_STALL); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2d59e2990448..1b96b0960adc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,12 +49,6 @@ static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) I915_GEM_HWS_HANGCHECK_ADDR); } -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_INDEX_ADDR); -} - unsigned int intel_ring_update_space(struct intel_ring *ring) { unsigned int space; @@ -327,11 +321,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; - *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -438,13 +427,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = hws_hangcheck_address(rq->engine); *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL); - *cs++ = intel_hws_seqno_address(rq->engine); - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -467,11 +449,8 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -495,10 +474,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->global_seqno; - for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR; @@ -510,7 +485,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -782,10 +756,8 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } } - GEM_TRACE("%s seqno=%d, stalled? %s\n", - engine->name, - rq ? rq->global_seqno : 0, - yesno(stalled)); + GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled)); + /* * The guilty request will get skipped on a hung engine. * @@ -915,8 +887,6 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->timeline.requests, link) { - GEM_BUG_ON(!request->global_seqno); - if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); @@ -953,12 +923,7 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -976,10 +941,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = MI_FLUSH; - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); @@ -987,11 +948,12 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; } *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 551b3daa741c..de8dba7565b0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -724,8 +724,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * * The area from dword 0x30 to 0x3ff is available for driver usage. */ -#define I915_GEM_HWS_INDEX 0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) #define I915_GEM_HWS_PREEMPT 0x32 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) #define I915_GEM_HWS_HANGCHECK 0x34 diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index a77e4bf1ab55..fa02cf9ce0cf 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -571,11 +571,10 @@ static int active_request_put(struct i915_request *rq) return 0; if (i915_request_wait(rq, 0, 5 * HZ) < 0) { - GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld, seqno %d.\n", + GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", rq->engine->name, rq->fence.context, - rq->fence.seqno, - i915_request_global_seqno(rq)); + rq->fence.seqno); GEM_TRACE_DUMP(); i915_gem_set_wedged(rq->i915); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 79bf27606ab8..6f3fb803c747 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -196,7 +196,6 @@ static void mock_submit_request(struct i915_request *request) unsigned long flags; i915_request_submit(request); - GEM_BUG_ON(!request->global_seqno); spin_lock_irqsave(&engine->hw_lock, flags); list_add_tail(&mock->link, &engine->hw_queue); -- cgit v1.2.3 From 2d5eaad007d971b8cd8cd8122f594b04e292b567 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 10:24:00 +0000 Subject: drm/i915: Compute the global scheduler caps Do a pass over all the engines upon starting to determine the global scheduler capability flags (those that are agreed upon by all). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226102404.29153-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 39 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 6 ----- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 4 files changed, 43 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2b261524cfa4..de8b92da56cf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4698,6 +4698,8 @@ static int __i915_gem_restart_engines(void *data) } } + intel_engines_set_scheduler_caps(i915); + return 0; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4f244019560d..43ce4c5c56c9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -608,6 +608,45 @@ err_hwsp: return err; } +void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) +{ + static const struct { + u8 engine; + u8 sched; + } map[] = { +#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) } + MAP(PREEMPTION, PREEMPTION), +#undef MAP + }; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 enabled, disabled; + + enabled = 0; + disabled = 0; + for_each_engine(engine, i915, id) { /* all engines must agree! */ + int i; + + if (engine->schedule) + enabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + else + disabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + + for (i = 0; i < ARRAY_SIZE(map); i++) { + if (engine->flags & BIT(map[i].engine)) + enabled |= BIT(map[i].sched); + else + disabled |= BIT(map[i].sched); + } + } + + i915->caps.scheduler = enabled & ~disabled; + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) + i915->caps.scheduler = 0; +} + static void __intel_context_unpin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c4f4966b0f4f..81d188508b44 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2292,12 +2292,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (engine->i915->preempt_context) engine->flags |= I915_ENGINE_HAS_PREEMPTION; - - engine->i915->caps.scheduler = - I915_SCHEDULER_CAP_ENABLED | - I915_SCHEDULER_CAP_PRIORITY; - if (intel_engine_has_preemption(engine)) - engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION; } static void diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index de8dba7565b0..533e2053664e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -593,6 +593,8 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_HAS_PREEMPTION; } +void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); + static inline bool __execlists_need_preempt(int prio, int last) { /* -- cgit v1.2.3 From 32eb6bcfdda9dad240cf6a22fda2b3418b1a1b8e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Feb 2019 10:20:33 +0000 Subject: drm/i915: Make request allocation caches global As kmem_caches share the same properties (size, allocation/free behaviour) for all potential devices, we can use global caches. While this potential has worse fragmentation behaviour (one can argue that different devices would have different activity lifetimes, but you can also argue that activity is temporal across the system) it is the default behaviour of the system at large to amalgamate matching caches. The benefit for us is much reduced pointer dancing along the frequent allocation paths. v2: Defer shrinking until after a global grace period for futureproofing multiple consumers of the slab caches, similar to the current strategy for avoiding shrinking too early. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190228102035.5857-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_active.c | 7 +- drivers/gpu/drm/i915/i915_active.h | 1 + drivers/gpu/drm/i915/i915_drv.h | 3 - drivers/gpu/drm/i915/i915_gem.c | 34 +------ drivers/gpu/drm/i915/i915_globals.c | 113 +++++++++++++++++++++++ drivers/gpu/drm/i915/i915_globals.h | 15 +++ drivers/gpu/drm/i915/i915_pci.c | 8 +- drivers/gpu/drm/i915/i915_request.c | 53 +++++++++-- drivers/gpu/drm/i915/i915_request.h | 10 ++ drivers/gpu/drm/i915/i915_scheduler.c | 66 ++++++++++--- drivers/gpu/drm/i915/i915_scheduler.h | 34 ++++++- drivers/gpu/drm/i915/intel_guc_submission.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 6 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 17 ---- drivers/gpu/drm/i915/selftests/mock_engine.c | 45 +++++---- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 26 ------ drivers/gpu/drm/i915/selftests/mock_request.c | 12 +-- drivers/gpu/drm/i915/selftests/mock_request.h | 7 -- 19 files changed, 312 insertions(+), 149 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_globals.c create mode 100644 drivers/gpu/drm/i915/i915_globals.h (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 1787e1299b1b..a1d834068765 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -77,6 +77,7 @@ i915-y += \ i915_gem_tiling.o \ i915_gem_userptr.o \ i915_gemfs.o \ + i915_globals.o \ i915_query.o \ i915_request.o \ i915_scheduler.o \ diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index db7bb5bd5add..d9f6471ac16c 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -294,7 +294,12 @@ int __init i915_global_active_init(void) return 0; } -void __exit i915_global_active_exit(void) +void i915_global_active_shrink(void) +{ + kmem_cache_shrink(global.slab_cache); +} + +void i915_global_active_exit(void) { kmem_cache_destroy(global.slab_cache); } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 12b5c1d287d1..5fbd9102384b 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -420,6 +420,7 @@ static inline void i915_active_fini(struct i915_active *ref) { } #endif int i915_global_active_init(void); +void i915_global_active_shrink(void); void i915_global_active_exit(void); #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cc09caf3870e..f16016b330b3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1473,9 +1473,6 @@ struct drm_i915_private { struct kmem_cache *objects; struct kmem_cache *vmas; struct kmem_cache *luts; - struct kmem_cache *requests; - struct kmem_cache *dependencies; - struct kmem_cache *priorities; const struct intel_device_info __info; /* Use INTEL_INFO() to access. */ struct intel_runtime_info __runtime; /* Use RUNTIME_INFO() to access. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index de8b92da56cf..f6fe10fce0ec 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -42,6 +42,7 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gemfs.h" +#include "i915_globals.h" #include "i915_reset.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -187,6 +188,8 @@ void i915_gem_unpark(struct drm_i915_private *i915) if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ i915->gt.epoch = 1; + i915_globals_unpark(); + intel_enable_gt_powersave(i915); i915_update_gfx_val(i915); if (INTEL_GEN(i915) >= 6) @@ -2892,12 +2895,11 @@ static void shrink_caches(struct drm_i915_private *i915) * filled slabs to prioritise allocating from the mostly full slabs, * with the aim of reducing fragmentation. */ - kmem_cache_shrink(i915->priorities); - kmem_cache_shrink(i915->dependencies); - kmem_cache_shrink(i915->requests); kmem_cache_shrink(i915->luts); kmem_cache_shrink(i915->vmas); kmem_cache_shrink(i915->objects); + + i915_globals_park(); } struct sleep_rcu_work { @@ -5237,23 +5239,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->luts) goto err_vmas; - dev_priv->requests = KMEM_CACHE(i915_request, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT | - SLAB_TYPESAFE_BY_RCU); - if (!dev_priv->requests) - goto err_luts; - - dev_priv->dependencies = KMEM_CACHE(i915_dependency, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT); - if (!dev_priv->dependencies) - goto err_requests; - - dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); - if (!dev_priv->priorities) - goto err_dependencies; - INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); @@ -5278,12 +5263,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) return 0; -err_dependencies: - kmem_cache_destroy(dev_priv->dependencies); -err_requests: - kmem_cache_destroy(dev_priv->requests); -err_luts: - kmem_cache_destroy(dev_priv->luts); err_vmas: kmem_cache_destroy(dev_priv->vmas); err_objects: @@ -5301,9 +5280,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); - kmem_cache_destroy(dev_priv->priorities); - kmem_cache_destroy(dev_priv->dependencies); - kmem_cache_destroy(dev_priv->requests); kmem_cache_destroy(dev_priv->luts); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c new file mode 100644 index 000000000000..7fd1b3945a04 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include +#include + +#include "i915_active.h" +#include "i915_globals.h" +#include "i915_request.h" +#include "i915_scheduler.h" + +int __init i915_globals_init(void) +{ + int err; + + err = i915_global_active_init(); + if (err) + return err; + + err = i915_global_request_init(); + if (err) + goto err_active; + + err = i915_global_scheduler_init(); + if (err) + goto err_request; + + return 0; + +err_request: + i915_global_request_exit(); +err_active: + i915_global_active_exit(); + return err; +} + +static void i915_globals_shrink(void) +{ + /* + * kmem_cache_shrink() discards empty slabs and reorders partially + * filled slabs to prioritise allocating from the mostly full slabs, + * with the aim of reducing fragmentation. + */ + i915_global_active_shrink(); + i915_global_request_shrink(); + i915_global_scheduler_shrink(); +} + +static atomic_t active; +static atomic_t epoch; +struct park_work { + struct rcu_work work; + int epoch; +}; + +static void __i915_globals_park(struct work_struct *work) +{ + struct park_work *wrk = container_of(work, typeof(*wrk), work.work); + + /* Confirm nothing woke up in the last grace period */ + if (wrk->epoch == atomic_read(&epoch)) + i915_globals_shrink(); + + kfree(wrk); +} + +void i915_globals_park(void) +{ + struct park_work *wrk; + + /* + * Defer shrinking the global slab caches (and other work) until + * after a RCU grace period has completed with no activity. This + * is to try and reduce the latency impact on the consumers caused + * by us shrinking the caches the same time as they are trying to + * allocate, with the assumption being that if we idle long enough + * for an RCU grace period to elapse since the last use, it is likely + * to be longer until we need the caches again. + */ + if (!atomic_dec_and_test(&active)) + return; + + wrk = kmalloc(sizeof(*wrk), GFP_KERNEL); + if (!wrk) + return; + + wrk->epoch = atomic_inc_return(&epoch); + INIT_RCU_WORK(&wrk->work, __i915_globals_park); + queue_rcu_work(system_wq, &wrk->work); +} + +void i915_globals_unpark(void) +{ + atomic_inc(&epoch); + atomic_inc(&active); +} + +void __exit i915_globals_exit(void) +{ + /* Flush any residual park_work */ + rcu_barrier(); + flush_scheduled_work(); + + i915_global_scheduler_exit(); + i915_global_request_exit(); + i915_global_active_exit(); + + /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ + rcu_barrier(); +} diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h new file mode 100644 index 000000000000..e468f0413a73 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_GLOBALS_H_ +#define _I915_GLOBALS_H_ + +int i915_globals_init(void); +void i915_globals_park(void); +void i915_globals_unpark(void); +void i915_globals_exit(void); + +#endif /* _I915_GLOBALS_H_ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index c4d6b8da9b03..a9211c370cd1 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -28,8 +28,8 @@ #include -#include "i915_active.h" #include "i915_drv.h" +#include "i915_globals.h" #include "i915_selftest.h" #define PLATFORM(x) .platform = (x), .platform_mask = BIT(x) @@ -802,7 +802,9 @@ static int __init i915_init(void) bool use_kms = true; int err; - i915_global_active_init(); + err = i915_globals_init(); + if (err) + return err; err = i915_mock_selftests(); if (err) @@ -835,7 +837,7 @@ static void __exit i915_exit(void) return; pci_unregister_driver(&i915_pci_driver); - i915_global_active_exit(); + i915_globals_exit(); } module_init(i915_init); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 935db5548f80..a011bf4be48e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -32,6 +32,11 @@ #include "i915_active.h" #include "i915_reset.h" +static struct i915_global_request { + struct kmem_cache *slab_requests; + struct kmem_cache *slab_dependencies; +} global; + static const char *i915_fence_get_driver_name(struct dma_fence *fence) { return "i915"; @@ -86,7 +91,7 @@ static void i915_fence_release(struct dma_fence *fence) */ i915_sw_fence_fini(&rq->submit); - kmem_cache_free(rq->i915->requests, rq); + kmem_cache_free(global.slab_requests, rq); } const struct dma_fence_ops i915_fence_ops = { @@ -292,7 +297,7 @@ static void i915_request_retire(struct i915_request *request) unreserve_gt(request->i915); - i915_sched_node_fini(request->i915, &request->sched); + i915_sched_node_fini(&request->sched); i915_request_put(request); } @@ -491,7 +496,7 @@ i915_request_alloc_slow(struct intel_context *ce) ring_retire_requests(ring); out: - return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL); + return kmem_cache_alloc(global.slab_requests, GFP_KERNEL); } static int add_timeline_barrier(struct i915_request *rq) @@ -579,7 +584,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * * Do not use kmem_cache_zalloc() here! */ - rq = kmem_cache_alloc(i915->requests, + rq = kmem_cache_alloc(global.slab_requests, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { rq = i915_request_alloc_slow(ce); @@ -666,7 +671,7 @@ err_unwind: GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); - kmem_cache_free(i915->requests, rq); + kmem_cache_free(global.slab_requests, rq); err_unreserve: unreserve_gt(i915); intel_context_unpin(ce); @@ -685,9 +690,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(to->i915, - &to->sched, - &from->sched); + ret = i915_sched_node_add_dependency(&to->sched, &from->sched); if (ret < 0) return ret; } @@ -1175,3 +1178,37 @@ void i915_retire_requests(struct drm_i915_private *i915) #include "selftests/mock_request.c" #include "selftests/i915_request.c" #endif + +int __init i915_global_request_init(void) +{ + global.slab_requests = KMEM_CACHE(i915_request, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_TYPESAFE_BY_RCU); + if (!global.slab_requests) + return -ENOMEM; + + global.slab_dependencies = KMEM_CACHE(i915_dependency, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT); + if (!global.slab_dependencies) + goto err_requests; + + return 0; + +err_requests: + kmem_cache_destroy(global.slab_requests); + return -ENOMEM; +} + +void i915_global_request_shrink(void) +{ + kmem_cache_shrink(global.slab_dependencies); + kmem_cache_shrink(global.slab_requests); +} + +void i915_global_request_exit(void) +{ + kmem_cache_destroy(global.slab_dependencies); + kmem_cache_destroy(global.slab_requests); +} diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 1e127c1c53fa..be3ded6bcf56 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -29,6 +29,7 @@ #include "i915_gem.h" #include "i915_scheduler.h" +#include "i915_selftest.h" #include "i915_sw_fence.h" #include @@ -196,6 +197,11 @@ struct i915_request { struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_link; + + I915_SELFTEST_DECLARE(struct { + struct list_head link; + unsigned long delay; + } mock;) }; #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) @@ -371,4 +377,8 @@ static inline void i915_request_mark_complete(struct i915_request *rq) void i915_retire_requests(struct drm_i915_private *i915); +int i915_global_request_init(void); +void i915_global_request_shrink(void); +void i915_global_request_exit(void); + #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 38efefd22dce..0dd720593f9c 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -10,6 +10,11 @@ #include "i915_request.h" #include "i915_scheduler.h" +static struct i915_global_scheduler { + struct kmem_cache *slab_dependencies; + struct kmem_cache *slab_priorities; +} global; + static DEFINE_SPINLOCK(schedule_lock); static const struct i915_request * @@ -37,16 +42,15 @@ void i915_sched_node_init(struct i915_sched_node *node) } static struct i915_dependency * -i915_dependency_alloc(struct drm_i915_private *i915) +i915_dependency_alloc(void) { - return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); + return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL); } static void -i915_dependency_free(struct drm_i915_private *i915, - struct i915_dependency *dep) +i915_dependency_free(struct i915_dependency *dep) { - kmem_cache_free(i915->dependencies, dep); + kmem_cache_free(global.slab_dependencies, dep); } bool __i915_sched_node_add_dependency(struct i915_sched_node *node, @@ -73,25 +77,23 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, return ret; } -int i915_sched_node_add_dependency(struct drm_i915_private *i915, - struct i915_sched_node *node, +int i915_sched_node_add_dependency(struct i915_sched_node *node, struct i915_sched_node *signal) { struct i915_dependency *dep; - dep = i915_dependency_alloc(i915); + dep = i915_dependency_alloc(); if (!dep) return -ENOMEM; if (!__i915_sched_node_add_dependency(node, signal, dep, I915_DEPENDENCY_ALLOC)) - i915_dependency_free(i915, dep); + i915_dependency_free(dep); return 0; } -void i915_sched_node_fini(struct drm_i915_private *i915, - struct i915_sched_node *node) +void i915_sched_node_fini(struct i915_sched_node *node) { struct i915_dependency *dep, *tmp; @@ -111,7 +113,7 @@ void i915_sched_node_fini(struct drm_i915_private *i915, list_del(&dep->wait_link); if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); + i915_dependency_free(dep); } /* Remove ourselves from everyone who depends upon us */ @@ -121,7 +123,7 @@ void i915_sched_node_fini(struct drm_i915_private *i915, list_del(&dep->signal_link); if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); + i915_dependency_free(dep); } spin_unlock(&schedule_lock); @@ -198,7 +200,7 @@ find_priolist: if (prio == I915_PRIORITY_NORMAL) { p = &execlists->default_priolist; } else { - p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); + p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ if (unlikely(!p)) { prio = I915_PRIORITY_NORMAL; /* recurses just once */ @@ -424,3 +426,39 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) spin_unlock_bh(&schedule_lock); } + +void __i915_priolist_free(struct i915_priolist *p) +{ + kmem_cache_free(global.slab_priorities, p); +} + +int __init i915_global_scheduler_init(void) +{ + global.slab_dependencies = KMEM_CACHE(i915_dependency, + SLAB_HWCACHE_ALIGN); + if (!global.slab_dependencies) + return -ENOMEM; + + global.slab_priorities = KMEM_CACHE(i915_priolist, + SLAB_HWCACHE_ALIGN); + if (!global.slab_priorities) + goto err_priorities; + + return 0; + +err_priorities: + kmem_cache_destroy(global.slab_priorities); + return -ENOMEM; +} + +void i915_global_scheduler_shrink(void) +{ + kmem_cache_shrink(global.slab_dependencies); + kmem_cache_shrink(global.slab_priorities); +} + +void i915_global_scheduler_exit(void) +{ + kmem_cache_destroy(global.slab_dependencies); + kmem_cache_destroy(global.slab_priorities); +} diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index dbe9cb7ecd82..4153c6a607b0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -83,6 +83,23 @@ struct i915_dependency { #define I915_DEPENDENCY_ALLOC BIT(0) }; +struct i915_priolist { + struct list_head requests[I915_PRIORITY_COUNT]; + struct rb_node node; + unsigned long used; + int priority; +}; + +#define priolist_for_each_request(it, plist, idx) \ + for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ + list_for_each_entry(it, &(plist)->requests[idx], sched.link) + +#define priolist_for_each_request_consume(it, n, plist, idx) \ + for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \ + list_for_each_entry_safe(it, n, \ + &(plist)->requests[idx - 1], \ + sched.link) + void i915_sched_node_init(struct i915_sched_node *node); bool __i915_sched_node_add_dependency(struct i915_sched_node *node, @@ -90,12 +107,10 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, struct i915_dependency *dep, unsigned long flags); -int i915_sched_node_add_dependency(struct drm_i915_private *i915, - struct i915_sched_node *node, +int i915_sched_node_add_dependency(struct i915_sched_node *node, struct i915_sched_node *signal); -void i915_sched_node_fini(struct drm_i915_private *i915, - struct i915_sched_node *node); +void i915_sched_node_fini(struct i915_sched_node *node); void i915_schedule(struct i915_request *request, const struct i915_sched_attr *attr); @@ -105,4 +120,15 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump); struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); +void __i915_priolist_free(struct i915_priolist *p); +static inline void i915_priolist_free(struct i915_priolist *p) +{ + if (p->priority != I915_PRIORITY_NORMAL) + __i915_priolist_free(p); +} + +int i915_global_scheduler_init(void); +void i915_global_scheduler_shrink(void); +void i915_global_scheduler_exit(void); + #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 20cbceeabeae..4366db7978a8 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -781,8 +781,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) } rb_erase_cached(&p->node, &execlists->queue); - if (p->priority != I915_PRIORITY_NORMAL) - kmem_cache_free(engine->i915->priorities, p); + i915_priolist_free(p); } done: execlists->queue_priority_hint = diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 81d188508b44..661d2f6da84f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -781,8 +781,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } rb_erase_cached(&p->node, &execlists->queue); - if (p->priority != I915_PRIORITY_NORMAL) - kmem_cache_free(engine->i915->priorities, p); + i915_priolist_free(p); } done: @@ -935,8 +934,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) } rb_erase_cached(&p->node, &execlists->queue); - if (p->priority != I915_PRIORITY_NORMAL) - kmem_cache_free(engine->i915->priorities, p); + i915_priolist_free(p); } /* Remaining _unready_ requests will be nop'ed when submitted */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 533e2053664e..b8ec7e40a59b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -187,23 +187,6 @@ enum intel_engine_id { #define _VECS(n) (VECS + (n)) }; -struct i915_priolist { - struct list_head requests[I915_PRIORITY_COUNT]; - struct rb_node node; - unsigned long used; - int priority; -}; - -#define priolist_for_each_request(it, plist, idx) \ - for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ - list_for_each_entry(it, &(plist)->requests[idx], sched.link) - -#define priolist_for_each_request_consume(it, n, plist, idx) \ - for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \ - list_for_each_entry_safe(it, n, \ - &(plist)->requests[idx - 1], \ - sched.link) - struct st_preempt_hang { struct completion completion; unsigned int count; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 6f3fb803c747..ec1ae948954c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -76,26 +76,26 @@ static void mock_ring_free(struct intel_ring *base) kfree(ring); } -static struct mock_request *first_request(struct mock_engine *engine) +static struct i915_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, - struct mock_request, - link); + struct i915_request, + mock.link); } -static void advance(struct mock_request *request) +static void advance(struct i915_request *request) { - list_del_init(&request->link); - i915_request_mark_complete(&request->base); - GEM_BUG_ON(!i915_request_completed(&request->base)); + list_del_init(&request->mock.link); + i915_request_mark_complete(request); + GEM_BUG_ON(!i915_request_completed(request)); - intel_engine_queue_breadcrumbs(request->base.engine); + intel_engine_queue_breadcrumbs(request->engine); } static void hw_delay_complete(struct timer_list *t) { struct mock_engine *engine = from_timer(engine, t, hw_delay); - struct mock_request *request; + struct i915_request *request; unsigned long flags; spin_lock_irqsave(&engine->hw_lock, flags); @@ -110,8 +110,9 @@ static void hw_delay_complete(struct timer_list *t) * requeue the timer for the next delayed request. */ while ((request = first_request(engine))) { - if (request->delay) { - mod_timer(&engine->hw_delay, jiffies + request->delay); + if (request->mock.delay) { + mod_timer(&engine->hw_delay, + jiffies + request->mock.delay); break; } @@ -169,10 +170,8 @@ err: static int mock_request_alloc(struct i915_request *request) { - struct mock_request *mock = container_of(request, typeof(*mock), base); - - INIT_LIST_HEAD(&mock->link); - mock->delay = 0; + INIT_LIST_HEAD(&request->mock.link); + request->mock.delay = 0; return 0; } @@ -190,7 +189,6 @@ static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs) static void mock_submit_request(struct i915_request *request) { - struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = container_of(request->engine, typeof(*engine), base); unsigned long flags; @@ -198,12 +196,13 @@ static void mock_submit_request(struct i915_request *request) i915_request_submit(request); spin_lock_irqsave(&engine->hw_lock, flags); - list_add_tail(&mock->link, &engine->hw_queue); - if (mock->link.prev == &engine->hw_queue) { - if (mock->delay) - mod_timer(&engine->hw_delay, jiffies + mock->delay); + list_add_tail(&request->mock.link, &engine->hw_queue); + if (list_is_first(&request->mock.link, &engine->hw_queue)) { + if (request->mock.delay) + mod_timer(&engine->hw_delay, + jiffies + request->mock.delay); else - advance(mock); + advance(request); } spin_unlock_irqrestore(&engine->hw_lock, flags); } @@ -263,12 +262,12 @@ void mock_engine_flush(struct intel_engine_cs *engine) { struct mock_engine *mock = container_of(engine, typeof(*mock), base); - struct mock_request *request, *rn; + struct i915_request *request, *rn; del_timer_sync(&mock->hw_delay); spin_lock_irq(&mock->hw_lock); - list_for_each_entry_safe(request, rn, &mock->hw_queue, link) + list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link) advance(request); spin_unlock_irq(&mock->hw_lock); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index fc516a2970f4..5a98caba6d69 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -79,9 +79,6 @@ static void mock_device_release(struct drm_device *dev) destroy_workqueue(i915->wq); - kmem_cache_destroy(i915->priorities); - kmem_cache_destroy(i915->dependencies); - kmem_cache_destroy(i915->requests); kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); @@ -211,23 +208,6 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->vmas) goto err_objects; - i915->requests = KMEM_CACHE(mock_request, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT | - SLAB_TYPESAFE_BY_RCU); - if (!i915->requests) - goto err_vmas; - - i915->dependencies = KMEM_CACHE(i915_dependency, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT); - if (!i915->dependencies) - goto err_requests; - - i915->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); - if (!i915->priorities) - goto err_dependencies; - i915_timelines_init(i915); INIT_LIST_HEAD(&i915->gt.active_rings); @@ -257,12 +237,6 @@ err_context: err_unlock: mutex_unlock(&i915->drm.struct_mutex); i915_timelines_fini(i915); - kmem_cache_destroy(i915->priorities); -err_dependencies: - kmem_cache_destroy(i915->dependencies); -err_requests: - kmem_cache_destroy(i915->requests); -err_vmas: kmem_cache_destroy(i915->vmas); err_objects: kmem_cache_destroy(i915->objects); diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 0dc29e242597..d1a7c9608712 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -31,29 +31,25 @@ mock_request(struct intel_engine_cs *engine, unsigned long delay) { struct i915_request *request; - struct mock_request *mock; /* NB the i915->requests slab cache is enlarged to fit mock_request */ request = i915_request_alloc(engine, context); if (IS_ERR(request)) return NULL; - mock = container_of(request, typeof(*mock), base); - mock->delay = delay; - - return &mock->base; + request->mock.delay = delay; + return request; } bool mock_cancel_request(struct i915_request *request) { - struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = container_of(request->engine, typeof(*engine), base); bool was_queued; spin_lock_irq(&engine->hw_lock); - was_queued = !list_empty(&mock->link); - list_del_init(&mock->link); + was_queued = !list_empty(&request->mock.link); + list_del_init(&request->mock.link); spin_unlock_irq(&engine->hw_lock); if (was_queued) diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h index 995fb728380c..4acf0211df20 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.h +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -29,13 +29,6 @@ #include "../i915_request.h" -struct mock_request { - struct i915_request base; - - struct list_head link; - unsigned long delay; -}; - struct i915_request * mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, -- cgit v1.2.3 From e88619646971168e3baedc850c21243d303e31ca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Mar 2019 17:09:00 +0000 Subject: drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Having introduced per-context seqno, we now have a means to identity progress across the system without feel of rollback as befell the global_seqno. That is we can program a MI_SEMAPHORE_WAIT operation in advance of submission safe in the knowledge that our target seqno and address is stable. However, since we are telling the GPU to busy-spin on the target address until it matches the signaling seqno, we only want to do so when we are sure that busy-spin will be completed quickly. To achieve this we only submit the request to HW once the signaler is itself executing (modulo preemption causing us to wait longer), and we only do so for default and above priority requests (so that idle priority tasks never themselves hog the GPU waiting for others). As might be reasonably expected, HW semaphores excel in inter-engine synchronisation microbenchmarks (where the 3x reduced latency / increased throughput more than offset the power cost of spinning on a second ring) and have significant improvement (can be up to ~10%, most see no change) for single clients that utilize multiple engines (typically media players and transcoders), without regressing multiple clients that can saturate the system or changing the power envelope dramatically. v3: Drop the older NEQ branch, now we pin the signaler's HWSP anyway. v4: Tell the world and include it as part of scheduler caps. Testcase: igt/gem_exec_whisper Testcase: igt/benchmarks/gem_wsim Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190301170901.8340-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 138 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_request.h | 26 +++++- drivers/gpu/drm/i915/i915_sw_fence.c | 4 +- drivers/gpu/drm/i915/i915_sw_fence.h | 3 + drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/intel_gpu_commands.h | 9 +- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 7 ++ include/uapi/drm/i915_drm.h | 1 + 10 files changed, 181 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c6354f6cdbdb..c08abdef5eb6 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -351,7 +351,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL); break; case I915_PARAM_HAS_SEMAPHORES: - value = 0; + value = !!(dev_priv->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); break; case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d354967d6ae8..59e30b8c4ee9 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -22,8 +22,9 @@ * */ -#include #include +#include +#include #include #include #include @@ -32,9 +33,16 @@ #include "i915_active.h" #include "i915_reset.h" +struct execute_cb { + struct list_head link; + struct irq_work work; + struct i915_sw_fence *fence; +}; + static struct i915_global_request { struct kmem_cache *slab_requests; struct kmem_cache *slab_dependencies; + struct kmem_cache *slab_execute_cbs; } global; static const char *i915_fence_get_driver_name(struct dma_fence *fence) @@ -325,6 +333,69 @@ void i915_request_retire_upto(struct i915_request *rq) } while (tmp != rq); } +static void irq_execute_cb(struct irq_work *wrk) +{ + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); + + i915_sw_fence_complete(cb->fence); + kmem_cache_free(global.slab_execute_cbs, cb); +} + +static void __notify_execute_cb(struct i915_request *rq) +{ + struct execute_cb *cb; + + lockdep_assert_held(&rq->lock); + + if (list_empty(&rq->execute_cb)) + return; + + list_for_each_entry(cb, &rq->execute_cb, link) + irq_work_queue(&cb->work); + + /* + * XXX Rollback on __i915_request_unsubmit() + * + * In the future, perhaps when we have an active time-slicing scheduler, + * it will be interesting to unsubmit parallel execution and remove + * busywaits from the GPU until their master is restarted. This is + * quite hairy, we have to carefully rollback the fence and do a + * preempt-to-idle cycle on the target engine, all the while the + * master execute_cb may refire. + */ + INIT_LIST_HEAD(&rq->execute_cb); +} + +static int +i915_request_await_execution(struct i915_request *rq, + struct i915_request *signal, + gfp_t gfp) +{ + struct execute_cb *cb; + + if (i915_request_is_active(signal)) + return 0; + + cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); + if (!cb) + return -ENOMEM; + + cb->fence = &rq->submit; + i915_sw_fence_await(cb->fence); + init_irq_work(&cb->work, irq_execute_cb); + + spin_lock_irq(&signal->lock); + if (i915_request_is_active(signal)) { + i915_sw_fence_complete(cb->fence); + kmem_cache_free(global.slab_execute_cbs, cb); + } else { + list_add_tail(&cb->link, &signal->execute_cb); + } + spin_unlock_irq(&signal->lock); + + return 0; +} + static void move_to_timeline(struct i915_request *request, struct i915_timeline *timeline) { @@ -361,6 +432,8 @@ void __i915_request_submit(struct i915_request *request) !i915_request_enable_breadcrumb(request)) intel_engine_queue_breadcrumbs(engine); + __notify_execute_cb(request); + spin_unlock(&request->lock); engine->emit_fini_breadcrumb(request, @@ -608,6 +681,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } INIT_LIST_HEAD(&rq->active_list); + INIT_LIST_HEAD(&rq->execute_cb); tl = ce->ring->timeline; ret = i915_timeline_get_seqno(tl, rq, &seqno); @@ -696,6 +770,52 @@ err_unreserve: return ERR_PTR(ret); } +static int +emit_semaphore_wait(struct i915_request *to, + struct i915_request *from, + gfp_t gfp) +{ + u32 hwsp_offset; + u32 *cs; + int err; + + GEM_BUG_ON(!from->timeline->has_initial_breadcrumb); + GEM_BUG_ON(INTEL_GEN(to->i915) < 8); + + /* We need to pin the signaler's HWSP until we are finished reading. */ + err = i915_timeline_read_hwsp(from, to, &hwsp_offset); + if (err) + return err; + + /* Only submit our spinner after the signaler is running! */ + err = i915_request_await_execution(to, from, gfp); + if (err) + return err; + + cs = intel_ring_begin(to, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Using greater-than-or-equal here means we have to worry + * about seqno wraparound. To side step that issue, we swap + * the timeline HWSP upon wrapping, so that everyone listening + * for the old (pre-wrap) values do not see the much smaller + * (post-wrap) values than they were expecting (and so wait + * forever). + */ + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = from->fence.seqno; + *cs++ = hwsp_offset; + *cs++ = 0; + + intel_ring_advance(to, cs); + return 0; +} + static int i915_request_await_request(struct i915_request *to, struct i915_request *from) { @@ -717,6 +837,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, I915_FENCE_GFP); + } else if (intel_engine_has_semaphores(to->engine) && + to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) { + ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); } else { ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, @@ -1208,14 +1331,23 @@ int __init i915_global_request_init(void) if (!global.slab_requests) return -ENOMEM; + global.slab_execute_cbs = KMEM_CACHE(execute_cb, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_TYPESAFE_BY_RCU); + if (!global.slab_execute_cbs) + goto err_requests; + global.slab_dependencies = KMEM_CACHE(i915_dependency, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT); if (!global.slab_dependencies) - goto err_requests; + goto err_execute_cbs; return 0; +err_execute_cbs: + kmem_cache_destroy(global.slab_execute_cbs); err_requests: kmem_cache_destroy(global.slab_requests); return -ENOMEM; @@ -1224,11 +1356,13 @@ err_requests: void i915_global_request_shrink(void) { kmem_cache_shrink(global.slab_dependencies); + kmem_cache_shrink(global.slab_execute_cbs); kmem_cache_shrink(global.slab_requests); } void i915_global_request_exit(void) { kmem_cache_destroy(global.slab_dependencies); + kmem_cache_destroy(global.slab_execute_cbs); kmem_cache_destroy(global.slab_requests); } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 09eaad06d2c6..4dd1dea1d1af 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -129,6 +129,7 @@ struct i915_request { */ struct i915_sw_fence submit; wait_queue_entry_t submitq; + struct list_head execute_cb; /* * A list of everyone we wait upon, and everyone who waits upon us. @@ -343,10 +344,27 @@ static inline bool __i915_request_has_started(const struct i915_request *rq) * i915_request_started - check if the request has begun being executed * @rq: the request * - * Returns true if the request has been submitted to hardware, and the hardware - * has advanced passed the end of the previous request and so should be either - * currently processing the request (though it may be preempted and so - * not necessarily the next request to complete) or have completed the request. + * If the timeline is not using initial breadcrumbs, a request is + * considered started if the previous request on its timeline (i.e. + * context) has been signaled. + * + * If the timeline is using semaphores, it will also be emitting an + * "initial breadcrumb" after the semaphores are complete and just before + * it began executing the user payload. A request can therefore be active + * on the HW and not yet started as it is still busywaiting on its + * dependencies (via HW semaphores). + * + * If the request has started, its dependencies will have been signaled + * (either by fences or by semaphores) and it will have begun processing + * the user payload. + * + * However, even if a request has started, it may have been preempted and + * so no longer active, or it may have already completed. + * + * See also i915_request_is_active(). + * + * Returns true if the request has begun executing the user payload, or + * has completed: */ static inline bool i915_request_started(const struct i915_request *rq) { diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 7c58b049ecb5..8d1400d378d7 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -192,7 +192,7 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence, __i915_sw_fence_notify(fence, FENCE_FREE); } -static void i915_sw_fence_complete(struct i915_sw_fence *fence) +void i915_sw_fence_complete(struct i915_sw_fence *fence) { debug_fence_assert(fence); @@ -202,7 +202,7 @@ static void i915_sw_fence_complete(struct i915_sw_fence *fence) __i915_sw_fence_complete(fence, NULL); } -static void i915_sw_fence_await(struct i915_sw_fence *fence) +void i915_sw_fence_await(struct i915_sw_fence *fence) { debug_fence_assert(fence); WARN_ON(atomic_inc_return(&fence->pending) <= 1); diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 0e055ea0179f..6dec9e1d1102 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -79,6 +79,9 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp); +void i915_sw_fence_await(struct i915_sw_fence *fence); +void i915_sw_fence_complete(struct i915_sw_fence *fence); + static inline bool i915_sw_fence_signaled(const struct i915_sw_fence *fence) { return atomic_read(&fence->pending) <= 0; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index df8f88142f1d..4fc7e2ac6278 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -616,6 +616,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) } map[] = { #define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) } MAP(PREEMPTION, PREEMPTION), + MAP(SEMAPHORES, SEMAPHORES), #undef MAP }; struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h index b96a31bc1080..a34ece53a771 100644 --- a/drivers/gpu/drm/i915/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -105,8 +105,13 @@ #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ -#define MI_SEMAPHORE_POLL (1<<15) -#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) +#define MI_SEMAPHORE_POLL (1 << 15) +#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) +#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) +#define MI_SEMAPHORE_SAD_LT_SDD (2 << 12) +#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) +#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) +#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3fd0c45a2920..6c9479acb433 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2330,6 +2330,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->park = NULL; engine->unpark = NULL; + engine->flags |= I915_ENGINE_HAS_SEMAPHORES; engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (engine->i915->preempt_context) engine->flags |= I915_ENGINE_HAS_PREEMPTION; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b8ec7e40a59b..2e7264119ec4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -499,6 +499,7 @@ struct intel_engine_cs { #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) +#define I915_ENGINE_HAS_SEMAPHORES BIT(3) unsigned int flags; /* @@ -576,6 +577,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_HAS_PREEMPTION; } +static inline bool +intel_engine_has_semaphores(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_SEMAPHORES; +} + void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); static inline bool __execlists_need_preempt(int prio, int last) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 8304a7f1ec3f..b10eea3f6d24 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -479,6 +479,7 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_ENABLED (1ul << 0) #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) +#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_PARAM_HUC_STATUS 42 -- cgit v1.2.3 From c8b502422bfe04422261cb2861977a5cd31cc1da Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Mar 2019 16:26:43 +0000 Subject: drm/i915: Remove last traces of exec-id (GEM_BUSY) As we allow per-context engine allows the legacy concept of I915_EXEC_RING no longer applies universally. We are still exposing the unrelated exec-id in GEM_BUSY, so transition this ioctl (once more slightly changing its ABI, but no one cares) over to only reporting the uabi-class (not instance as we can not foreseeably fit those into the small bitmask). The only user of the extended ring information from GEM_BUSY is ddx/sna, which tries to use the non-rcs business information to guide which engine to use for subsequent operations on foreign bo. All that matters for it is the decision between rcs and !rcs, so it is unaffected by the change in higher bits. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190305162643.20243-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 32 +++++++++++++++++--------------- drivers/gpu/drm/i915/intel_engine_cs.c | 10 ---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - include/uapi/drm/i915_drm.h | 32 +++++++++++++++++--------------- 4 files changed, 34 insertions(+), 41 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a1ad5e137a97..69413f99ed04 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3825,20 +3825,17 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, static __always_inline unsigned int __busy_read_flag(unsigned int id) { - /* Note that we could alias engines in the execbuf API, but - * that would be very unwise as it prevents userspace from - * fine control over engine selection. Ahem. - * - * This should be something like EXEC_MAX_ENGINE instead of - * I915_NUM_ENGINES. - */ - BUILD_BUG_ON(I915_NUM_ENGINES > 16); + if (id == I915_ENGINE_CLASS_INVALID) + return 0xffff0000; + + GEM_BUG_ON(id >= 16); return 0x10000 << id; } static __always_inline unsigned int __busy_write_id(unsigned int id) { - /* The uABI guarantees an active writer is also amongst the read + /* + * The uABI guarantees an active writer is also amongst the read * engines. This would be true if we accessed the activity tracking * under the lock, but as we perform the lookup of the object and * its activity locklessly we can not guarantee that the last_write @@ -3846,16 +3843,20 @@ static __always_inline unsigned int __busy_write_id(unsigned int id) * last_read - hence we always set both read and write busy for * last_write. */ - return id | __busy_read_flag(id); + if (id == I915_ENGINE_CLASS_INVALID) + return 0xffffffff; + + return (id + 1) | __busy_read_flag(id); } static __always_inline unsigned int __busy_set_if_active(const struct dma_fence *fence, unsigned int (*flag)(unsigned int id)) { - struct i915_request *rq; + const struct i915_request *rq; - /* We have to check the current hw status of the fence as the uABI + /* + * We have to check the current hw status of the fence as the uABI * guarantees forward progress. We could rely on the idle worker * to eventually flush us, but to minimise latency just ask the * hardware. @@ -3866,11 +3867,11 @@ __busy_set_if_active(const struct dma_fence *fence, return 0; /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, struct i915_request, fence); + rq = container_of(fence, const struct i915_request, fence); if (i915_request_completed(rq)) return 0; - return flag(rq->engine->uabi_id); + return flag(rq->engine->uabi_class); } static __always_inline unsigned int @@ -3904,7 +3905,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, if (!obj) goto out; - /* A discrepancy here is that we do not report the status of + /* + * A discrepancy here is that we do not report the status of * non-i915 fences, i.e. even though we may report the object as idle, * a call to set-domain may still stall waiting for foreign rendering. * This also means that wait-ioctl may report an object as busy, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4fc7e2ac6278..f3f161c5627b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -84,7 +84,6 @@ static const struct engine_class_info intel_engine_classes[] = { #define MAX_MMIO_BASES 3 struct engine_info { unsigned int hw_id; - unsigned int uabi_id; u8 class; u8 instance; /* mmio bases table *must* be sorted in reverse gen order */ @@ -97,7 +96,6 @@ struct engine_info { static const struct engine_info intel_engines[] = { [RCS] = { .hw_id = RCS_HW, - .uabi_id = I915_EXEC_RENDER, .class = RENDER_CLASS, .instance = 0, .mmio_bases = { @@ -106,7 +104,6 @@ static const struct engine_info intel_engines[] = { }, [BCS] = { .hw_id = BCS_HW, - .uabi_id = I915_EXEC_BLT, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { @@ -115,7 +112,6 @@ static const struct engine_info intel_engines[] = { }, [VCS] = { .hw_id = VCS_HW, - .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { @@ -126,7 +122,6 @@ static const struct engine_info intel_engines[] = { }, [VCS2] = { .hw_id = VCS2_HW, - .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_bases = { @@ -136,7 +131,6 @@ static const struct engine_info intel_engines[] = { }, [VCS3] = { .hw_id = VCS3_HW, - .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 2, .mmio_bases = { @@ -145,7 +139,6 @@ static const struct engine_info intel_engines[] = { }, [VCS4] = { .hw_id = VCS4_HW, - .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 3, .mmio_bases = { @@ -154,7 +147,6 @@ static const struct engine_info intel_engines[] = { }, [VECS] = { .hw_id = VECS_HW, - .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { @@ -164,7 +156,6 @@ static const struct engine_info intel_engines[] = { }, [VECS2] = { .hw_id = VECS2_HW, - .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, .mmio_bases = { @@ -321,7 +312,6 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->class = info->class; engine->instance = info->instance; - engine->uabi_id = info->uabi_id; engine->uabi_class = intel_engine_classes[info->class].uabi_class; engine->context_size = __intel_engine_context_size(dev_priv, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2e7264119ec4..12e79828dbd5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -335,7 +335,6 @@ struct intel_engine_cs { unsigned int hw_id; unsigned int guc_id; - u8 uabi_id; u8 uabi_class; u8 class; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 1a60642c1d61..aa2d4c73a97d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1127,32 +1127,34 @@ struct drm_i915_gem_busy { * as busy may become idle before the ioctl is completed. * * Furthermore, if the object is busy, which engine is busy is only - * provided as a guide. There are race conditions which prevent the - * report of which engines are busy from being always accurate. - * However, the converse is not true. If the object is idle, the - * result of the ioctl, that all engines are idle, is accurate. + * provided as a guide and only indirectly by reporting its class + * (there may be more than one engine in each class). There are race + * conditions which prevent the report of which engines are busy from + * being always accurate. However, the converse is not true. If the + * object is idle, the result of the ioctl, that all engines are idle, + * is accurate. * * The returned dword is split into two fields to indicate both - * the engines on which the object is being read, and the - * engine on which it is currently being written (if any). + * the engine classess on which the object is being read, and the + * engine class on which it is currently being written (if any). * * The low word (bits 0:15) indicate if the object is being written * to by any engine (there can only be one, as the GEM implicit * synchronisation rules force writes to be serialised). Only the - * engine for the last write is reported. + * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as + * 1 not 0 etc) for the last write is reported. * - * The high word (bits 16:31) are a bitmask of which engines are - * currently reading from the object. Multiple engines may be + * The high word (bits 16:31) are a bitmask of which engines classes + * are currently reading from the object. Multiple engines may be * reading from the object simultaneously. * - * The value of each engine is the same as specified in the - * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc. - * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to - * the I915_EXEC_RENDER engine for execution, and so it is never + * The value of each engine class is the same as specified in the + * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. * reported as active itself. Some hardware may have parallel * execution engines, e.g. multiple media engines, which are - * mapped to the same identifier in the EXECBUFFER2 ioctl and - * so are not separately reported for busyness. + * mapped to the same class identifier and so are not separately + * reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether -- cgit v1.2.3 From 8a68d464366efb5b294fa11ccf23b51306cc2695 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Mar 2019 18:03:30 +0000 Subject: drm/i915: Store the BIT(engine->id) as the engine's mask In the next patch, we are introducing a broad virtual engine to encompass multiple physical engines, losing the 1:1 nature of BIT(engine->id). To reflect the broader set of engines implied by the virtual instance, lets store the full bitmask. v2: Use intel_engine_mask_t (s/ring_mask/engine_mask/) v3: Tvrtko voted for moah churn so teach everyone to not mention ring and use $class$instance throughout. v4: Comment upon the disparity in bspec for using VCS1,VCS2 in gen8 and VCS[0-4] in later gen. We opt to keep the code consistent and use 0-index naming throughout. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190305180332.30900-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 44 ++-- drivers/gpu/drm/i915/gvt/execlist.c | 17 +- drivers/gpu/drm/i915/gvt/handlers.c | 26 +-- drivers/gpu/drm/i915/gvt/interrupt.c | 2 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 228 +++++++++++---------- drivers/gpu/drm/i915/gvt/scheduler.c | 21 +- drivers/gpu/drm/i915/i915_cmd_parser.c | 12 +- drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_drv.c | 8 +- drivers/gpu/drm/i915/i915_drv.h | 22 +- drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 11 +- drivers/gpu/drm/i915/i915_irq.c | 65 +++--- drivers/gpu/drm/i915/i915_pci.c | 39 ++-- drivers/gpu/drm/i915/i915_perf.c | 8 +- drivers/gpu/drm/i915/i915_pmu.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 24 +-- drivers/gpu/drm/i915/i915_reset.c | 47 ++--- drivers/gpu/drm/i915/intel_device_info.c | 6 +- drivers/gpu/drm/i915/intel_device_info.h | 6 +- drivers/gpu/drm/i915/intel_engine_cs.c | 59 +++--- drivers/gpu/drm/i915/intel_guc_ads.c | 2 +- drivers/gpu/drm/i915/intel_guc_submission.c | 6 +- drivers/gpu/drm/i915/intel_hangcheck.c | 10 +- drivers/gpu/drm/i915/intel_lrc.c | 12 +- drivers/gpu/drm/i915/intel_mocs.c | 12 +- drivers/gpu/drm/i915/intel_overlay.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 35 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 27 +-- drivers/gpu/drm/i915/intel_workarounds.c | 4 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 4 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 14 +- drivers/gpu/drm/i915/selftests/intel_guc.c | 4 +- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 16 +- drivers/gpu/drm/i915/selftests/intel_lrc.c | 4 +- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 4 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 1 + drivers/gpu/drm/i915/selftests/mock_gem_device.c | 6 +- 45 files changed, 422 insertions(+), 432 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 35b4ec3f7618..cf4a1ecf6853 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -391,12 +391,12 @@ struct cmd_info { #define F_POST_HANDLE (1<<2) u32 flag; -#define R_RCS (1 << RCS) -#define R_VCS1 (1 << VCS) -#define R_VCS2 (1 << VCS2) +#define R_RCS BIT(RCS0) +#define R_VCS1 BIT(VCS0) +#define R_VCS2 BIT(VCS1) #define R_VCS (R_VCS1 | R_VCS2) -#define R_BCS (1 << BCS) -#define R_VECS (1 << VECS) +#define R_BCS BIT(BCS0) +#define R_VECS BIT(VECS0) #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS) /* rings that support this cmd: BLT/RCS/VCS/VECS */ u16 rings; @@ -558,7 +558,7 @@ static const struct decode_info decode_info_vebox = { }; static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { - [RCS] = { + [RCS0] = { &decode_info_mi, NULL, NULL, @@ -569,7 +569,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VCS] = { + [VCS0] = { &decode_info_mi, NULL, NULL, @@ -580,7 +580,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [BCS] = { + [BCS0] = { &decode_info_mi, NULL, &decode_info_2d, @@ -591,7 +591,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VECS] = { + [VECS0] = { &decode_info_mi, NULL, NULL, @@ -602,7 +602,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VCS2] = { + [VCS1] = { &decode_info_mi, NULL, NULL, @@ -631,8 +631,7 @@ static inline const struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e; hash_for_each_possible(gvt->cmd_table, e, hlist, opcode) { - if ((opcode == e->info->opcode) && - (e->info->rings & (1 << ring_id))) + if (opcode == e->info->opcode && e->info->rings & BIT(ring_id)) return e->info; } return NULL; @@ -943,15 +942,12 @@ static int cmd_handler_lri(struct parser_exec_state *s) struct intel_gvt *gvt = s->vgpu->gvt; for (i = 1; i < cmd_len; i += 2) { - if (IS_BROADWELL(gvt->dev_priv) && - (s->ring_id != RCS)) { - if (s->ring_id == BCS && - cmd_reg(s, i) == - i915_mmio_reg_offset(DERRMR)) + if (IS_BROADWELL(gvt->dev_priv) && s->ring_id != RCS0) { + if (s->ring_id == BCS0 && + cmd_reg(s, i) == i915_mmio_reg_offset(DERRMR)) ret |= 0; else - ret |= (cmd_reg_inhibit(s, i)) ? - -EBADRQC : 0; + ret |= cmd_reg_inhibit(s, i) ? -EBADRQC : 0; } if (ret) break; @@ -1047,27 +1043,27 @@ struct cmd_interrupt_event { }; static struct cmd_interrupt_event cmd_interrupt_events[] = { - [RCS] = { + [RCS0] = { .pipe_control_notify = RCS_PIPE_CONTROL, .mi_flush_dw = INTEL_GVT_EVENT_RESERVED, .mi_user_interrupt = RCS_MI_USER_INTERRUPT, }, - [BCS] = { + [BCS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = BCS_MI_FLUSH_DW, .mi_user_interrupt = BCS_MI_USER_INTERRUPT, }, - [VCS] = { + [VCS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VCS_MI_FLUSH_DW, .mi_user_interrupt = VCS_MI_USER_INTERRUPT, }, - [VCS2] = { + [VCS1] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VCS2_MI_FLUSH_DW, .mi_user_interrupt = VCS2_MI_USER_INTERRUPT, }, - [VECS] = { + [VECS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VECS_MI_FLUSH_DW, .mi_user_interrupt = VECS_MI_USER_INTERRUPT, diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 70494e394d2c..1a93472cb34e 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -47,17 +47,16 @@ ((a)->lrca == (b)->lrca)) static int context_switch_events[] = { - [RCS] = RCS_AS_CONTEXT_SWITCH, - [BCS] = BCS_AS_CONTEXT_SWITCH, - [VCS] = VCS_AS_CONTEXT_SWITCH, - [VCS2] = VCS2_AS_CONTEXT_SWITCH, - [VECS] = VECS_AS_CONTEXT_SWITCH, + [RCS0] = RCS_AS_CONTEXT_SWITCH, + [BCS0] = BCS_AS_CONTEXT_SWITCH, + [VCS0] = VCS_AS_CONTEXT_SWITCH, + [VCS1] = VCS2_AS_CONTEXT_SWITCH, + [VECS0] = VECS_AS_CONTEXT_SWITCH, }; -static int ring_id_to_context_switch_event(int ring_id) +static int ring_id_to_context_switch_event(unsigned int ring_id) { - if (WARN_ON(ring_id < RCS || - ring_id >= ARRAY_SIZE(context_switch_events))) + if (WARN_ON(ring_id >= ARRAY_SIZE(context_switch_events))) return -EINVAL; return context_switch_events[ring_id]; @@ -411,7 +410,7 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) gvt_dbg_el("complete workload %p status %d\n", workload, workload->status); - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) + if (workload->status || (vgpu->resetting_eng & BIT(ring_id))) goto out; if (!list_empty(workload_q_head(vgpu, ring_id))) { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index bc64b810e0d5..b596cb42e24e 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -323,25 +323,25 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, } else { if (data & GEN6_GRDOM_RENDER) { gvt_dbg_mmio("vgpu%d: request RCS reset\n", vgpu->id); - engine_mask |= (1 << RCS); + engine_mask |= BIT(RCS0); } if (data & GEN6_GRDOM_MEDIA) { gvt_dbg_mmio("vgpu%d: request VCS reset\n", vgpu->id); - engine_mask |= (1 << VCS); + engine_mask |= BIT(VCS0); } if (data & GEN6_GRDOM_BLT) { gvt_dbg_mmio("vgpu%d: request BCS Reset\n", vgpu->id); - engine_mask |= (1 << BCS); + engine_mask |= BIT(BCS0); } if (data & GEN6_GRDOM_VECS) { gvt_dbg_mmio("vgpu%d: request VECS Reset\n", vgpu->id); - engine_mask |= (1 << VECS); + engine_mask |= BIT(VECS0); } if (data & GEN8_GRDOM_MEDIA2) { gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id); - if (HAS_BSD2(vgpu->gvt->dev_priv)) - engine_mask |= (1 << VCS2); + engine_mask |= BIT(VCS1); } + engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask; } /* vgpu_lock already hold by emulate mmio r/w */ @@ -1704,7 +1704,7 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; ret = intel_vgpu_select_submission_ops(vgpu, - ENGINE_MASK(ring_id), + BIT(ring_id), INTEL_VGPU_EXECLIST_SUBMISSION); if (ret) return ret; @@ -1724,19 +1724,19 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu, switch (offset) { case 0x4260: - id = RCS; + id = RCS0; break; case 0x4264: - id = VCS; + id = VCS0; break; case 0x4268: - id = VCS2; + id = VCS1; break; case 0x426c: - id = BCS; + id = BCS0; break; case 0x4270: - id = VECS; + id = VECS0; break; default: return -EINVAL; @@ -1793,7 +1793,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \ - if (HAS_BSD2(dev_priv)) \ + if (HAS_ENGINE(dev_priv, VCS1)) \ MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \ } while (0) diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 67125c5eec6e..951681813230 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -536,7 +536,7 @@ static void gen8_init_irq( SET_BIT_INFO(irq, 4, VCS_MI_FLUSH_DW, INTEL_GVT_IRQ_INFO_GT1); SET_BIT_INFO(irq, 8, VCS_AS_CONTEXT_SWITCH, INTEL_GVT_IRQ_INFO_GT1); - if (HAS_BSD2(gvt->dev_priv)) { + if (HAS_ENGINE(gvt->dev_priv, VCS1)) { SET_BIT_INFO(irq, 16, VCS2_MI_USER_INTERRUPT, INTEL_GVT_IRQ_INFO_GT1); SET_BIT_INFO(irq, 20, VCS2_MI_FLUSH_DW, diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 7d84cfb9051a..0209d27fcaf0 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -41,102 +41,102 @@ /* Raw offset is appened to each line for convenience. */ static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = { - {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ - {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ - {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ - {RCS, INSTPM, 0xffff, true}, /* 0x20c0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ - {RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ - {RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ - {RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ - {RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ - {RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ - {RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ - - {BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ - {BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ - {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ - {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ - {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ - {RCS, INVALID_MMIO_REG, 0, false } /* Terminated */ + {RCS0, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ + {RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS0, HWSTAM, 0x0, false}, /* 0x2098 */ + {RCS0, INSTPM, 0xffff, true}, /* 0x20c0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ + {RCS0, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ + {RCS0, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ + {RCS0, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ + {RCS0, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ + {RCS0, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ + {RCS0, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ + + {BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ + {BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ + {BCS0, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ + {BCS0, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ + {BCS0, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ + {RCS0, INVALID_MMIO_REG, 0, false } /* Terminated */ }; static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { - {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ - {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ - {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ - {RCS, INSTPM, 0xffff, true}, /* 0x20c0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ - {RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ - {RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ - {RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ - {RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ - {RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ - {RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ - - {RCS, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */ - {RCS, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */ - {RCS, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */ - {RCS, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ - {RCS, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ - {RCS, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ - {RCS, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ - {RCS, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ - {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ - {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ - {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ - {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */ - {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */ - {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */ - {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */ - {RCS, TRINVTILEDETCT, 0, false}, /* 0x4dec */ - {RCS, TRVADR, 0, false}, /* 0x4df0 */ - {RCS, TRTTE, 0, false}, /* 0x4df4 */ - - {BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ - {BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ - {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ - {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ - {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ - - {VCS2, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */ - - {VECS, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */ - - {RCS, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */ - {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ - {RCS, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */ - {RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */ - - {RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ - {RCS, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */ - - {RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ - {RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ - {RCS, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */ - {RCS, INVALID_MMIO_REG, 0, false } /* Terminated */ + {RCS0, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ + {RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS0, HWSTAM, 0x0, false}, /* 0x2098 */ + {RCS0, INSTPM, 0xffff, true}, /* 0x20c0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ + {RCS0, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ + {RCS0, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ + {RCS0, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ + {RCS0, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ + {RCS0, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ + {RCS0, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ + + {RCS0, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */ + {RCS0, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */ + {RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */ + {RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ + {RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ + {RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ + {RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ + {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ + {RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ + {RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ + {RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ + {RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */ + {RCS0, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */ + {RCS0, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */ + {RCS0, TRNULLDETCT, 0, false}, /* 0x4de8 */ + {RCS0, TRINVTILEDETCT, 0, false}, /* 0x4dec */ + {RCS0, TRVADR, 0, false}, /* 0x4df0 */ + {RCS0, TRTTE, 0, false}, /* 0x4df4 */ + + {BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ + {BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ + {BCS0, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ + {BCS0, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ + {BCS0, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ + + {VCS1, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */ + + {VECS0, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */ + + {RCS0, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */ + {RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS0, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */ + {RCS0, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */ + + {RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ + {RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */ + + {RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ + {RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ + {RCS0, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */ + {RCS0, INVALID_MMIO_REG, 0, false } /* Terminated */ }; static struct { @@ -149,11 +149,11 @@ static void load_render_mocs(struct drm_i915_private *dev_priv) { i915_reg_t offset; u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, + [RCS0] = 0xc800, + [VCS0] = 0xc900, + [VCS1] = 0xca00, + [BCS0] = 0xcc00, + [VECS0] = 0xcb00, }; int ring_id, i; @@ -301,7 +301,7 @@ int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, goto out; /* no MOCS register in context except render engine */ - if (req->engine->id != RCS) + if (req->engine->id != RCS0) goto out; ret = restore_render_mocs_control_for_inhibit(vgpu, req); @@ -331,11 +331,11 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) enum forcewake_domains fw; i915_reg_t reg; u32 regs[] = { - [RCS] = 0x4260, - [VCS] = 0x4264, - [VCS2] = 0x4268, - [BCS] = 0x426c, - [VECS] = 0x4270, + [RCS0] = 0x4260, + [VCS0] = 0x4264, + [VCS1] = 0x4268, + [BCS0] = 0x426c, + [VECS0] = 0x4270, }; if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) @@ -353,7 +353,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) */ fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ | FW_REG_WRITE); - if (ring_id == RCS && (INTEL_GEN(dev_priv) >= 9)) + if (ring_id == RCS0 && INTEL_GEN(dev_priv) >= 9) fw |= FORCEWAKE_RENDER; intel_uncore_forcewake_get(dev_priv, fw); @@ -378,11 +378,11 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, u32 old_v, new_v; u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, + [RCS0] = 0xc800, + [VCS0] = 0xc900, + [VCS1] = 0xca00, + [BCS0] = 0xcc00, + [VECS0] = 0xcb00, }; int i; @@ -390,8 +390,10 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv) - || IS_COFFEELAKE(dev_priv)) && ring_id == RCS) + if (ring_id == RCS0 && + (IS_KABYLAKE(dev_priv) || + IS_BROXTON(dev_priv) || + IS_COFFEELAKE(dev_priv))) return; if (!pre && !gen9_render_mocs.initialized) @@ -414,7 +416,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, offset.reg += 4; } - if (ring_id == RCS) { + if (ring_id == RCS0) { l3_offset.reg = 0xb020; for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) { if (pre) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1bb8f936fdaa..709bcaaed765 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -93,7 +93,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, i915_mmio_reg_offset(EU_PERF_CNTL6), }; - if (workload->ring_id != RCS) + if (workload->ring_id != RCS0) return; if (save) { @@ -149,7 +149,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) COPY_REG_MASKED(ctx_ctrl); COPY_REG(ctx_timestamp); - if (ring_id == RCS) { + if (ring_id == RCS0) { COPY_REG(bb_per_ctx_ptr); COPY_REG(rcs_indirect_ctx); COPY_REG(rcs_indirect_ctx_offset); @@ -177,7 +177,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS0) context_page_num = 19; i = 2; @@ -440,8 +440,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) if (ret) goto err_unpin; - if ((workload->ring_id == RCS) && - (workload->wa_ctx.indirect_ctx.size != 0)) { + if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) { ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); if (ret) goto err_shadow; @@ -791,7 +790,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) context_page_num = rq->engine->context_size; context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS0) context_page_num = 19; i = 2; @@ -891,8 +890,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - if (!workload->status && !(vgpu->resetting_eng & - ENGINE_MASK(ring_id))) { + if (!workload->status && + !(vgpu->resetting_eng & BIT(ring_id))) { update_guest_context(workload); for_each_set_bit(event, workload->pending_events, @@ -915,7 +914,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) list_del_init(&workload->list); - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { + if (workload->status || vgpu->resetting_eng & BIT(ring_id)) { /* if workload->status is not successful means HW GPU * has occurred GPU hang or something wrong with i915/GVT, * and GVT won't inject context switch interrupt to guest. @@ -929,7 +928,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) * cleaned up during the resetting process later, so doing * the workload clean up here doesn't have any impact. **/ - intel_vgpu_clean_workloads(vgpu, ENGINE_MASK(ring_id)); + intel_vgpu_clean_workloads(vgpu, BIT(ring_id)); } workload->complete(workload); @@ -1438,7 +1437,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, workload->rb_start = start; workload->rb_ctl = ctl; - if (ring_id == RCS) { + if (ring_id == RCS0) { intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 33e8eed64423..503d548a55f7 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -868,8 +868,8 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) if (!IS_GEN(engine->i915, 7)) return; - switch (engine->id) { - case RCS: + switch (engine->class) { + case RENDER_CLASS: if (IS_HASWELL(engine->i915)) { cmd_tables = hsw_render_ring_cmds; cmd_table_count = @@ -889,12 +889,12 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; - case VCS: + case VIDEO_DECODE_CLASS: cmd_tables = gen7_video_cmds; cmd_table_count = ARRAY_SIZE(gen7_video_cmds); engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; - case BCS: + case COPY_ENGINE_CLASS: if (IS_HASWELL(engine->i915)) { cmd_tables = hsw_blt_ring_cmds; cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); @@ -913,14 +913,14 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; - case VECS: + case VIDEO_ENHANCEMENT_CLASS: cmd_tables = hsw_vebox_cmds; cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); /* VECS can use the same length_mask function as VCS */ engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; default: - MISSING_CASE(engine->id); + MISSING_CASE(engine->class); return; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 298371aad445..0a6348ad7c98 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1298,7 +1298,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seqno[id] = intel_engine_get_hangcheck_seqno(engine); } - intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); + intel_engine_get_instdone(dev_priv->engine[RCS0], &instdone); } if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) @@ -1325,7 +1325,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) (long long)engine->hangcheck.acthd, (long long)acthd[id]); - if (engine->id == RCS) { + if (engine->id == RCS0) { seq_puts(m, "\tinstdone read =\n"); i915_instdone_info(dev_priv, m, &instdone); @@ -3178,7 +3178,7 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) static int i915_wa_registers(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); - const struct i915_wa_list *wal = &i915->engine[RCS]->ctx_wa_list; + const struct i915_wa_list *wal = &i915->engine[RCS0]->ctx_wa_list; struct i915_wa *wa; unsigned int i; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1b2f5a6f8c25..b548c292738c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -330,16 +330,16 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = dev_priv->overlay ? 1 : 0; break; case I915_PARAM_HAS_BSD: - value = !!dev_priv->engine[VCS]; + value = !!dev_priv->engine[VCS0]; break; case I915_PARAM_HAS_BLT: - value = !!dev_priv->engine[BCS]; + value = !!dev_priv->engine[BCS0]; break; case I915_PARAM_HAS_VEBOX: - value = !!dev_priv->engine[VECS]; + value = !!dev_priv->engine[VECS0]; break; case I915_PARAM_HAS_BSD2: - value = !!dev_priv->engine[VCS2]; + value = !!dev_priv->engine[VCS1]; break; case I915_PARAM_HAS_LLC: value = HAS_LLC(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8abc5a640bf1..08ead854ac2d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2099,7 +2099,7 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc) /* Iterator over subset of engines selected by mask */ #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ - for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->ring_mask; \ + for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->engine_mask; \ (tmp__) ? \ ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \ 0;) @@ -2420,24 +2420,8 @@ static inline unsigned int i915_sg_segment_size(void) #define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) && !IS_LP(dev_priv)) -#define ENGINE_MASK(id) BIT(id) -#define RENDER_RING ENGINE_MASK(RCS) -#define BSD_RING ENGINE_MASK(VCS) -#define BLT_RING ENGINE_MASK(BCS) -#define VEBOX_RING ENGINE_MASK(VECS) -#define BSD2_RING ENGINE_MASK(VCS2) -#define BSD3_RING ENGINE_MASK(VCS3) -#define BSD4_RING ENGINE_MASK(VCS4) -#define VEBOX2_RING ENGINE_MASK(VECS2) -#define ALL_ENGINES (~0) - -#define HAS_ENGINE(dev_priv, id) \ - (!!(INTEL_INFO(dev_priv)->ring_mask & ENGINE_MASK(id))) - -#define HAS_BSD(dev_priv) HAS_ENGINE(dev_priv, VCS) -#define HAS_BSD2(dev_priv) HAS_ENGINE(dev_priv, VCS2) -#define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) -#define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) +#define ALL_ENGINES (~0u) +#define HAS_ENGINE(dev_priv, id) (INTEL_INFO(dev_priv)->engine_mask & BIT(id)) #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d266ba3f7210..1e3211e909f1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -583,7 +583,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) GEM_BUG_ON(dev_priv->kernel_context); GEM_BUG_ON(dev_priv->preempt_context); - intel_engine_init_ctx_wa(dev_priv->engine[RCS]); + intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); init_contexts(dev_priv); /* lowest priority; idle task */ @@ -1089,7 +1089,7 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, int ret = 0; GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8); - GEM_BUG_ON(engine->id != RCS); + GEM_BUG_ON(engine->id != RCS0); /* Nothing to do if unmodified. */ if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 53d0d70c97fa..943a221acb21 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1957,7 +1957,7 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) u32 *cs; int i; - if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS) { + if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -2082,11 +2082,11 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, #define I915_USER_RINGS (4) static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { - [I915_EXEC_DEFAULT] = RCS, - [I915_EXEC_RENDER] = RCS, - [I915_EXEC_BLT] = BCS, - [I915_EXEC_BSD] = VCS, - [I915_EXEC_VEBOX] = VECS + [I915_EXEC_DEFAULT] = RCS0, + [I915_EXEC_RENDER] = RCS0, + [I915_EXEC_BLT] = BCS0, + [I915_EXEC_BSD] = VCS0, + [I915_EXEC_VEBOX] = VECS0 }; static struct intel_engine_cs * @@ -2109,7 +2109,7 @@ eb_select_engine(struct drm_i915_private *dev_priv, return NULL; } - if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4a681b3332ad..f447c6564418 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -799,7 +799,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, */ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) { - ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->vm.i915)->ring_mask; + ppgtt->pd_dirty_engines = INTEL_INFO(ppgtt->vm.i915)->engine_mask; } /* Removes entries from a single page table, releasing it if it's empty. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 86065d75b3ac..a47e11e6fc1b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -390,7 +390,7 @@ struct i915_hw_ppgtt { struct i915_address_space vm; struct kref ref; - unsigned long pd_dirty_rings; + unsigned long pd_dirty_engines; union { struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */ struct i915_page_directory_pointer pdp; /* GEN8+ */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 90baf9086d0a..91196348c68c 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -42,7 +42,7 @@ struct intel_render_state { static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) { - if (engine->id != RCS) + if (engine->id != RCS0) return NULL; switch (INTEL_GEN(engine->i915)) { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ececbfd8ee82..5f1cdbc9eb5d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -411,7 +411,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone.instdone); - if (ee->engine_id != RCS || INTEL_GEN(m->i915) <= 3) + if (ee->engine_id != RCS0 || INTEL_GEN(m->i915) <= 3) return; err_printf(m, " SC_INSTDONE: 0x%08x\n", @@ -1179,16 +1179,17 @@ static void error_record_engine_registers(struct i915_gpu_state *error, if (IS_GEN(dev_priv, 7)) { switch (engine->id) { default: - case RCS: + MISSING_CASE(engine->id); + case RCS0: mmio = RENDER_HWS_PGA_GEN7; break; - case BCS: + case BCS0: mmio = BLT_HWS_PGA_GEN7; break; - case VCS: + case VCS0: mmio = BSD_HWS_PGA_GEN7; break; - case VECS: + case VECS0: mmio = VEBOX_HWS_PGA_GEN7; break; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 06541f4bd4af..1f4e984ce42f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1427,20 +1427,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); } static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (gt_iir & GT_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); if (gt_iir & GT_BLT_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[BCS0]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -1475,8 +1475,8 @@ static void gen8_gt_irq_ack(struct drm_i915_private *i915, #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ GEN8_GT_BCS_IRQ | \ + GEN8_GT_VCS0_IRQ | \ GEN8_GT_VCS1_IRQ | \ - GEN8_GT_VCS2_IRQ | \ GEN8_GT_VECS_IRQ | \ GEN8_GT_PM_IRQ | \ GEN8_GT_GUC_IRQ) @@ -1487,7 +1487,7 @@ static void gen8_gt_irq_ack(struct drm_i915_private *i915, raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]); } - if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { + if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1)); if (likely(gt_iir[1])) raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]); @@ -1510,21 +1510,21 @@ static void gen8_gt_irq_handler(struct drm_i915_private *i915, u32 master_ctl, u32 gt_iir[4]) { if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { - gen8_cs_irq_handler(i915->engine[RCS], + gen8_cs_irq_handler(i915->engine[RCS0], gt_iir[0] >> GEN8_RCS_IRQ_SHIFT); - gen8_cs_irq_handler(i915->engine[BCS], + gen8_cs_irq_handler(i915->engine[BCS0], gt_iir[0] >> GEN8_BCS_IRQ_SHIFT); } - if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { - gen8_cs_irq_handler(i915->engine[VCS], + if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { + gen8_cs_irq_handler(i915->engine[VCS0], + gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT); + gen8_cs_irq_handler(i915->engine[VCS1], gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT); - gen8_cs_irq_handler(i915->engine[VCS2], - gt_iir[1] >> GEN8_VCS2_IRQ_SHIFT); } if (master_ctl & GEN8_GT_VECS_IRQ) { - gen8_cs_irq_handler(i915->engine[VECS], + gen8_cs_irq_handler(i915->engine[VECS0], gt_iir[3] >> GEN8_VECS_IRQ_SHIFT); } @@ -1802,7 +1802,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) return; if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VECS0]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -3780,7 +3780,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) * RPS interrupts will get enabled/disabled on demand when RPS * itself is enabled/disabled. */ - if (HAS_VEBOX(dev_priv)) { + if (HAS_ENGINE(dev_priv, VECS0)) { pm_irqs |= PM_VEBOX_USER_INTERRUPT; dev_priv->pm_ier |= PM_VEBOX_USER_INTERRUPT; } @@ -3892,18 +3892,21 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) { /* These are interrupts we'll toggle with the ring mask register */ u32 gt_interrupts[] = { - GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT, - GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, + (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT), + + (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT), + 0, - GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT - }; + + (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT) + }; dev_priv->pm_ier = 0x0; dev_priv->pm_imr = ~dev_priv->pm_ier; @@ -4231,7 +4234,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) I915_WRITE16(IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4339,7 +4342,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4484,10 +4487,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (iir & I915_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index a9211c370cd1..c42c5ccf38fe 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -94,7 +94,7 @@ .gpu_reset_clobbers_display = true, \ .hws_needs_physical = 1, \ .unfenced_needs_alignment = 1, \ - .ring_mask = RENDER_RING, \ + .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = false, \ GEN_DEFAULT_PIPEOFFSETS, \ @@ -133,7 +133,7 @@ static const struct intel_device_info intel_i865g_info = { .num_pipes = 2, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ - .ring_mask = RENDER_RING, \ + .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ GEN_DEFAULT_PIPEOFFSETS, \ @@ -210,7 +210,7 @@ static const struct intel_device_info intel_pineview_info = { .display.has_hotplug = 1, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ - .ring_mask = RENDER_RING, \ + .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ GEN_DEFAULT_PIPEOFFSETS, \ @@ -239,7 +239,7 @@ static const struct intel_device_info intel_i965gm_info = { static const struct intel_device_info intel_g45_info = { GEN4_FEATURES, PLATFORM(INTEL_G45), - .ring_mask = RENDER_RING | BSD_RING, + .engine_mask = BIT(RCS0) | BIT(VCS0), .gpu_reset_clobbers_display = false, }; @@ -249,7 +249,7 @@ static const struct intel_device_info intel_gm45_info = { .is_mobile = 1, .display.has_fbc = 1, .display.supports_tv = 1, - .ring_mask = RENDER_RING | BSD_RING, + .engine_mask = BIT(RCS0) | BIT(VCS0), .gpu_reset_clobbers_display = false, }; @@ -257,7 +257,7 @@ static const struct intel_device_info intel_gm45_info = { GEN(5), \ .num_pipes = 2, \ .display.has_hotplug = 1, \ - .ring_mask = RENDER_RING | BSD_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ /* ilk does support rc6, but we do not implement [power] contexts */ \ @@ -283,7 +283,7 @@ static const struct intel_device_info intel_ironlake_m_info = { .num_pipes = 2, \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6 = 1, \ @@ -328,7 +328,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .num_pipes = 3, \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6 = 1, \ @@ -389,7 +389,7 @@ static const struct intel_device_info intel_valleyview_info = { .ppgtt = INTEL_PPGTT_FULL, .has_snoop = true, .has_coherent_ggtt = false, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING, + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), .display_mmio_offset = VLV_DISPLAY_BASE, GEN_DEFAULT_PAGE_SIZES, GEN_DEFAULT_PIPEOFFSETS, @@ -398,7 +398,7 @@ static const struct intel_device_info intel_valleyview_info = { #define G75_FEATURES \ GEN7_FEATURES, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ .display.has_ddi = 1, \ .has_fpga_dbg = 1, \ .display.has_psr = 1, \ @@ -462,7 +462,8 @@ static const struct intel_device_info intel_broadwell_rsvd_info = { static const struct intel_device_info intel_broadwell_gt3_info = { BDW_PLATFORM, .gt = 3, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, + .engine_mask = + BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; static const struct intel_device_info intel_cherryview_info = { @@ -471,7 +472,7 @@ static const struct intel_device_info intel_cherryview_info = { .num_pipes = 3, .display.has_hotplug = 1, .is_lp = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), .has_64bit_reloc = 1, .has_runtime_pm = 1, .has_rc6 = 1, @@ -521,7 +522,8 @@ static const struct intel_device_info intel_skylake_gt2_info = { #define SKL_GT3_PLUS_PLATFORM \ SKL_PLATFORM, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING + .engine_mask = \ + BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1) static const struct intel_device_info intel_skylake_gt3_info = { @@ -538,7 +540,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { GEN(9), \ .is_lp = 1, \ .display.has_hotplug = 1, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ .num_pipes = 3, \ .has_64bit_reloc = 1, \ .display.has_ddi = 1, \ @@ -592,7 +594,8 @@ static const struct intel_device_info intel_kabylake_gt2_info = { static const struct intel_device_info intel_kabylake_gt3_info = { KBL_PLATFORM, .gt = 3, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, + .engine_mask = + BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; #define CFL_PLATFORM \ @@ -612,7 +615,8 @@ static const struct intel_device_info intel_coffeelake_gt2_info = { static const struct intel_device_info intel_coffeelake_gt3_info = { CFL_PLATFORM, .gt = 3, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, + .engine_mask = + BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; #define GEN10_FEATURES \ @@ -655,7 +659,8 @@ static const struct intel_device_info intel_icelake_11_info = { GEN11_FEATURES, PLATFORM(INTEL_ICELAKE), .is_alpha_support = 1, - .ring_mask = RENDER_RING | BLT_RING | VEBOX_RING | BSD_RING | BSD3_RING, + .engine_mask = + BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), }; #undef GEN diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 72a9a35b40e2..e5aea176c1da 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1202,7 +1202,7 @@ static int i915_oa_read(struct i915_perf_stream *stream, static struct intel_context *oa_pin_context(struct drm_i915_private *i915, struct i915_gem_context *ctx) { - struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_engine_cs *engine = i915->engine[RCS0]; struct intel_context *ce; int ret; @@ -1681,7 +1681,7 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, gen8_make_rpcs(dev_priv, &to_intel_context(ctx, - dev_priv->engine[RCS])->sseu)); + dev_priv->engine[RCS0])->sseu)); } /* @@ -1711,7 +1711,7 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { - struct intel_engine_cs *engine = dev_priv->engine[RCS]; + struct intel_engine_cs *engine = dev_priv->engine[RCS0]; unsigned int map_type = i915_coherent_map_type(dev_priv); struct i915_gem_context *ctx; struct i915_request *rq; @@ -2143,7 +2143,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, { struct i915_perf_stream *stream; - if (engine->id != RCS) + if (engine->class != RENDER_CLASS) return; stream = engine->i915->perf.oa.exclusive_stream; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 6560c356f279..084016200a1e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -101,7 +101,7 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) * * Use RCS as proxy for all engines. */ - else if (intel_engine_supports_stats(i915->engine[RCS])) + else if (intel_engine_supports_stats(i915->engine[RCS0])) enable &= ~BIT(I915_SAMPLE_BUSY); /* diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c9b868347481..16ce9c609c65 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -210,14 +210,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) /* Engine ID */ -#define RCS_HW 0 -#define VCS_HW 1 -#define BCS_HW 2 -#define VECS_HW 3 -#define VCS2_HW 4 -#define VCS3_HW 6 -#define VCS4_HW 7 -#define VECS2_HW 12 +#define RCS0_HW 0 +#define VCS0_HW 1 +#define BCS0_HW 2 +#define VECS0_HW 3 +#define VCS1_HW 4 +#define VCS2_HW 6 +#define VCS3_HW 7 +#define VECS1_HW 12 /* Engine class */ @@ -7250,8 +7250,8 @@ enum { #define GEN8_GT_VECS_IRQ (1 << 6) #define GEN8_GT_GUC_IRQ (1 << 5) #define GEN8_GT_PM_IRQ (1 << 4) -#define GEN8_GT_VCS2_IRQ (1 << 3) -#define GEN8_GT_VCS1_IRQ (1 << 2) +#define GEN8_GT_VCS1_IRQ (1 << 3) /* NB: VCS2 in bspec! */ +#define GEN8_GT_VCS0_IRQ (1 << 2) /* NB: VCS1 in bpsec! */ #define GEN8_GT_BCS_IRQ (1 << 1) #define GEN8_GT_RCS_IRQ (1 << 0) @@ -7272,8 +7272,8 @@ enum { #define GEN8_RCS_IRQ_SHIFT 0 #define GEN8_BCS_IRQ_SHIFT 16 -#define GEN8_VCS1_IRQ_SHIFT 0 -#define GEN8_VCS2_IRQ_SHIFT 16 +#define GEN8_VCS0_IRQ_SHIFT 0 /* NB: VCS1 in bspec! */ +#define GEN8_VCS1_IRQ_SHIFT 16 /* NB: VCS2 in bpsec! */ #define GEN8_VECS_IRQ_SHIFT 0 #define GEN8_WD_IRQ_SHIFT 16 diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 55d6123dbba4..3fbaa72a9eac 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -297,12 +297,12 @@ static int gen6_reset_engines(struct drm_i915_private *i915, unsigned int retry) { struct intel_engine_cs *engine; - const u32 hw_engine_mask[I915_NUM_ENGINES] = { - [RCS] = GEN6_GRDOM_RENDER, - [BCS] = GEN6_GRDOM_BLT, - [VCS] = GEN6_GRDOM_MEDIA, - [VCS2] = GEN8_GRDOM_MEDIA2, - [VECS] = GEN6_GRDOM_VECS, + const u32 hw_engine_mask[] = { + [RCS0] = GEN6_GRDOM_RENDER, + [BCS0] = GEN6_GRDOM_BLT, + [VCS0] = GEN6_GRDOM_MEDIA, + [VCS1] = GEN8_GRDOM_MEDIA2, + [VECS0] = GEN6_GRDOM_VECS, }; u32 hw_mask; @@ -312,8 +312,10 @@ static int gen6_reset_engines(struct drm_i915_private *i915, unsigned int tmp; hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, i915, engine_mask, tmp) { + GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; + } } return gen6_hw_domain_reset(i915, hw_mask); @@ -420,28 +422,27 @@ static int gen11_reset_engines(struct drm_i915_private *i915, unsigned int engine_mask, unsigned int retry) { - const u32 hw_engine_mask[I915_NUM_ENGINES] = { - [RCS] = GEN11_GRDOM_RENDER, - [BCS] = GEN11_GRDOM_BLT, - [VCS] = GEN11_GRDOM_MEDIA, - [VCS2] = GEN11_GRDOM_MEDIA2, - [VCS3] = GEN11_GRDOM_MEDIA3, - [VCS4] = GEN11_GRDOM_MEDIA4, - [VECS] = GEN11_GRDOM_VECS, - [VECS2] = GEN11_GRDOM_VECS2, + const u32 hw_engine_mask[] = { + [RCS0] = GEN11_GRDOM_RENDER, + [BCS0] = GEN11_GRDOM_BLT, + [VCS0] = GEN11_GRDOM_MEDIA, + [VCS1] = GEN11_GRDOM_MEDIA2, + [VCS2] = GEN11_GRDOM_MEDIA3, + [VCS3] = GEN11_GRDOM_MEDIA4, + [VECS0] = GEN11_GRDOM_VECS, + [VECS1] = GEN11_GRDOM_VECS2, }; struct intel_engine_cs *engine; unsigned int tmp; u32 hw_mask; int ret; - BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES); - if (engine_mask == ALL_ENGINES) { hw_mask = GEN11_GRDOM_FULL; } else { hw_mask = 0; for_each_engine_masked(engine, i915, engine_mask, tmp) { + GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; hw_mask |= gen11_lock_sfc(i915, engine); } @@ -692,7 +693,7 @@ static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) return err; for_each_engine(engine, i915, id) - intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id)); + intel_engine_reset(engine, stalled_mask & engine->mask); i915_gem_restore_fences(i915); @@ -1057,7 +1058,7 @@ error: static inline int intel_gt_reset_engine(struct drm_i915_private *i915, struct intel_engine_cs *engine) { - return intel_gpu_reset(i915, intel_engine_flag(engine)); + return intel_gpu_reset(i915, engine->mask); } /** @@ -1193,7 +1194,7 @@ void i915_clear_error_registers(struct drm_i915_private *dev_priv) I915_READ(RING_FAULT_REG(engine)) & ~RING_FAULT_VALID); } - POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); + POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS0])); } } @@ -1241,7 +1242,7 @@ void i915_handle_error(struct drm_i915_private *i915, */ wakeref = intel_runtime_pm_get(i915); - engine_mask &= INTEL_INFO(i915)->ring_mask; + engine_mask &= INTEL_INFO(i915)->engine_mask; if (flags & I915_ERROR_CAPTURE) { i915_capture_error_state(i915, engine_mask, msg); @@ -1260,7 +1261,7 @@ void i915_handle_error(struct drm_i915_private *i915, continue; if (i915_reset_engine(engine, msg) == 0) - engine_mask &= ~intel_engine_flag(engine); + engine_mask &= ~engine->mask; clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 855a5074ad77..2c1b46cfd6d3 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -738,7 +738,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) runtime->num_scalers[PIPE_C] = 1; } - BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_TYPE(intel_ring_mask_t)); + BUILD_BUG_ON(BITS_PER_TYPE(intel_engine_mask_t) < I915_NUM_ENGINES); if (IS_GEN(dev_priv, 11)) for_each_pipe(dev_priv, pipe) @@ -887,7 +887,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) continue; if (!(BIT(i) & RUNTIME_INFO(dev_priv)->vdbox_enable)) { - info->ring_mask &= ~ENGINE_MASK(_VCS(i)); + info->engine_mask &= ~BIT(_VCS(i)); DRM_DEBUG_DRIVER("vcs%u fused off\n", i); continue; } @@ -906,7 +906,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) continue; if (!(BIT(i) & RUNTIME_INFO(dev_priv)->vebox_enable)) { - info->ring_mask &= ~ENGINE_MASK(_VECS(i)); + info->engine_mask &= ~BIT(_VECS(i)); DRM_DEBUG_DRIVER("vecs%u fused off\n", i); } } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index e8b8661df746..047d10bdd455 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -150,14 +150,14 @@ struct sseu_dev_info { u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; -typedef u8 intel_ring_mask_t; +typedef u8 intel_engine_mask_t; struct intel_device_info { u16 gen_mask; u8 gen; u8 gt; /* GT number, 0 if undefined */ - intel_ring_mask_t ring_mask; /* Rings supported by the HW */ + intel_engine_mask_t engine_mask; /* Engines supported by the HW */ enum intel_platform platform; u32 platform_mask; @@ -200,7 +200,7 @@ struct intel_runtime_info { u8 num_sprites[I915_MAX_PIPES]; u8 num_scalers[I915_MAX_PIPES]; - u8 num_rings; + u8 num_engines; /* Slice/subslice/EU info */ struct sseu_dev_info sseu; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f3f161c5627b..62a2bbbbcc64 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -94,24 +94,24 @@ struct engine_info { }; static const struct engine_info intel_engines[] = { - [RCS] = { - .hw_id = RCS_HW, + [RCS0] = { + .hw_id = RCS0_HW, .class = RENDER_CLASS, .instance = 0, .mmio_bases = { { .gen = 1, .base = RENDER_RING_BASE } }, }, - [BCS] = { - .hw_id = BCS_HW, + [BCS0] = { + .hw_id = BCS0_HW, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { { .gen = 6, .base = BLT_RING_BASE } }, }, - [VCS] = { - .hw_id = VCS_HW, + [VCS0] = { + .hw_id = VCS0_HW, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { @@ -120,8 +120,8 @@ static const struct engine_info intel_engines[] = { { .gen = 4, .base = BSD_RING_BASE } }, }, - [VCS2] = { - .hw_id = VCS2_HW, + [VCS1] = { + .hw_id = VCS1_HW, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_bases = { @@ -129,24 +129,24 @@ static const struct engine_info intel_engines[] = { { .gen = 8, .base = GEN8_BSD2_RING_BASE } }, }, - [VCS3] = { - .hw_id = VCS3_HW, + [VCS2] = { + .hw_id = VCS2_HW, .class = VIDEO_DECODE_CLASS, .instance = 2, .mmio_bases = { { .gen = 11, .base = GEN11_BSD3_RING_BASE } }, }, - [VCS4] = { - .hw_id = VCS4_HW, + [VCS3] = { + .hw_id = VCS3_HW, .class = VIDEO_DECODE_CLASS, .instance = 3, .mmio_bases = { { .gen = 11, .base = GEN11_BSD4_RING_BASE } }, }, - [VECS] = { - .hw_id = VECS_HW, + [VECS0] = { + .hw_id = VECS0_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { @@ -154,8 +154,8 @@ static const struct engine_info intel_engines[] = { { .gen = 7, .base = VEBOX_RING_BASE } }, }, - [VECS2] = { - .hw_id = VECS2_HW, + [VECS1] = { + .hw_id = VECS1_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, .mmio_bases = { @@ -304,7 +304,10 @@ intel_engine_setup(struct drm_i915_private *dev_priv, if (!engine) return -ENOMEM; + BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); + engine->id = id; + engine->mask = BIT(id); engine->i915 = dev_priv; __sprint_engine_name(engine->name, info); engine->hw_id = engine->guc_id = info->hw_id; @@ -345,15 +348,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, int intel_engines_init_mmio(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); - const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; + const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask; struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int mask = 0; unsigned int i; int err; - WARN_ON(ring_mask == 0); - WARN_ON(ring_mask & + WARN_ON(engine_mask == 0); + WARN_ON(engine_mask & GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); if (i915_inject_load_failure()) @@ -367,7 +370,7 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv) if (err) goto cleanup; - mask |= ENGINE_MASK(i); + mask |= BIT(i); } /* @@ -375,16 +378,16 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv) * are added to the driver by a warning and disabling the forgotten * engines. */ - if (WARN_ON(mask != ring_mask)) - device_info->ring_mask = mask; + if (WARN_ON(mask != engine_mask)) + device_info->engine_mask = mask; /* We always presume we have at least RCS available for later probing */ - if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) { + if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) { err = -ENODEV; goto cleanup; } - RUNTIME_INFO(dev_priv)->num_rings = hweight32(mask); + RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask); i915_check_and_clear_faults(dev_priv); @@ -954,7 +957,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, default: instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); - if (engine->id != RCS) + if (engine->id != RCS0) break; instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); @@ -970,7 +973,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, case 7: instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); - if (engine->id != RCS) + if (engine->id != RCS0) break; instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); @@ -983,7 +986,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, case 4: instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); - if (engine->id == RCS) + if (engine->id == RCS0) /* HACK: Using the wrong struct member */ instdone->slice_common = I915_READ(GEN4_INSTDONE1); break; @@ -1355,7 +1358,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, &engine->execlists; u64 addr; - if (engine->id == RCS && IS_GEN_RANGE(dev_priv, 4, 7)) + if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID)); drm_printf(m, "\tRING_START: 0x%08x\n", I915_READ(RING_START(engine->mmio_base))); diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index f0db62887f50..c51d558fd431 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -122,7 +122,7 @@ int intel_guc_ads_create(struct intel_guc *guc) * because our GuC shared data is there. */ kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, - dev_priv->engine[RCS])->state; + dev_priv->engine[RCS0])->state; blob->ads.golden_context_lrca = intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 56ba2fcbabe6..e3afc91f0e7b 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -575,7 +575,7 @@ static void inject_preempt_context(struct work_struct *work) u32 *cs; cs = ce->ring->vaddr; - if (engine->id == RCS) { + if (engine->class == RENDER_CLASS) { cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, addr, @@ -1030,7 +1030,7 @@ static int guc_clients_create(struct intel_guc *guc) GEM_BUG_ON(guc->preempt_client); client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, + INTEL_INFO(dev_priv)->engine_mask, GUC_CLIENT_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (IS_ERR(client)) { @@ -1041,7 +1041,7 @@ static int guc_clients_create(struct intel_guc *guc) if (dev_priv->preempt_context) { client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, + INTEL_INFO(dev_priv)->engine_mask, GUC_CLIENT_PRIORITY_KMD_HIGH, dev_priv->preempt_context); if (IS_ERR(client)) { diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index f1d8dfc58049..57ed49dc19c4 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -56,7 +56,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) int slice; int subslice; - if (engine->id != RCS) + if (engine->id != RCS0) return true; intel_engine_get_instdone(engine, &instdone); @@ -120,7 +120,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = I915_READ_CTL(engine); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, BIT(engine->id), 0, + i915_handle_error(dev_priv, engine->mask, 0, "stuck wait on %s", engine->name); I915_WRITE_CTL(engine, tmp); return ENGINE_WAIT_KICK; @@ -282,13 +282,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work) hangcheck_store_sample(engine, &hc); if (hc.stalled) { - hung |= intel_engine_flag(engine); + hung |= engine->mask; if (hc.action != ENGINE_DEAD) - stuck |= intel_engine_flag(engine); + stuck |= engine->mask; } if (hc.wedged) - wedged |= intel_engine_flag(engine); + wedged |= engine->mask; } if (GEM_SHOW_DEBUG() && (hung | stuck)) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 578c8c98c718..6ec6c4e175a2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1777,7 +1777,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) unsigned int i; int ret; - if (GEM_DEBUG_WARN_ON(engine->id != RCS)) + if (GEM_DEBUG_WARN_ON(engine->id != RCS0)) return -EINVAL; switch (INTEL_GEN(engine->i915)) { @@ -2376,11 +2376,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) if (INTEL_GEN(engine->i915) < 11) { const u8 irq_shifts[] = { - [RCS] = GEN8_RCS_IRQ_SHIFT, - [BCS] = GEN8_BCS_IRQ_SHIFT, - [VCS] = GEN8_VCS1_IRQ_SHIFT, - [VCS2] = GEN8_VCS2_IRQ_SHIFT, - [VECS] = GEN8_VECS_IRQ_SHIFT, + [RCS0] = GEN8_RCS_IRQ_SHIFT, + [BCS0] = GEN8_BCS_IRQ_SHIFT, + [VCS0] = GEN8_VCS0_IRQ_SHIFT, + [VCS1] = GEN8_VCS1_IRQ_SHIFT, + [VECS0] = GEN8_VECS_IRQ_SHIFT, }; shift = irq_shifts[engine->id]; diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 331e7a678fb7..80dcc08222d0 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -288,17 +288,17 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) { switch (engine_id) { - case RCS: + case RCS0: return GEN9_GFX_MOCS(index); - case VCS: + case VCS0: return GEN9_MFX0_MOCS(index); - case BCS: + case BCS0: return GEN9_BLT_MOCS(index); - case VECS: + case VECS0: return GEN9_VEBOX_MOCS(index); - case VCS2: + case VCS1: return GEN9_MFX1_MOCS(index); - case VCS3: + case VCS2: return GEN11_MFX2_MOCS(index); default: MISSING_CASE(engine_id); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index c0df1dbb0069..a882b8d42bd9 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -236,7 +236,7 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, static struct i915_request *alloc_request(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = dev_priv->engine[RCS]; + struct intel_engine_cs *engine = dev_priv->engine[RCS0]; return i915_request_alloc(engine, dev_priv->kernel_context); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1b96b0960adc..1dd0c99b893f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -552,16 +552,17 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) */ default: GEM_BUG_ON(engine->id); - case RCS: + /* fallthrough */ + case RCS0: hwsp = RENDER_HWS_PGA_GEN7; break; - case BCS: + case BCS0: hwsp = BLT_HWS_PGA_GEN7; break; - case VCS: + case VCS0: hwsp = BSD_HWS_PGA_GEN7; break; - case VECS: + case VECS0: hwsp = VEBOX_HWS_PGA_GEN7; break; } @@ -1692,8 +1693,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; enum intel_engine_id id; - const int num_rings = - IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_rings - 1 : 0; + const int num_engines = + IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; bool force_restore = false; int len; u32 *cs; @@ -1707,7 +1708,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) len = 4; if (IS_GEN(i915, 7)) - len += 2 + (num_rings ? 4*num_rings + 6 : 0); + len += 2 + (num_engines ? 4 * num_engines + 6 : 0); if (flags & MI_FORCE_RESTORE) { GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); flags &= ~MI_FORCE_RESTORE; @@ -1722,10 +1723,10 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (IS_GEN(i915, 7)) { *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - if (num_rings) { + if (num_engines) { struct intel_engine_cs *signaller; - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); + *cs++ = MI_LOAD_REGISTER_IMM(num_engines); for_each_engine(signaller, i915, id) { if (signaller == engine) continue; @@ -1768,11 +1769,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; if (IS_GEN(i915, 7)) { - if (num_rings) { + if (num_engines) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); + *cs++ = MI_LOAD_REGISTER_IMM(num_engines); for_each_engine(signaller, i915, id) { if (signaller == engine) continue; @@ -1850,7 +1851,7 @@ static int switch_context(struct i915_request *rq) * explanation. */ loops = 1; - if (engine->id == BCS && IS_VALLEYVIEW(engine->i915)) + if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) loops = 32; do { @@ -1859,15 +1860,15 @@ static int switch_context(struct i915_request *rq) goto err; } while (--loops); - if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) { - unwind_mm = intel_engine_flag(engine); - ppgtt->pd_dirty_rings &= ~unwind_mm; + if (ppgtt->pd_dirty_engines & engine->mask) { + unwind_mm = engine->mask; + ppgtt->pd_dirty_engines &= ~unwind_mm; hw_flags = MI_FORCE_RESTORE; } } if (rq->hw_context->state) { - GEM_BUG_ON(engine->id != RCS); + GEM_BUG_ON(engine->id != RCS0); /* * The kernel context(s) is treated as pure scratch and is not @@ -1927,7 +1928,7 @@ static int switch_context(struct i915_request *rq) err_mm: if (unwind_mm) - ppgtt->pd_dirty_rings |= unwind_mm; + ppgtt->pd_dirty_engines |= unwind_mm; err: return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 12e79828dbd5..18e865ff6637 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -10,12 +10,12 @@ #include #include "i915_gem_batch_pool.h" - -#include "i915_reg.h" #include "i915_pmu.h" +#include "i915_reg.h" #include "i915_request.h" #include "i915_selftest.h" #include "i915_timeline.h" +#include "intel_device_info.h" #include "intel_gpu_commands.h" #include "intel_workarounds.h" @@ -175,16 +175,16 @@ struct i915_request; * Keep instances of the same type engine together. */ enum intel_engine_id { - RCS = 0, - BCS, - VCS, + RCS0 = 0, + BCS0, + VCS0, + VCS1, VCS2, VCS3, - VCS4, -#define _VCS(n) (VCS + (n)) - VECS, - VECS2 -#define _VECS(n) (VECS + (n)) +#define _VCS(n) (VCS0 + (n)) + VECS0, + VECS1 +#define _VECS(n) (VECS0 + (n)) }; struct st_preempt_hang { @@ -334,6 +334,7 @@ struct intel_engine_cs { enum intel_engine_id id; unsigned int hw_id; unsigned int guc_id; + intel_engine_mask_t mask; u8 uabi_class; @@ -667,12 +668,6 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, return port; } -static inline unsigned int -intel_engine_flag(const struct intel_engine_cs *engine) -{ - return BIT(engine->id); -} - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 89b4007d5200..2ff54950891e 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -1055,7 +1055,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct i915_wa_list *w = &engine->whitelist; - GEM_BUG_ON(engine->id != RCS); + GEM_BUG_ON(engine->id != RCS0); wa_init_start(w, "whitelist"); @@ -1228,7 +1228,7 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) return; - if (engine->id == RCS) + if (engine->id == RCS0) rcs_engine_wa_init(engine, wal); else xcs_engine_wa_init(engine, wal); diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 4b9ded4ca0f5..1e66cff985f8 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1535,7 +1535,7 @@ static int igt_ppgtt_pin_update(void *arg) * land in the now stale 2M page. */ - err = gpu_write(vma, ctx, dev_priv->engine[RCS], 0, 0xdeadbeaf); + err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); if (err) goto out_unpin; @@ -1653,7 +1653,7 @@ static int igt_shrink_thp(void *arg) if (err) goto out_unpin; - err = gpu_write(vma, ctx, i915->engine[RCS], 0, 0xdeadbeaf); + err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); if (err) goto out_unpin; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 9c16aff87028..e43630b40fce 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -256,7 +256,7 @@ static bool needs_mi_store_dword(struct drm_i915_private *i915) if (i915_terminally_wedged(i915)) return false; - return intel_engine_can_store_dword(i915->engine[RCS]); + return intel_engine_can_store_dword(i915->engine[RCS0]); } static const struct igt_coherency_mode { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 3a238b9628b3..30111c004eb6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -556,7 +556,7 @@ static int igt_ctx_exec(void *arg) ncontexts++; } pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n", - ncontexts, RUNTIME_INFO(i915)->num_rings, ndwords); + ncontexts, RUNTIME_INFO(i915)->num_engines, ndwords); dw = 0; list_for_each_entry(obj, &objects, st_link) { @@ -923,7 +923,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, unsigned int flags) { struct intel_sseu default_sseu = intel_device_default_sseu(i915); - struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_engine_cs *engine = i915->engine[RCS0]; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; struct intel_sseu pg_sseu; @@ -1126,7 +1126,7 @@ static int igt_ctx_readonly(void *arg) } } pr_info("Submitted %lu dwords (across %u engines)\n", - ndwords, RUNTIME_INFO(i915)->num_rings); + ndwords, RUNTIME_INFO(i915)->num_engines); dw = 0; list_for_each_entry(obj, &objects, st_link) { @@ -1459,7 +1459,7 @@ static int igt_vm_isolation(void *arg) count += this; } pr_info("Checked %lu scratch offsets across %d engines\n", - count, RUNTIME_INFO(i915)->num_rings); + count, RUNTIME_INFO(i915)->num_engines); out_rpm: intel_runtime_pm_put(i915, wakeref); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 784982aed625..971148fbe6f5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -468,7 +468,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 7e1b65b8eb19..3eb6a6b075ab 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -42,7 +42,7 @@ static int igt_add_request(void *arg) /* Basic preliminary test to create a request and let it loose! */ mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], + request = mock_request(i915->engine[RCS0], i915->kernel_context, HZ / 10); if (!request) @@ -66,7 +66,7 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], i915->kernel_context, T); + request = mock_request(i915->engine[RCS0], i915->kernel_context, T); if (!request) { err = -ENOMEM; goto out_unlock; @@ -136,7 +136,7 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], i915->kernel_context, T); + request = mock_request(i915->engine[RCS0], i915->kernel_context, T); if (!request) { err = -ENOMEM; goto out_locked; @@ -193,7 +193,7 @@ static int igt_request_rewind(void *arg) mutex_lock(&i915->drm.struct_mutex); ctx[0] = mock_context(i915, "A"); - request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ); if (!request) { err = -ENOMEM; goto err_context_0; @@ -203,7 +203,7 @@ static int igt_request_rewind(void *arg) i915_request_add(request); ctx[1] = mock_context(i915, "B"); - vip = mock_request(i915->engine[RCS], ctx[1], 0); + vip = mock_request(i915->engine[RCS0], ctx[1], 0); if (!vip) { err = -ENOMEM; goto err_context_1; @@ -415,7 +415,7 @@ static int mock_breadcrumbs_smoketest(void *arg) { struct drm_i915_private *i915 = arg; struct smoketest t = { - .engine = i915->engine[RCS], + .engine = i915->engine[RCS0], .ncontexts = 1024, .max_batch = 1024, .request_alloc = __mock_request_alloc @@ -1216,7 +1216,7 @@ out_flush: num_fences += atomic_long_read(&t[id].num_fences); } pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", - num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus); + num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); mutex_lock(&i915->drm.struct_mutex); ret = igt_live_test_end(&live) ?: ret; diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index c5e0a0e98fcb..b05a21eaa8f4 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -111,7 +111,7 @@ static int validate_client(struct intel_guc_client *client, dev_priv->preempt_context : dev_priv->kernel_context; if (client->owner != ctx_owner || - client->engines != INTEL_INFO(dev_priv)->ring_mask || + client->engines != INTEL_INFO(dev_priv)->engine_mask || client->priority != client_priority || client->doorbell_id == GUC_DOORBELL_INVALID) return -EINVAL; @@ -261,7 +261,7 @@ static int igt_guc_doorbells(void *arg) for (i = 0; i < ATTEMPTS; i++) { clients[i] = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, + INTEL_INFO(dev_priv)->engine_mask, i % GUC_CLIENT_PRIORITY_NUM, dev_priv->kernel_context); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 12e047328ab8..10658ad05305 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -1126,7 +1126,7 @@ static int igt_reset_wait(void *arg) long timeout; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS])) + if (!intel_engine_can_store_dword(i915->engine[RCS0])) return 0; /* Check that we detect a stuck waiter and issue a reset */ @@ -1138,7 +1138,7 @@ static int igt_reset_wait(void *arg) if (err) goto unlock; - rq = hang_create_request(&h, i915->engine[RCS]); + rq = hang_create_request(&h, i915->engine[RCS0]); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; @@ -1255,7 +1255,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, struct hang h; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS])) + if (!intel_engine_can_store_dword(i915->engine[RCS0])) return 0; /* Check that we can recover an unbind stuck on a hanging request */ @@ -1285,7 +1285,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, goto out_obj; } - rq = hang_create_request(&h, i915->engine[RCS]); + rq = hang_create_request(&h, i915->engine[RCS0]); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_obj; @@ -1358,7 +1358,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, out_reset: igt_global_reset_lock(i915); - fake_hangcheck(rq->i915, intel_engine_flag(rq->engine)); + fake_hangcheck(rq->i915, rq->engine->mask); igt_global_reset_unlock(i915); if (tsk) { @@ -1537,7 +1537,7 @@ static int igt_reset_queue(void *arg) goto fini; } - reset_count = fake_hangcheck(i915, ENGINE_MASK(id)); + reset_count = fake_hangcheck(i915, BIT(id)); if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1596,7 +1596,7 @@ unlock: static int igt_handle_error(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_engine_cs *engine = i915->engine[RCS0]; struct hang h; struct i915_request *rq; struct i915_gpu_state *error; @@ -1643,7 +1643,7 @@ static int igt_handle_error(void *arg) /* Temporarily disable error capture */ error = xchg(&i915->gpu_error.first_error, (void *)-1); - i915_handle_error(i915, ENGINE_MASK(engine->id), 0, NULL); + i915_handle_error(i915, engine->mask, 0, NULL); xchg(&i915->gpu_error.first_error, error); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 0677038a5466..565e949a4722 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -929,7 +929,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_rings, smoke->ncontext); + RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); return 0; } @@ -957,7 +957,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_rings, smoke->ncontext); + RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); return 0; } diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 9f12a0ec804b..f2a2b51a4662 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -222,7 +222,7 @@ out_put: static int do_device_reset(struct intel_engine_cs *engine) { - i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds"); + i915_reset(engine->i915, engine->mask, "live_workarounds"); return 0; } @@ -709,7 +709,7 @@ out_rpm: static int live_reset_whitelist(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_engine_cs *engine = i915->engine[RCS0]; int err = 0; /* If we reset the gpu, we should not lose the RING_NONPRIV */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index ec1ae948954c..c2c954f64226 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -223,6 +223,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; + engine->base.mask = BIT(id); engine->base.status_page.addr = (void *)(engine + 1); engine->base.context_pin = mock_context_pin; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index c27616efc4f8..b2c7808e0595 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -206,13 +206,13 @@ struct drm_i915_private *mock_gem_device(void) mock_init_ggtt(i915, &i915->ggtt); - mkwrite_device_info(i915)->ring_mask = BIT(0); + mkwrite_device_info(i915)->engine_mask = BIT(0); i915->kernel_context = mock_context(i915, NULL); if (!i915->kernel_context) goto err_unlock; - i915->engine[RCS] = mock_engine(i915, "mock", RCS); - if (!i915->engine[RCS]) + i915->engine[RCS0] = mock_engine(i915, "mock", RCS0); + if (!i915->engine[RCS0]) goto err_context; mutex_unlock(&i915->drm.struct_mutex); -- cgit v1.2.3 From cf4331dd3975f89fe4d20c6113620c9139b1c147 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Mar 2019 18:03:32 +0000 Subject: drm/i915: Move find_active_request() to the engine To find the active request, we need only search along the individual engine for the right request. This does not require touching any global GEM state, so move it into the engine compartment. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190305180332.30900-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 3 --- drivers/gpu/drm/i915/i915_gem.c | 45 ------------------------------- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 47 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 5 files changed, 50 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 08ead854ac2d..ff039750069d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2996,9 +2996,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); -struct i915_request * -i915_gem_find_active_request(struct intel_engine_cs *engine); - static inline bool __i915_wedged(struct i915_gpu_error *error) { return unlikely(test_bit(I915_WEDGED, &error->flags)); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 69413f99ed04..c67369bd145b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2803,51 +2803,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static bool match_ring(struct i915_request *rq) -{ - struct drm_i915_private *dev_priv = rq->i915; - u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); - - return ring == i915_ggtt_offset(rq->ring->vma); -} - -struct i915_request * -i915_gem_find_active_request(struct intel_engine_cs *engine) -{ - struct i915_request *request, *active = NULL; - unsigned long flags; - - /* - * We are called by the error capture, reset and to dump engine - * state at random points in time. In particular, note that neither is - * crucially ordered with an interrupt. After a hang, the GPU is dead - * and we assume that no more writes can happen (we waited long enough - * for all writes that were in transaction to be flushed) - adding an - * extra delay for a recent interrupt is pointless. Hence, we do - * not need an engine->irq_seqno_barrier() before the seqno reads. - * At all other times, we must assume the GPU is still running, but - * we only care about the snapshot of this moment. - */ - spin_lock_irqsave(&engine->timeline.lock, flags); - list_for_each_entry(request, &engine->timeline.requests, link) { - if (i915_request_completed(request)) - continue; - - if (!i915_request_started(request)) - break; - - /* More than one preemptible request may match! */ - if (!match_ring(request)) - break; - - active = request; - break; - } - spin_unlock_irqrestore(&engine->timeline.lock, flags); - - return active; -} - static void i915_gem_retire_work_handler(struct work_struct *work) { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5f1cdbc9eb5d..3d8020888604 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1411,7 +1411,7 @@ static void gem_record_rings(struct i915_gpu_state *error) error_record_engine_registers(error, engine, ee); error_record_engine_execlists(engine, ee); - request = i915_gem_find_active_request(engine); + request = intel_engine_find_active_request(engine); if (request) { struct i915_gem_context *ctx = request->gem_context; struct intel_ring *ring; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 62a2bbbbcc64..555a4590fa23 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1545,7 +1545,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tlast "); - rq = i915_gem_find_active_request(engine); + rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1712,6 +1712,51 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine) write_sequnlock_irqrestore(&engine->stats.lock, flags); } +static bool match_ring(struct i915_request *rq) +{ + struct drm_i915_private *dev_priv = rq->i915; + u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); + + return ring == i915_ggtt_offset(rq->ring->vma); +} + +struct i915_request * +intel_engine_find_active_request(struct intel_engine_cs *engine) +{ + struct i915_request *request, *active = NULL; + unsigned long flags; + + /* + * We are called by the error capture, reset and to dump engine + * state at random points in time. In particular, note that neither is + * crucially ordered with an interrupt. After a hang, the GPU is dead + * and we assume that no more writes can happen (we waited long enough + * for all writes that were in transaction to be flushed) - adding an + * extra delay for a recent interrupt is pointless. Hence, we do + * not need an engine->irq_seqno_barrier() before the seqno reads. + * At all other times, we must assume the GPU is still running, but + * we only care about the snapshot of this moment. + */ + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { + if (i915_request_completed(request)) + continue; + + if (!i915_request_started(request)) + break; + + /* More than one preemptible request may match! */ + if (!match_ring(request)) + break; + + active = request; + break; + } + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + return active; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #include "selftests/intel_engine_cs.c" diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 18e865ff6637..84b7047e2df5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1014,6 +1014,9 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine); ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); +struct i915_request * +intel_engine_find_active_request(struct intel_engine_cs *engine); + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) -- cgit v1.2.3 From 7d6ce55887a44c15c6df29e883d0ea567c8ac55c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Mar 2019 09:36:57 +0000 Subject: drm/i915: Remove has-kernel-context We can no longer assume execution ordering, and in particular we cannot assume which context will execute last. One side-effect of this is that we cannot determine if the kernel-context is resident on the GPU, so remove the routines that claimed to do so. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190308093657.8640-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_active.h | 13 ------------- drivers/gpu/drm/i915/i915_gem.c | 21 +-------------------- drivers/gpu/drm/i915/i915_gem_evict.c | 16 +++------------- drivers/gpu/drm/i915/intel_engine_cs.c | 31 ------------------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 5 files changed, 4 insertions(+), 78 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 8142a334b37b..7d758719ce39 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -108,19 +108,6 @@ i915_active_request_set_retire_fn(struct i915_active_request *active, active->retire = fn ?: i915_active_retire_noop; } -static inline struct i915_request * -__i915_active_request_peek(const struct i915_active_request *active) -{ - /* - * Inside the error capture (running with the driver in an unknown - * state), we want to bend the rules slightly (a lot). - * - * Work is in progress to make it safer, in the meantime this keeps - * the known issue from spamming the logs. - */ - return rcu_dereference_protected(active->request, 1); -} - /** * i915_active_request_raw - return the active request * @active - the active tracker diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 961237b90b40..1f1849f90606 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2828,23 +2828,6 @@ i915_gem_retire_work_handler(struct work_struct *work) round_jiffies_up_relative(HZ)); } -static void assert_kernel_context_is_current(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (i915_reset_failed(i915)) - return; - - i915_retire_requests(i915); - - for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request)); - GEM_BUG_ON(engine->last_retired_context != - to_intel_context(i915->kernel_context, engine)); - } -} - static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, unsigned long mask) { @@ -2864,9 +2847,7 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, I915_GEM_IDLE_TIMEOUT)) result = false; - if (result) { - assert_kernel_context_is_current(i915); - } else { + if (!result) { /* Forcibly cancel outstanding work and leave the gpu quiet. */ dev_err(i915->drm.dev, "Failed to idle engines, declaring wedged!\n"); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 7d8e90dfca84..060f5903544a 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -38,25 +38,15 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { static bool ggtt_is_idle(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (i915->gt.active_requests) - return false; - - for_each_engine(engine, i915, id) { - if (!intel_engine_has_kernel_context(engine)) - return false; - } - - return true; + return !i915->gt.active_requests; } static int ggtt_flush(struct drm_i915_private *i915) { int err; - /* Not everything in the GGTT is tracked via vma (otherwise we + /* + * Not everything in the GGTT is tracked via vma (otherwise we * could evict as required with minimal stalling) so we are forced * to idle the GPU and explicitly retire outstanding requests in * the hopes that we can then remove contexts and the like only diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 18174f808fd8..8e326556499e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1090,37 +1090,6 @@ bool intel_engines_are_idle(struct drm_i915_private *i915) return true; } -/** - * intel_engine_has_kernel_context: - * @engine: the engine - * - * Returns true if the last context to be executed on this engine, or has been - * executed if the engine is already idle, is the kernel context - * (#i915.kernel_context). - */ -bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) -{ - const struct intel_context *kernel_context = - to_intel_context(engine->i915->kernel_context, engine); - struct i915_request *rq; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - if (!engine->context_size) - return true; - - /* - * Check the last context seen by the engine. If active, it will be - * the last request that remains in the timeline. When idle, it is - * the last executed context as tracked by retirement. - */ - rq = __i915_active_request_peek(&engine->timeline.last_request); - if (rq) - return rq->hw_context == kernel_context; - else - return engine->last_retired_context == kernel_context; -} - void intel_engines_reset_default_submission(struct drm_i915_private *i915) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 84b7047e2df5..9ccbe63d46e3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -935,7 +935,6 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force); bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); -bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); void intel_engine_lost_context(struct intel_engine_cs *engine); void intel_engines_park(struct drm_i915_private *i915); -- cgit v1.2.3 From 39e2f501c1b431bd9291308e1ef02b9a02fffbee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Mar 2019 13:25:17 +0000 Subject: drm/i915: Split struct intel_context definition to its own header This complex struct pulling in half the driver deserves its own isolation in preparation for intel_context becoming an outright complicated class of its own. In order to split this beast into its own header also requests splitting several of its dependent types and their dependencies into their own headers as well. v2: Add standalone compilation tests Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190308132522.21573-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 9 + drivers/gpu/drm/i915/i915_gem_context.h | 238 +--------- drivers/gpu/drm/i915/i915_gem_context_types.h | 181 +++++++ drivers/gpu/drm/i915/i915_timeline.h | 70 +-- drivers/gpu/drm/i915/i915_timeline_types.h | 80 ++++ drivers/gpu/drm/i915/intel_context.h | 47 ++ drivers/gpu/drm/i915/intel_context_types.h | 60 +++ drivers/gpu/drm/i915/intel_engine_types.h | 521 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 502 +------------------- drivers/gpu/drm/i915/intel_workarounds.h | 13 +- drivers/gpu/drm/i915/intel_workarounds_types.h | 27 ++ .../drm/i915/test_i915_active_types_standalone.c | 7 + .../i915/test_i915_gem_context_types_standalone.c | 7 + .../drm/i915/test_i915_timeline_types_standalone.c | 7 + .../drm/i915/test_intel_context_types_standalone.c | 7 + .../drm/i915/test_intel_engine_types_standalone.c | 7 + .../i915/test_intel_workarounds_types_standalone.c | 7 + 18 files changed, 974 insertions(+), 817 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_context_types.h create mode 100644 drivers/gpu/drm/i915/i915_timeline_types.h create mode 100644 drivers/gpu/drm/i915/intel_context.h create mode 100644 drivers/gpu/drm/i915/intel_context_types.h create mode 100644 drivers/gpu/drm/i915/intel_engine_types.h create mode 100644 drivers/gpu/drm/i915/intel_workarounds_types.h create mode 100644 drivers/gpu/drm/i915/test_i915_active_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_intel_context_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_intel_engine_types_standalone.c create mode 100644 drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a1d834068765..a230553367de 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -56,6 +56,15 @@ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o +# Test the headers are compilable as standalone units +i915-$(CONFIG_DRM_I915_WERROR) += \ + test_i915_active_types_standalone.o \ + test_i915_gem_context_types_standalone.o \ + test_i915_timeline_types_standalone.o \ + test_intel_context_types_standalone.o \ + test_intel_engine_types_standalone.o \ + test_intel_workarounds_types_standalone.o + # GEM code i915-y += \ i915_active.o \ diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 124c2a082b99..00698944a0ee 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -25,218 +25,17 @@ #ifndef __I915_GEM_CONTEXT_H__ #define __I915_GEM_CONTEXT_H__ -#include -#include -#include +#include "i915_gem_context_types.h" #include "i915_gem.h" #include "i915_scheduler.h" +#include "intel_context.h" #include "intel_device_info.h" #include "intel_ringbuffer.h" -struct pid; - struct drm_device; struct drm_file; -struct drm_i915_private; -struct drm_i915_file_private; -struct i915_hw_ppgtt; -struct i915_request; -struct i915_vma; -struct intel_ring; - -#define DEFAULT_CONTEXT_HANDLE 0 - -struct intel_context; - -struct intel_context_ops { - void (*unpin)(struct intel_context *ce); - void (*destroy)(struct intel_context *ce); -}; - -/* - * Powergating configuration for a particular (context,engine). - */ -struct intel_sseu { - u8 slice_mask; - u8 subslice_mask; - u8 min_eus_per_subslice; - u8 max_eus_per_subslice; -}; - -/** - * struct i915_gem_context - client state - * - * The struct i915_gem_context represents the combined view of the driver and - * logical hardware state for a particular client. - */ -struct i915_gem_context { - /** i915: i915 device backpointer */ - struct drm_i915_private *i915; - - /** file_priv: owning file descriptor */ - struct drm_i915_file_private *file_priv; - - /** - * @ppgtt: unique address space (GTT) - * - * In full-ppgtt mode, each context has its own address space ensuring - * complete seperation of one client from all others. - * - * In other modes, this is a NULL pointer with the expectation that - * the caller uses the shared global GTT. - */ - struct i915_hw_ppgtt *ppgtt; - - /** - * @pid: process id of creator - * - * Note that who created the context may not be the principle user, - * as the context may be shared across a local socket. However, - * that should only affect the default context, all contexts created - * explicitly by the client are expected to be isolated. - */ - struct pid *pid; - - /** - * @name: arbitrary name - * - * A name is constructed for the context from the creator's process - * name, pid and user handle in order to uniquely identify the - * context in messages. - */ - const char *name; - - /** link: place with &drm_i915_private.context_list */ - struct list_head link; - struct llist_node free_link; - - /** - * @ref: reference count - * - * A reference to a context is held by both the client who created it - * and on each request submitted to the hardware using the request - * (to ensure the hardware has access to the state until it has - * finished all pending writes). See i915_gem_context_get() and - * i915_gem_context_put() for access. - */ - struct kref ref; - - /** - * @rcu: rcu_head for deferred freeing. - */ - struct rcu_head rcu; - - /** - * @user_flags: small set of booleans controlled by the user - */ - unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP 0 -#define UCONTEXT_NO_ERROR_CAPTURE 1 -#define UCONTEXT_BANNABLE 2 -#define UCONTEXT_RECOVERABLE 3 - - /** - * @flags: small set of booleans - */ - unsigned long flags; -#define CONTEXT_BANNED 0 -#define CONTEXT_CLOSED 1 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 - - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; - - struct list_head active_engines; - struct mutex mutex; - - /** - * @user_handle: userspace identifier - * - * A unique per-file identifier is generated from - * &drm_i915_file_private.contexts. - */ - u32 user_handle; - - struct i915_sched_attr sched; - - /** engine: per-engine logical HW state */ - struct intel_context { - struct i915_gem_context *gem_context; - struct intel_engine_cs *engine; - struct intel_engine_cs *active; - struct list_head active_link; - struct list_head signal_link; - struct list_head signals; - struct i915_vma *state; - struct intel_ring *ring; - u32 *lrc_reg_state; - u64 lrc_desc; - int pin_count; - - /** - * active_tracker: Active tracker for the external rq activity - * on this intel_context object. - */ - struct i915_active_request active_tracker; - - const struct intel_context_ops *ops; - - /** sseu: Control eu/slice partitioning */ - struct intel_sseu sseu; - } __engine[I915_NUM_ENGINES]; - - /** ring_size: size for allocating the per-engine ring buffer */ - u32 ring_size; - /** desc_template: invariant fields for the HW context descriptor */ - u32 desc_template; - - /** guilty_count: How many times this context has caused a GPU hang. */ - atomic_t guilty_count; - /** - * @active_count: How many times this context was active during a GPU - * hang, but did not cause it. - */ - atomic_t active_count; - - /** - * @hang_timestamp: The last time(s) this context caused a GPU hang - */ - unsigned long hang_timestamp[2]; -#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - - /** remap_slice: Bitmask of cache lines that need remapping */ - u8 remap_slice; - - /** handles_vma: rbtree to look up our context specific obj/vma for - * the user handle. (user handles are per fd, but the binding is - * per vm, which may be one per context or shared with the global GTT) - */ - struct radix_tree_root handles_vma; - - /** handles_list: reverse list of all the rbtree entries in use for - * this context, which allows us to free all the allocations on - * context close. - */ - struct list_head handles_list; -}; - static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) { return test_bit(CONTEXT_CLOSED, &ctx->flags); @@ -338,35 +137,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) return !ctx->file_priv; } -static inline struct intel_context * -to_intel_context(struct i915_gem_context *ctx, - const struct intel_engine_cs *engine) -{ - return &ctx->__engine[engine->id]; -} - -static inline struct intel_context * -intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) -{ - return engine->context_pin(engine, ctx); -} - -static inline void __intel_context_pin(struct intel_context *ce) -{ - GEM_BUG_ON(!ce->pin_count); - ce->pin_count++; -} - -static inline void intel_context_unpin(struct intel_context *ce) -{ - GEM_BUG_ON(!ce->pin_count); - if (--ce->pin_count) - return; - - GEM_BUG_ON(!ce->ops); - ce->ops->unpin(ce); -} - /* i915_gem_context.c */ int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); @@ -410,10 +180,6 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } -void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h new file mode 100644 index 000000000000..59800d749510 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -0,0 +1,181 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_CONTEXT_TYPES_H__ +#define __I915_GEM_CONTEXT_TYPES_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i915_gem.h" /* I915_NUM_ENGINES */ +#include "i915_scheduler.h" +#include "intel_context_types.h" + +struct pid; + +struct drm_i915_private; +struct drm_i915_file_private; +struct i915_hw_ppgtt; +struct i915_timeline; +struct intel_ring; + +/** + * struct i915_gem_context - client state + * + * The struct i915_gem_context represents the combined view of the driver and + * logical hardware state for a particular client. + */ +struct i915_gem_context { + /** i915: i915 device backpointer */ + struct drm_i915_private *i915; + + /** file_priv: owning file descriptor */ + struct drm_i915_file_private *file_priv; + + /** + * @ppgtt: unique address space (GTT) + * + * In full-ppgtt mode, each context has its own address space ensuring + * complete seperation of one client from all others. + * + * In other modes, this is a NULL pointer with the expectation that + * the caller uses the shared global GTT. + */ + struct i915_hw_ppgtt *ppgtt; + + /** + * @pid: process id of creator + * + * Note that who created the context may not be the principle user, + * as the context may be shared across a local socket. However, + * that should only affect the default context, all contexts created + * explicitly by the client are expected to be isolated. + */ + struct pid *pid; + + /** + * @name: arbitrary name + * + * A name is constructed for the context from the creator's process + * name, pid and user handle in order to uniquely identify the + * context in messages. + */ + const char *name; + + /** link: place with &drm_i915_private.context_list */ + struct list_head link; + struct llist_node free_link; + + /** + * @ref: reference count + * + * A reference to a context is held by both the client who created it + * and on each request submitted to the hardware using the request + * (to ensure the hardware has access to the state until it has + * finished all pending writes). See i915_gem_context_get() and + * i915_gem_context_put() for access. + */ + struct kref ref; + + /** + * @rcu: rcu_head for deferred freeing. + */ + struct rcu_head rcu; + + /** + * @user_flags: small set of booleans controlled by the user + */ + unsigned long user_flags; +#define UCONTEXT_NO_ZEROMAP 0 +#define UCONTEXT_NO_ERROR_CAPTURE 1 +#define UCONTEXT_BANNABLE 2 +#define UCONTEXT_RECOVERABLE 3 + + /** + * @flags: small set of booleans + */ + unsigned long flags; +#define CONTEXT_BANNED 0 +#define CONTEXT_CLOSED 1 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 + + /** + * @hw_id: - unique identifier for the context + * + * The hardware needs to uniquely identify the context for a few + * functions like fault reporting, PASID, scheduling. The + * &drm_i915_private.context_hw_ida is used to assign a unqiue + * id for the lifetime of the context. + * + * @hw_id_pin_count: - number of times this context had been pinned + * for use (should be, at most, once per engine). + * + * @hw_id_link: - all contexts with an assigned id are tracked + * for possible repossession. + */ + unsigned int hw_id; + atomic_t hw_id_pin_count; + struct list_head hw_id_link; + + struct list_head active_engines; + struct mutex mutex; + + /** + * @user_handle: userspace identifier + * + * A unique per-file identifier is generated from + * &drm_i915_file_private.contexts. + */ + u32 user_handle; +#define DEFAULT_CONTEXT_HANDLE 0 + + struct i915_sched_attr sched; + + /** engine: per-engine logical HW state */ + struct intel_context __engine[I915_NUM_ENGINES]; + + /** ring_size: size for allocating the per-engine ring buffer */ + u32 ring_size; + /** desc_template: invariant fields for the HW context descriptor */ + u32 desc_template; + + /** guilty_count: How many times this context has caused a GPU hang. */ + atomic_t guilty_count; + /** + * @active_count: How many times this context was active during a GPU + * hang, but did not cause it. + */ + atomic_t active_count; + + /** + * @hang_timestamp: The last time(s) this context caused a GPU hang + */ + unsigned long hang_timestamp[2]; +#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ + + /** remap_slice: Bitmask of cache lines that need remapping */ + u8 remap_slice; + + /** handles_vma: rbtree to look up our context specific obj/vma for + * the user handle. (user handles are per fd, but the binding is + * per vm, which may be one per context or shared with the global GTT) + */ + struct radix_tree_root handles_vma; + + /** handles_list: reverse list of all the rbtree entries in use for + * this context, which allows us to free all the allocations on + * context close. + */ + struct list_head handles_list; +}; + +#endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 60b1dfad93ed..9126c8206490 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -25,76 +25,10 @@ #ifndef I915_TIMELINE_H #define I915_TIMELINE_H -#include -#include +#include -#include "i915_active.h" -#include "i915_request.h" #include "i915_syncmap.h" -#include "i915_utils.h" - -struct i915_vma; -struct i915_timeline_cacheline; - -struct i915_timeline { - u64 fence_context; - u32 seqno; - - spinlock_t lock; -#define TIMELINE_CLIENT 0 /* default subclass */ -#define TIMELINE_ENGINE 1 - - struct mutex mutex; /* protects the flow of requests */ - - unsigned int pin_count; - const u32 *hwsp_seqno; - struct i915_vma *hwsp_ggtt; - u32 hwsp_offset; - - struct i915_timeline_cacheline *hwsp_cacheline; - - bool has_initial_breadcrumb; - - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. - */ - struct list_head requests; - - /* Contains an RCU guarded pointer to the last request. No reference is - * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. - */ - struct i915_active_request last_request; - - /** - * We track the most recent seqno that we wait on in every context so - * that we only have to emit a new await and dependency on a more - * recent sync point. As the contexts may be executed out-of-order, we - * have to track each individually and can not rely on an absolute - * global_seqno. When we know that all tracked fences are completed - * (i.e. when the driver is idle), we know that the syncmap is - * redundant and we can discard it without loss of generality. - */ - struct i915_syncmap *sync; - - /** - * Barrier provides the ability to serialize ordering between different - * timelines. - * - * Users can call i915_timeline_set_barrier which will make all - * subsequent submissions to this timeline be executed only after the - * barrier has been completed. - */ - struct i915_active_request barrier; - - struct list_head link; - const char *name; - struct drm_i915_private *i915; - - struct kref kref; -}; +#include "i915_timeline_types.h" int i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *tl, diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h new file mode 100644 index 000000000000..8ff146dc05ba --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timeline_types.h @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_TIMELINE_TYPES_H__ +#define __I915_TIMELINE_TYPES_H__ + +#include +#include +#include + +#include "i915_active.h" + +struct drm_i915_private; +struct i915_vma; +struct i915_timeline_cacheline; +struct i915_syncmap; + +struct i915_timeline { + u64 fence_context; + u32 seqno; + + spinlock_t lock; +#define TIMELINE_CLIENT 0 /* default subclass */ +#define TIMELINE_ENGINE 1 + struct mutex mutex; /* protects the flow of requests */ + + unsigned int pin_count; + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + + struct i915_timeline_cacheline *hwsp_cacheline; + + bool has_initial_breadcrumb; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head requests; + + /* Contains an RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_active_request_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_active_request last_request; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; + + /** + * Barrier provides the ability to serialize ordering between different + * timelines. + * + * Users can call i915_timeline_set_barrier which will make all + * subsequent submissions to this timeline be executed only after the + * barrier has been completed. + */ + struct i915_active_request barrier; + + struct list_head link; + const char *name; + struct drm_i915_private *i915; + + struct kref kref; +}; + +#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h new file mode 100644 index 000000000000..dd947692bb0b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_context.h @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_CONTEXT_H__ +#define __INTEL_CONTEXT_H__ + +#include "i915_gem_context_types.h" +#include "intel_context_types.h" +#include "intel_engine_types.h" + +void intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +static inline struct intel_context * +to_intel_context(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + return &ctx->__engine[engine->id]; +} + +static inline struct intel_context * +intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + return engine->context_pin(engine, ctx); +} + +static inline void __intel_context_pin(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->pin_count); + ce->pin_count++; +} + +static inline void intel_context_unpin(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->pin_count); + if (--ce->pin_count) + return; + + GEM_BUG_ON(!ce->ops); + ce->ops->unpin(ce); +} + +#endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h new file mode 100644 index 000000000000..16e1306e9595 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_context_types.h @@ -0,0 +1,60 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_CONTEXT_TYPES__ +#define __INTEL_CONTEXT_TYPES__ + +#include +#include + +#include "i915_active_types.h" + +struct i915_gem_context; +struct i915_vma; +struct intel_context; +struct intel_ring; + +struct intel_context_ops { + void (*unpin)(struct intel_context *ce); + void (*destroy)(struct intel_context *ce); +}; + +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + +struct intel_context { + struct i915_gem_context *gem_context; + struct intel_engine_cs *engine; + struct intel_engine_cs *active; + struct list_head active_link; + struct list_head signal_link; + struct list_head signals; + struct i915_vma *state; + struct intel_ring *ring; + u32 *lrc_reg_state; + u64 lrc_desc; + int pin_count; + + /** + * active_tracker: Active tracker for the external rq activity + * on this intel_context object. + */ + struct i915_active_request active_tracker; + + const struct intel_context_ops *ops; + + /** sseu: Control eu/slice partitioning */ + struct intel_sseu sseu; +}; + +#endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h new file mode 100644 index 000000000000..f7ceda334169 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -0,0 +1,521 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_ENGINE_TYPES__ +#define __INTEL_ENGINE_TYPES__ + +#include +#include +#include +#include + +#include "i915_timeline_types.h" +#include "intel_device_info.h" +#include "intel_workarounds_types.h" + +#include "i915_gem_batch_pool.h" +#include "i915_pmu.h" + +#define I915_MAX_SLICES 3 +#define I915_MAX_SUBSLICES 8 + +#define I915_CMD_HASH_ORDER 9 + +struct drm_i915_reg_table; +struct i915_gem_context; +struct i915_request; +struct i915_sched_attr; + +struct intel_hw_status_page { + struct i915_vma *vma; + u32 *addr; +}; + +struct intel_instdone { + u32 instdone; + /* The following exist only in the RCS engine */ + u32 slice_common; + u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; +}; + +struct intel_engine_hangcheck { + u64 acthd; + u32 last_seqno; + u32 next_seqno; + unsigned long action_timestamp; + struct intel_instdone instdone; +}; + +struct intel_ring { + struct i915_vma *vma; + void *vaddr; + + struct i915_timeline *timeline; + struct list_head request_list; + struct list_head active_link; + + u32 head; + u32 tail; + u32 emit; + + u32 space; + u32 size; + u32 effective_size; +}; + +/* + * we use a single page to load ctx workarounds so all of these + * values are referred in terms of dwords + * + * struct i915_wa_ctx_bb: + * offset: specifies batch starting position, also helpful in case + * if we want to have multiple batches at different offsets based on + * some criteria. It is not a requirement at the moment but provides + * an option for future use. + * size: size of the batch in DWORDS + */ +struct i915_ctx_workarounds { + struct i915_wa_ctx_bb { + u32 offset; + u32 size; + } indirect_ctx, per_ctx; + struct i915_vma *vma; +}; + +#define I915_MAX_VCS 4 +#define I915_MAX_VECS 2 + +/* + * Engine IDs definitions. + * Keep instances of the same type engine together. + */ +enum intel_engine_id { + RCS0 = 0, + BCS0, + VCS0, + VCS1, + VCS2, + VCS3, +#define _VCS(n) (VCS0 + (n)) + VECS0, + VECS1 +#define _VECS(n) (VECS0 + (n)) +}; + +struct st_preempt_hang { + struct completion completion; + unsigned int count; + bool inject_hang; +}; + +/** + * struct intel_engine_execlists - execlist submission queue and port state + * + * The struct intel_engine_execlists represents the combined logical state of + * driver and the hardware state for execlist mode of submission. + */ +struct intel_engine_execlists { + /** + * @tasklet: softirq tasklet for bottom handler + */ + struct tasklet_struct tasklet; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; + + /** + * @submit_reg: gen-specific execlist submission register + * set to the ExecList Submission Port (elsp) register pre-Gen11 and to + * the ExecList Submission Queue Contents register array for Gen11+ + */ + u32 __iomem *submit_reg; + + /** + * @ctrl_reg: the enhanced execlists control register, used to load the + * submit queue on the HW and to request preemptions to idle + */ + u32 __iomem *ctrl_reg; + + /** + * @port: execlist port states + * + * For each hardware ELSP (ExecList Submission Port) we keep + * track of the last request and the number of times we submitted + * that port to hw. We then count the number of times the hw reports + * a context completion or preemption. As only one context can + * be active on hw, we limit resubmission of context to port[0]. This + * is called Lite Restore, of the context. + */ + struct execlist_port { + /** + * @request_count: combined request and submission count + */ + struct i915_request *request_count; +#define EXECLIST_COUNT_BITS 2 +#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) +#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) +#define port_set(p, packed) ((p)->request_count = (packed)) +#define port_isset(p) ((p)->request_count) +#define port_index(p, execlists) ((p) - (execlists)->port) + + /** + * @context_id: context ID for port + */ + GEM_DEBUG_DECL(u32 context_id); + +#define EXECLIST_MAX_PORTS 2 + } port[EXECLIST_MAX_PORTS]; + + /** + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. + */ + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 +#define EXECLISTS_ACTIVE_HWACK 2 + + /** + * @port_mask: number of execlist ports - 1 + */ + unsigned int port_mask; + + /** + * @queue_priority_hint: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. + */ + int queue_priority_hint; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root_cached queue; + + /** + * @csb_write: control register for Context Switch buffer + * + * Note this register may be either mmio or HWSP shadow. + */ + u32 *csb_write; + + /** + * @csb_status: status array for Context Switch buffer + * + * Note these register may be either mmio or HWSP shadow. + */ + u32 *csb_status; + + /** + * @preempt_complete_status: expected CSB upon completing preemption + */ + u32 preempt_complete_status; + + /** + * @csb_head: context status buffer head + */ + u8 csb_head; + + I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) +}; + +#define INTEL_ENGINE_CS_MAX_NAME 8 + +struct intel_engine_cs { + struct drm_i915_private *i915; + char name[INTEL_ENGINE_CS_MAX_NAME]; + + enum intel_engine_id id; + unsigned int hw_id; + unsigned int guc_id; + intel_engine_mask_t mask; + + u8 uabi_class; + + u8 class; + u8 instance; + u32 context_size; + u32 mmio_base; + + struct intel_ring *buffer; + + struct i915_timeline timeline; + + struct drm_i915_gem_object *default_state; + void *pinned_default_state; + + /* Rather than have every client wait upon all user interrupts, + * with the herd waking after every interrupt and each doing the + * heavyweight seqno dance, we delegate the task (of being the + * bottom-half of the user interrupt) to the first client. After + * every interrupt, we wake up one client, who does the heavyweight + * coherent seqno read and either goes back to sleep (if incomplete), + * or wakes up all the completed clients in parallel, before then + * transferring the bottom-half status to the next client in the queue. + * + * Compared to walking the entire list of waiters in a single dedicated + * bottom-half, we reduce the latency of the first waiter by avoiding + * a context switch, but incur additional coherent seqno reads when + * following the chain of request breadcrumbs. Since it is most likely + * that we have a single client waiting on each seqno, then reducing + * the overhead of waking that client is much preferred. + */ + struct intel_breadcrumbs { + spinlock_t irq_lock; + struct list_head signalers; + + struct irq_work irq_work; /* for use from inside irq_lock */ + + unsigned int irq_enabled; + + bool irq_armed; + } breadcrumbs; + + struct intel_engine_pmu { + /** + * @enable: Bitmask of enable sample events on this engine. + * + * Bits correspond to sample event types, for instance + * I915_SAMPLE_QUEUED is bit 0 etc. + */ + u32 enable; + /** + * @enable_count: Reference count for the enabled samplers. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. + */ + unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; + /** + * @sample: Counter values for sampling events. + * + * Our internal timer stores the current counters in this field. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. + */ + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; + } pmu; + + /* + * A pool of objects to use as shadow copies of client batch buffers + * when the command parser is enabled. Prevents the client from + * modifying the batch contents after software parsing. + */ + struct i915_gem_batch_pool batch_pool; + + struct intel_hw_status_page status_page; + struct i915_ctx_workarounds wa_ctx; + struct i915_wa_list ctx_wa_list; + struct i915_wa_list wa_list; + struct i915_wa_list whitelist; + + u32 irq_keep_mask; /* always keep these interrupts */ + u32 irq_enable_mask; /* bitmask to enable ring interrupt */ + void (*irq_enable)(struct intel_engine_cs *engine); + void (*irq_disable)(struct intel_engine_cs *engine); + + int (*init_hw)(struct intel_engine_cs *engine); + + struct { + void (*prepare)(struct intel_engine_cs *engine); + void (*reset)(struct intel_engine_cs *engine, bool stalled); + void (*finish)(struct intel_engine_cs *engine); + } reset; + + void (*park)(struct intel_engine_cs *engine); + void (*unpark)(struct intel_engine_cs *engine); + + void (*set_default_submission)(struct intel_engine_cs *engine); + + struct intel_context *(*context_pin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); + + int (*request_alloc)(struct i915_request *rq); + int (*init_context)(struct i915_request *rq); + + int (*emit_flush)(struct i915_request *request, u32 mode); +#define EMIT_INVALIDATE BIT(0) +#define EMIT_FLUSH BIT(1) +#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) + int (*emit_bb_start)(struct i915_request *rq, + u64 offset, u32 length, + unsigned int dispatch_flags); +#define I915_DISPATCH_SECURE BIT(0) +#define I915_DISPATCH_PINNED BIT(1) + int (*emit_init_breadcrumb)(struct i915_request *rq); + u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, + u32 *cs); + unsigned int emit_fini_breadcrumb_dw; + + /* Pass the request to the hardware queue (e.g. directly into + * the legacy ringbuffer or to the end of an execlist). + * + * This is called from an atomic context with irqs disabled; must + * be irq safe. + */ + void (*submit_request)(struct i915_request *rq); + + /* + * Call when the priority on a request has changed and it and its + * dependencies may need rescheduling. Note the request itself may + * not be ready to run! + */ + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); + + /* + * Cancel all requests on the hardware, or queued for execution. + * This should only cancel the ready requests that have been + * submitted to the engine (via the engine->submit_request callback). + * This is called when marking the device as wedged. + */ + void (*cancel_requests)(struct intel_engine_cs *engine); + + void (*cleanup)(struct intel_engine_cs *engine); + + struct intel_engine_execlists execlists; + + /* Contexts are pinned whilst they are active on the GPU. The last + * context executed remains active whilst the GPU is idle - the + * switch away and write to the context object only occurs on the + * next execution. Contexts are only unpinned on retirement of the + * following request ensuring that we can always write to the object + * on the context switch even after idling. Across suspend, we switch + * to the kernel context and trash it as the save may not happen + * before the hardware is powered down. + */ + struct intel_context *last_retired_context; + + /* status_notifier: list of callbacks for context-switch changes */ + struct atomic_notifier_head context_status_notifier; + + struct intel_engine_hangcheck hangcheck; + +#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_SUPPORTS_STATS BIT(1) +#define I915_ENGINE_HAS_PREEMPTION BIT(2) +#define I915_ENGINE_HAS_SEMAPHORES BIT(3) + unsigned int flags; + + /* + * Table of commands the command parser needs to know about + * for this engine. + */ + DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); + + /* + * Table of registers allowed in commands that read/write registers. + */ + const struct drm_i915_reg_table *reg_tables; + int reg_table_count; + + /* + * Returns the bitmask for the length field of the specified command. + * Return 0 for an unrecognized/invalid command. + * + * If the command parser finds an entry for a command in the engine's + * cmd_tables, it gets the command's length based on the table entry. + * If not, it calls this function to determine the per-engine length + * field encoding for the command (i.e. different opcode ranges use + * certain bits to encode the command length in the header). + */ + u32 (*get_cmd_length_mask)(u32 cmd_header); + + struct { + /** + * @lock: Lock protecting the below fields. + */ + seqlock_t lock; + /** + * @enabled: Reference count indicating number of listeners. + */ + unsigned int enabled; + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + /** + * @enabled_at: Timestamp when busy stats were enabled. + */ + ktime_t enabled_at; + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + } stats; +}; + +static inline bool +intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; +} + +static inline bool +intel_engine_supports_stats(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_SUPPORTS_STATS; +} + +static inline bool +intel_engine_has_preemption(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_PREEMPTION; +} + +static inline bool +intel_engine_has_semaphores(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_SEMAPHORES; +} + +#define instdone_slice_mask(dev_priv__) \ + (IS_GEN(dev_priv__, 7) ? \ + 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) + +#define instdone_subslice_mask(dev_priv__) \ + (IS_GEN(dev_priv__, 7) ? \ + 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) + +#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ + for ((slice__) = 0, (subslice__) = 0; \ + (slice__) < I915_MAX_SLICES; \ + (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ + (slice__) += ((subslice__) == 0)) \ + for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ + (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) + +#endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 744220296653..77ec1bd4df5a 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -32,6 +32,7 @@ #include "intel_guc_log.h" #include "intel_guc_reg.h" #include "intel_uc_fw.h" +#include "i915_utils.h" #include "i915_vma.h" struct guc_preempt_work { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9ccbe63d46e3..e612bdca9fd9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -15,14 +15,11 @@ #include "i915_request.h" #include "i915_selftest.h" #include "i915_timeline.h" -#include "intel_device_info.h" +#include "intel_engine_types.h" #include "intel_gpu_commands.h" #include "intel_workarounds.h" struct drm_printer; -struct i915_sched_attr; - -#define I915_CMD_HASH_ORDER 9 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -32,11 +29,6 @@ struct i915_sched_attr; #define CACHELINE_BYTES 64 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) -struct intel_hw_status_page { - struct i915_vma *vma; - u32 *addr; -}; - #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) @@ -91,498 +83,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) return "unknown"; } -#define I915_MAX_SLICES 3 -#define I915_MAX_SUBSLICES 8 - -#define instdone_slice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) - -#define instdone_subslice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) - -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ - for ((slice__) = 0, (subslice__) = 0; \ - (slice__) < I915_MAX_SLICES; \ - (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ - (slice__) += ((subslice__) == 0)) \ - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ - (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) - -struct intel_instdone { - u32 instdone; - /* The following exist only in the RCS engine */ - u32 slice_common; - u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; - u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; -}; - -struct intel_engine_hangcheck { - u64 acthd; - u32 last_seqno; - u32 next_seqno; - unsigned long action_timestamp; - struct intel_instdone instdone; -}; - -struct intel_ring { - struct i915_vma *vma; - void *vaddr; - - struct i915_timeline *timeline; - struct list_head request_list; - struct list_head active_link; - - u32 head; - u32 tail; - u32 emit; - - u32 space; - u32 size; - u32 effective_size; -}; - -struct i915_gem_context; -struct drm_i915_reg_table; - -/* - * we use a single page to load ctx workarounds so all of these - * values are referred in terms of dwords - * - * struct i915_wa_ctx_bb: - * offset: specifies batch starting position, also helpful in case - * if we want to have multiple batches at different offsets based on - * some criteria. It is not a requirement at the moment but provides - * an option for future use. - * size: size of the batch in DWORDS - */ -struct i915_ctx_workarounds { - struct i915_wa_ctx_bb { - u32 offset; - u32 size; - } indirect_ctx, per_ctx; - struct i915_vma *vma; -}; - -struct i915_request; - -#define I915_MAX_VCS 4 -#define I915_MAX_VECS 2 - -/* - * Engine IDs definitions. - * Keep instances of the same type engine together. - */ -enum intel_engine_id { - RCS0 = 0, - BCS0, - VCS0, - VCS1, - VCS2, - VCS3, -#define _VCS(n) (VCS0 + (n)) - VECS0, - VECS1 -#define _VECS(n) (VECS0 + (n)) -}; - -struct st_preempt_hang { - struct completion completion; - unsigned int count; - bool inject_hang; -}; - -/** - * struct intel_engine_execlists - execlist submission queue and port state - * - * The struct intel_engine_execlists represents the combined logical state of - * driver and the hardware state for execlist mode of submission. - */ -struct intel_engine_execlists { - /** - * @tasklet: softirq tasklet for bottom handler - */ - struct tasklet_struct tasklet; - - /** - * @default_priolist: priority list for I915_PRIORITY_NORMAL - */ - struct i915_priolist default_priolist; - - /** - * @no_priolist: priority lists disabled - */ - bool no_priolist; - - /** - * @submit_reg: gen-specific execlist submission register - * set to the ExecList Submission Port (elsp) register pre-Gen11 and to - * the ExecList Submission Queue Contents register array for Gen11+ - */ - u32 __iomem *submit_reg; - - /** - * @ctrl_reg: the enhanced execlists control register, used to load the - * submit queue on the HW and to request preemptions to idle - */ - u32 __iomem *ctrl_reg; - - /** - * @port: execlist port states - * - * For each hardware ELSP (ExecList Submission Port) we keep - * track of the last request and the number of times we submitted - * that port to hw. We then count the number of times the hw reports - * a context completion or preemption. As only one context can - * be active on hw, we limit resubmission of context to port[0]. This - * is called Lite Restore, of the context. - */ - struct execlist_port { - /** - * @request_count: combined request and submission count - */ - struct i915_request *request_count; -#define EXECLIST_COUNT_BITS 2 -#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) -#define port_set(p, packed) ((p)->request_count = (packed)) -#define port_isset(p) ((p)->request_count) -#define port_index(p, execlists) ((p) - (execlists)->port) - - /** - * @context_id: context ID for port - */ - GEM_DEBUG_DECL(u32 context_id); - -#define EXECLIST_MAX_PORTS 2 - } port[EXECLIST_MAX_PORTS]; - - /** - * @active: is the HW active? We consider the HW as active after - * submitting any context for execution and until we have seen the - * last context completion event. After that, we do not expect any - * more events until we submit, and so can park the HW. - * - * As we have a small number of different sources from which we feed - * the HW, we track the state of each inside a single bitfield. - */ - unsigned int active; -#define EXECLISTS_ACTIVE_USER 0 -#define EXECLISTS_ACTIVE_PREEMPT 1 -#define EXECLISTS_ACTIVE_HWACK 2 - - /** - * @port_mask: number of execlist ports - 1 - */ - unsigned int port_mask; - - /** - * @queue_priority_hint: Highest pending priority. - * - * When we add requests into the queue, or adjust the priority of - * executing requests, we compute the maximum priority of those - * pending requests. We can then use this value to determine if - * we need to preempt the executing requests to service the queue. - * However, since the we may have recorded the priority of an inflight - * request we wanted to preempt but since completed, at the time of - * dequeuing the priority hint may no longer may match the highest - * available request priority. - */ - int queue_priority_hint; - - /** - * @queue: queue of requests, in priority lists - */ - struct rb_root_cached queue; - - /** - * @csb_write: control register for Context Switch buffer - * - * Note this register may be either mmio or HWSP shadow. - */ - u32 *csb_write; - - /** - * @csb_status: status array for Context Switch buffer - * - * Note these register may be either mmio or HWSP shadow. - */ - u32 *csb_status; - - /** - * @preempt_complete_status: expected CSB upon completing preemption - */ - u32 preempt_complete_status; - - /** - * @csb_head: context status buffer head - */ - u8 csb_head; - - I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) -}; - -#define INTEL_ENGINE_CS_MAX_NAME 8 - -struct intel_engine_cs { - struct drm_i915_private *i915; - char name[INTEL_ENGINE_CS_MAX_NAME]; - - enum intel_engine_id id; - unsigned int hw_id; - unsigned int guc_id; - intel_engine_mask_t mask; - - u8 uabi_class; - - u8 class; - u8 instance; - u32 context_size; - u32 mmio_base; - - struct intel_ring *buffer; - - struct i915_timeline timeline; - - struct drm_i915_gem_object *default_state; - void *pinned_default_state; - - /* Rather than have every client wait upon all user interrupts, - * with the herd waking after every interrupt and each doing the - * heavyweight seqno dance, we delegate the task (of being the - * bottom-half of the user interrupt) to the first client. After - * every interrupt, we wake up one client, who does the heavyweight - * coherent seqno read and either goes back to sleep (if incomplete), - * or wakes up all the completed clients in parallel, before then - * transferring the bottom-half status to the next client in the queue. - * - * Compared to walking the entire list of waiters in a single dedicated - * bottom-half, we reduce the latency of the first waiter by avoiding - * a context switch, but incur additional coherent seqno reads when - * following the chain of request breadcrumbs. Since it is most likely - * that we have a single client waiting on each seqno, then reducing - * the overhead of waking that client is much preferred. - */ - struct intel_breadcrumbs { - spinlock_t irq_lock; - struct list_head signalers; - - struct irq_work irq_work; /* for use from inside irq_lock */ - - unsigned int irq_enabled; - - bool irq_armed; - } breadcrumbs; - - struct intel_engine_pmu { - /** - * @enable: Bitmask of enable sample events on this engine. - * - * Bits correspond to sample event types, for instance - * I915_SAMPLE_QUEUED is bit 0 etc. - */ - u32 enable; - /** - * @enable_count: Reference count for the enabled samplers. - * - * Index number corresponds to @enum drm_i915_pmu_engine_sample. - */ - unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; - /** - * @sample: Counter values for sampling events. - * - * Our internal timer stores the current counters in this field. - * - * Index number corresponds to @enum drm_i915_pmu_engine_sample. - */ - struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; - } pmu; - - /* - * A pool of objects to use as shadow copies of client batch buffers - * when the command parser is enabled. Prevents the client from - * modifying the batch contents after software parsing. - */ - struct i915_gem_batch_pool batch_pool; - - struct intel_hw_status_page status_page; - struct i915_ctx_workarounds wa_ctx; - struct i915_wa_list ctx_wa_list; - struct i915_wa_list wa_list; - struct i915_wa_list whitelist; - - u32 irq_keep_mask; /* always keep these interrupts */ - u32 irq_enable_mask; /* bitmask to enable ring interrupt */ - void (*irq_enable)(struct intel_engine_cs *engine); - void (*irq_disable)(struct intel_engine_cs *engine); - - int (*init_hw)(struct intel_engine_cs *engine); - - struct { - void (*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, bool stalled); - void (*finish)(struct intel_engine_cs *engine); - } reset; - - void (*park)(struct intel_engine_cs *engine); - void (*unpark)(struct intel_engine_cs *engine); - - void (*set_default_submission)(struct intel_engine_cs *engine); - - struct intel_context *(*context_pin)(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); - - int (*request_alloc)(struct i915_request *rq); - int (*init_context)(struct i915_request *rq); - - int (*emit_flush)(struct i915_request *request, u32 mode); -#define EMIT_INVALIDATE BIT(0) -#define EMIT_FLUSH BIT(1) -#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) - int (*emit_bb_start)(struct i915_request *rq, - u64 offset, u32 length, - unsigned int dispatch_flags); -#define I915_DISPATCH_SECURE BIT(0) -#define I915_DISPATCH_PINNED BIT(1) - int (*emit_init_breadcrumb)(struct i915_request *rq); - u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, - u32 *cs); - unsigned int emit_fini_breadcrumb_dw; - - /* Pass the request to the hardware queue (e.g. directly into - * the legacy ringbuffer or to the end of an execlist). - * - * This is called from an atomic context with irqs disabled; must - * be irq safe. - */ - void (*submit_request)(struct i915_request *rq); - - /* - * Call when the priority on a request has changed and it and its - * dependencies may need rescheduling. Note the request itself may - * not be ready to run! - */ - void (*schedule)(struct i915_request *request, - const struct i915_sched_attr *attr); - - /* - * Cancel all requests on the hardware, or queued for execution. - * This should only cancel the ready requests that have been - * submitted to the engine (via the engine->submit_request callback). - * This is called when marking the device as wedged. - */ - void (*cancel_requests)(struct intel_engine_cs *engine); - - void (*cleanup)(struct intel_engine_cs *engine); - - struct intel_engine_execlists execlists; - - /* Contexts are pinned whilst they are active on the GPU. The last - * context executed remains active whilst the GPU is idle - the - * switch away and write to the context object only occurs on the - * next execution. Contexts are only unpinned on retirement of the - * following request ensuring that we can always write to the object - * on the context switch even after idling. Across suspend, we switch - * to the kernel context and trash it as the save may not happen - * before the hardware is powered down. - */ - struct intel_context *last_retired_context; - - /* status_notifier: list of callbacks for context-switch changes */ - struct atomic_notifier_head context_status_notifier; - - struct intel_engine_hangcheck hangcheck; - -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) -#define I915_ENGINE_SUPPORTS_STATS BIT(1) -#define I915_ENGINE_HAS_PREEMPTION BIT(2) -#define I915_ENGINE_HAS_SEMAPHORES BIT(3) - unsigned int flags; - - /* - * Table of commands the command parser needs to know about - * for this engine. - */ - DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); - - /* - * Table of registers allowed in commands that read/write registers. - */ - const struct drm_i915_reg_table *reg_tables; - int reg_table_count; - - /* - * Returns the bitmask for the length field of the specified command. - * Return 0 for an unrecognized/invalid command. - * - * If the command parser finds an entry for a command in the engine's - * cmd_tables, it gets the command's length based on the table entry. - * If not, it calls this function to determine the per-engine length - * field encoding for the command (i.e. different opcode ranges use - * certain bits to encode the command length in the header). - */ - u32 (*get_cmd_length_mask)(u32 cmd_header); - - struct { - /** - * @lock: Lock protecting the below fields. - */ - seqlock_t lock; - /** - * @enabled: Reference count indicating number of listeners. - */ - unsigned int enabled; - /** - * @active: Number of contexts currently scheduled in. - */ - unsigned int active; - /** - * @enabled_at: Timestamp when busy stats were enabled. - */ - ktime_t enabled_at; - /** - * @start: Timestamp of the last idle to active transition. - * - * Idle is defined as active == 0, active is active > 0. - */ - ktime_t start; - /** - * @total: Total time this engine was busy. - * - * Accumulated time not counting the most recent block in cases - * where engine is currently busy (active > 0). - */ - ktime_t total; - } stats; -}; - -static inline bool -intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; -} - -static inline bool -intel_engine_supports_stats(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_SUPPORTS_STATS; -} - -static inline bool -intel_engine_has_preemption(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_HAS_PREEMPTION; -} - -static inline bool -intel_engine_has_semaphores(const struct intel_engine_cs *engine) -{ - return engine->flags & I915_ENGINE_HAS_SEMAPHORES; -} - void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); static inline bool __execlists_need_preempt(int prio, int last) diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index 7c734714b05e..a1bf51c611a9 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -9,18 +9,7 @@ #include -struct i915_wa { - i915_reg_t reg; - u32 mask; - u32 val; -}; - -struct i915_wa_list { - const char *name; - struct i915_wa *list; - unsigned int count; - unsigned int wa_count; -}; +#include "intel_workarounds_types.h" static inline void intel_wa_list_free(struct i915_wa_list *wal) { diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h new file mode 100644 index 000000000000..30918da180ff --- /dev/null +++ b/drivers/gpu/drm/i915/intel_workarounds_types.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef __INTEL_WORKAROUNDS_TYPES_H__ +#define __INTEL_WORKAROUNDS_TYPES_H__ + +#include + +#include "i915_reg.h" + +struct i915_wa { + i915_reg_t reg; + u32 mask; + u32 val; +}; + +struct i915_wa_list { + const char *name; + struct i915_wa *list; + unsigned int count; + unsigned int wa_count; +}; + +#endif /* __INTEL_WORKAROUNDS_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/test_i915_active_types_standalone.c b/drivers/gpu/drm/i915/test_i915_active_types_standalone.c new file mode 100644 index 000000000000..144ebd153e57 --- /dev/null +++ b/drivers/gpu/drm/i915/test_i915_active_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_active_types.h" diff --git a/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c b/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c new file mode 100644 index 000000000000..4e4da4860bc2 --- /dev/null +++ b/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_gem_context_types.h" diff --git a/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c b/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c new file mode 100644 index 000000000000..f58e148e8946 --- /dev/null +++ b/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_timeline_types.h" diff --git a/drivers/gpu/drm/i915/test_intel_context_types_standalone.c b/drivers/gpu/drm/i915/test_intel_context_types_standalone.c new file mode 100644 index 000000000000..b39e3c4e6551 --- /dev/null +++ b/drivers/gpu/drm/i915/test_intel_context_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_context_types.h" diff --git a/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c b/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c new file mode 100644 index 000000000000..d05e4cdcbcf9 --- /dev/null +++ b/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_engine_types.h" diff --git a/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c b/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c new file mode 100644 index 000000000000..4f658bb00825 --- /dev/null +++ b/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c @@ -0,0 +1,7 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_workarounds_types.h" -- cgit v1.2.3 From 54939ea0bd85e128bdd5bca579508dd4701c5ce9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Mar 2019 09:51:51 +0000 Subject: drm/i915: Switch to use HWS indices rather than addresses If we use the STORE_DATA_INDEX function we can use a fixed offset and avoid having to lookup up the engine HWS address. A step closer to being able to emit the final breadcrumb during request_add rather than later in the submission interrupt handler. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190318095204.9913-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 3 ++- drivers/gpu/drm/i915/intel_lrc.c | 17 +++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 4 files changed, 17 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 4a5727233419..c4ad73980988 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -583,7 +583,8 @@ static void inject_preempt_context(struct work_struct *work) } else { cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, - addr); + addr, + 0); *cs++ = MI_NOOP; *cs++ = MI_NOOP; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e54e0064b2d6..29042060b42c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -173,12 +173,6 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); -static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_HANGCHECK_ADDR); -} - static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -2214,11 +2208,14 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write(cs, request->fence.seqno, - request->timeline->hwsp_offset); + request->timeline->hwsp_offset, + 0); cs = gen8_emit_ggtt_write(cs, intel_engine_next_hangcheck_seqno(request->engine), - intel_hws_hangcheck_address(request->engine)); + I915_GEM_HWS_HANGCHECK_ADDR, + MI_FLUSH_DW_STORE_INDEX); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2242,8 +2239,8 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write_rcs(cs, intel_engine_next_hangcheck_seqno(request->engine), - intel_hws_hangcheck_address(request->engine), - 0); + I915_GEM_HWS_HANGCHECK_ADDR, + PIPE_CONTROL_STORE_DATA_INDEX); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f26f5cc1584c..366be3d67e15 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -43,12 +43,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_HANGCHECK_ADDR); -} - unsigned int intel_ring_update_space(struct intel_ring *ring) { unsigned int space; @@ -317,8 +311,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->fence.seqno; *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); *cs++ = MI_USER_INTERRUPT; @@ -423,8 +417,10 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->fence.seqno; *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; - *cs++ = hws_hangcheck_address(rq->engine); + *cs++ = (PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_GLOBAL_GTT_IVB); + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); *cs++ = MI_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index e612bdca9fd9..f9593e2e070d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -408,14 +408,14 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) } static inline u32 * -gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ GEM_BUG_ON(gtt_offset & (1 << 5)); /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags; *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; *cs++ = 0; *cs++ = value; -- cgit v1.2.3 From 65baf0ef046b0297a1214932d48a6b71d3d79b4c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Mar 2019 09:51:46 +0000 Subject: drm/i915: Hold a ref to the ring while retiring As the final request on a ring may hold the reference to this ring (via retiring the last pinned context), we may find ourselves chasing a dangling pointer on completion of the list. A quick solution is to hold a reference to the ring itself as we retire along it so that we only free it after we stop dereferencing it. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190318095204.9913-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 6 +++++- drivers/gpu/drm/i915/intel_engine_types.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 13 ++++++++++++- drivers/gpu/drm/i915/selftests/mock_engine.c | 1 + 6 files changed, 27 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9533a85cb0b3..0a3d94517d0a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1332,8 +1332,12 @@ void i915_retire_requests(struct drm_i915_private *i915) if (!i915->gt.active_requests) return; - list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) + list_for_each_entry_safe(ring, tmp, + &i915->gt.active_rings, active_link) { + intel_ring_get(ring); /* last rq holds reference! */ ring_retire_requests(ring); + intel_ring_put(ring); + } } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h index b0aa1f0d4e47..88ed7ba8886f 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -51,6 +52,7 @@ struct intel_engine_hangcheck { }; struct intel_ring { + struct kref ref; struct i915_vma *vma; void *vaddr; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 29042060b42c..8d1cb81ba0f5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1230,7 +1230,7 @@ static void execlists_submit_request(struct i915_request *request) static void __execlists_context_fini(struct intel_context *ce) { - intel_ring_free(ce->ring); + intel_ring_put(ce->ring); GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); i915_gem_object_put(ce->state->obj); @@ -2867,7 +2867,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, return 0; error_ring_free: - intel_ring_free(ring); + intel_ring_put(ring); error_deref_obj: i915_gem_object_put(ctx_obj); return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 366be3d67e15..5137f0140664 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1307,6 +1307,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, if (!ring) return ERR_PTR(-ENOMEM); + kref_init(&ring->ref); INIT_LIST_HEAD(&ring->request_list); ring->timeline = i915_timeline_get(timeline); @@ -1331,9 +1332,9 @@ intel_engine_create_ring(struct intel_engine_cs *engine, return ring; } -void -intel_ring_free(struct intel_ring *ring) +void intel_ring_free(struct kref *ref) { + struct intel_ring *ring = container_of(ref, typeof(*ring), ref); struct drm_i915_gem_object *obj = ring->vma->obj; i915_vma_close(ring->vma); @@ -1587,7 +1588,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) err_unpin: intel_ring_unpin(ring); err_ring: - intel_ring_free(ring); + intel_ring_put(ring); err: intel_engine_cleanup_common(engine); return err; @@ -1601,7 +1602,7 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) (I915_READ_MODE(engine) & MODE_IDLE) == 0); intel_ring_unpin(engine->buffer); - intel_ring_free(engine->buffer); + intel_ring_put(engine->buffer); if (engine->cleanup) engine->cleanup(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f9593e2e070d..a02c92dac5da 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -231,7 +231,18 @@ int intel_ring_pin(struct intel_ring *ring); void intel_ring_reset(struct intel_ring *ring, u32 tail); unsigned int intel_ring_update_space(struct intel_ring *ring); void intel_ring_unpin(struct intel_ring *ring); -void intel_ring_free(struct intel_ring *ring); +void intel_ring_free(struct kref *ref); + +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) +{ + kref_get(&ring->ref); + return ring; +} + +static inline void intel_ring_put(struct intel_ring *ring) +{ + kref_put(&ring->ref, intel_ring_free); +} void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index f6d120e05ee4..881450c694e9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -57,6 +57,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) return NULL; } + kref_init(&ring->base.ref); ring->base.size = sz; ring->base.effective_size = sz; ring->base.vaddr = (void *)(ring + 1); -- cgit v1.2.3