From ed06fddc2203aefb4edbcd179a4e3237e19e6197 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Feb 2019 13:10:04 +0000 Subject: drm/i915: Include the current timeline seqno for debugging execlists While this is mainly only useful for ELSP[0], it is definitely useful to know the current timeline seqno wrt to the queued set of requests for that port, as this carries additional information above and beyond the near-defunct global_seqno and global HWSP. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190211131004.11634-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 49fa43ff02ba..2547e2e51db8 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1425,10 +1425,11 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ", + "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", idx, count, i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset); + rq->timeline->hwsp_offset, + hwsp_seqno(rq)); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); -- cgit v1.2.3 From c41166f9a145f1c4ce2961b338f9b57495ace4b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Feb 2019 14:56:37 +0000 Subject: drm/i915: Beware temporary wedging when determining -EIO At a few points in our uABI, we check to see if the driver is wedged and report -EIO back to the user in that case. However, as we perform the check and reset asynchronously (where once before they were both serialised by the struct_mutex), we may instead see the temporary wedging used to cancel inflight rendering to avoid a deadlock during reset (caused by either us timing out in our reset handler, i915_wedge_on_timeout or with malice aforethought in intel_reset_prepare for a stuck modeset). If we suspect this is the case, that is we see a wedged driver *and* reset in progress, then wait until the reset is resolved before reporting upon the wedged status. v2: might_sleep() (Mika) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109580 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190220145637.23503-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++---- drivers/gpu/drm/i915/i915_drv.h | 7 +++++- drivers/gpu/drm/i915/i915_gem.c | 25 ++++++++++--------- drivers/gpu/drm/i915/i915_request.c | 5 ++-- drivers/gpu/drm/i915/i915_reset.c | 29 ++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reset.h | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 8 +++--- drivers/gpu/drm/i915/intel_hangcheck.c | 2 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/selftests/i915_active.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 4 +-- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 2 +- drivers/gpu/drm/i915/selftests/igt_flush_test.c | 2 +- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 24 +++++++++--------- drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 4 +-- 19 files changed, 91 insertions(+), 52 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2aeea977283f..37175414ce89 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3833,11 +3833,18 @@ static const struct file_operations i915_cur_wm_latency_fops = { static int i915_wedged_get(void *data, u64 *val) { - struct drm_i915_private *dev_priv = data; - - *val = i915_terminally_wedged(&dev_priv->gpu_error); + int ret = i915_terminally_wedged(data); - return 0; + switch (ret) { + case -EIO: + *val = 1; + return 0; + case 0: + *val = 0; + return 0; + default: + return ret; + } } static int @@ -3918,7 +3925,7 @@ i915_drop_caches_set(void *data, u64 val) mutex_unlock(&i915->drm.struct_mutex); } - if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error)) + if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(i915)) i915_handle_error(i915, ALL_ENGINES, 0, NULL); fs_reclaim_acquire(GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 58c9058da4b4..2a78fb3e6eee 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3020,11 +3020,16 @@ int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine); -static inline bool i915_terminally_wedged(struct i915_gpu_error *error) +static inline bool __i915_wedged(struct i915_gpu_error *error) { return unlikely(test_bit(I915_WEDGED, &error->flags)); } +static inline bool i915_reset_failed(struct drm_i915_private *i915) +{ + return __i915_wedged(&i915->gpu_error); +} + static inline u32 i915_reset_count(struct i915_gpu_error *error) { return READ_ONCE(error->reset_count); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b421bc7a2e26..cc88e7974422 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1928,7 +1928,7 @@ err: * fail). But any other -EIO isn't ours (e.g. swap in failure) * and so needs to be reported. */ - if (!i915_terminally_wedged(&dev_priv->gpu_error)) + if (!i915_terminally_wedged(dev_priv)) return VM_FAULT_SIGBUS; /* else: fall through */ case -EAGAIN: @@ -2958,7 +2958,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) return; GEM_BUG_ON(i915->gt.active_requests); @@ -3806,8 +3806,9 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) long ret; /* ABI: return -EIO if already wedged */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) - return -EIO; + ret = i915_terminally_wedged(dev_priv); + if (ret) + return ret; spin_lock(&file_priv->mm.lock); list_for_each_entry(request, &file_priv->mm.request_list, client_link) { @@ -4460,7 +4461,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * back to defaults, recovering from whatever wedged state we left it * in and so worth trying to use the device once more. */ - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) i915_gem_unset_wedged(i915); /* @@ -4504,7 +4505,7 @@ int i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - if (!i915_terminally_wedged(&i915->gpu_error)) { + if (!i915_reset_failed(i915)) { ret = i915_gem_switch_to_kernel_context(i915); if (ret) goto err_unlock; @@ -4625,8 +4626,9 @@ out_unlock: return; err_wedged: - if (!i915_terminally_wedged(&i915->gpu_error)) { - DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); + if (!i915_reset_failed(i915)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU, declaring it wedged!\n"); i915_gem_set_wedged(i915); } goto out_unlock; @@ -4731,10 +4733,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) init_unused_rings(dev_priv); BUG_ON(!dev_priv->kernel_context); - if (i915_terminally_wedged(&dev_priv->gpu_error)) { - ret = -EIO; + ret = i915_terminally_wedged(dev_priv); + if (ret) goto out; - } ret = i915_ppgtt_init_hw(dev_priv); if (ret) { @@ -5107,7 +5108,7 @@ err_uc_misc: * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - if (!i915_terminally_wedged(&dev_priv->gpu_error)) { + if (!i915_reset_failed(dev_priv)) { i915_load_error(dev_priv, "Failed to initialize GPU, declaring it wedged!\n"); i915_gem_set_wedged(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a61e3a4fc9dc..124b3e279c88 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -559,8 +559,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ - if (i915_terminally_wedged(&i915->gpu_error)) - return ERR_PTR(-EIO); + ret = i915_terminally_wedged(i915); + if (ret) + return ERR_PTR(ret); /* * Pinning the contexts may generate requests in order to acquire diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index c234feb5fdf5..19ad56ba22a7 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -1032,7 +1032,7 @@ void i915_reset(struct drm_i915_private *i915, finish: reset_finish(i915); - if (!i915_terminally_wedged(error)) + if (!__i915_wedged(error)) reset_restart(i915); return; @@ -1253,7 +1253,7 @@ void i915_handle_error(struct drm_i915_private *i915, * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(i915) && !i915_terminally_wedged(error)) { + if (intel_has_reset_engine(i915) && !__i915_wedged(error)) { for_each_engine_masked(engine, i915, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, @@ -1339,6 +1339,31 @@ __releases(&i915->gpu_error.reset_backoff_srcu) srcu_read_unlock(&error->reset_backoff_srcu, tag); } +int i915_terminally_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + + might_sleep(); + + if (!__i915_wedged(error)) + return 0; + + /* Reset still in progress? Maybe we will recover? */ + if (!test_bit(I915_RESET_BACKOFF, &error->flags)) + return -EIO; + + /* XXX intel_reset_finish() still takes struct_mutex!!! */ + if (mutex_is_locked(&i915->drm.struct_mutex)) + return -EAGAIN; + + if (wait_event_interruptible(error->reset_queue, + !test_bit(I915_RESET_BACKOFF, + &error->flags))) + return -EINTR; + + return __i915_wedged(error) ? -EIO : 0; +} + bool i915_reset_flush(struct drm_i915_private *i915) { int err; diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h index 893c5d1c2eb8..16f2389f656f 100644 --- a/drivers/gpu/drm/i915/i915_reset.h +++ b/drivers/gpu/drm/i915/i915_reset.h @@ -36,6 +36,8 @@ bool i915_reset_flush(struct drm_i915_private *i915); int __must_check i915_reset_trylock(struct drm_i915_private *i915); void i915_reset_unlock(struct drm_i915_private *i915, int tag); +int i915_terminally_wedged(struct drm_i915_private *i915); + bool intel_has_gpu_reset(struct drm_i915_private *i915); bool intel_has_reset_engine(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2547e2e51db8..81b80f8fd9ea 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1007,10 +1007,8 @@ static bool ring_is_idle(struct intel_engine_cs *engine) */ bool intel_engine_is_idle(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - /* More white lies, if wedged, hw state is inconsistent */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) + if (i915_reset_failed(engine->i915)) return true; /* Any inflight/incomplete requests? */ @@ -1054,7 +1052,7 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) * If the driver is wedged, HW state may be very inconsistent and * report that it is still busy, even though we have stopped using it. */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) + if (i915_reset_failed(dev_priv)) return true; for_each_engine(engine, dev_priv, id) { @@ -1496,7 +1494,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, va_end(ap); } - if (i915_terminally_wedged(&engine->i915->gpu_error)) + if (i915_reset_failed(engine->i915)) drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index a219c796e56d..9be033b6f4d2 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -263,7 +263,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (!READ_ONCE(dev_priv->gt.awake)) return; - if (i915_terminally_wedged(&dev_priv->gpu_error)) + if (i915_terminally_wedged(dev_priv)) return; /* As enabling the GPU requires fairly extensive mmio access, diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index a9a2fa35876f..40607ba7dda6 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1764,7 +1764,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) return 0; } - if (i915_terminally_wedged(&dev_priv->gpu_error)) + if (i915_terminally_wedged(dev_priv)) return 0; file = mock_file(dev_priv); diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 337b1f98b923..27d8f853111b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -150,7 +150,7 @@ int i915_active_live_selftests(struct drm_i915_private *i915) SUBTEST(live_active_retire), }; - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index fd89a5a33c1a..9c16aff87028 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -248,12 +248,12 @@ static bool always_valid(struct drm_i915_private *i915) static bool needs_fence_registers(struct drm_i915_private *i915) { - return !i915_terminally_wedged(&i915->gpu_error); + return !i915_terminally_wedged(i915); } static bool needs_mi_store_dword(struct drm_i915_private *i915) { - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) return false; return intel_engine_can_store_dword(i915->engine[RCS]); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index b7b97c57ad05..3a238b9628b3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1632,7 +1632,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) SUBTEST(igt_vm_isolation), }; - if (i915_terminally_wedged(&dev_priv->gpu_error)) + if (i915_terminally_wedged(dev_priv)) return 0; return i915_subtests(tests, dev_priv); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 32dce7176f63..b270eab1cad1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -547,7 +547,7 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_evict_contexts), }; - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 395ae878e0f7..784982aed625 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -583,7 +583,7 @@ static int igt_mmap_offset_exhaustion(void *arg) for (loop = 0; loop < 3; loop++) { intel_wakeref_t wakeref; - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) break; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 6733dc5b6b4c..03cc8ab6a620 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1246,7 +1246,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_breadcrumbs_smoketest), }; - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index af66e3d4e23a..e0d3122fd35a 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -29,5 +29,5 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) i915_gem_set_wedged(i915); } - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; + return i915_terminally_wedged(i915); } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index c32bc31192ae..a77e4bf1ab55 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -342,7 +342,7 @@ static int igt_hang_sanitycheck(void *arg) timeout = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) timeout = -EIO; i915_request_put(rq); @@ -383,7 +383,7 @@ static int igt_global_reset(void *arg) igt_global_reset_unlock(i915); - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) err = -EIO; return err; @@ -401,13 +401,13 @@ static int igt_wedged_reset(void *arg) i915_gem_set_wedged(i915); - GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); + GEM_BUG_ON(!i915_reset_failed(i915)); i915_reset(i915, ALL_ENGINES, NULL); intel_runtime_pm_put(i915, wakeref); igt_global_reset_unlock(i915); - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; + return i915_reset_failed(i915) ? -EIO : 0; } static bool wait_for_idle(struct intel_engine_cs *engine) @@ -529,7 +529,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) err = -EIO; if (active) { @@ -843,7 +843,7 @@ unwind: break; } - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) err = -EIO; if (flags & TEST_ACTIVE) { @@ -969,7 +969,7 @@ unlock: mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) return -EIO; return err; @@ -1166,7 +1166,7 @@ fini: unlock: mutex_unlock(&i915->drm.struct_mutex); - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) return -EIO; return err; @@ -1372,7 +1372,7 @@ unlock: mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) return -EIO; return err; @@ -1552,7 +1552,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) err = -EIO; } @@ -1591,7 +1591,7 @@ static int igt_atomic_reset(void *arg) /* Flush any requests before we get started and check basics */ force_reset(i915); - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_reset_failed(i915)) goto unlock; if (intel_has_gpu_reset(i915)) { @@ -1665,7 +1665,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) if (!intel_has_gpu_reset(i915)) return 0; - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) return -EIO; /* we're long past hope of a successful reset */ wakeref = intel_runtime_pm_get(i915); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 58144e024751..0f7a5bf69646 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -895,7 +895,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) if (!HAS_EXECLISTS(i915)) return 0; - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index fb479a2c04fb..e6ffc8ac22dc 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -181,7 +181,7 @@ static int check_whitelist(struct i915_gem_context *ctx, err = 0; igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); - if (i915_terminally_wedged(&ctx->i915->gpu_error)) + if (i915_terminally_wedged(ctx->i915)) err = -EIO; if (err) goto out_put; @@ -510,7 +510,7 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) }; int err; - if (i915_terminally_wedged(&i915->gpu_error)) + if (i915_terminally_wedged(i915)) return 0; mutex_lock(&i915->drm.struct_mutex); -- cgit v1.2.3 From 89531e7d8ee8602b2723431a581250d5d0ec2913 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 09:49:19 +0000 Subject: drm/i915: Replace global_seqno with a hangcheck heartbeat seqno To determine whether an engine has 'stuck', we simply check whether or not is still on the same seqno for several seconds. To keep this simple mechanism intact over the loss of a global seqno, we can simply add a new global heartbeat seqno instead. As we cannot know the sequence in which requests will then be completed, we use a primitive random number generator instead (with a cycle long enough to not matter over an interval of a few thousand requests between hangcheck samples). The alternative to using a dedicated seqno on every request is to issue a heartbeat request and query its progress through the system. Sadly this requires us to reduce struct_mutex so that we can issue requests without requiring that bkl. v2: And without the extra CS_STALL for the hangcheck seqno -- we don't need strict serialisation with what comes later, we just need to be sure we don't write the hangcheck seqno before our batch is flushed. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 7 ++++--- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++-- drivers/gpu/drm/i915/intel_hangcheck.c | 6 +++--- drivers/gpu/drm/i915/intel_lrc.c | 15 ++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 36 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 19 ++++++++++++++++- 6 files changed, 77 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 37175414ce89..545091a5180b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1295,7 +1295,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) with_intel_runtime_pm(dev_priv, wakeref) { for_each_engine(engine, dev_priv, id) { acthd[id] = intel_engine_get_active_head(engine); - seqno[id] = intel_engine_get_seqno(engine); + seqno[id] = intel_engine_get_hangcheck_seqno(engine); } intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); @@ -1315,8 +1315,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) for_each_engine(engine, dev_priv, id) { seq_printf(m, "%s:\n", engine->name); seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n", - engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine), + engine->hangcheck.last_seqno, + seqno[id], + engine->hangcheck.next_seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 81b80f8fd9ea..57bc5c4fb3ff 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1497,10 +1497,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_reset_failed(engine->i915)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), - engine->hangcheck.seqno, + engine->hangcheck.last_seqno, + engine->hangcheck.next_seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 9be033b6f4d2..f1d8dfc58049 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -133,21 +133,21 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine, struct hangcheck *hc) { hc->acthd = intel_engine_get_active_head(engine); - hc->seqno = intel_engine_get_seqno(engine); + hc->seqno = intel_engine_get_hangcheck_seqno(engine); } static void hangcheck_store_sample(struct intel_engine_cs *engine, const struct hangcheck *hc) { engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.seqno = hc->seqno; + engine->hangcheck.last_seqno = hc->seqno; } static enum intel_engine_hangcheck_action hangcheck_get_action(struct intel_engine_cs *engine, const struct hangcheck *hc) { - if (engine->hangcheck.seqno != hc->seqno) + if (engine->hangcheck.last_seqno != hc->seqno) return ENGINE_ACTIVE_SEQNO; if (intel_engine_is_idle(engine)) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 34a0866959c5..0516fc6b9652 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -178,6 +178,12 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) I915_GEM_HWS_INDEX_ADDR); } +static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_HANGCHECK_ADDR); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -2206,6 +2212,10 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) request->fence.seqno, request->timeline->hwsp_offset); + cs = gen8_emit_ggtt_write(cs, + intel_engine_next_hangcheck_seqno(request->engine), + intel_hws_hangcheck_address(request->engine)); + cs = gen8_emit_ggtt_write(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); @@ -2230,6 +2240,11 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); + cs = gen8_emit_ggtt_write_rcs(cs, + intel_engine_next_hangcheck_seqno(request->engine), + intel_hws_hangcheck_address(request->engine), + 0); + cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, intel_hws_seqno_address(request->engine), diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7f841dba87b3..870184bbd169 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -43,6 +43,12 @@ */ #define LEGACY_REQUEST_SIZE 200 +static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_HANGCHECK_ADDR); +} + static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -316,6 +322,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; @@ -422,6 +433,11 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->timeline->hwsp_offset; *cs++ = rq->fence.seqno; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = hws_hangcheck_address(rq->engine); + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = (PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | @@ -447,12 +463,15 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -472,6 +491,10 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; @@ -487,6 +510,7 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -930,11 +954,16 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR; *cs++ = rq->fence.seqno; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR; *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -956,6 +985,10 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR; *cs++ = rq->fence.seqno; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; + *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); + BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; @@ -964,7 +997,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) } *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5d45ad4ecca9..2869aaa9d225 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -6,6 +6,7 @@ #include #include +#include #include #include "i915_gem_batch_pool.h" @@ -119,7 +120,8 @@ struct intel_instdone { struct intel_engine_hangcheck { u64 acthd; - u32 seqno; + u32 last_seqno; + u32 next_seqno; unsigned long action_timestamp; struct intel_instdone instdone; }; @@ -726,6 +728,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) #define I915_GEM_HWS_PREEMPT 0x32 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_HANGCHECK 0x34 +#define I915_GEM_HWS_HANGCHECK_ADDR (I915_GEM_HWS_HANGCHECK * sizeof(u32)) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 @@ -1086,4 +1090,17 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) #endif +static inline u32 +intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine) +{ + return engine->hangcheck.next_seqno = + next_pseudo_random32(engine->hangcheck.next_seqno); +} + +static inline u32 +intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine) +{ + return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK); +} + #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit v1.2.3 From 8892f47742ea25fe31eb27c73ab6b6f5f4616c1c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 09:49:20 +0000 Subject: drm/i915: Remove access to global seqno in the HWSP Stop accessing the HWSP to read the global seqno, and stop tracking the mirror in the engine's execution timeline -- it is unused. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gpu_error.c | 4 --- drivers/gpu/drm/i915/i915_gpu_error.h | 3 -- drivers/gpu/drm/i915/i915_request.c | 27 +++++++----------- drivers/gpu/drm/i915/i915_reset.c | 1 - drivers/gpu/drm/i915/intel_engine_cs.c | 14 +--------- drivers/gpu/drm/i915/intel_lrc.c | 21 ++++---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +---- drivers/gpu/drm/i915/intel_ringbuffer.h | 40 --------------------------- drivers/gpu/drm/i915/selftests/i915_request.c | 3 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 3 -- 10 files changed, 19 insertions(+), 104 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3f6eddb6f6de..061a767e3bed 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -526,8 +526,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, ee->vm_info.pp_dir_base); } } - err_printf(m, " seqno: 0x%08x\n", ee->seqno); - err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", @@ -1216,8 +1214,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->acthd = intel_engine_get_active_head(engine); - ee->seqno = intel_engine_get_seqno(engine); - ee->last_seqno = intel_engine_last_submit(engine); ee->start = I915_READ_START(engine); ee->head = I915_READ_HEAD(engine); ee->tail = I915_READ_TAIL(engine); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 94eaf8ab9051..19ac102afaff 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -94,8 +94,6 @@ struct i915_gpu_state { u32 cpu_ring_head; u32 cpu_ring_tail; - u32 last_seqno; - /* Register state */ u32 start; u32 tail; @@ -108,7 +106,6 @@ struct i915_gpu_state { u32 bbstate; u32 instpm; u32 instps; - u32 seqno; u64 bbaddr; u64 acthd; u32 fault_reg; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 124b3e279c88..596183f35b78 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -179,12 +179,11 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(engine)); + hwsp_seqno(rq)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -243,12 +242,11 @@ static void i915_request_retire(struct i915_request *request) { struct i915_active_request *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, - hwsp_seqno(request), - intel_engine_get_seqno(request->engine)); + hwsp_seqno(request)); lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); @@ -305,12 +303,11 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(rq->engine)); + hwsp_seqno(rq)); lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); @@ -354,12 +351,11 @@ void __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, engine->timeline.seqno + 1, - hwsp_seqno(request), - intel_engine_get_seqno(engine)); + hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline.lock); @@ -371,7 +367,6 @@ void __i915_request_submit(struct i915_request *request) seqno = next_global_seqno(&engine->timeline); GEM_BUG_ON(!seqno); - GEM_BUG_ON(intel_engine_signaled(engine, seqno)); /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -409,12 +404,11 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, request->global_seqno, - hwsp_seqno(request), - intel_engine_get_seqno(engine)); + hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline.lock); @@ -425,7 +419,6 @@ void __i915_request_unsubmit(struct i915_request *request) */ GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); - GEM_BUG_ON(intel_engine_has_completed(engine, request->global_seqno)); engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 39a08932a95a..55d6123dbba4 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -788,7 +788,6 @@ static void nop_submit_request(struct i915_request *request) spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); i915_request_mark_complete(request); - intel_engine_write_global_seqno(engine, request->global_seqno); spin_unlock_irqrestore(&engine->timeline.lock, flags); intel_engine_queue_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 57bc5c4fb3ff..ec859c7b8c7c 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -455,12 +455,6 @@ cleanup: return err; } -void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); -} - static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { i915_gem_batch_pool_init(&engine->batch_pool, engine); @@ -1011,10 +1005,6 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (i915_reset_failed(engine->i915)) return true; - /* Any inflight/incomplete requests? */ - if (!intel_engine_signaled(engine, intel_engine_last_submit(engine))) - return false; - /* Waiting to drain ELSP? */ if (READ_ONCE(engine->execlists.active)) { struct tasklet_struct *t = &engine->execlists.tasklet; @@ -1497,9 +1487,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_reset_failed(engine->i915)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n", - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine), + drm_printf(m, "\tHangcheck %x:%x [%d ms]\n", engine->hangcheck.last_seqno, engine->hangcheck.next_seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0516fc6b9652..09b56b84e007 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -528,13 +528,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq), - intel_engine_get_seqno(engine), rq_prio(rq)); } else { GEM_BUG_ON(!n); @@ -840,13 +839,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", + GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", rq->engine->name, (unsigned int)(port - execlists->port), rq->global_seqno, rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(rq->engine)); + hwsp_seqno(rq)); GEM_BUG_ON(!execlists->active); execlists_context_schedule_out(rq, @@ -902,8 +900,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s current %d\n", - engine->name, intel_engine_get_seqno(engine)); + GEM_TRACE("%s\n", engine->name); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -952,10 +949,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) kmem_cache_free(engine->i915->priorities, p); } - intel_write_status_page(engine, - I915_GEM_HWS_INDEX, - intel_engine_last_submit(engine)); - /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority_hint = INT_MIN; @@ -1071,14 +1064,13 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, rq ? hwsp_seqno(rq) : 0, - intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ @@ -1946,10 +1938,9 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + GEM_TRACE("%s seqno=%d, stalled? %s\n", engine->name, rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine), yesno(stalled)); if (!rq) goto out_unlock; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 870184bbd169..2d59e2990448 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -782,10 +782,9 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } } - GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + GEM_TRACE("%s seqno=%d, stalled? %s\n", engine->name, rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine), yesno(stalled)); /* * The guilty request will get skipped on a hung engine. @@ -924,10 +923,6 @@ static void cancel_requests(struct intel_engine_cs *engine) i915_request_mark_complete(request); } - intel_write_status_page(engine, - I915_GEM_HWS_INDEX, - intel_engine_last_submit(engine)); - /* Remaining _unready_ requests will be nop'ed when submitted */ spin_unlock_irqrestore(&engine->timeline.lock, flags); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2869aaa9d225..551b3daa741c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -848,8 +848,6 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) return (head - tail - CACHELINE_BYTES) & (size - 1); } -void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); - int intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -867,44 +865,6 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); -static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) -{ - /* - * We are only peeking at the tail of the submit queue (and not the - * queue itself) in order to gain a hint as to the current active - * state of the engine. Callers are not expected to be taking - * engine->timeline->lock, nor are they expected to be concerned - * wtih serialising this hint with anything, so document it as - * a hint and nothing more. - */ - return READ_ONCE(engine->timeline.seqno); -} - -static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) -{ - return intel_read_status_page(engine, I915_GEM_HWS_INDEX); -} - -static inline bool intel_engine_signaled(struct intel_engine_cs *engine, - u32 seqno) -{ - return i915_seqno_passed(intel_engine_get_seqno(engine), seqno); -} - -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, - u32 seqno) -{ - GEM_BUG_ON(!seqno); - return intel_engine_signaled(engine, seqno); -} - -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, - u32 seqno) -{ - GEM_BUG_ON(!seqno); - return intel_engine_signaled(engine, seqno - 1); -} - void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 03cc8ab6a620..7da52e3d67af 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -226,8 +226,7 @@ static int igt_request_rewind(void *arg) mutex_unlock(&i915->drm.struct_mutex); if (i915_request_wait(vip, 0, HZ) == -ETIME) { - pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", - vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + pr_err("timed out waiting for high priority request\n"); goto err; } diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 08f0cab02e0f..79bf27606ab8 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -86,8 +86,6 @@ static struct mock_request *first_request(struct mock_engine *engine) static void advance(struct mock_request *request) { list_del_init(&request->link); - intel_engine_write_global_seqno(request->base.engine, - request->base.global_seqno); i915_request_mark_complete(&request->base); GEM_BUG_ON(!i915_request_completed(&request->base)); @@ -278,7 +276,6 @@ void mock_engine_flush(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine) { - intel_engine_write_global_seqno(engine, 0); } void mock_engine_free(struct intel_engine_cs *engine) -- cgit v1.2.3 From b300fde8965fdd628341c4b602481ebde8ac9cb7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 09:49:21 +0000 Subject: drm/i915: Remove i915_request.global_seqno Having weaned the interrupt handling off using a single global execution queue, we no longer need to emit a global_seqno. Note that we still have a few assumptions about execution order along engine timelines, but this removes the most obvious artefact! Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gpu_error.c | 34 ++-------------- drivers/gpu/drm/i915/i915_gpu_error.h | 2 - drivers/gpu/drm/i915/i915_request.c | 34 +++------------- drivers/gpu/drm/i915/i915_request.h | 32 --------------- drivers/gpu/drm/i915/i915_trace.h | 25 ++++-------- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +-- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 34 ++-------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 50 +++--------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 - drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 5 +-- drivers/gpu/drm/i915/selftests/mock_engine.c | 1 - 12 files changed, 32 insertions(+), 194 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 061a767e3bed..fa86c60fb56c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -380,19 +380,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, "%s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x_%08x %8u %02x %02x %02x", + err_printf(m, " %08x_%08x %8u %02x %02x", upper_32_bits(err->gtt_offset), lower_32_bits(err->gtt_offset), err->size, err->read_domains, - err->write_domain, - err->wseqno); + err->write_domain); err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); err_puts(m, err->userptr ? " userptr" : ""); - err_puts(m, err->engine != -1 ? " " : ""); - err_puts(m, engine_name(m->i915, err->engine)); err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); if (err->name) @@ -1048,27 +1045,6 @@ i915_error_object_create(struct drm_i915_private *i915, return dst; } -/* The error capture is special as tries to run underneath the normal - * locking rules - so we use the raw version of the i915_active_request lookup. - */ -static inline u32 -__active_get_seqno(struct i915_active_request *active) -{ - struct i915_request *request; - - request = __i915_active_request_peek(active); - return request ? request->global_seqno : 0; -} - -static inline int -__active_get_engine_id(struct i915_active_request *active) -{ - struct i915_request *request; - - request = __i915_active_request_peek(active); - return request ? request->engine->id : -1; -} - static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { @@ -1077,9 +1053,6 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; - err->wseqno = __active_get_seqno(&obj->frontbuffer_write); - err->engine = __active_get_engine_id(&obj->frontbuffer_write); - err->gtt_offset = vma->node.start; err->read_domains = obj->read_domains; err->write_domain = obj->write_domain; @@ -1284,7 +1257,8 @@ static void record_request(struct i915_request *request, struct i915_gem_context *ctx = request->gem_context; erq->flags = request->fence.flags; - erq->context = ctx->hw_id; + erq->context = request->fence.context; + erq->seqno = request->fence.seqno; erq->sched_attr = request->sched.attr; erq->jiffies = request->emitted_jiffies; erq->start = i915_ggtt_offset(request->ring->vma); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 19ac102afaff..8c1569c1830d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -164,7 +164,6 @@ struct i915_gpu_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 wseqno; u64 gtt_offset; u32 read_domains; u32 write_domain; @@ -173,7 +172,6 @@ struct i915_gpu_state { u32 dirty:1; u32 purgeable:1; u32 userptr:1; - s32 engine:4; u32 cache_level:3; } *active_bo[I915_NUM_ENGINES], *pinned_bo; u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 596183f35b78..935db5548f80 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -179,10 +179,9 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, - rq->global_seqno, hwsp_seqno(rq)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -242,10 +241,9 @@ static void i915_request_retire(struct i915_request *request) { struct i915_active_request *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", request->engine->name, request->fence.context, request->fence.seqno, - request->global_seqno, hwsp_seqno(request)); lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -303,10 +301,9 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, - rq->global_seqno, hwsp_seqno(rq)); lockdep_assert_held(&rq->i915->drm.struct_mutex); @@ -339,22 +336,13 @@ static void move_to_timeline(struct i915_request *request, spin_unlock(&request->timeline->lock); } -static u32 next_global_seqno(struct i915_timeline *tl) -{ - if (!++tl->seqno) - ++tl->seqno; - return tl->seqno; -} - void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld -> current %d\n", engine->name, request->fence.context, request->fence.seqno, - engine->timeline.seqno + 1, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); @@ -363,16 +351,10 @@ void __i915_request_submit(struct i915_request *request) if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); - GEM_BUG_ON(request->global_seqno); - - seqno = next_global_seqno(&engine->timeline); - GEM_BUG_ON(!seqno); - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); - request->global_seqno = seqno; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !i915_request_enable_breadcrumb(request)) intel_engine_queue_breadcrumbs(engine); @@ -404,10 +386,9 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, request->fence.context, request->fence.seqno, - request->global_seqno, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); @@ -417,13 +398,9 @@ void __i915_request_unsubmit(struct i915_request *request) * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ - GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); - engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - request->global_seqno = 0; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) i915_request_cancel_breadcrumb(request); GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); @@ -637,7 +614,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ - rq->global_seqno = 0; rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 40f3e8dcbdd5..1e127c1c53fa 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -147,14 +147,6 @@ struct i915_request { */ const u32 *hwsp_seqno; - /** - * GEM sequence number associated with this request on the - * global execution timeline. It is zero when the request is not - * on the HW queue (i.e. not on the engine timeline list). - * Its value is guarded by the timeline spinlock. - */ - u32 global_seqno; - /** Position in the ring of the start of the request */ u32 head; @@ -247,30 +239,6 @@ i915_request_put(struct i915_request *rq) dma_fence_put(&rq->fence); } -/** - * i915_request_global_seqno - report the current global seqno - * @request - the request - * - * A request is assigned a global seqno only when it is on the hardware - * execution queue. The global seqno can be used to maintain a list of - * requests on the same engine in retirement order, for example for - * constructing a priority queue for waiting. Prior to its execution, or - * if it is subsequently removed in the event of preemption, its global - * seqno is zero. As both insertion and removal from the execution queue - * may operate in IRQ context, it is not guarded by the usual struct_mutex - * BKL. Instead those relying on the global seqno must be prepared for its - * value to change between reads. Only when the request is complete can - * the global seqno be stable (due to the memory barriers on submitting - * the commands to the hardware to write the breadcrumb, if the HWS shows - * that it has passed the global seqno and the global seqno is unchanged - * after the read, it is indeed complete). - */ -static inline u32 -i915_request_global_seqno(const struct i915_request *request) -{ - return READ_ONCE(request->global_seqno); -} - int i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 0d9cedb892b0..12893304c8f8 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -708,7 +708,6 @@ DECLARE_EVENT_CLASS(i915_request, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global) ), TP_fast_assign( @@ -718,13 +717,11 @@ DECLARE_EVENT_CLASS(i915_request, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global = rq->global_seqno; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global) + __entry->hw_id, __entry->ctx, __entry->seqno) ); DEFINE_EVENT(i915_request, i915_request_add, @@ -754,7 +751,6 @@ TRACE_EVENT(i915_request_in, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global_seqno) __field(u32, port) __field(u32, prio) ), @@ -766,15 +762,14 @@ TRACE_EVENT(i915_request_in, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; __entry->prio = rq->sched.attr.priority; __entry->port = port; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, global=%u, port=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->prio, __entry->global_seqno, __entry->port) + __entry->prio, __entry->port) ); TRACE_EVENT(i915_request_out, @@ -788,7 +783,6 @@ TRACE_EVENT(i915_request_out, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global_seqno) __field(u32, completed) ), @@ -799,14 +793,13 @@ TRACE_EVENT(i915_request_out, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; __entry->completed = i915_request_completed(rq); ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, completed?=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global_seqno, __entry->completed) + __entry->completed) ); #else @@ -849,7 +842,6 @@ TRACE_EVENT(i915_request_wait_begin, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global) __field(unsigned int, flags) ), @@ -866,14 +858,13 @@ TRACE_EVENT(i915_request_wait_begin, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global = rq->global_seqno; __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, blocking=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ec859c7b8c7c..b7b626195eda 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1271,15 +1271,14 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", + drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", prefix, - rq->global_seqno, + rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : "", test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags) ? "+" : "", - rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 8bc8aa54aa35..20cbceeabeae 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -535,7 +535,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) spin_lock(&client->wq_lock); guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring_tail, rq->global_seqno); + ring_tail, rq->fence.seqno); guc_ring_doorbell(client); client->submissions[engine->id] += 1; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 09b56b84e007..c4f4966b0f4f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -172,12 +172,6 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_INDEX_ADDR); -} - static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -528,10 +522,9 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, - rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq), rq_prio(rq)); @@ -839,10 +832,9 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", + GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", rq->engine->name, (unsigned int)(port - execlists->port), - rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq)); @@ -924,8 +916,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline.requests, link) { - GEM_BUG_ON(!rq->global_seqno); - if (!i915_request_signaled(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -1064,10 +1054,9 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", engine->name, port->context_id, count, - rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, rq ? hwsp_seqno(rq) : 0, @@ -1938,10 +1927,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - GEM_TRACE("%s seqno=%d, stalled? %s\n", - engine->name, - rq ? rq->global_seqno : 0, - yesno(stalled)); + GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled)); if (!rq) goto out_unlock; @@ -2196,9 +2182,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ - BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - cs = gen8_emit_ggtt_write(cs, request->fence.seqno, request->timeline->hwsp_offset); @@ -2207,10 +2190,6 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) intel_engine_next_hangcheck_seqno(request->engine), intel_hws_hangcheck_address(request->engine)); - cs = gen8_emit_ggtt_write(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine)); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2236,11 +2215,6 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) intel_hws_hangcheck_address(request->engine), 0); - cs = gen8_emit_ggtt_write_rcs(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine), - PIPE_CONTROL_CS_STALL); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2d59e2990448..1b96b0960adc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,12 +49,6 @@ static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) I915_GEM_HWS_HANGCHECK_ADDR); } -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_INDEX_ADDR); -} - unsigned int intel_ring_update_space(struct intel_ring *ring) { unsigned int space; @@ -327,11 +321,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; - *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -438,13 +427,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = hws_hangcheck_address(rq->engine); *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL); - *cs++ = intel_hws_seqno_address(rq->engine); - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -467,11 +449,8 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -495,10 +474,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->global_seqno; - for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR; @@ -510,7 +485,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -782,10 +756,8 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } } - GEM_TRACE("%s seqno=%d, stalled? %s\n", - engine->name, - rq ? rq->global_seqno : 0, - yesno(stalled)); + GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled)); + /* * The guilty request will get skipped on a hung engine. * @@ -915,8 +887,6 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->timeline.requests, link) { - GEM_BUG_ON(!request->global_seqno); - if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); @@ -953,12 +923,7 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -976,10 +941,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = MI_FLUSH; - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); @@ -987,11 +948,12 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; } *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 551b3daa741c..de8dba7565b0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -724,8 +724,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * * The area from dword 0x30 to 0x3ff is available for driver usage. */ -#define I915_GEM_HWS_INDEX 0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) #define I915_GEM_HWS_PREEMPT 0x32 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) #define I915_GEM_HWS_HANGCHECK 0x34 diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index a77e4bf1ab55..fa02cf9ce0cf 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -571,11 +571,10 @@ static int active_request_put(struct i915_request *rq) return 0; if (i915_request_wait(rq, 0, 5 * HZ) < 0) { - GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld, seqno %d.\n", + GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", rq->engine->name, rq->fence.context, - rq->fence.seqno, - i915_request_global_seqno(rq)); + rq->fence.seqno); GEM_TRACE_DUMP(); i915_gem_set_wedged(rq->i915); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 79bf27606ab8..6f3fb803c747 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -196,7 +196,6 @@ static void mock_submit_request(struct i915_request *request) unsigned long flags; i915_request_submit(request); - GEM_BUG_ON(!request->global_seqno); spin_lock_irqsave(&engine->hw_lock, flags); list_add_tail(&mock->link, &engine->hw_queue); -- cgit v1.2.3 From 0b702dca26580e3bbfbbaf22dfc29280b6263414 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Feb 2019 11:45:50 +0000 Subject: drm/i915: Avoid waking the engines just to check if they are idle Exploit that reads of the ring registers return 0 from the engine when it is idle and we do not apply forcewake to know that if the engine is idle then both reads will be identical (and so we interpret the ring as idle). The ulterior motive is to try and reduce the number of spurious wakeups to avoid untimely death, such as: <3> [85.046836] [drm:fw_domains_get [i915]] *ERROR* render: timed out waiting for forcewake ack request. <4> [85.051916] ------------[ cut here ]------------ <4> [85.051917] GT thread status wait timed out <4> [85.051963] WARNING: CPU: 2 PID: 2195 at drivers/gpu/drm/i915/intel_uncore.c:303 __gen6_gt_wait_for_thread_c0+0x6e/0xa0 [i915] <4> [85.051964] Modules linked in: snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 x86_pkg_temp_thermal coretemp mei_hdcp crct10dif_pclmul crc32_pclmul snd_hda_intel ghash_clmulni_intel snd_hda_codec broadcom bcm_phy_lib i2c_i801 snd_hwdep snd_hda_core tg3 snd_pcm ptp pps_core mei_me mei prime_numbers lpc_ich <4> [85.051980] CPU: 2 PID: 2195 Comm: drm_read Tainted: G U 5.0.0-rc8-CI-CI_DRM_5662+ #1 <4> [85.051981] Hardware name: Dell Inc. XPS 8300 /0Y2MRG, BIOS A06 10/17/2011 <4> [85.052012] RIP: 0010:__gen6_gt_wait_for_thread_c0+0x6e/0xa0 [i915] <4> [85.052015] Code: 8b 92 5c 80 13 00 83 e2 07 75 d5 5b 5d c3 80 3d 5b 6a 1a 00 00 75 f4 48 c7 c7 38 21 31 a0 c6 05 4b 6a 1a 00 01 e8 e2 84 ea e0 <0f> 0b eb dd 80 3d 3a 6a 1a 00 00 75 98 48 c7 c6 08 21 31 a0 48 c7 <4> [85.052016] RSP: 0018:ffffc9000043bd00 EFLAGS: 00010086 <4> [85.052019] RAX: 0000000000000000 RBX: ffff888217c50000 RCX: 0000000000000000 <4> [85.052020] RDX: 0000000000000007 RSI: ffffffff820cb141 RDI: 00000000ffffffff <4> [85.052022] RBP: 00000013cd30f2fb R08: 0000000000000000 R09: 0000000000000001 <4> [85.052024] R10: ffffc9000043bce0 R11: 0000000000000000 R12: ffff888217c50ee0 <4> [85.052025] R13: 0000000000000001 R14: 00000000ffffffff R15: ffff888218076530 <4> [85.052028] FS: 00007fc79d049980(0000) GS:ffff888227a80000(0000) knlGS:0000000000000000 <4> [85.052029] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [85.052031] CR2: 00007f782e2940f8 CR3: 000000022458e006 CR4: 00000000000606e0 <4> [85.052033] Call Trace: <4> [85.052064] gen6_read32+0x14e/0x250 [i915] <4> [85.052096] intel_engine_is_idle+0x7d/0x180 [i915] <4> [85.052126] intel_engines_are_idle+0x29/0x50 [i915] <4> [85.052153] i915_drop_caches_set+0x21c/0x290 [i915] <4> [85.052160] simple_attr_write+0xb0/0xd0 <4> [85.052165] full_proxy_write+0x51/0x80 <4> [85.052170] __vfs_write+0x31/0x190 <4> [85.052176] ? rcu_read_lock_sched_held+0x6f/0x80 <4> [85.052178] ? rcu_sync_lockdep_assert+0x29/0x50 <4> [85.052181] ? __sb_start_write+0x152/0x1f0 <4> [85.052183] ? __sb_start_write+0x163/0x1f0 <4> [85.052187] vfs_write+0xbd/0x1b0 <4> [85.052191] ksys_write+0x50/0xc0 <4> [85.052196] do_syscall_64+0x55/0x190 <4> [85.052200] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4> [85.052202] RIP: 0033:0x7fc79c9d3281 <4> [85.052204] Code: c3 0f 1f 84 00 00 00 00 00 48 8b 05 59 8d 20 00 c3 0f 1f 84 00 00 00 00 00 8b 05 8a d1 20 00 85 c0 75 16 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53 <4> [85.052206] RSP: 002b:00007fffa4a0a7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 <4> [85.052208] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fc79c9d3281 <4> [85.052210] RDX: 0000000000000005 RSI: 00007fffa4a0a880 RDI: 0000000000000008 <4> [85.052212] RBP: 00007fffa4a0a820 R08: 0000000000000000 R09: 0000000000000000 <4> [85.052213] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc79c9bc718 <4> [85.052215] R13: 0000000000000003 R14: 00007fc79c9c1628 R15: 00007fc79c9bdd80 <4> [85.052223] irq event stamp: 71630 <4> [85.052226] hardirqs last enabled at (71629): [] _raw_spin_unlock_irqrestore+0x4c/0x60 <4> [85.052228] hardirqs last disabled at (71630): [] _raw_spin_lock_irqsave+0xd/0x50 <4> [85.052231] softirqs last enabled at (70444): [] __do_softirq+0x33a/0x4b9 <4> [85.052234] softirqs last disabled at (70433): [] irq_exit+0xd1/0xe0 <4> [85.052264] WARNING: CPU: 2 PID: 2195 at drivers/gpu/drm/i915/intel_uncore.c:303 __gen6_gt_wait_for_thread_c0+0x6e/0xa0 [i915] Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190227114958.32438-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index b7b626195eda..4f244019560d 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -968,6 +968,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; intel_wakeref_t wakeref; + unsigned long flags; bool idle = true; if (I915_SELFTEST_ONLY(!engine->mmio_base)) @@ -978,15 +979,19 @@ static bool ring_is_idle(struct intel_engine_cs *engine) if (!wakeref) return true; - /* First check that no commands are left in the ring */ - if ((I915_READ_HEAD(engine) & HEAD_ADDR) != - (I915_READ_TAIL(engine) & TAIL_ADDR)) - idle = false; + spin_lock_irqsave(&dev_priv->uncore.lock, flags); - /* No bit for gen2, so assume the CS parser is idle */ - if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) + /* + * Check that no commands are left in the ring. + * + * If the engine is not awake, both reads return 0 as we do so without + * forcewake. + */ + if ((I915_READ_FW(RING_HEAD(engine->mmio_base)) & HEAD_ADDR) != + (I915_READ_FW(RING_TAIL(engine->mmio_base)) & TAIL_ADDR)) idle = false; + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); intel_runtime_pm_put(dev_priv, wakeref); return idle; -- cgit v1.2.3 From 2d5eaad007d971b8cd8cd8122f594b04e292b567 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 10:24:00 +0000 Subject: drm/i915: Compute the global scheduler caps Do a pass over all the engines upon starting to determine the global scheduler capability flags (those that are agreed upon by all). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226102404.29153-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 39 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 6 ----- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 4 files changed, 43 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2b261524cfa4..de8b92da56cf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4698,6 +4698,8 @@ static int __i915_gem_restart_engines(void *data) } } + intel_engines_set_scheduler_caps(i915); + return 0; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4f244019560d..43ce4c5c56c9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -608,6 +608,45 @@ err_hwsp: return err; } +void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) +{ + static const struct { + u8 engine; + u8 sched; + } map[] = { +#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) } + MAP(PREEMPTION, PREEMPTION), +#undef MAP + }; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 enabled, disabled; + + enabled = 0; + disabled = 0; + for_each_engine(engine, i915, id) { /* all engines must agree! */ + int i; + + if (engine->schedule) + enabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + else + disabled |= (I915_SCHEDULER_CAP_ENABLED | + I915_SCHEDULER_CAP_PRIORITY); + + for (i = 0; i < ARRAY_SIZE(map); i++) { + if (engine->flags & BIT(map[i].engine)) + enabled |= BIT(map[i].sched); + else + disabled |= BIT(map[i].sched); + } + } + + i915->caps.scheduler = enabled & ~disabled; + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) + i915->caps.scheduler = 0; +} + static void __intel_context_unpin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c4f4966b0f4f..81d188508b44 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2292,12 +2292,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (engine->i915->preempt_context) engine->flags |= I915_ENGINE_HAS_PREEMPTION; - - engine->i915->caps.scheduler = - I915_SCHEDULER_CAP_ENABLED | - I915_SCHEDULER_CAP_PRIORITY; - if (intel_engine_has_preemption(engine)) - engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION; } static void diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index de8dba7565b0..533e2053664e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -593,6 +593,8 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_HAS_PREEMPTION; } +void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); + static inline bool __execlists_need_preempt(int prio, int last) { /* -- cgit v1.2.3 From 44f8b8022d4cde821819397e9f9f57eb30c51f93 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Feb 2019 20:46:53 +0000 Subject: Revert "drm/i915: Avoid waking the engines just to check if they are idle" This reverts commit 0b702dca26580e3bbfbbaf22dfc29280b6263414. CI reports that this is not as reliable as it first appears, with failures starting to sporadically occur in selftests. Fixes: 0b702dca2658 ("drm/i915: Avoid waking the engines just to check if they are idle") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Jani Nikula Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190227204654.14907-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 43ce4c5c56c9..ef49b1b0537b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1007,7 +1007,6 @@ static bool ring_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; intel_wakeref_t wakeref; - unsigned long flags; bool idle = true; if (I915_SELFTEST_ONLY(!engine->mmio_base)) @@ -1018,19 +1017,15 @@ static bool ring_is_idle(struct intel_engine_cs *engine) if (!wakeref) return true; - spin_lock_irqsave(&dev_priv->uncore.lock, flags); + /* First check that no commands are left in the ring */ + if ((I915_READ_HEAD(engine) & HEAD_ADDR) != + (I915_READ_TAIL(engine) & TAIL_ADDR)) + idle = false; - /* - * Check that no commands are left in the ring. - * - * If the engine is not awake, both reads return 0 as we do so without - * forcewake. - */ - if ((I915_READ_FW(RING_HEAD(engine->mmio_base)) & HEAD_ADDR) != - (I915_READ_FW(RING_TAIL(engine->mmio_base)) & TAIL_ADDR)) + /* No bit for gen2, so assume the CS parser is idle */ + if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) idle = false; - spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); intel_runtime_pm_put(dev_priv, wakeref); return idle; -- cgit v1.2.3 From bd2be1418659abd7b1cdecc7d23d86314b0e3496 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Feb 2019 21:41:59 +0000 Subject: drm/i915: Report engines are idle if already parked If we have parked, then we must have passed an idleness test and still be idle. We chose not to use this shortcut in the past so that we could use the idleness test at any time and inspect HW. However, some HW like Sandybridge, doesn't like being woken up frivolously, so avoid doing so. References: 0b702dca2658 ("drm/i915: Avoid waking the engines just to check if they are idle") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190227214159.7946-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ef49b1b0537b..df8f88142f1d 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1072,7 +1072,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return ring_is_idle(engine); } -bool intel_engines_are_idle(struct drm_i915_private *dev_priv) +bool intel_engines_are_idle(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1081,10 +1081,14 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) * If the driver is wedged, HW state may be very inconsistent and * report that it is still busy, even though we have stopped using it. */ - if (i915_reset_failed(dev_priv)) + if (i915_reset_failed(i915)) return true; - for_each_engine(engine, dev_priv, id) { + /* Already parked (and passed an idleness test); must still be idle */ + if (!READ_ONCE(i915->gt.awake)) + return true; + + for_each_engine(engine, i915, id) { if (!intel_engine_is_idle(engine)) return false; } -- cgit v1.2.3 From e88619646971168e3baedc850c21243d303e31ca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Mar 2019 17:09:00 +0000 Subject: drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Having introduced per-context seqno, we now have a means to identity progress across the system without feel of rollback as befell the global_seqno. That is we can program a MI_SEMAPHORE_WAIT operation in advance of submission safe in the knowledge that our target seqno and address is stable. However, since we are telling the GPU to busy-spin on the target address until it matches the signaling seqno, we only want to do so when we are sure that busy-spin will be completed quickly. To achieve this we only submit the request to HW once the signaler is itself executing (modulo preemption causing us to wait longer), and we only do so for default and above priority requests (so that idle priority tasks never themselves hog the GPU waiting for others). As might be reasonably expected, HW semaphores excel in inter-engine synchronisation microbenchmarks (where the 3x reduced latency / increased throughput more than offset the power cost of spinning on a second ring) and have significant improvement (can be up to ~10%, most see no change) for single clients that utilize multiple engines (typically media players and transcoders), without regressing multiple clients that can saturate the system or changing the power envelope dramatically. v3: Drop the older NEQ branch, now we pin the signaler's HWSP anyway. v4: Tell the world and include it as part of scheduler caps. Testcase: igt/gem_exec_whisper Testcase: igt/benchmarks/gem_wsim Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190301170901.8340-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 138 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_request.h | 26 +++++- drivers/gpu/drm/i915/i915_sw_fence.c | 4 +- drivers/gpu/drm/i915/i915_sw_fence.h | 3 + drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/intel_gpu_commands.h | 9 +- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 7 ++ include/uapi/drm/i915_drm.h | 1 + 10 files changed, 181 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c6354f6cdbdb..c08abdef5eb6 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -351,7 +351,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL); break; case I915_PARAM_HAS_SEMAPHORES: - value = 0; + value = !!(dev_priv->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); break; case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d354967d6ae8..59e30b8c4ee9 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -22,8 +22,9 @@ * */ -#include #include +#include +#include #include #include #include @@ -32,9 +33,16 @@ #include "i915_active.h" #include "i915_reset.h" +struct execute_cb { + struct list_head link; + struct irq_work work; + struct i915_sw_fence *fence; +}; + static struct i915_global_request { struct kmem_cache *slab_requests; struct kmem_cache *slab_dependencies; + struct kmem_cache *slab_execute_cbs; } global; static const char *i915_fence_get_driver_name(struct dma_fence *fence) @@ -325,6 +333,69 @@ void i915_request_retire_upto(struct i915_request *rq) } while (tmp != rq); } +static void irq_execute_cb(struct irq_work *wrk) +{ + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); + + i915_sw_fence_complete(cb->fence); + kmem_cache_free(global.slab_execute_cbs, cb); +} + +static void __notify_execute_cb(struct i915_request *rq) +{ + struct execute_cb *cb; + + lockdep_assert_held(&rq->lock); + + if (list_empty(&rq->execute_cb)) + return; + + list_for_each_entry(cb, &rq->execute_cb, link) + irq_work_queue(&cb->work); + + /* + * XXX Rollback on __i915_request_unsubmit() + * + * In the future, perhaps when we have an active time-slicing scheduler, + * it will be interesting to unsubmit parallel execution and remove + * busywaits from the GPU until their master is restarted. This is + * quite hairy, we have to carefully rollback the fence and do a + * preempt-to-idle cycle on the target engine, all the while the + * master execute_cb may refire. + */ + INIT_LIST_HEAD(&rq->execute_cb); +} + +static int +i915_request_await_execution(struct i915_request *rq, + struct i915_request *signal, + gfp_t gfp) +{ + struct execute_cb *cb; + + if (i915_request_is_active(signal)) + return 0; + + cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); + if (!cb) + return -ENOMEM; + + cb->fence = &rq->submit; + i915_sw_fence_await(cb->fence); + init_irq_work(&cb->work, irq_execute_cb); + + spin_lock_irq(&signal->lock); + if (i915_request_is_active(signal)) { + i915_sw_fence_complete(cb->fence); + kmem_cache_free(global.slab_execute_cbs, cb); + } else { + list_add_tail(&cb->link, &signal->execute_cb); + } + spin_unlock_irq(&signal->lock); + + return 0; +} + static void move_to_timeline(struct i915_request *request, struct i915_timeline *timeline) { @@ -361,6 +432,8 @@ void __i915_request_submit(struct i915_request *request) !i915_request_enable_breadcrumb(request)) intel_engine_queue_breadcrumbs(engine); + __notify_execute_cb(request); + spin_unlock(&request->lock); engine->emit_fini_breadcrumb(request, @@ -608,6 +681,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } INIT_LIST_HEAD(&rq->active_list); + INIT_LIST_HEAD(&rq->execute_cb); tl = ce->ring->timeline; ret = i915_timeline_get_seqno(tl, rq, &seqno); @@ -696,6 +770,52 @@ err_unreserve: return ERR_PTR(ret); } +static int +emit_semaphore_wait(struct i915_request *to, + struct i915_request *from, + gfp_t gfp) +{ + u32 hwsp_offset; + u32 *cs; + int err; + + GEM_BUG_ON(!from->timeline->has_initial_breadcrumb); + GEM_BUG_ON(INTEL_GEN(to->i915) < 8); + + /* We need to pin the signaler's HWSP until we are finished reading. */ + err = i915_timeline_read_hwsp(from, to, &hwsp_offset); + if (err) + return err; + + /* Only submit our spinner after the signaler is running! */ + err = i915_request_await_execution(to, from, gfp); + if (err) + return err; + + cs = intel_ring_begin(to, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Using greater-than-or-equal here means we have to worry + * about seqno wraparound. To side step that issue, we swap + * the timeline HWSP upon wrapping, so that everyone listening + * for the old (pre-wrap) values do not see the much smaller + * (post-wrap) values than they were expecting (and so wait + * forever). + */ + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = from->fence.seqno; + *cs++ = hwsp_offset; + *cs++ = 0; + + intel_ring_advance(to, cs); + return 0; +} + static int i915_request_await_request(struct i915_request *to, struct i915_request *from) { @@ -717,6 +837,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, I915_FENCE_GFP); + } else if (intel_engine_has_semaphores(to->engine) && + to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) { + ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); } else { ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, @@ -1208,14 +1331,23 @@ int __init i915_global_request_init(void) if (!global.slab_requests) return -ENOMEM; + global.slab_execute_cbs = KMEM_CACHE(execute_cb, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_TYPESAFE_BY_RCU); + if (!global.slab_execute_cbs) + goto err_requests; + global.slab_dependencies = KMEM_CACHE(i915_dependency, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT); if (!global.slab_dependencies) - goto err_requests; + goto err_execute_cbs; return 0; +err_execute_cbs: + kmem_cache_destroy(global.slab_execute_cbs); err_requests: kmem_cache_destroy(global.slab_requests); return -ENOMEM; @@ -1224,11 +1356,13 @@ err_requests: void i915_global_request_shrink(void) { kmem_cache_shrink(global.slab_dependencies); + kmem_cache_shrink(global.slab_execute_cbs); kmem_cache_shrink(global.slab_requests); } void i915_global_request_exit(void) { kmem_cache_destroy(global.slab_dependencies); + kmem_cache_destroy(global.slab_execute_cbs); kmem_cache_destroy(global.slab_requests); } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 09eaad06d2c6..4dd1dea1d1af 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -129,6 +129,7 @@ struct i915_request { */ struct i915_sw_fence submit; wait_queue_entry_t submitq; + struct list_head execute_cb; /* * A list of everyone we wait upon, and everyone who waits upon us. @@ -343,10 +344,27 @@ static inline bool __i915_request_has_started(const struct i915_request *rq) * i915_request_started - check if the request has begun being executed * @rq: the request * - * Returns true if the request has been submitted to hardware, and the hardware - * has advanced passed the end of the previous request and so should be either - * currently processing the request (though it may be preempted and so - * not necessarily the next request to complete) or have completed the request. + * If the timeline is not using initial breadcrumbs, a request is + * considered started if the previous request on its timeline (i.e. + * context) has been signaled. + * + * If the timeline is using semaphores, it will also be emitting an + * "initial breadcrumb" after the semaphores are complete and just before + * it began executing the user payload. A request can therefore be active + * on the HW and not yet started as it is still busywaiting on its + * dependencies (via HW semaphores). + * + * If the request has started, its dependencies will have been signaled + * (either by fences or by semaphores) and it will have begun processing + * the user payload. + * + * However, even if a request has started, it may have been preempted and + * so no longer active, or it may have already completed. + * + * See also i915_request_is_active(). + * + * Returns true if the request has begun executing the user payload, or + * has completed: */ static inline bool i915_request_started(const struct i915_request *rq) { diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 7c58b049ecb5..8d1400d378d7 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -192,7 +192,7 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence, __i915_sw_fence_notify(fence, FENCE_FREE); } -static void i915_sw_fence_complete(struct i915_sw_fence *fence) +void i915_sw_fence_complete(struct i915_sw_fence *fence) { debug_fence_assert(fence); @@ -202,7 +202,7 @@ static void i915_sw_fence_complete(struct i915_sw_fence *fence) __i915_sw_fence_complete(fence, NULL); } -static void i915_sw_fence_await(struct i915_sw_fence *fence) +void i915_sw_fence_await(struct i915_sw_fence *fence) { debug_fence_assert(fence); WARN_ON(atomic_inc_return(&fence->pending) <= 1); diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 0e055ea0179f..6dec9e1d1102 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -79,6 +79,9 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp); +void i915_sw_fence_await(struct i915_sw_fence *fence); +void i915_sw_fence_complete(struct i915_sw_fence *fence); + static inline bool i915_sw_fence_signaled(const struct i915_sw_fence *fence) { return atomic_read(&fence->pending) <= 0; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index df8f88142f1d..4fc7e2ac6278 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -616,6 +616,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) } map[] = { #define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) } MAP(PREEMPTION, PREEMPTION), + MAP(SEMAPHORES, SEMAPHORES), #undef MAP }; struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h index b96a31bc1080..a34ece53a771 100644 --- a/drivers/gpu/drm/i915/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -105,8 +105,13 @@ #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ -#define MI_SEMAPHORE_POLL (1<<15) -#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) +#define MI_SEMAPHORE_POLL (1 << 15) +#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) +#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) +#define MI_SEMAPHORE_SAD_LT_SDD (2 << 12) +#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) +#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) +#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3fd0c45a2920..6c9479acb433 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2330,6 +2330,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->park = NULL; engine->unpark = NULL; + engine->flags |= I915_ENGINE_HAS_SEMAPHORES; engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (engine->i915->preempt_context) engine->flags |= I915_ENGINE_HAS_PREEMPTION; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b8ec7e40a59b..2e7264119ec4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -499,6 +499,7 @@ struct intel_engine_cs { #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) +#define I915_ENGINE_HAS_SEMAPHORES BIT(3) unsigned int flags; /* @@ -576,6 +577,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_HAS_PREEMPTION; } +static inline bool +intel_engine_has_semaphores(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_SEMAPHORES; +} + void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); static inline bool __execlists_need_preempt(int prio, int last) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 8304a7f1ec3f..b10eea3f6d24 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -479,6 +479,7 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_ENABLED (1ul << 0) #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) +#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_PARAM_HUC_STATUS 42 -- cgit v1.2.3 From c8b502422bfe04422261cb2861977a5cd31cc1da Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Mar 2019 16:26:43 +0000 Subject: drm/i915: Remove last traces of exec-id (GEM_BUSY) As we allow per-context engine allows the legacy concept of I915_EXEC_RING no longer applies universally. We are still exposing the unrelated exec-id in GEM_BUSY, so transition this ioctl (once more slightly changing its ABI, but no one cares) over to only reporting the uabi-class (not instance as we can not foreseeably fit those into the small bitmask). The only user of the extended ring information from GEM_BUSY is ddx/sna, which tries to use the non-rcs business information to guide which engine to use for subsequent operations on foreign bo. All that matters for it is the decision between rcs and !rcs, so it is unaffected by the change in higher bits. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190305162643.20243-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 32 +++++++++++++++++--------------- drivers/gpu/drm/i915/intel_engine_cs.c | 10 ---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - include/uapi/drm/i915_drm.h | 32 +++++++++++++++++--------------- 4 files changed, 34 insertions(+), 41 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a1ad5e137a97..69413f99ed04 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3825,20 +3825,17 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, static __always_inline unsigned int __busy_read_flag(unsigned int id) { - /* Note that we could alias engines in the execbuf API, but - * that would be very unwise as it prevents userspace from - * fine control over engine selection. Ahem. - * - * This should be something like EXEC_MAX_ENGINE instead of - * I915_NUM_ENGINES. - */ - BUILD_BUG_ON(I915_NUM_ENGINES > 16); + if (id == I915_ENGINE_CLASS_INVALID) + return 0xffff0000; + + GEM_BUG_ON(id >= 16); return 0x10000 << id; } static __always_inline unsigned int __busy_write_id(unsigned int id) { - /* The uABI guarantees an active writer is also amongst the read + /* + * The uABI guarantees an active writer is also amongst the read * engines. This would be true if we accessed the activity tracking * under the lock, but as we perform the lookup of the object and * its activity locklessly we can not guarantee that the last_write @@ -3846,16 +3843,20 @@ static __always_inline unsigned int __busy_write_id(unsigned int id) * last_read - hence we always set both read and write busy for * last_write. */ - return id | __busy_read_flag(id); + if (id == I915_ENGINE_CLASS_INVALID) + return 0xffffffff; + + return (id + 1) | __busy_read_flag(id); } static __always_inline unsigned int __busy_set_if_active(const struct dma_fence *fence, unsigned int (*flag)(unsigned int id)) { - struct i915_request *rq; + const struct i915_request *rq; - /* We have to check the current hw status of the fence as the uABI + /* + * We have to check the current hw status of the fence as the uABI * guarantees forward progress. We could rely on the idle worker * to eventually flush us, but to minimise latency just ask the * hardware. @@ -3866,11 +3867,11 @@ __busy_set_if_active(const struct dma_fence *fence, return 0; /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, struct i915_request, fence); + rq = container_of(fence, const struct i915_request, fence); if (i915_request_completed(rq)) return 0; - return flag(rq->engine->uabi_id); + return flag(rq->engine->uabi_class); } static __always_inline unsigned int @@ -3904,7 +3905,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, if (!obj) goto out; - /* A discrepancy here is that we do not report the status of + /* + * A discrepancy here is that we do not report the status of * non-i915 fences, i.e. even though we may report the object as idle, * a call to set-domain may still stall waiting for foreign rendering. * This also means that wait-ioctl may report an object as busy, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4fc7e2ac6278..f3f161c5627b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -84,7 +84,6 @@ static const struct engine_class_info intel_engine_classes[] = { #define MAX_MMIO_BASES 3 struct engine_info { unsigned int hw_id; - unsigned int uabi_id; u8 class; u8 instance; /* mmio bases table *must* be sorted in reverse gen order */ @@ -97,7 +96,6 @@ struct engine_info { static const struct engine_info intel_engines[] = { [RCS] = { .hw_id = RCS_HW, - .uabi_id = I915_EXEC_RENDER, .class = RENDER_CLASS, .instance = 0, .mmio_bases = { @@ -106,7 +104,6 @@ static const struct engine_info intel_engines[] = { }, [BCS] = { .hw_id = BCS_HW, - .uabi_id = I915_EXEC_BLT, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { @@ -115,7 +112,6 @@ static const struct engine_info intel_engines[] = { }, [VCS] = { .hw_id = VCS_HW, - .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { @@ -126,7 +122,6 @@ static const struct engine_info intel_engines[] = { }, [VCS2] = { .hw_id = VCS2_HW, - .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_bases = { @@ -136,7 +131,6 @@ static const struct engine_info intel_engines[] = { }, [VCS3] = { .hw_id = VCS3_HW, - .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 2, .mmio_bases = { @@ -145,7 +139,6 @@ static const struct engine_info intel_engines[] = { }, [VCS4] = { .hw_id = VCS4_HW, - .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 3, .mmio_bases = { @@ -154,7 +147,6 @@ static const struct engine_info intel_engines[] = { }, [VECS] = { .hw_id = VECS_HW, - .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { @@ -164,7 +156,6 @@ static const struct engine_info intel_engines[] = { }, [VECS2] = { .hw_id = VECS2_HW, - .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, .mmio_bases = { @@ -321,7 +312,6 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->class = info->class; engine->instance = info->instance; - engine->uabi_id = info->uabi_id; engine->uabi_class = intel_engine_classes[info->class].uabi_class; engine->context_size = __intel_engine_context_size(dev_priv, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2e7264119ec4..12e79828dbd5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -335,7 +335,6 @@ struct intel_engine_cs { unsigned int hw_id; unsigned int guc_id; - u8 uabi_id; u8 uabi_class; u8 class; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 1a60642c1d61..aa2d4c73a97d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1127,32 +1127,34 @@ struct drm_i915_gem_busy { * as busy may become idle before the ioctl is completed. * * Furthermore, if the object is busy, which engine is busy is only - * provided as a guide. There are race conditions which prevent the - * report of which engines are busy from being always accurate. - * However, the converse is not true. If the object is idle, the - * result of the ioctl, that all engines are idle, is accurate. + * provided as a guide and only indirectly by reporting its class + * (there may be more than one engine in each class). There are race + * conditions which prevent the report of which engines are busy from + * being always accurate. However, the converse is not true. If the + * object is idle, the result of the ioctl, that all engines are idle, + * is accurate. * * The returned dword is split into two fields to indicate both - * the engines on which the object is being read, and the - * engine on which it is currently being written (if any). + * the engine classess on which the object is being read, and the + * engine class on which it is currently being written (if any). * * The low word (bits 0:15) indicate if the object is being written * to by any engine (there can only be one, as the GEM implicit * synchronisation rules force writes to be serialised). Only the - * engine for the last write is reported. + * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as + * 1 not 0 etc) for the last write is reported. * - * The high word (bits 16:31) are a bitmask of which engines are - * currently reading from the object. Multiple engines may be + * The high word (bits 16:31) are a bitmask of which engines classes + * are currently reading from the object. Multiple engines may be * reading from the object simultaneously. * - * The value of each engine is the same as specified in the - * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc. - * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to - * the I915_EXEC_RENDER engine for execution, and so it is never + * The value of each engine class is the same as specified in the + * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. * reported as active itself. Some hardware may have parallel * execution engines, e.g. multiple media engines, which are - * mapped to the same identifier in the EXECBUFFER2 ioctl and - * so are not separately reported for busyness. + * mapped to the same class identifier and so are not separately + * reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether -- cgit v1.2.3 From 8a68d464366efb5b294fa11ccf23b51306cc2695 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Mar 2019 18:03:30 +0000 Subject: drm/i915: Store the BIT(engine->id) as the engine's mask In the next patch, we are introducing a broad virtual engine to encompass multiple physical engines, losing the 1:1 nature of BIT(engine->id). To reflect the broader set of engines implied by the virtual instance, lets store the full bitmask. v2: Use intel_engine_mask_t (s/ring_mask/engine_mask/) v3: Tvrtko voted for moah churn so teach everyone to not mention ring and use $class$instance throughout. v4: Comment upon the disparity in bspec for using VCS1,VCS2 in gen8 and VCS[0-4] in later gen. We opt to keep the code consistent and use 0-index naming throughout. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190305180332.30900-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 44 ++-- drivers/gpu/drm/i915/gvt/execlist.c | 17 +- drivers/gpu/drm/i915/gvt/handlers.c | 26 +-- drivers/gpu/drm/i915/gvt/interrupt.c | 2 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 228 +++++++++++---------- drivers/gpu/drm/i915/gvt/scheduler.c | 21 +- drivers/gpu/drm/i915/i915_cmd_parser.c | 12 +- drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_drv.c | 8 +- drivers/gpu/drm/i915/i915_drv.h | 22 +- drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 11 +- drivers/gpu/drm/i915/i915_irq.c | 65 +++--- drivers/gpu/drm/i915/i915_pci.c | 39 ++-- drivers/gpu/drm/i915/i915_perf.c | 8 +- drivers/gpu/drm/i915/i915_pmu.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 24 +-- drivers/gpu/drm/i915/i915_reset.c | 47 ++--- drivers/gpu/drm/i915/intel_device_info.c | 6 +- drivers/gpu/drm/i915/intel_device_info.h | 6 +- drivers/gpu/drm/i915/intel_engine_cs.c | 59 +++--- drivers/gpu/drm/i915/intel_guc_ads.c | 2 +- drivers/gpu/drm/i915/intel_guc_submission.c | 6 +- drivers/gpu/drm/i915/intel_hangcheck.c | 10 +- drivers/gpu/drm/i915/intel_lrc.c | 12 +- drivers/gpu/drm/i915/intel_mocs.c | 12 +- drivers/gpu/drm/i915/intel_overlay.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 35 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 27 +-- drivers/gpu/drm/i915/intel_workarounds.c | 4 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 4 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 14 +- drivers/gpu/drm/i915/selftests/intel_guc.c | 4 +- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 16 +- drivers/gpu/drm/i915/selftests/intel_lrc.c | 4 +- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 4 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 1 + drivers/gpu/drm/i915/selftests/mock_gem_device.c | 6 +- 45 files changed, 422 insertions(+), 432 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 35b4ec3f7618..cf4a1ecf6853 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -391,12 +391,12 @@ struct cmd_info { #define F_POST_HANDLE (1<<2) u32 flag; -#define R_RCS (1 << RCS) -#define R_VCS1 (1 << VCS) -#define R_VCS2 (1 << VCS2) +#define R_RCS BIT(RCS0) +#define R_VCS1 BIT(VCS0) +#define R_VCS2 BIT(VCS1) #define R_VCS (R_VCS1 | R_VCS2) -#define R_BCS (1 << BCS) -#define R_VECS (1 << VECS) +#define R_BCS BIT(BCS0) +#define R_VECS BIT(VECS0) #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS) /* rings that support this cmd: BLT/RCS/VCS/VECS */ u16 rings; @@ -558,7 +558,7 @@ static const struct decode_info decode_info_vebox = { }; static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { - [RCS] = { + [RCS0] = { &decode_info_mi, NULL, NULL, @@ -569,7 +569,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VCS] = { + [VCS0] = { &decode_info_mi, NULL, NULL, @@ -580,7 +580,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [BCS] = { + [BCS0] = { &decode_info_mi, NULL, &decode_info_2d, @@ -591,7 +591,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VECS] = { + [VECS0] = { &decode_info_mi, NULL, NULL, @@ -602,7 +602,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { NULL, }, - [VCS2] = { + [VCS1] = { &decode_info_mi, NULL, NULL, @@ -631,8 +631,7 @@ static inline const struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e; hash_for_each_possible(gvt->cmd_table, e, hlist, opcode) { - if ((opcode == e->info->opcode) && - (e->info->rings & (1 << ring_id))) + if (opcode == e->info->opcode && e->info->rings & BIT(ring_id)) return e->info; } return NULL; @@ -943,15 +942,12 @@ static int cmd_handler_lri(struct parser_exec_state *s) struct intel_gvt *gvt = s->vgpu->gvt; for (i = 1; i < cmd_len; i += 2) { - if (IS_BROADWELL(gvt->dev_priv) && - (s->ring_id != RCS)) { - if (s->ring_id == BCS && - cmd_reg(s, i) == - i915_mmio_reg_offset(DERRMR)) + if (IS_BROADWELL(gvt->dev_priv) && s->ring_id != RCS0) { + if (s->ring_id == BCS0 && + cmd_reg(s, i) == i915_mmio_reg_offset(DERRMR)) ret |= 0; else - ret |= (cmd_reg_inhibit(s, i)) ? - -EBADRQC : 0; + ret |= cmd_reg_inhibit(s, i) ? -EBADRQC : 0; } if (ret) break; @@ -1047,27 +1043,27 @@ struct cmd_interrupt_event { }; static struct cmd_interrupt_event cmd_interrupt_events[] = { - [RCS] = { + [RCS0] = { .pipe_control_notify = RCS_PIPE_CONTROL, .mi_flush_dw = INTEL_GVT_EVENT_RESERVED, .mi_user_interrupt = RCS_MI_USER_INTERRUPT, }, - [BCS] = { + [BCS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = BCS_MI_FLUSH_DW, .mi_user_interrupt = BCS_MI_USER_INTERRUPT, }, - [VCS] = { + [VCS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VCS_MI_FLUSH_DW, .mi_user_interrupt = VCS_MI_USER_INTERRUPT, }, - [VCS2] = { + [VCS1] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VCS2_MI_FLUSH_DW, .mi_user_interrupt = VCS2_MI_USER_INTERRUPT, }, - [VECS] = { + [VECS0] = { .pipe_control_notify = INTEL_GVT_EVENT_RESERVED, .mi_flush_dw = VECS_MI_FLUSH_DW, .mi_user_interrupt = VECS_MI_USER_INTERRUPT, diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 70494e394d2c..1a93472cb34e 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -47,17 +47,16 @@ ((a)->lrca == (b)->lrca)) static int context_switch_events[] = { - [RCS] = RCS_AS_CONTEXT_SWITCH, - [BCS] = BCS_AS_CONTEXT_SWITCH, - [VCS] = VCS_AS_CONTEXT_SWITCH, - [VCS2] = VCS2_AS_CONTEXT_SWITCH, - [VECS] = VECS_AS_CONTEXT_SWITCH, + [RCS0] = RCS_AS_CONTEXT_SWITCH, + [BCS0] = BCS_AS_CONTEXT_SWITCH, + [VCS0] = VCS_AS_CONTEXT_SWITCH, + [VCS1] = VCS2_AS_CONTEXT_SWITCH, + [VECS0] = VECS_AS_CONTEXT_SWITCH, }; -static int ring_id_to_context_switch_event(int ring_id) +static int ring_id_to_context_switch_event(unsigned int ring_id) { - if (WARN_ON(ring_id < RCS || - ring_id >= ARRAY_SIZE(context_switch_events))) + if (WARN_ON(ring_id >= ARRAY_SIZE(context_switch_events))) return -EINVAL; return context_switch_events[ring_id]; @@ -411,7 +410,7 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) gvt_dbg_el("complete workload %p status %d\n", workload, workload->status); - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) + if (workload->status || (vgpu->resetting_eng & BIT(ring_id))) goto out; if (!list_empty(workload_q_head(vgpu, ring_id))) { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index bc64b810e0d5..b596cb42e24e 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -323,25 +323,25 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, } else { if (data & GEN6_GRDOM_RENDER) { gvt_dbg_mmio("vgpu%d: request RCS reset\n", vgpu->id); - engine_mask |= (1 << RCS); + engine_mask |= BIT(RCS0); } if (data & GEN6_GRDOM_MEDIA) { gvt_dbg_mmio("vgpu%d: request VCS reset\n", vgpu->id); - engine_mask |= (1 << VCS); + engine_mask |= BIT(VCS0); } if (data & GEN6_GRDOM_BLT) { gvt_dbg_mmio("vgpu%d: request BCS Reset\n", vgpu->id); - engine_mask |= (1 << BCS); + engine_mask |= BIT(BCS0); } if (data & GEN6_GRDOM_VECS) { gvt_dbg_mmio("vgpu%d: request VECS Reset\n", vgpu->id); - engine_mask |= (1 << VECS); + engine_mask |= BIT(VECS0); } if (data & GEN8_GRDOM_MEDIA2) { gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id); - if (HAS_BSD2(vgpu->gvt->dev_priv)) - engine_mask |= (1 << VCS2); + engine_mask |= BIT(VCS1); } + engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask; } /* vgpu_lock already hold by emulate mmio r/w */ @@ -1704,7 +1704,7 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; ret = intel_vgpu_select_submission_ops(vgpu, - ENGINE_MASK(ring_id), + BIT(ring_id), INTEL_VGPU_EXECLIST_SUBMISSION); if (ret) return ret; @@ -1724,19 +1724,19 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu, switch (offset) { case 0x4260: - id = RCS; + id = RCS0; break; case 0x4264: - id = VCS; + id = VCS0; break; case 0x4268: - id = VCS2; + id = VCS1; break; case 0x426c: - id = BCS; + id = BCS0; break; case 0x4270: - id = VECS; + id = VECS0; break; default: return -EINVAL; @@ -1793,7 +1793,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \ - if (HAS_BSD2(dev_priv)) \ + if (HAS_ENGINE(dev_priv, VCS1)) \ MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \ } while (0) diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 67125c5eec6e..951681813230 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -536,7 +536,7 @@ static void gen8_init_irq( SET_BIT_INFO(irq, 4, VCS_MI_FLUSH_DW, INTEL_GVT_IRQ_INFO_GT1); SET_BIT_INFO(irq, 8, VCS_AS_CONTEXT_SWITCH, INTEL_GVT_IRQ_INFO_GT1); - if (HAS_BSD2(gvt->dev_priv)) { + if (HAS_ENGINE(gvt->dev_priv, VCS1)) { SET_BIT_INFO(irq, 16, VCS2_MI_USER_INTERRUPT, INTEL_GVT_IRQ_INFO_GT1); SET_BIT_INFO(irq, 20, VCS2_MI_FLUSH_DW, diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 7d84cfb9051a..0209d27fcaf0 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -41,102 +41,102 @@ /* Raw offset is appened to each line for convenience. */ static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = { - {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ - {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ - {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ - {RCS, INSTPM, 0xffff, true}, /* 0x20c0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ - {RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ - {RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ - {RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ - {RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ - {RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ - {RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ - - {BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ - {BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ - {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ - {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ - {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ - {RCS, INVALID_MMIO_REG, 0, false } /* Terminated */ + {RCS0, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ + {RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS0, HWSTAM, 0x0, false}, /* 0x2098 */ + {RCS0, INSTPM, 0xffff, true}, /* 0x20c0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ + {RCS0, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ + {RCS0, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ + {RCS0, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ + {RCS0, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ + {RCS0, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ + {RCS0, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ + + {BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ + {BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ + {BCS0, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ + {BCS0, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ + {BCS0, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ + {RCS0, INVALID_MMIO_REG, 0, false } /* Terminated */ }; static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { - {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ - {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ - {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ - {RCS, INSTPM, 0xffff, true}, /* 0x20c0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ - {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ - {RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ - {RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ - {RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ - {RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ - {RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ - {RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ - - {RCS, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */ - {RCS, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */ - {RCS, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */ - {RCS, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ - {RCS, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ - {RCS, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ - {RCS, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ - {RCS, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ - {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ - {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ - {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ - {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */ - {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */ - {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */ - {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */ - {RCS, TRINVTILEDETCT, 0, false}, /* 0x4dec */ - {RCS, TRVADR, 0, false}, /* 0x4df0 */ - {RCS, TRTTE, 0, false}, /* 0x4df4 */ - - {BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ - {BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ - {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ - {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ - {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ - - {VCS2, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */ - - {VECS, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */ - - {RCS, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */ - {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ - {RCS, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */ - {RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */ - - {RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ - {RCS, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */ - - {RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ - {RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ - {RCS, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */ - {RCS, INVALID_MMIO_REG, 0, false } /* Terminated */ + {RCS0, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ + {RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS0, HWSTAM, 0x0, false}, /* 0x2098 */ + {RCS0, INSTPM, 0xffff, true}, /* 0x20c0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ + {RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ + {RCS0, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ + {RCS0, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ + {RCS0, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ + {RCS0, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ + {RCS0, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ + {RCS0, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ + + {RCS0, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */ + {RCS0, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */ + {RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */ + {RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ + {RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ + {RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ + {RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ + {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ + {RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ + {RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ + {RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ + {RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */ + {RCS0, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */ + {RCS0, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */ + {RCS0, TRNULLDETCT, 0, false}, /* 0x4de8 */ + {RCS0, TRINVTILEDETCT, 0, false}, /* 0x4dec */ + {RCS0, TRVADR, 0, false}, /* 0x4df0 */ + {RCS0, TRTTE, 0, false}, /* 0x4df4 */ + + {BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ + {BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ + {BCS0, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ + {BCS0, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ + {BCS0, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ + + {VCS1, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */ + + {VECS0, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */ + + {RCS0, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */ + {RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS0, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */ + {RCS0, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */ + + {RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ + {RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */ + + {RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ + {RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ + {RCS0, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */ + {RCS0, INVALID_MMIO_REG, 0, false } /* Terminated */ }; static struct { @@ -149,11 +149,11 @@ static void load_render_mocs(struct drm_i915_private *dev_priv) { i915_reg_t offset; u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, + [RCS0] = 0xc800, + [VCS0] = 0xc900, + [VCS1] = 0xca00, + [BCS0] = 0xcc00, + [VECS0] = 0xcb00, }; int ring_id, i; @@ -301,7 +301,7 @@ int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, goto out; /* no MOCS register in context except render engine */ - if (req->engine->id != RCS) + if (req->engine->id != RCS0) goto out; ret = restore_render_mocs_control_for_inhibit(vgpu, req); @@ -331,11 +331,11 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) enum forcewake_domains fw; i915_reg_t reg; u32 regs[] = { - [RCS] = 0x4260, - [VCS] = 0x4264, - [VCS2] = 0x4268, - [BCS] = 0x426c, - [VECS] = 0x4270, + [RCS0] = 0x4260, + [VCS0] = 0x4264, + [VCS1] = 0x4268, + [BCS0] = 0x426c, + [VECS0] = 0x4270, }; if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) @@ -353,7 +353,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) */ fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ | FW_REG_WRITE); - if (ring_id == RCS && (INTEL_GEN(dev_priv) >= 9)) + if (ring_id == RCS0 && INTEL_GEN(dev_priv) >= 9) fw |= FORCEWAKE_RENDER; intel_uncore_forcewake_get(dev_priv, fw); @@ -378,11 +378,11 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, u32 old_v, new_v; u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, + [RCS0] = 0xc800, + [VCS0] = 0xc900, + [VCS1] = 0xca00, + [BCS0] = 0xcc00, + [VECS0] = 0xcb00, }; int i; @@ -390,8 +390,10 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv) - || IS_COFFEELAKE(dev_priv)) && ring_id == RCS) + if (ring_id == RCS0 && + (IS_KABYLAKE(dev_priv) || + IS_BROXTON(dev_priv) || + IS_COFFEELAKE(dev_priv))) return; if (!pre && !gen9_render_mocs.initialized) @@ -414,7 +416,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, offset.reg += 4; } - if (ring_id == RCS) { + if (ring_id == RCS0) { l3_offset.reg = 0xb020; for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) { if (pre) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1bb8f936fdaa..709bcaaed765 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -93,7 +93,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, i915_mmio_reg_offset(EU_PERF_CNTL6), }; - if (workload->ring_id != RCS) + if (workload->ring_id != RCS0) return; if (save) { @@ -149,7 +149,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) COPY_REG_MASKED(ctx_ctrl); COPY_REG(ctx_timestamp); - if (ring_id == RCS) { + if (ring_id == RCS0) { COPY_REG(bb_per_ctx_ptr); COPY_REG(rcs_indirect_ctx); COPY_REG(rcs_indirect_ctx_offset); @@ -177,7 +177,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS0) context_page_num = 19; i = 2; @@ -440,8 +440,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) if (ret) goto err_unpin; - if ((workload->ring_id == RCS) && - (workload->wa_ctx.indirect_ctx.size != 0)) { + if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) { ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); if (ret) goto err_shadow; @@ -791,7 +790,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) context_page_num = rq->engine->context_size; context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS0) context_page_num = 19; i = 2; @@ -891,8 +890,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - if (!workload->status && !(vgpu->resetting_eng & - ENGINE_MASK(ring_id))) { + if (!workload->status && + !(vgpu->resetting_eng & BIT(ring_id))) { update_guest_context(workload); for_each_set_bit(event, workload->pending_events, @@ -915,7 +914,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) list_del_init(&workload->list); - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { + if (workload->status || vgpu->resetting_eng & BIT(ring_id)) { /* if workload->status is not successful means HW GPU * has occurred GPU hang or something wrong with i915/GVT, * and GVT won't inject context switch interrupt to guest. @@ -929,7 +928,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) * cleaned up during the resetting process later, so doing * the workload clean up here doesn't have any impact. **/ - intel_vgpu_clean_workloads(vgpu, ENGINE_MASK(ring_id)); + intel_vgpu_clean_workloads(vgpu, BIT(ring_id)); } workload->complete(workload); @@ -1438,7 +1437,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, workload->rb_start = start; workload->rb_ctl = ctl; - if (ring_id == RCS) { + if (ring_id == RCS0) { intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 33e8eed64423..503d548a55f7 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -868,8 +868,8 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) if (!IS_GEN(engine->i915, 7)) return; - switch (engine->id) { - case RCS: + switch (engine->class) { + case RENDER_CLASS: if (IS_HASWELL(engine->i915)) { cmd_tables = hsw_render_ring_cmds; cmd_table_count = @@ -889,12 +889,12 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; - case VCS: + case VIDEO_DECODE_CLASS: cmd_tables = gen7_video_cmds; cmd_table_count = ARRAY_SIZE(gen7_video_cmds); engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; - case BCS: + case COPY_ENGINE_CLASS: if (IS_HASWELL(engine->i915)) { cmd_tables = hsw_blt_ring_cmds; cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); @@ -913,14 +913,14 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; - case VECS: + case VIDEO_ENHANCEMENT_CLASS: cmd_tables = hsw_vebox_cmds; cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); /* VECS can use the same length_mask function as VCS */ engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; default: - MISSING_CASE(engine->id); + MISSING_CASE(engine->class); return; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 298371aad445..0a6348ad7c98 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1298,7 +1298,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seqno[id] = intel_engine_get_hangcheck_seqno(engine); } - intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); + intel_engine_get_instdone(dev_priv->engine[RCS0], &instdone); } if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) @@ -1325,7 +1325,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) (long long)engine->hangcheck.acthd, (long long)acthd[id]); - if (engine->id == RCS) { + if (engine->id == RCS0) { seq_puts(m, "\tinstdone read =\n"); i915_instdone_info(dev_priv, m, &instdone); @@ -3178,7 +3178,7 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) static int i915_wa_registers(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); - const struct i915_wa_list *wal = &i915->engine[RCS]->ctx_wa_list; + const struct i915_wa_list *wal = &i915->engine[RCS0]->ctx_wa_list; struct i915_wa *wa; unsigned int i; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1b2f5a6f8c25..b548c292738c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -330,16 +330,16 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = dev_priv->overlay ? 1 : 0; break; case I915_PARAM_HAS_BSD: - value = !!dev_priv->engine[VCS]; + value = !!dev_priv->engine[VCS0]; break; case I915_PARAM_HAS_BLT: - value = !!dev_priv->engine[BCS]; + value = !!dev_priv->engine[BCS0]; break; case I915_PARAM_HAS_VEBOX: - value = !!dev_priv->engine[VECS]; + value = !!dev_priv->engine[VECS0]; break; case I915_PARAM_HAS_BSD2: - value = !!dev_priv->engine[VCS2]; + value = !!dev_priv->engine[VCS1]; break; case I915_PARAM_HAS_LLC: value = HAS_LLC(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8abc5a640bf1..08ead854ac2d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2099,7 +2099,7 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc) /* Iterator over subset of engines selected by mask */ #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ - for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->ring_mask; \ + for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->engine_mask; \ (tmp__) ? \ ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \ 0;) @@ -2420,24 +2420,8 @@ static inline unsigned int i915_sg_segment_size(void) #define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) && !IS_LP(dev_priv)) -#define ENGINE_MASK(id) BIT(id) -#define RENDER_RING ENGINE_MASK(RCS) -#define BSD_RING ENGINE_MASK(VCS) -#define BLT_RING ENGINE_MASK(BCS) -#define VEBOX_RING ENGINE_MASK(VECS) -#define BSD2_RING ENGINE_MASK(VCS2) -#define BSD3_RING ENGINE_MASK(VCS3) -#define BSD4_RING ENGINE_MASK(VCS4) -#define VEBOX2_RING ENGINE_MASK(VECS2) -#define ALL_ENGINES (~0) - -#define HAS_ENGINE(dev_priv, id) \ - (!!(INTEL_INFO(dev_priv)->ring_mask & ENGINE_MASK(id))) - -#define HAS_BSD(dev_priv) HAS_ENGINE(dev_priv, VCS) -#define HAS_BSD2(dev_priv) HAS_ENGINE(dev_priv, VCS2) -#define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) -#define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) +#define ALL_ENGINES (~0u) +#define HAS_ENGINE(dev_priv, id) (INTEL_INFO(dev_priv)->engine_mask & BIT(id)) #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d266ba3f7210..1e3211e909f1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -583,7 +583,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) GEM_BUG_ON(dev_priv->kernel_context); GEM_BUG_ON(dev_priv->preempt_context); - intel_engine_init_ctx_wa(dev_priv->engine[RCS]); + intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); init_contexts(dev_priv); /* lowest priority; idle task */ @@ -1089,7 +1089,7 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, int ret = 0; GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8); - GEM_BUG_ON(engine->id != RCS); + GEM_BUG_ON(engine->id != RCS0); /* Nothing to do if unmodified. */ if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 53d0d70c97fa..943a221acb21 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1957,7 +1957,7 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) u32 *cs; int i; - if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS) { + if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -2082,11 +2082,11 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, #define I915_USER_RINGS (4) static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { - [I915_EXEC_DEFAULT] = RCS, - [I915_EXEC_RENDER] = RCS, - [I915_EXEC_BLT] = BCS, - [I915_EXEC_BSD] = VCS, - [I915_EXEC_VEBOX] = VECS + [I915_EXEC_DEFAULT] = RCS0, + [I915_EXEC_RENDER] = RCS0, + [I915_EXEC_BLT] = BCS0, + [I915_EXEC_BSD] = VCS0, + [I915_EXEC_VEBOX] = VECS0 }; static struct intel_engine_cs * @@ -2109,7 +2109,7 @@ eb_select_engine(struct drm_i915_private *dev_priv, return NULL; } - if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4a681b3332ad..f447c6564418 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -799,7 +799,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, */ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) { - ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->vm.i915)->ring_mask; + ppgtt->pd_dirty_engines = INTEL_INFO(ppgtt->vm.i915)->engine_mask; } /* Removes entries from a single page table, releasing it if it's empty. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 86065d75b3ac..a47e11e6fc1b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -390,7 +390,7 @@ struct i915_hw_ppgtt { struct i915_address_space vm; struct kref ref; - unsigned long pd_dirty_rings; + unsigned long pd_dirty_engines; union { struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */ struct i915_page_directory_pointer pdp; /* GEN8+ */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 90baf9086d0a..91196348c68c 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -42,7 +42,7 @@ struct intel_render_state { static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) { - if (engine->id != RCS) + if (engine->id != RCS0) return NULL; switch (INTEL_GEN(engine->i915)) { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ececbfd8ee82..5f1cdbc9eb5d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -411,7 +411,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone.instdone); - if (ee->engine_id != RCS || INTEL_GEN(m->i915) <= 3) + if (ee->engine_id != RCS0 || INTEL_GEN(m->i915) <= 3) return; err_printf(m, " SC_INSTDONE: 0x%08x\n", @@ -1179,16 +1179,17 @@ static void error_record_engine_registers(struct i915_gpu_state *error, if (IS_GEN(dev_priv, 7)) { switch (engine->id) { default: - case RCS: + MISSING_CASE(engine->id); + case RCS0: mmio = RENDER_HWS_PGA_GEN7; break; - case BCS: + case BCS0: mmio = BLT_HWS_PGA_GEN7; break; - case VCS: + case VCS0: mmio = BSD_HWS_PGA_GEN7; break; - case VECS: + case VECS0: mmio = VEBOX_HWS_PGA_GEN7; break; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 06541f4bd4af..1f4e984ce42f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1427,20 +1427,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); } static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (gt_iir & GT_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); if (gt_iir & GT_BLT_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[BCS0]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -1475,8 +1475,8 @@ static void gen8_gt_irq_ack(struct drm_i915_private *i915, #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ GEN8_GT_BCS_IRQ | \ + GEN8_GT_VCS0_IRQ | \ GEN8_GT_VCS1_IRQ | \ - GEN8_GT_VCS2_IRQ | \ GEN8_GT_VECS_IRQ | \ GEN8_GT_PM_IRQ | \ GEN8_GT_GUC_IRQ) @@ -1487,7 +1487,7 @@ static void gen8_gt_irq_ack(struct drm_i915_private *i915, raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]); } - if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { + if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1)); if (likely(gt_iir[1])) raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]); @@ -1510,21 +1510,21 @@ static void gen8_gt_irq_handler(struct drm_i915_private *i915, u32 master_ctl, u32 gt_iir[4]) { if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { - gen8_cs_irq_handler(i915->engine[RCS], + gen8_cs_irq_handler(i915->engine[RCS0], gt_iir[0] >> GEN8_RCS_IRQ_SHIFT); - gen8_cs_irq_handler(i915->engine[BCS], + gen8_cs_irq_handler(i915->engine[BCS0], gt_iir[0] >> GEN8_BCS_IRQ_SHIFT); } - if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { - gen8_cs_irq_handler(i915->engine[VCS], + if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { + gen8_cs_irq_handler(i915->engine[VCS0], + gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT); + gen8_cs_irq_handler(i915->engine[VCS1], gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT); - gen8_cs_irq_handler(i915->engine[VCS2], - gt_iir[1] >> GEN8_VCS2_IRQ_SHIFT); } if (master_ctl & GEN8_GT_VECS_IRQ) { - gen8_cs_irq_handler(i915->engine[VECS], + gen8_cs_irq_handler(i915->engine[VECS0], gt_iir[3] >> GEN8_VECS_IRQ_SHIFT); } @@ -1802,7 +1802,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) return; if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VECS0]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -3780,7 +3780,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) * RPS interrupts will get enabled/disabled on demand when RPS * itself is enabled/disabled. */ - if (HAS_VEBOX(dev_priv)) { + if (HAS_ENGINE(dev_priv, VECS0)) { pm_irqs |= PM_VEBOX_USER_INTERRUPT; dev_priv->pm_ier |= PM_VEBOX_USER_INTERRUPT; } @@ -3892,18 +3892,21 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) { /* These are interrupts we'll toggle with the ring mask register */ u32 gt_interrupts[] = { - GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT, - GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, + (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT), + + (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT), + 0, - GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT - }; + + (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT) + }; dev_priv->pm_ier = 0x0; dev_priv->pm_imr = ~dev_priv->pm_ier; @@ -4231,7 +4234,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) I915_WRITE16(IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4339,7 +4342,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4484,10 +4487,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]); if (iir & I915_BSD_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index a9211c370cd1..c42c5ccf38fe 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -94,7 +94,7 @@ .gpu_reset_clobbers_display = true, \ .hws_needs_physical = 1, \ .unfenced_needs_alignment = 1, \ - .ring_mask = RENDER_RING, \ + .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = false, \ GEN_DEFAULT_PIPEOFFSETS, \ @@ -133,7 +133,7 @@ static const struct intel_device_info intel_i865g_info = { .num_pipes = 2, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ - .ring_mask = RENDER_RING, \ + .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ GEN_DEFAULT_PIPEOFFSETS, \ @@ -210,7 +210,7 @@ static const struct intel_device_info intel_pineview_info = { .display.has_hotplug = 1, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ - .ring_mask = RENDER_RING, \ + .engine_mask = BIT(RCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ GEN_DEFAULT_PIPEOFFSETS, \ @@ -239,7 +239,7 @@ static const struct intel_device_info intel_i965gm_info = { static const struct intel_device_info intel_g45_info = { GEN4_FEATURES, PLATFORM(INTEL_G45), - .ring_mask = RENDER_RING | BSD_RING, + .engine_mask = BIT(RCS0) | BIT(VCS0), .gpu_reset_clobbers_display = false, }; @@ -249,7 +249,7 @@ static const struct intel_device_info intel_gm45_info = { .is_mobile = 1, .display.has_fbc = 1, .display.supports_tv = 1, - .ring_mask = RENDER_RING | BSD_RING, + .engine_mask = BIT(RCS0) | BIT(VCS0), .gpu_reset_clobbers_display = false, }; @@ -257,7 +257,7 @@ static const struct intel_device_info intel_gm45_info = { GEN(5), \ .num_pipes = 2, \ .display.has_hotplug = 1, \ - .ring_mask = RENDER_RING | BSD_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ /* ilk does support rc6, but we do not implement [power] contexts */ \ @@ -283,7 +283,7 @@ static const struct intel_device_info intel_ironlake_m_info = { .num_pipes = 2, \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6 = 1, \ @@ -328,7 +328,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .num_pipes = 3, \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6 = 1, \ @@ -389,7 +389,7 @@ static const struct intel_device_info intel_valleyview_info = { .ppgtt = INTEL_PPGTT_FULL, .has_snoop = true, .has_coherent_ggtt = false, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING, + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), .display_mmio_offset = VLV_DISPLAY_BASE, GEN_DEFAULT_PAGE_SIZES, GEN_DEFAULT_PIPEOFFSETS, @@ -398,7 +398,7 @@ static const struct intel_device_info intel_valleyview_info = { #define G75_FEATURES \ GEN7_FEATURES, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ .display.has_ddi = 1, \ .has_fpga_dbg = 1, \ .display.has_psr = 1, \ @@ -462,7 +462,8 @@ static const struct intel_device_info intel_broadwell_rsvd_info = { static const struct intel_device_info intel_broadwell_gt3_info = { BDW_PLATFORM, .gt = 3, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, + .engine_mask = + BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; static const struct intel_device_info intel_cherryview_info = { @@ -471,7 +472,7 @@ static const struct intel_device_info intel_cherryview_info = { .num_pipes = 3, .display.has_hotplug = 1, .is_lp = 1, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), .has_64bit_reloc = 1, .has_runtime_pm = 1, .has_rc6 = 1, @@ -521,7 +522,8 @@ static const struct intel_device_info intel_skylake_gt2_info = { #define SKL_GT3_PLUS_PLATFORM \ SKL_PLATFORM, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING + .engine_mask = \ + BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1) static const struct intel_device_info intel_skylake_gt3_info = { @@ -538,7 +540,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { GEN(9), \ .is_lp = 1, \ .display.has_hotplug = 1, \ - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ + .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ .num_pipes = 3, \ .has_64bit_reloc = 1, \ .display.has_ddi = 1, \ @@ -592,7 +594,8 @@ static const struct intel_device_info intel_kabylake_gt2_info = { static const struct intel_device_info intel_kabylake_gt3_info = { KBL_PLATFORM, .gt = 3, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, + .engine_mask = + BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; #define CFL_PLATFORM \ @@ -612,7 +615,8 @@ static const struct intel_device_info intel_coffeelake_gt2_info = { static const struct intel_device_info intel_coffeelake_gt3_info = { CFL_PLATFORM, .gt = 3, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, + .engine_mask = + BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1), }; #define GEN10_FEATURES \ @@ -655,7 +659,8 @@ static const struct intel_device_info intel_icelake_11_info = { GEN11_FEATURES, PLATFORM(INTEL_ICELAKE), .is_alpha_support = 1, - .ring_mask = RENDER_RING | BLT_RING | VEBOX_RING | BSD_RING | BSD3_RING, + .engine_mask = + BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), }; #undef GEN diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 72a9a35b40e2..e5aea176c1da 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1202,7 +1202,7 @@ static int i915_oa_read(struct i915_perf_stream *stream, static struct intel_context *oa_pin_context(struct drm_i915_private *i915, struct i915_gem_context *ctx) { - struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_engine_cs *engine = i915->engine[RCS0]; struct intel_context *ce; int ret; @@ -1681,7 +1681,7 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, gen8_make_rpcs(dev_priv, &to_intel_context(ctx, - dev_priv->engine[RCS])->sseu)); + dev_priv->engine[RCS0])->sseu)); } /* @@ -1711,7 +1711,7 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { - struct intel_engine_cs *engine = dev_priv->engine[RCS]; + struct intel_engine_cs *engine = dev_priv->engine[RCS0]; unsigned int map_type = i915_coherent_map_type(dev_priv); struct i915_gem_context *ctx; struct i915_request *rq; @@ -2143,7 +2143,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, { struct i915_perf_stream *stream; - if (engine->id != RCS) + if (engine->class != RENDER_CLASS) return; stream = engine->i915->perf.oa.exclusive_stream; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 6560c356f279..084016200a1e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -101,7 +101,7 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) * * Use RCS as proxy for all engines. */ - else if (intel_engine_supports_stats(i915->engine[RCS])) + else if (intel_engine_supports_stats(i915->engine[RCS0])) enable &= ~BIT(I915_SAMPLE_BUSY); /* diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c9b868347481..16ce9c609c65 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -210,14 +210,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) /* Engine ID */ -#define RCS_HW 0 -#define VCS_HW 1 -#define BCS_HW 2 -#define VECS_HW 3 -#define VCS2_HW 4 -#define VCS3_HW 6 -#define VCS4_HW 7 -#define VECS2_HW 12 +#define RCS0_HW 0 +#define VCS0_HW 1 +#define BCS0_HW 2 +#define VECS0_HW 3 +#define VCS1_HW 4 +#define VCS2_HW 6 +#define VCS3_HW 7 +#define VECS1_HW 12 /* Engine class */ @@ -7250,8 +7250,8 @@ enum { #define GEN8_GT_VECS_IRQ (1 << 6) #define GEN8_GT_GUC_IRQ (1 << 5) #define GEN8_GT_PM_IRQ (1 << 4) -#define GEN8_GT_VCS2_IRQ (1 << 3) -#define GEN8_GT_VCS1_IRQ (1 << 2) +#define GEN8_GT_VCS1_IRQ (1 << 3) /* NB: VCS2 in bspec! */ +#define GEN8_GT_VCS0_IRQ (1 << 2) /* NB: VCS1 in bpsec! */ #define GEN8_GT_BCS_IRQ (1 << 1) #define GEN8_GT_RCS_IRQ (1 << 0) @@ -7272,8 +7272,8 @@ enum { #define GEN8_RCS_IRQ_SHIFT 0 #define GEN8_BCS_IRQ_SHIFT 16 -#define GEN8_VCS1_IRQ_SHIFT 0 -#define GEN8_VCS2_IRQ_SHIFT 16 +#define GEN8_VCS0_IRQ_SHIFT 0 /* NB: VCS1 in bspec! */ +#define GEN8_VCS1_IRQ_SHIFT 16 /* NB: VCS2 in bpsec! */ #define GEN8_VECS_IRQ_SHIFT 0 #define GEN8_WD_IRQ_SHIFT 16 diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 55d6123dbba4..3fbaa72a9eac 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -297,12 +297,12 @@ static int gen6_reset_engines(struct drm_i915_private *i915, unsigned int retry) { struct intel_engine_cs *engine; - const u32 hw_engine_mask[I915_NUM_ENGINES] = { - [RCS] = GEN6_GRDOM_RENDER, - [BCS] = GEN6_GRDOM_BLT, - [VCS] = GEN6_GRDOM_MEDIA, - [VCS2] = GEN8_GRDOM_MEDIA2, - [VECS] = GEN6_GRDOM_VECS, + const u32 hw_engine_mask[] = { + [RCS0] = GEN6_GRDOM_RENDER, + [BCS0] = GEN6_GRDOM_BLT, + [VCS0] = GEN6_GRDOM_MEDIA, + [VCS1] = GEN8_GRDOM_MEDIA2, + [VECS0] = GEN6_GRDOM_VECS, }; u32 hw_mask; @@ -312,8 +312,10 @@ static int gen6_reset_engines(struct drm_i915_private *i915, unsigned int tmp; hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, i915, engine_mask, tmp) { + GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; + } } return gen6_hw_domain_reset(i915, hw_mask); @@ -420,28 +422,27 @@ static int gen11_reset_engines(struct drm_i915_private *i915, unsigned int engine_mask, unsigned int retry) { - const u32 hw_engine_mask[I915_NUM_ENGINES] = { - [RCS] = GEN11_GRDOM_RENDER, - [BCS] = GEN11_GRDOM_BLT, - [VCS] = GEN11_GRDOM_MEDIA, - [VCS2] = GEN11_GRDOM_MEDIA2, - [VCS3] = GEN11_GRDOM_MEDIA3, - [VCS4] = GEN11_GRDOM_MEDIA4, - [VECS] = GEN11_GRDOM_VECS, - [VECS2] = GEN11_GRDOM_VECS2, + const u32 hw_engine_mask[] = { + [RCS0] = GEN11_GRDOM_RENDER, + [BCS0] = GEN11_GRDOM_BLT, + [VCS0] = GEN11_GRDOM_MEDIA, + [VCS1] = GEN11_GRDOM_MEDIA2, + [VCS2] = GEN11_GRDOM_MEDIA3, + [VCS3] = GEN11_GRDOM_MEDIA4, + [VECS0] = GEN11_GRDOM_VECS, + [VECS1] = GEN11_GRDOM_VECS2, }; struct intel_engine_cs *engine; unsigned int tmp; u32 hw_mask; int ret; - BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES); - if (engine_mask == ALL_ENGINES) { hw_mask = GEN11_GRDOM_FULL; } else { hw_mask = 0; for_each_engine_masked(engine, i915, engine_mask, tmp) { + GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; hw_mask |= gen11_lock_sfc(i915, engine); } @@ -692,7 +693,7 @@ static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) return err; for_each_engine(engine, i915, id) - intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id)); + intel_engine_reset(engine, stalled_mask & engine->mask); i915_gem_restore_fences(i915); @@ -1057,7 +1058,7 @@ error: static inline int intel_gt_reset_engine(struct drm_i915_private *i915, struct intel_engine_cs *engine) { - return intel_gpu_reset(i915, intel_engine_flag(engine)); + return intel_gpu_reset(i915, engine->mask); } /** @@ -1193,7 +1194,7 @@ void i915_clear_error_registers(struct drm_i915_private *dev_priv) I915_READ(RING_FAULT_REG(engine)) & ~RING_FAULT_VALID); } - POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); + POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS0])); } } @@ -1241,7 +1242,7 @@ void i915_handle_error(struct drm_i915_private *i915, */ wakeref = intel_runtime_pm_get(i915); - engine_mask &= INTEL_INFO(i915)->ring_mask; + engine_mask &= INTEL_INFO(i915)->engine_mask; if (flags & I915_ERROR_CAPTURE) { i915_capture_error_state(i915, engine_mask, msg); @@ -1260,7 +1261,7 @@ void i915_handle_error(struct drm_i915_private *i915, continue; if (i915_reset_engine(engine, msg) == 0) - engine_mask &= ~intel_engine_flag(engine); + engine_mask &= ~engine->mask; clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 855a5074ad77..2c1b46cfd6d3 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -738,7 +738,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) runtime->num_scalers[PIPE_C] = 1; } - BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_TYPE(intel_ring_mask_t)); + BUILD_BUG_ON(BITS_PER_TYPE(intel_engine_mask_t) < I915_NUM_ENGINES); if (IS_GEN(dev_priv, 11)) for_each_pipe(dev_priv, pipe) @@ -887,7 +887,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) continue; if (!(BIT(i) & RUNTIME_INFO(dev_priv)->vdbox_enable)) { - info->ring_mask &= ~ENGINE_MASK(_VCS(i)); + info->engine_mask &= ~BIT(_VCS(i)); DRM_DEBUG_DRIVER("vcs%u fused off\n", i); continue; } @@ -906,7 +906,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) continue; if (!(BIT(i) & RUNTIME_INFO(dev_priv)->vebox_enable)) { - info->ring_mask &= ~ENGINE_MASK(_VECS(i)); + info->engine_mask &= ~BIT(_VECS(i)); DRM_DEBUG_DRIVER("vecs%u fused off\n", i); } } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index e8b8661df746..047d10bdd455 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -150,14 +150,14 @@ struct sseu_dev_info { u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; -typedef u8 intel_ring_mask_t; +typedef u8 intel_engine_mask_t; struct intel_device_info { u16 gen_mask; u8 gen; u8 gt; /* GT number, 0 if undefined */ - intel_ring_mask_t ring_mask; /* Rings supported by the HW */ + intel_engine_mask_t engine_mask; /* Engines supported by the HW */ enum intel_platform platform; u32 platform_mask; @@ -200,7 +200,7 @@ struct intel_runtime_info { u8 num_sprites[I915_MAX_PIPES]; u8 num_scalers[I915_MAX_PIPES]; - u8 num_rings; + u8 num_engines; /* Slice/subslice/EU info */ struct sseu_dev_info sseu; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f3f161c5627b..62a2bbbbcc64 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -94,24 +94,24 @@ struct engine_info { }; static const struct engine_info intel_engines[] = { - [RCS] = { - .hw_id = RCS_HW, + [RCS0] = { + .hw_id = RCS0_HW, .class = RENDER_CLASS, .instance = 0, .mmio_bases = { { .gen = 1, .base = RENDER_RING_BASE } }, }, - [BCS] = { - .hw_id = BCS_HW, + [BCS0] = { + .hw_id = BCS0_HW, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { { .gen = 6, .base = BLT_RING_BASE } }, }, - [VCS] = { - .hw_id = VCS_HW, + [VCS0] = { + .hw_id = VCS0_HW, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { @@ -120,8 +120,8 @@ static const struct engine_info intel_engines[] = { { .gen = 4, .base = BSD_RING_BASE } }, }, - [VCS2] = { - .hw_id = VCS2_HW, + [VCS1] = { + .hw_id = VCS1_HW, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_bases = { @@ -129,24 +129,24 @@ static const struct engine_info intel_engines[] = { { .gen = 8, .base = GEN8_BSD2_RING_BASE } }, }, - [VCS3] = { - .hw_id = VCS3_HW, + [VCS2] = { + .hw_id = VCS2_HW, .class = VIDEO_DECODE_CLASS, .instance = 2, .mmio_bases = { { .gen = 11, .base = GEN11_BSD3_RING_BASE } }, }, - [VCS4] = { - .hw_id = VCS4_HW, + [VCS3] = { + .hw_id = VCS3_HW, .class = VIDEO_DECODE_CLASS, .instance = 3, .mmio_bases = { { .gen = 11, .base = GEN11_BSD4_RING_BASE } }, }, - [VECS] = { - .hw_id = VECS_HW, + [VECS0] = { + .hw_id = VECS0_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { @@ -154,8 +154,8 @@ static const struct engine_info intel_engines[] = { { .gen = 7, .base = VEBOX_RING_BASE } }, }, - [VECS2] = { - .hw_id = VECS2_HW, + [VECS1] = { + .hw_id = VECS1_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, .mmio_bases = { @@ -304,7 +304,10 @@ intel_engine_setup(struct drm_i915_private *dev_priv, if (!engine) return -ENOMEM; + BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); + engine->id = id; + engine->mask = BIT(id); engine->i915 = dev_priv; __sprint_engine_name(engine->name, info); engine->hw_id = engine->guc_id = info->hw_id; @@ -345,15 +348,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, int intel_engines_init_mmio(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); - const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; + const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask; struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int mask = 0; unsigned int i; int err; - WARN_ON(ring_mask == 0); - WARN_ON(ring_mask & + WARN_ON(engine_mask == 0); + WARN_ON(engine_mask & GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); if (i915_inject_load_failure()) @@ -367,7 +370,7 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv) if (err) goto cleanup; - mask |= ENGINE_MASK(i); + mask |= BIT(i); } /* @@ -375,16 +378,16 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv) * are added to the driver by a warning and disabling the forgotten * engines. */ - if (WARN_ON(mask != ring_mask)) - device_info->ring_mask = mask; + if (WARN_ON(mask != engine_mask)) + device_info->engine_mask = mask; /* We always presume we have at least RCS available for later probing */ - if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) { + if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) { err = -ENODEV; goto cleanup; } - RUNTIME_INFO(dev_priv)->num_rings = hweight32(mask); + RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask); i915_check_and_clear_faults(dev_priv); @@ -954,7 +957,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, default: instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); - if (engine->id != RCS) + if (engine->id != RCS0) break; instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); @@ -970,7 +973,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, case 7: instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); - if (engine->id != RCS) + if (engine->id != RCS0) break; instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); @@ -983,7 +986,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, case 4: instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); - if (engine->id == RCS) + if (engine->id == RCS0) /* HACK: Using the wrong struct member */ instdone->slice_common = I915_READ(GEN4_INSTDONE1); break; @@ -1355,7 +1358,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, &engine->execlists; u64 addr; - if (engine->id == RCS && IS_GEN_RANGE(dev_priv, 4, 7)) + if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID)); drm_printf(m, "\tRING_START: 0x%08x\n", I915_READ(RING_START(engine->mmio_base))); diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index f0db62887f50..c51d558fd431 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -122,7 +122,7 @@ int intel_guc_ads_create(struct intel_guc *guc) * because our GuC shared data is there. */ kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, - dev_priv->engine[RCS])->state; + dev_priv->engine[RCS0])->state; blob->ads.golden_context_lrca = intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 56ba2fcbabe6..e3afc91f0e7b 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -575,7 +575,7 @@ static void inject_preempt_context(struct work_struct *work) u32 *cs; cs = ce->ring->vaddr; - if (engine->id == RCS) { + if (engine->class == RENDER_CLASS) { cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, addr, @@ -1030,7 +1030,7 @@ static int guc_clients_create(struct intel_guc *guc) GEM_BUG_ON(guc->preempt_client); client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, + INTEL_INFO(dev_priv)->engine_mask, GUC_CLIENT_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (IS_ERR(client)) { @@ -1041,7 +1041,7 @@ static int guc_clients_create(struct intel_guc *guc) if (dev_priv->preempt_context) { client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, + INTEL_INFO(dev_priv)->engine_mask, GUC_CLIENT_PRIORITY_KMD_HIGH, dev_priv->preempt_context); if (IS_ERR(client)) { diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index f1d8dfc58049..57ed49dc19c4 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -56,7 +56,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) int slice; int subslice; - if (engine->id != RCS) + if (engine->id != RCS0) return true; intel_engine_get_instdone(engine, &instdone); @@ -120,7 +120,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = I915_READ_CTL(engine); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, BIT(engine->id), 0, + i915_handle_error(dev_priv, engine->mask, 0, "stuck wait on %s", engine->name); I915_WRITE_CTL(engine, tmp); return ENGINE_WAIT_KICK; @@ -282,13 +282,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work) hangcheck_store_sample(engine, &hc); if (hc.stalled) { - hung |= intel_engine_flag(engine); + hung |= engine->mask; if (hc.action != ENGINE_DEAD) - stuck |= intel_engine_flag(engine); + stuck |= engine->mask; } if (hc.wedged) - wedged |= intel_engine_flag(engine); + wedged |= engine->mask; } if (GEM_SHOW_DEBUG() && (hung | stuck)) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 578c8c98c718..6ec6c4e175a2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1777,7 +1777,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) unsigned int i; int ret; - if (GEM_DEBUG_WARN_ON(engine->id != RCS)) + if (GEM_DEBUG_WARN_ON(engine->id != RCS0)) return -EINVAL; switch (INTEL_GEN(engine->i915)) { @@ -2376,11 +2376,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) if (INTEL_GEN(engine->i915) < 11) { const u8 irq_shifts[] = { - [RCS] = GEN8_RCS_IRQ_SHIFT, - [BCS] = GEN8_BCS_IRQ_SHIFT, - [VCS] = GEN8_VCS1_IRQ_SHIFT, - [VCS2] = GEN8_VCS2_IRQ_SHIFT, - [VECS] = GEN8_VECS_IRQ_SHIFT, + [RCS0] = GEN8_RCS_IRQ_SHIFT, + [BCS0] = GEN8_BCS_IRQ_SHIFT, + [VCS0] = GEN8_VCS0_IRQ_SHIFT, + [VCS1] = GEN8_VCS1_IRQ_SHIFT, + [VECS0] = GEN8_VECS_IRQ_SHIFT, }; shift = irq_shifts[engine->id]; diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 331e7a678fb7..80dcc08222d0 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -288,17 +288,17 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) { switch (engine_id) { - case RCS: + case RCS0: return GEN9_GFX_MOCS(index); - case VCS: + case VCS0: return GEN9_MFX0_MOCS(index); - case BCS: + case BCS0: return GEN9_BLT_MOCS(index); - case VECS: + case VECS0: return GEN9_VEBOX_MOCS(index); - case VCS2: + case VCS1: return GEN9_MFX1_MOCS(index); - case VCS3: + case VCS2: return GEN11_MFX2_MOCS(index); default: MISSING_CASE(engine_id); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index c0df1dbb0069..a882b8d42bd9 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -236,7 +236,7 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, static struct i915_request *alloc_request(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = dev_priv->engine[RCS]; + struct intel_engine_cs *engine = dev_priv->engine[RCS0]; return i915_request_alloc(engine, dev_priv->kernel_context); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1b96b0960adc..1dd0c99b893f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -552,16 +552,17 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) */ default: GEM_BUG_ON(engine->id); - case RCS: + /* fallthrough */ + case RCS0: hwsp = RENDER_HWS_PGA_GEN7; break; - case BCS: + case BCS0: hwsp = BLT_HWS_PGA_GEN7; break; - case VCS: + case VCS0: hwsp = BSD_HWS_PGA_GEN7; break; - case VECS: + case VECS0: hwsp = VEBOX_HWS_PGA_GEN7; break; } @@ -1692,8 +1693,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; enum intel_engine_id id; - const int num_rings = - IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_rings - 1 : 0; + const int num_engines = + IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; bool force_restore = false; int len; u32 *cs; @@ -1707,7 +1708,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) len = 4; if (IS_GEN(i915, 7)) - len += 2 + (num_rings ? 4*num_rings + 6 : 0); + len += 2 + (num_engines ? 4 * num_engines + 6 : 0); if (flags & MI_FORCE_RESTORE) { GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); flags &= ~MI_FORCE_RESTORE; @@ -1722,10 +1723,10 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (IS_GEN(i915, 7)) { *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - if (num_rings) { + if (num_engines) { struct intel_engine_cs *signaller; - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); + *cs++ = MI_LOAD_REGISTER_IMM(num_engines); for_each_engine(signaller, i915, id) { if (signaller == engine) continue; @@ -1768,11 +1769,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; if (IS_GEN(i915, 7)) { - if (num_rings) { + if (num_engines) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); + *cs++ = MI_LOAD_REGISTER_IMM(num_engines); for_each_engine(signaller, i915, id) { if (signaller == engine) continue; @@ -1850,7 +1851,7 @@ static int switch_context(struct i915_request *rq) * explanation. */ loops = 1; - if (engine->id == BCS && IS_VALLEYVIEW(engine->i915)) + if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) loops = 32; do { @@ -1859,15 +1860,15 @@ static int switch_context(struct i915_request *rq) goto err; } while (--loops); - if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) { - unwind_mm = intel_engine_flag(engine); - ppgtt->pd_dirty_rings &= ~unwind_mm; + if (ppgtt->pd_dirty_engines & engine->mask) { + unwind_mm = engine->mask; + ppgtt->pd_dirty_engines &= ~unwind_mm; hw_flags = MI_FORCE_RESTORE; } } if (rq->hw_context->state) { - GEM_BUG_ON(engine->id != RCS); + GEM_BUG_ON(engine->id != RCS0); /* * The kernel context(s) is treated as pure scratch and is not @@ -1927,7 +1928,7 @@ static int switch_context(struct i915_request *rq) err_mm: if (unwind_mm) - ppgtt->pd_dirty_rings |= unwind_mm; + ppgtt->pd_dirty_engines |= unwind_mm; err: return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 12e79828dbd5..18e865ff6637 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -10,12 +10,12 @@ #include #include "i915_gem_batch_pool.h" - -#include "i915_reg.h" #include "i915_pmu.h" +#include "i915_reg.h" #include "i915_request.h" #include "i915_selftest.h" #include "i915_timeline.h" +#include "intel_device_info.h" #include "intel_gpu_commands.h" #include "intel_workarounds.h" @@ -175,16 +175,16 @@ struct i915_request; * Keep instances of the same type engine together. */ enum intel_engine_id { - RCS = 0, - BCS, - VCS, + RCS0 = 0, + BCS0, + VCS0, + VCS1, VCS2, VCS3, - VCS4, -#define _VCS(n) (VCS + (n)) - VECS, - VECS2 -#define _VECS(n) (VECS + (n)) +#define _VCS(n) (VCS0 + (n)) + VECS0, + VECS1 +#define _VECS(n) (VECS0 + (n)) }; struct st_preempt_hang { @@ -334,6 +334,7 @@ struct intel_engine_cs { enum intel_engine_id id; unsigned int hw_id; unsigned int guc_id; + intel_engine_mask_t mask; u8 uabi_class; @@ -667,12 +668,6 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, return port; } -static inline unsigned int -intel_engine_flag(const struct intel_engine_cs *engine) -{ - return BIT(engine->id); -} - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 89b4007d5200..2ff54950891e 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -1055,7 +1055,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct i915_wa_list *w = &engine->whitelist; - GEM_BUG_ON(engine->id != RCS); + GEM_BUG_ON(engine->id != RCS0); wa_init_start(w, "whitelist"); @@ -1228,7 +1228,7 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) return; - if (engine->id == RCS) + if (engine->id == RCS0) rcs_engine_wa_init(engine, wal); else xcs_engine_wa_init(engine, wal); diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 4b9ded4ca0f5..1e66cff985f8 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1535,7 +1535,7 @@ static int igt_ppgtt_pin_update(void *arg) * land in the now stale 2M page. */ - err = gpu_write(vma, ctx, dev_priv->engine[RCS], 0, 0xdeadbeaf); + err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); if (err) goto out_unpin; @@ -1653,7 +1653,7 @@ static int igt_shrink_thp(void *arg) if (err) goto out_unpin; - err = gpu_write(vma, ctx, i915->engine[RCS], 0, 0xdeadbeaf); + err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); if (err) goto out_unpin; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 9c16aff87028..e43630b40fce 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -256,7 +256,7 @@ static bool needs_mi_store_dword(struct drm_i915_private *i915) if (i915_terminally_wedged(i915)) return false; - return intel_engine_can_store_dword(i915->engine[RCS]); + return intel_engine_can_store_dword(i915->engine[RCS0]); } static const struct igt_coherency_mode { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 3a238b9628b3..30111c004eb6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -556,7 +556,7 @@ static int igt_ctx_exec(void *arg) ncontexts++; } pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n", - ncontexts, RUNTIME_INFO(i915)->num_rings, ndwords); + ncontexts, RUNTIME_INFO(i915)->num_engines, ndwords); dw = 0; list_for_each_entry(obj, &objects, st_link) { @@ -923,7 +923,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, unsigned int flags) { struct intel_sseu default_sseu = intel_device_default_sseu(i915); - struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_engine_cs *engine = i915->engine[RCS0]; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; struct intel_sseu pg_sseu; @@ -1126,7 +1126,7 @@ static int igt_ctx_readonly(void *arg) } } pr_info("Submitted %lu dwords (across %u engines)\n", - ndwords, RUNTIME_INFO(i915)->num_rings); + ndwords, RUNTIME_INFO(i915)->num_engines); dw = 0; list_for_each_entry(obj, &objects, st_link) { @@ -1459,7 +1459,7 @@ static int igt_vm_isolation(void *arg) count += this; } pr_info("Checked %lu scratch offsets across %d engines\n", - count, RUNTIME_INFO(i915)->num_rings); + count, RUNTIME_INFO(i915)->num_engines); out_rpm: intel_runtime_pm_put(i915, wakeref); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 784982aed625..971148fbe6f5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -468,7 +468,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 7e1b65b8eb19..3eb6a6b075ab 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -42,7 +42,7 @@ static int igt_add_request(void *arg) /* Basic preliminary test to create a request and let it loose! */ mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], + request = mock_request(i915->engine[RCS0], i915->kernel_context, HZ / 10); if (!request) @@ -66,7 +66,7 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], i915->kernel_context, T); + request = mock_request(i915->engine[RCS0], i915->kernel_context, T); if (!request) { err = -ENOMEM; goto out_unlock; @@ -136,7 +136,7 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], i915->kernel_context, T); + request = mock_request(i915->engine[RCS0], i915->kernel_context, T); if (!request) { err = -ENOMEM; goto out_locked; @@ -193,7 +193,7 @@ static int igt_request_rewind(void *arg) mutex_lock(&i915->drm.struct_mutex); ctx[0] = mock_context(i915, "A"); - request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ); if (!request) { err = -ENOMEM; goto err_context_0; @@ -203,7 +203,7 @@ static int igt_request_rewind(void *arg) i915_request_add(request); ctx[1] = mock_context(i915, "B"); - vip = mock_request(i915->engine[RCS], ctx[1], 0); + vip = mock_request(i915->engine[RCS0], ctx[1], 0); if (!vip) { err = -ENOMEM; goto err_context_1; @@ -415,7 +415,7 @@ static int mock_breadcrumbs_smoketest(void *arg) { struct drm_i915_private *i915 = arg; struct smoketest t = { - .engine = i915->engine[RCS], + .engine = i915->engine[RCS0], .ncontexts = 1024, .max_batch = 1024, .request_alloc = __mock_request_alloc @@ -1216,7 +1216,7 @@ out_flush: num_fences += atomic_long_read(&t[id].num_fences); } pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", - num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus); + num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); mutex_lock(&i915->drm.struct_mutex); ret = igt_live_test_end(&live) ?: ret; diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index c5e0a0e98fcb..b05a21eaa8f4 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -111,7 +111,7 @@ static int validate_client(struct intel_guc_client *client, dev_priv->preempt_context : dev_priv->kernel_context; if (client->owner != ctx_owner || - client->engines != INTEL_INFO(dev_priv)->ring_mask || + client->engines != INTEL_INFO(dev_priv)->engine_mask || client->priority != client_priority || client->doorbell_id == GUC_DOORBELL_INVALID) return -EINVAL; @@ -261,7 +261,7 @@ static int igt_guc_doorbells(void *arg) for (i = 0; i < ATTEMPTS; i++) { clients[i] = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, + INTEL_INFO(dev_priv)->engine_mask, i % GUC_CLIENT_PRIORITY_NUM, dev_priv->kernel_context); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 12e047328ab8..10658ad05305 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -1126,7 +1126,7 @@ static int igt_reset_wait(void *arg) long timeout; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS])) + if (!intel_engine_can_store_dword(i915->engine[RCS0])) return 0; /* Check that we detect a stuck waiter and issue a reset */ @@ -1138,7 +1138,7 @@ static int igt_reset_wait(void *arg) if (err) goto unlock; - rq = hang_create_request(&h, i915->engine[RCS]); + rq = hang_create_request(&h, i915->engine[RCS0]); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; @@ -1255,7 +1255,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, struct hang h; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS])) + if (!intel_engine_can_store_dword(i915->engine[RCS0])) return 0; /* Check that we can recover an unbind stuck on a hanging request */ @@ -1285,7 +1285,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, goto out_obj; } - rq = hang_create_request(&h, i915->engine[RCS]); + rq = hang_create_request(&h, i915->engine[RCS0]); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_obj; @@ -1358,7 +1358,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, out_reset: igt_global_reset_lock(i915); - fake_hangcheck(rq->i915, intel_engine_flag(rq->engine)); + fake_hangcheck(rq->i915, rq->engine->mask); igt_global_reset_unlock(i915); if (tsk) { @@ -1537,7 +1537,7 @@ static int igt_reset_queue(void *arg) goto fini; } - reset_count = fake_hangcheck(i915, ENGINE_MASK(id)); + reset_count = fake_hangcheck(i915, BIT(id)); if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1596,7 +1596,7 @@ unlock: static int igt_handle_error(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_engine_cs *engine = i915->engine[RCS0]; struct hang h; struct i915_request *rq; struct i915_gpu_state *error; @@ -1643,7 +1643,7 @@ static int igt_handle_error(void *arg) /* Temporarily disable error capture */ error = xchg(&i915->gpu_error.first_error, (void *)-1); - i915_handle_error(i915, ENGINE_MASK(engine->id), 0, NULL); + i915_handle_error(i915, engine->mask, 0, NULL); xchg(&i915->gpu_error.first_error, error); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 0677038a5466..565e949a4722 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -929,7 +929,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_rings, smoke->ncontext); + RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); return 0; } @@ -957,7 +957,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_rings, smoke->ncontext); + RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); return 0; } diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 9f12a0ec804b..f2a2b51a4662 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -222,7 +222,7 @@ out_put: static int do_device_reset(struct intel_engine_cs *engine) { - i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds"); + i915_reset(engine->i915, engine->mask, "live_workarounds"); return 0; } @@ -709,7 +709,7 @@ out_rpm: static int live_reset_whitelist(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS]; + struct intel_engine_cs *engine = i915->engine[RCS0]; int err = 0; /* If we reset the gpu, we should not lose the RING_NONPRIV */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index ec1ae948954c..c2c954f64226 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -223,6 +223,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; + engine->base.mask = BIT(id); engine->base.status_page.addr = (void *)(engine + 1); engine->base.context_pin = mock_context_pin; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index c27616efc4f8..b2c7808e0595 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -206,13 +206,13 @@ struct drm_i915_private *mock_gem_device(void) mock_init_ggtt(i915, &i915->ggtt); - mkwrite_device_info(i915)->ring_mask = BIT(0); + mkwrite_device_info(i915)->engine_mask = BIT(0); i915->kernel_context = mock_context(i915, NULL); if (!i915->kernel_context) goto err_unlock; - i915->engine[RCS] = mock_engine(i915, "mock", RCS); - if (!i915->engine[RCS]) + i915->engine[RCS0] = mock_engine(i915, "mock", RCS0); + if (!i915->engine[RCS0]) goto err_context; mutex_unlock(&i915->drm.struct_mutex); -- cgit v1.2.3 From cf4331dd3975f89fe4d20c6113620c9139b1c147 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Mar 2019 18:03:32 +0000 Subject: drm/i915: Move find_active_request() to the engine To find the active request, we need only search along the individual engine for the right request. This does not require touching any global GEM state, so move it into the engine compartment. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190305180332.30900-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 3 --- drivers/gpu/drm/i915/i915_gem.c | 45 ------------------------------- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 47 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 5 files changed, 50 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 08ead854ac2d..ff039750069d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2996,9 +2996,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); -struct i915_request * -i915_gem_find_active_request(struct intel_engine_cs *engine); - static inline bool __i915_wedged(struct i915_gpu_error *error) { return unlikely(test_bit(I915_WEDGED, &error->flags)); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 69413f99ed04..c67369bd145b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2803,51 +2803,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static bool match_ring(struct i915_request *rq) -{ - struct drm_i915_private *dev_priv = rq->i915; - u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); - - return ring == i915_ggtt_offset(rq->ring->vma); -} - -struct i915_request * -i915_gem_find_active_request(struct intel_engine_cs *engine) -{ - struct i915_request *request, *active = NULL; - unsigned long flags; - - /* - * We are called by the error capture, reset and to dump engine - * state at random points in time. In particular, note that neither is - * crucially ordered with an interrupt. After a hang, the GPU is dead - * and we assume that no more writes can happen (we waited long enough - * for all writes that were in transaction to be flushed) - adding an - * extra delay for a recent interrupt is pointless. Hence, we do - * not need an engine->irq_seqno_barrier() before the seqno reads. - * At all other times, we must assume the GPU is still running, but - * we only care about the snapshot of this moment. - */ - spin_lock_irqsave(&engine->timeline.lock, flags); - list_for_each_entry(request, &engine->timeline.requests, link) { - if (i915_request_completed(request)) - continue; - - if (!i915_request_started(request)) - break; - - /* More than one preemptible request may match! */ - if (!match_ring(request)) - break; - - active = request; - break; - } - spin_unlock_irqrestore(&engine->timeline.lock, flags); - - return active; -} - static void i915_gem_retire_work_handler(struct work_struct *work) { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5f1cdbc9eb5d..3d8020888604 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1411,7 +1411,7 @@ static void gem_record_rings(struct i915_gpu_state *error) error_record_engine_registers(error, engine, ee); error_record_engine_execlists(engine, ee); - request = i915_gem_find_active_request(engine); + request = intel_engine_find_active_request(engine); if (request) { struct i915_gem_context *ctx = request->gem_context; struct intel_ring *ring; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 62a2bbbbcc64..555a4590fa23 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1545,7 +1545,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tlast "); - rq = i915_gem_find_active_request(engine); + rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1712,6 +1712,51 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine) write_sequnlock_irqrestore(&engine->stats.lock, flags); } +static bool match_ring(struct i915_request *rq) +{ + struct drm_i915_private *dev_priv = rq->i915; + u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); + + return ring == i915_ggtt_offset(rq->ring->vma); +} + +struct i915_request * +intel_engine_find_active_request(struct intel_engine_cs *engine) +{ + struct i915_request *request, *active = NULL; + unsigned long flags; + + /* + * We are called by the error capture, reset and to dump engine + * state at random points in time. In particular, note that neither is + * crucially ordered with an interrupt. After a hang, the GPU is dead + * and we assume that no more writes can happen (we waited long enough + * for all writes that were in transaction to be flushed) - adding an + * extra delay for a recent interrupt is pointless. Hence, we do + * not need an engine->irq_seqno_barrier() before the seqno reads. + * At all other times, we must assume the GPU is still running, but + * we only care about the snapshot of this moment. + */ + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { + if (i915_request_completed(request)) + continue; + + if (!i915_request_started(request)) + break; + + /* More than one preemptible request may match! */ + if (!match_ring(request)) + break; + + active = request; + break; + } + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + return active; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #include "selftests/intel_engine_cs.c" diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 18e865ff6637..84b7047e2df5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1014,6 +1014,9 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine); ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); +struct i915_request * +intel_engine_find_active_request(struct intel_engine_cs *engine); + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) -- cgit v1.2.3 From c6eeb4797eb94ad14bb34adfccbc6addad2cfd48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Mar 2019 09:36:56 +0000 Subject: drm/i915: Reduce presumption of request ordering for barriers Currently we assume that we know the order in which requests run and so can determine if we need to reissue a switch-to-kernel-context prior to idling. That assumption does not hold for the future, so instead of tracking which barriers have been used, simply determine if we have ever switched away from the kernel context by using the engine and before idling ensure that all engines that have been used since the last idle are synchronously switched back to the kernel context for safety (and else of shrinking memory while idle). v2: Use intel_engine_mask_t and ALL_ENGINES Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190308093657.8640-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +++-- drivers/gpu/drm/i915/i915_gem_context.c | 66 ++--------------------- drivers/gpu/drm/i915/i915_gem_context.h | 3 +- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem_context.c | 3 +- drivers/gpu/drm/i915/selftests/igt_flush_test.c | 2 +- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 4 ++ 10 files changed, 27 insertions(+), 72 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b8a5281d8adf..c4ffe19ec698 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1995,6 +1995,7 @@ struct drm_i915_private { struct list_head hwsp_free_list; } timelines; + intel_engine_mask_t active_engines; struct list_head active_rings; struct list_head closed_vma; u32 active_requests; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 539ee78f6d9a..961237b90b40 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2845,7 +2845,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) } } -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, + unsigned long mask) { bool result = true; @@ -2854,7 +2855,7 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) * to save itself before we report the failure. Yes, this may be a * false positive due to e.g. ENOMEM, caveat emptor! */ - if (i915_gem_switch_to_kernel_context(i915)) + if (i915_gem_switch_to_kernel_context(i915, mask)) result = false; if (i915_gem_wait_for_idle(i915, @@ -2879,7 +2880,8 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) static bool load_power_context(struct drm_i915_private *i915) { - if (!switch_to_kernel_context_sync(i915)) + /* Force loading the kernel context on all engines */ + if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) return false; /* @@ -2927,7 +2929,7 @@ i915_gem_idle_work_handler(struct work_struct *work) !i915->gt.active_requests) { ++i915->gt.active_requests; /* don't requeue idle */ - switch_to_kernel_context_sync(i915); + switch_to_kernel_context_sync(i915, i915->gt.active_engines); if (!--i915->gt.active_requests) { __i915_gem_park(i915); @@ -4380,7 +4382,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(i915); + switch_to_kernel_context_sync(i915, i915->gt.active_engines); mutex_unlock(&i915->drm.struct_mutex); i915_reset_flush(i915); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9a3eb4f66d85..486203e9d205 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -704,63 +704,10 @@ last_request_on_engine(struct i915_timeline *timeline, return NULL; } -static bool engine_has_kernel_context_barrier(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - const struct intel_context * const ce = - to_intel_context(i915->kernel_context, engine); - struct i915_timeline *barrier = ce->ring->timeline; - struct intel_ring *ring; - bool any_active = false; - - lockdep_assert_held(&i915->drm.struct_mutex); - list_for_each_entry(ring, &i915->gt.active_rings, active_link) { - struct i915_request *rq; - - rq = last_request_on_engine(ring->timeline, engine); - if (!rq) - continue; - - any_active = true; - - if (rq->hw_context == ce) - continue; - - /* - * Was this request submitted after the previous - * switch-to-kernel-context? - */ - if (!i915_timeline_sync_is_later(barrier, &rq->fence)) { - GEM_TRACE("%s needs barrier for %llx:%lld\n", - ring->timeline->name, - rq->fence.context, - rq->fence.seqno); - return false; - } - - GEM_TRACE("%s has barrier after %llx:%lld\n", - ring->timeline->name, - rq->fence.context, - rq->fence.seqno); - } - - /* - * If any other timeline was still active and behind the last barrier, - * then our last switch-to-kernel-context must still be queued and - * will run last (leaving the engine in the kernel context when it - * eventually idles). - */ - if (any_active) - return true; - - /* The engine is idle; check that it is idling in the kernel context. */ - return engine->last_retired_context == ce; -} - -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) +int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, + unsigned long mask) { struct intel_engine_cs *engine; - enum intel_engine_id id; GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake)); @@ -771,17 +718,11 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) if (i915_terminally_wedged(i915)) return 0; - i915_retire_requests(i915); - - for_each_engine(engine, i915, id) { + for_each_engine_masked(engine, i915, mask, mask) { struct intel_ring *ring; struct i915_request *rq; GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine)); - if (engine_has_kernel_context_barrier(engine)) - continue; - - GEM_TRACE("emit barrier on %s\n", engine->name); rq = i915_request_alloc(engine, i915->kernel_context); if (IS_ERR(rq)) @@ -805,7 +746,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, I915_FENCE_GFP); - i915_timeline_sync_set(rq->timeline, &prev->fence); } i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 2f9ef333acaa..e1188d77a23d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -372,7 +372,8 @@ int i915_gem_context_open(struct drm_i915_private *i915, void i915_gem_context_close(struct drm_file *file); int i915_switch_context(struct i915_request *rq); -int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); +int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, + unsigned long engine_mask); void i915_gem_context_release(struct kref *ctx_ref); struct i915_gem_context * diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 68d74c50ac39..7d8e90dfca84 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -62,7 +62,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - err = i915_gem_switch_to_kernel_context(i915); + err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f8a63495114c..9533a85cb0b3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1068,6 +1068,7 @@ void i915_request_add(struct i915_request *request) GEM_TRACE("marking %s as active\n", ring->timeline->name); list_add(&ring->active_link, &request->i915->gt.active_rings); } + request->i915->gt.active_engines |= request->engine->mask; request->emitted_jiffies = jiffies; /* diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 555a4590fa23..18174f808fd8 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1106,6 +1106,9 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) lockdep_assert_held(&engine->i915->drm.struct_mutex); + if (!engine->context_size) + return true; + /* * Check the last context seen by the engine. If active, it will be * the last request that remains in the timeline. When idle, it is @@ -1205,6 +1208,8 @@ void intel_engines_park(struct drm_i915_private *i915) i915_gem_batch_pool_fini(&engine->batch_pool); engine->execlists.no_priolist = false; } + + i915->gt.active_engines = 0; } /** diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 755c4a7304b2..5b8614b2fbe4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1512,7 +1512,8 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, } } - err = i915_gem_switch_to_kernel_context(i915); + err = i915_gem_switch_to_kernel_context(i915, + i915->gt.active_engines); if (err) return err; diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index e0d3122fd35a..94aee4071a66 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -14,7 +14,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) cond_resched(); if (flags & I915_WAIT_LOCKED && - i915_gem_switch_to_kernel_context(i915)) { + i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) { pr_err("Failed to switch back to kernel context; declaring wedged\n"); i915_gem_set_wedged(i915); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index b2c7808e0595..54cfb611c0aa 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -109,6 +109,10 @@ static void mock_retire_work_handler(struct work_struct *work) static void mock_idle_work_handler(struct work_struct *work) { + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gt.idle_work.work); + + i915->gt.active_engines = 0; } static int pm_domain_resume(struct device *dev) -- cgit v1.2.3 From 7d6ce55887a44c15c6df29e883d0ea567c8ac55c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Mar 2019 09:36:57 +0000 Subject: drm/i915: Remove has-kernel-context We can no longer assume execution ordering, and in particular we cannot assume which context will execute last. One side-effect of this is that we cannot determine if the kernel-context is resident on the GPU, so remove the routines that claimed to do so. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190308093657.8640-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_active.h | 13 ------------- drivers/gpu/drm/i915/i915_gem.c | 21 +-------------------- drivers/gpu/drm/i915/i915_gem_evict.c | 16 +++------------- drivers/gpu/drm/i915/intel_engine_cs.c | 31 ------------------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 5 files changed, 4 insertions(+), 78 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 8142a334b37b..7d758719ce39 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -108,19 +108,6 @@ i915_active_request_set_retire_fn(struct i915_active_request *active, active->retire = fn ?: i915_active_retire_noop; } -static inline struct i915_request * -__i915_active_request_peek(const struct i915_active_request *active) -{ - /* - * Inside the error capture (running with the driver in an unknown - * state), we want to bend the rules slightly (a lot). - * - * Work is in progress to make it safer, in the meantime this keeps - * the known issue from spamming the logs. - */ - return rcu_dereference_protected(active->request, 1); -} - /** * i915_active_request_raw - return the active request * @active - the active tracker diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 961237b90b40..1f1849f90606 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2828,23 +2828,6 @@ i915_gem_retire_work_handler(struct work_struct *work) round_jiffies_up_relative(HZ)); } -static void assert_kernel_context_is_current(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (i915_reset_failed(i915)) - return; - - i915_retire_requests(i915); - - for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request)); - GEM_BUG_ON(engine->last_retired_context != - to_intel_context(i915->kernel_context, engine)); - } -} - static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, unsigned long mask) { @@ -2864,9 +2847,7 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, I915_GEM_IDLE_TIMEOUT)) result = false; - if (result) { - assert_kernel_context_is_current(i915); - } else { + if (!result) { /* Forcibly cancel outstanding work and leave the gpu quiet. */ dev_err(i915->drm.dev, "Failed to idle engines, declaring wedged!\n"); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 7d8e90dfca84..060f5903544a 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -38,25 +38,15 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { static bool ggtt_is_idle(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (i915->gt.active_requests) - return false; - - for_each_engine(engine, i915, id) { - if (!intel_engine_has_kernel_context(engine)) - return false; - } - - return true; + return !i915->gt.active_requests; } static int ggtt_flush(struct drm_i915_private *i915) { int err; - /* Not everything in the GGTT is tracked via vma (otherwise we + /* + * Not everything in the GGTT is tracked via vma (otherwise we * could evict as required with minimal stalling) so we are forced * to idle the GPU and explicitly retire outstanding requests in * the hopes that we can then remove contexts and the like only diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 18174f808fd8..8e326556499e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1090,37 +1090,6 @@ bool intel_engines_are_idle(struct drm_i915_private *i915) return true; } -/** - * intel_engine_has_kernel_context: - * @engine: the engine - * - * Returns true if the last context to be executed on this engine, or has been - * executed if the engine is already idle, is the kernel context - * (#i915.kernel_context). - */ -bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) -{ - const struct intel_context *kernel_context = - to_intel_context(engine->i915->kernel_context, engine); - struct i915_request *rq; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - if (!engine->context_size) - return true; - - /* - * Check the last context seen by the engine. If active, it will be - * the last request that remains in the timeline. When idle, it is - * the last executed context as tracked by retirement. - */ - rq = __i915_active_request_peek(&engine->timeline.last_request); - if (rq) - return rq->hw_context == kernel_context; - else - return engine->last_retired_context == kernel_context; -} - void intel_engines_reset_default_submission(struct drm_i915_private *i915) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 84b7047e2df5..9ccbe63d46e3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -935,7 +935,6 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force); bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); -bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); void intel_engine_lost_context(struct intel_engine_cs *engine); void intel_engines_park(struct drm_i915_private *i915); -- cgit v1.2.3 From c4d52feb2c46ddcdde4058cf03f8b9eb996bb09b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Mar 2019 13:25:19 +0000 Subject: drm/i915: Move over to intel_context_lookup() In preparation for an ever growing number of engines and so ever increasing static array of HW contexts within the GEM context, move the array over to an rbtree, allocated upon first use. Unfortunately, this imposes an rbtree lookup at a few frequent callsites, but we should be able to mitigate those by moving over to using the HW context as our primary type and so only incur the lookup on the boundary with the user GEM context and engines. v2: Check for no HW context in guc_stage_desc_init Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190308132522.21573-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gvt/mmio_context.c | 3 +- drivers/gpu/drm/i915/i915_gem.c | 9 +- drivers/gpu/drm/i915/i915_gem_context.c | 71 +++------- drivers/gpu/drm/i915/i915_gem_context.h | 1 - drivers/gpu/drm/i915/i915_gem_context_types.h | 7 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/i915_globals.c | 1 + drivers/gpu/drm/i915/i915_globals.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 4 +- drivers/gpu/drm/i915/intel_context.c | 180 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_context.h | 40 +++++- drivers/gpu/drm/i915/intel_context_types.h | 2 + drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_engine_types.h | 5 + drivers/gpu/drm/i915/intel_guc_ads.c | 4 +- drivers/gpu/drm/i915/intel_guc_submission.c | 6 +- drivers/gpu/drm/i915/intel_lrc.c | 29 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++- drivers/gpu/drm/i915/selftests/mock_context.c | 7 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 6 +- 21 files changed, 310 insertions(+), 96 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_context.c (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a230553367de..68fecf355471 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -94,6 +94,7 @@ i915-y += \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ + intel_context.o \ intel_engine_cs.o \ intel_hangcheck.o \ intel_lrc.o \ diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 0209d27fcaf0..f64c76dd11d4 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -494,7 +494,8 @@ static void switch_mmio(struct intel_vgpu *pre, * itself. */ if (mmio->in_context && - !is_inhibit_context(&s->shadow_ctx->__engine[ring_id])) + !is_inhibit_context(intel_context_lookup(s->shadow_ctx, + dev_priv->engine[ring_id]))) continue; if (mmio->mask) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f1849f90606..0f32ec12896c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4655,15 +4655,20 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } for_each_engine(engine, i915, id) { + struct intel_context *ce; struct i915_vma *state; void *vaddr; - GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count); + ce = intel_context_lookup(ctx, engine); + if (!ce) + continue; - state = to_intel_context(ctx, engine)->state; + state = ce->state; if (!state) continue; + GEM_BUG_ON(ce->pin_count); + /* * As we will hold a reference to the logical state, it will * not be torn down with the context, and importantly the diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0bb2bdd0a55f..995bc28d53d6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -95,7 +95,7 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static struct i915_global_context { +static struct i915_global_gem_context { struct i915_global base; struct kmem_cache *slab_luts; } global; @@ -222,7 +222,7 @@ static void release_hw_id(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - unsigned int n; + struct intel_context *it, *n; lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -231,12 +231,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) release_hw_id(ctx); i915_ppgtt_put(ctx->ppgtt); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { - struct intel_context *ce = &ctx->__engine[n]; - - if (ce->ops) - ce->ops->destroy(ce); - } + rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node) + it->ops->destroy(it); kfree(ctx->name); put_pid(ctx->pid); @@ -340,40 +336,11 @@ static u32 default_desc_template(const struct drm_i915_private *i915, return desc; } -static void intel_context_retire(struct i915_active_request *active, - struct i915_request *rq) -{ - struct intel_context *ce = - container_of(active, typeof(*ce), active_tracker); - - intel_context_unpin(ce); -} - -void -intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - ce->gem_context = ctx; - ce->engine = engine; - ce->ops = engine->cops; - - INIT_LIST_HEAD(&ce->signal_link); - INIT_LIST_HEAD(&ce->signals); - - /* Use the whole device by default */ - ce->sseu = intel_device_default_sseu(ctx->i915); - - i915_active_request_init(&ce->active_tracker, - NULL, intel_context_retire); -} - static struct i915_gem_context * __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) { struct i915_gem_context *ctx; - unsigned int n; int ret; int i; @@ -388,8 +355,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, INIT_LIST_HEAD(&ctx->active_engines); mutex_init(&ctx->mutex); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) - intel_context_init(&ctx->__engine[n], ctx, dev_priv->engine[n]); + ctx->hw_contexts = RB_ROOT; + spin_lock_init(&ctx->hw_contexts_lock); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -728,8 +695,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, struct intel_ring *ring; struct i915_request *rq; - GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine)); - rq = i915_request_alloc(engine, i915->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -865,13 +830,15 @@ static int get_sseu(struct i915_gem_context *ctx, if (!engine) return -EINVAL; + ce = intel_context_instance(ctx, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + /* Only use for mutex here is to serialize get_param and set_param. */ ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); if (ret) return ret; - ce = to_intel_context(ctx, engine); - user_sseu.slice_mask = ce->sseu.slice_mask; user_sseu.subslice_mask = ce->sseu.subslice_mask; user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; @@ -1037,12 +1004,16 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_sseu sseu) { - struct intel_context *ce = to_intel_context(ctx, engine); + struct intel_context *ce; int ret = 0; GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8); GEM_BUG_ON(engine->id != RCS0); + ce = intel_context_instance(ctx, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + /* Nothing to do if unmodified. */ if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) return 0; @@ -1375,22 +1346,22 @@ out_unlock: #include "selftests/i915_gem_context.c" #endif -static void i915_global_context_shrink(void) +static void i915_global_gem_context_shrink(void) { kmem_cache_shrink(global.slab_luts); } -static void i915_global_context_exit(void) +static void i915_global_gem_context_exit(void) { kmem_cache_destroy(global.slab_luts); } -static struct i915_global_context global = { { - .shrink = i915_global_context_shrink, - .exit = i915_global_context_exit, +static struct i915_global_gem_context global = { { + .shrink = i915_global_gem_context_shrink, + .exit = i915_global_gem_context_exit, } }; -int __init i915_global_context_init(void) +int __init i915_global_gem_context_init(void) { global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); if (!global.slab_luts) diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 00698944a0ee..5a32c4b4816f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -31,7 +31,6 @@ #include "i915_scheduler.h" #include "intel_context.h" #include "intel_device_info.h" -#include "intel_ringbuffer.h" struct drm_device; struct drm_file; diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h index 59800d749510..2bf19730eaa9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -13,10 +13,10 @@ #include #include #include +#include #include #include -#include "i915_gem.h" /* I915_NUM_ENGINES */ #include "i915_scheduler.h" #include "intel_context_types.h" @@ -140,8 +140,9 @@ struct i915_gem_context { struct i915_sched_attr sched; - /** engine: per-engine logical HW state */ - struct intel_context __engine[I915_NUM_ENGINES]; + /** hw_contexts: per-engine logical HW state */ + struct rb_root hw_contexts; + spinlock_t hw_contexts_lock; /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 943a221acb21..ee6d301a9627 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -794,8 +794,8 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb) * keeping all of their resources pinned. */ - ce = to_intel_context(eb->ctx, eb->engine); - if (!ce->ring) /* first use, assume empty! */ + ce = intel_context_lookup(eb->ctx, eb->engine); + if (!ce || !ce->ring) /* first use, assume empty! */ return 0; rq = __eb_wait_for_ring(ce->ring); diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 1cf4e8bc8ec6..2f5c72e2a9d1 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -36,6 +36,7 @@ static void __i915_globals_cleanup(void) static __initconst int (* const initfn[])(void) = { i915_global_active_init, i915_global_context_init, + i915_global_gem_context_init, i915_global_objects_init, i915_global_request_init, i915_global_scheduler_init, diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index a45529022a42..04c1ce107fc0 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -26,6 +26,7 @@ void i915_globals_exit(void); /* constructors */ int i915_global_active_init(void); int i915_global_context_init(void); +int i915_global_gem_context_init(void); int i915_global_objects_init(void); int i915_global_request_init(void); int i915_global_scheduler_init(void); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a0d145f976ec..e19a89e4df64 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1740,11 +1740,11 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = to_intel_context(ctx, engine); + struct intel_context *ce = intel_context_lookup(ctx, engine); u32 *regs; /* OA settings will be set upon first use */ - if (!ce->state) + if (!ce || !ce->state) continue; regs = i915_gem_object_pin_map(ce->state->obj, map_type); diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c new file mode 100644 index 000000000000..d04a1f51a90c --- /dev/null +++ b/drivers/gpu/drm/i915/intel_context.c @@ -0,0 +1,180 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_gem_context.h" +#include "i915_globals.h" +#include "intel_context.h" +#include "intel_ringbuffer.h" + +static struct i915_global_context { + struct i915_global base; + struct kmem_cache *slab_ce; +} global; + +struct intel_context *intel_context_alloc(void) +{ + return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); +} + +void intel_context_free(struct intel_context *ce) +{ + kmem_cache_free(global.slab_ce, ce); +} + +struct intel_context * +intel_context_lookup(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce = NULL; + struct rb_node *p; + + spin_lock(&ctx->hw_contexts_lock); + p = ctx->hw_contexts.rb_node; + while (p) { + struct intel_context *this = + rb_entry(p, struct intel_context, node); + + if (this->engine == engine) { + GEM_BUG_ON(this->gem_context != ctx); + ce = this; + break; + } + + if (this->engine < engine) + p = p->rb_right; + else + p = p->rb_left; + } + spin_unlock(&ctx->hw_contexts_lock); + + return ce; +} + +struct intel_context * +__intel_context_insert(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_context *ce) +{ + struct rb_node **p, *parent; + int err = 0; + + spin_lock(&ctx->hw_contexts_lock); + + parent = NULL; + p = &ctx->hw_contexts.rb_node; + while (*p) { + struct intel_context *this; + + parent = *p; + this = rb_entry(parent, struct intel_context, node); + + if (this->engine == engine) { + err = -EEXIST; + ce = this; + break; + } + + if (this->engine < engine) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + if (!err) { + rb_link_node(&ce->node, parent, p); + rb_insert_color(&ce->node, &ctx->hw_contexts); + } + + spin_unlock(&ctx->hw_contexts_lock); + + return ce; +} + +void __intel_context_remove(struct intel_context *ce) +{ + struct i915_gem_context *ctx = ce->gem_context; + + spin_lock(&ctx->hw_contexts_lock); + rb_erase(&ce->node, &ctx->hw_contexts); + spin_unlock(&ctx->hw_contexts_lock); +} + +struct intel_context * +intel_context_instance(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce, *pos; + + ce = intel_context_lookup(ctx, engine); + if (likely(ce)) + return ce; + + ce = intel_context_alloc(); + if (!ce) + return ERR_PTR(-ENOMEM); + + intel_context_init(ce, ctx, engine); + + pos = __intel_context_insert(ctx, engine, ce); + if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */ + intel_context_free(ce); + + GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos); + return pos; +} + +static void intel_context_retire(struct i915_active_request *active, + struct i915_request *rq) +{ + struct intel_context *ce = + container_of(active, typeof(*ce), active_tracker); + + intel_context_unpin(ce); +} + +void +intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + ce->gem_context = ctx; + ce->engine = engine; + ce->ops = engine->cops; + + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); + + /* Use the whole device by default */ + ce->sseu = intel_device_default_sseu(ctx->i915); + + i915_active_request_init(&ce->active_tracker, + NULL, intel_context_retire); +} + +static void i915_global_context_shrink(void) +{ + kmem_cache_shrink(global.slab_ce); +} + +static void i915_global_context_exit(void) +{ + kmem_cache_destroy(global.slab_ce); +} + +static struct i915_global_context global = { { + .shrink = i915_global_context_shrink, + .exit = i915_global_context_exit, +} }; + +int __init i915_global_context_init(void) +{ + global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); + if (!global.slab_ce) + return -ENOMEM; + + i915_global_register(&global.base); + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h index dd947692bb0b..2c910628acaf 100644 --- a/drivers/gpu/drm/i915/intel_context.h +++ b/drivers/gpu/drm/i915/intel_context.h @@ -7,20 +7,46 @@ #ifndef __INTEL_CONTEXT_H__ #define __INTEL_CONTEXT_H__ -#include "i915_gem_context_types.h" #include "intel_context_types.h" #include "intel_engine_types.h" +struct intel_context *intel_context_alloc(void); +void intel_context_free(struct intel_context *ce); + void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine); -static inline struct intel_context * -to_intel_context(struct i915_gem_context *ctx, - const struct intel_engine_cs *engine) -{ - return &ctx->__engine[engine->id]; -} +/** + * intel_context_lookup - Find the matching HW context for this (ctx, engine) + * @ctx - the parent GEM context + * @engine - the target HW engine + * + * May return NULL if the HW context hasn't been instantiated (i.e. unused). + */ +struct intel_context * +intel_context_lookup(struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +/** + * intel_context_instance - Lookup or allocate the HW context for (ctx, engine) + * @ctx - the parent GEM context + * @engine - the target HW engine + * + * Returns the existing HW context for this pair of (GEM context, engine), or + * allocates and initialises a fresh context. Once allocated, the HW context + * remains resident until the GEM context is destroyed. + */ +struct intel_context * +intel_context_instance(struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +struct intel_context * +__intel_context_insert(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_context *ce); +void +__intel_context_remove(struct intel_context *ce); static inline struct intel_context * intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h index 16e1306e9595..857f5c335324 100644 --- a/drivers/gpu/drm/i915/intel_context_types.h +++ b/drivers/gpu/drm/i915/intel_context_types.h @@ -8,6 +8,7 @@ #define __INTEL_CONTEXT_TYPES__ #include +#include #include #include "i915_active_types.h" @@ -52,6 +53,7 @@ struct intel_context { struct i915_active_request active_tracker; const struct intel_context_ops *ops; + struct rb_node node; /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 8e326556499e..1937128ea267 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -644,7 +644,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) static void __intel_context_unpin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - intel_context_unpin(to_intel_context(ctx, engine)); + intel_context_unpin(intel_context_lookup(ctx, engine)); } struct measure_breadcrumb { diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h index 4d2c0ba58e6e..6a570e4e51c3 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -231,6 +231,11 @@ struct intel_engine_execlists { */ u32 *csb_status; + /** + * @preempt_context: the HW context for injecting preempt-to-idle + */ + struct intel_context *preempt_context; + /** * @preempt_complete_status: expected CSB upon completing preemption */ diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index c51d558fd431..b4ff0045a605 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -121,8 +121,8 @@ int intel_guc_ads_create(struct intel_guc *guc) * to find it. Note that we have to skip our header (1 page), * because our GuC shared data is there. */ - kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, - dev_priv->engine[RCS0])->state; + kernel_ctx_vma = intel_context_lookup(dev_priv->kernel_context, + dev_priv->engine[RCS0])->state; blob->ads.golden_context_lrca = intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index e3afc91f0e7b..4a5727233419 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -382,7 +382,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = to_intel_context(ctx, engine); + struct intel_context *ce = intel_context_lookup(ctx, engine); u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; @@ -393,7 +393,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) * for now who owns a GuC client. But for future owner of GuC * client, need to make sure lrc is pinned prior to enter here. */ - if (!ce->state) + if (!ce || !ce->state) break; /* XXX: continue? */ /* @@ -567,7 +567,7 @@ static void inject_preempt_context(struct work_struct *work) preempt_work[engine->id]); struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); - struct intel_context *ce = to_intel_context(client->owner, engine); + struct intel_context *ce = intel_context_lookup(client->owner, engine); u32 data[7]; if (!ce->ring->emit) { /* recreate upon load/resume */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b210a2764613..7aeff600f12e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -628,8 +628,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; - struct intel_context *ce = - to_intel_context(engine->i915->preempt_context, engine); + struct intel_context *ce = execlists->preempt_context; unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != @@ -1237,19 +1236,24 @@ static void execlists_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static void execlists_context_destroy(struct intel_context *ce) +static void __execlists_context_fini(struct intel_context *ce) { - GEM_BUG_ON(ce->pin_count); - - if (!ce->state) - return; - intel_ring_free(ce->ring); GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); i915_gem_object_put(ce->state->obj); } +static void execlists_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(ce->pin_count); + + if (ce->state) + __execlists_context_fini(ce); + + intel_context_free(ce); +} + static void execlists_context_unpin(struct intel_context *ce) { struct intel_engine_cs *engine; @@ -1390,7 +1394,11 @@ static struct intel_context * execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = to_intel_context(ctx, engine); + struct intel_context *ce; + + ce = intel_context_instance(ctx, engine); + if (IS_ERR(ce)) + return ce; lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!ctx->ppgtt); @@ -2441,8 +2449,9 @@ static int logical_ring_init(struct intel_engine_cs *engine) execlists->preempt_complete_status = ~0u; if (i915->preempt_context) { struct intel_context *ce = - to_intel_context(i915->preempt_context, engine); + intel_context_lookup(i915->preempt_context, engine); + execlists->preempt_context = ce; execlists->preempt_complete_status = upper_32_bits(ce->lrc_desc); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 78e3c03aab4e..602babbd737f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1349,15 +1349,20 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } +static void __ring_context_fini(struct intel_context *ce) +{ + GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); + i915_gem_object_put(ce->state->obj); +} + static void ring_context_destroy(struct intel_context *ce) { GEM_BUG_ON(ce->pin_count); - if (!ce->state) - return; + if (ce->state) + __ring_context_fini(ce); - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); - i915_gem_object_put(ce->state->obj); + intel_context_free(ce); } static int __context_pin_ppgtt(struct i915_gem_context *ctx) @@ -1552,7 +1557,11 @@ err: static struct intel_context * ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = to_intel_context(ctx, engine); + struct intel_context *ce; + + ce = intel_context_instance(ctx, engine); + if (IS_ERR(ce)) + return ce; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -1754,8 +1763,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) * placeholder we use to flush other contexts. */ *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(to_intel_context(i915->kernel_context, - engine)->state) | + *cs++ = i915_ggtt_offset(intel_context_lookup(i915->kernel_context, + engine)->state) | MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT; } diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 353b37b9f78e..8efa6892c6cd 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -30,7 +30,6 @@ mock_context(struct drm_i915_private *i915, const char *name) { struct i915_gem_context *ctx; - unsigned int n; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -41,15 +40,15 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + ctx->hw_contexts = RB_ROOT; + spin_lock_init(&ctx->hw_contexts_lock); + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); INIT_LIST_HEAD(&ctx->hw_id_link); INIT_LIST_HEAD(&ctx->active_engines); mutex_init(&ctx->mutex); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) - intel_context_init(&ctx->__engine[n], ctx, i915->engine[n]); - ret = i915_gem_context_pin_hw_id(ctx); if (ret < 0) goto err_handles; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 856ec2706f3e..d9bbb102677f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -141,9 +141,13 @@ static struct intel_context * mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = to_intel_context(ctx, engine); + struct intel_context *ce; int err = -ENOMEM; + ce = intel_context_instance(ctx, engine); + if (IS_ERR(ce)) + return ce; + if (ce->pin_count++) return ce; -- cgit v1.2.3 From 9dbfea98d70ba83c3a824b470447b8d452ae2540 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Mar 2019 13:25:21 +0000 Subject: drm/i915: Track the pinned kernel contexts on each engine Each engine acquires a pin on the kernel contexts (normal and preempt) so that the logical state is always available on demand. Keep track of each engines pin by storing the returned pointer on the engine for quick access. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190308132522.21573-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 65 ++++++++++++++-------------- drivers/gpu/drm/i915/intel_engine_types.h | 8 ++-- drivers/gpu/drm/i915/intel_guc_ads.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 13 ++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 5 ++- 6 files changed, 45 insertions(+), 52 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1937128ea267..652c1b3ba190 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -641,12 +641,6 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) i915->caps.scheduler = 0; } -static void __intel_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - intel_context_unpin(intel_context_lookup(ctx, engine)); -} - struct measure_breadcrumb { struct i915_request rq; struct i915_timeline timeline; @@ -697,6 +691,20 @@ out_frame: return dw; } +static int pin_context(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_context **out) +{ + struct intel_context *ce; + + ce = intel_context_pin(ctx, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + *out = ce; + return 0; +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -711,11 +719,8 @@ out_frame: int intel_engine_init_common(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - struct intel_context *ce; int ret; - engine->set_default_submission(engine); - /* We may need to do things with the shrinker which * require us to immediately switch back to the default * context. This can cause a problem as pinning the @@ -723,36 +728,34 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ce = intel_context_pin(i915->kernel_context, engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); + ret = pin_context(i915->kernel_context, engine, + &engine->kernel_context); + if (ret) + return ret; /* * Similarly the preempt context must always be available so that - * we can interrupt the engine at any time. + * we can interrupt the engine at any time. However, as preemption + * is optional, we allow it to fail. */ - if (i915->preempt_context) { - ce = intel_context_pin(i915->preempt_context, engine); - if (IS_ERR(ce)) { - ret = PTR_ERR(ce); - goto err_unpin_kernel; - } - } + if (i915->preempt_context) + pin_context(i915->preempt_context, engine, + &engine->preempt_context); ret = measure_breadcrumb_dw(engine); if (ret < 0) - goto err_unpin_preempt; + goto err_unpin; engine->emit_fini_breadcrumb_dw = ret; - return 0; + engine->set_default_submission(engine); -err_unpin_preempt: - if (i915->preempt_context) - __intel_context_unpin(i915->preempt_context, engine); + return 0; -err_unpin_kernel: - __intel_context_unpin(i915->kernel_context, engine); +err_unpin: + if (engine->preempt_context) + intel_context_unpin(engine->preempt_context); + intel_context_unpin(engine->kernel_context); return ret; } @@ -765,8 +768,6 @@ err_unpin_kernel: */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; - cleanup_status_page(engine); intel_engine_fini_breadcrumbs(engine); @@ -776,9 +777,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - if (i915->preempt_context) - __intel_context_unpin(i915->preempt_context, engine); - __intel_context_unpin(i915->kernel_context, engine); + if (engine->preempt_context) + intel_context_unpin(engine->preempt_context); + intel_context_unpin(engine->kernel_context); i915_timeline_fini(&engine->timeline); diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h index 21f3275cc00c..b0aa1f0d4e47 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -231,11 +231,6 @@ struct intel_engine_execlists { */ u32 *csb_status; - /** - * @preempt_context: the HW context for injecting preempt-to-idle - */ - struct intel_context *preempt_context; - /** * @preempt_complete_status: expected CSB upon completing preemption */ @@ -271,6 +266,9 @@ struct intel_engine_cs { struct i915_timeline timeline; + struct intel_context *kernel_context; /* pinned */ + struct intel_context *preempt_context; /* pinned; optional */ + struct drm_i915_gem_object *default_state; void *pinned_default_state; diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index b4ff0045a605..bec62f34b15a 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -121,8 +121,7 @@ int intel_guc_ads_create(struct intel_guc *guc) * to find it. Note that we have to skip our header (1 page), * because our GuC shared data is there. */ - kernel_ctx_vma = intel_context_lookup(dev_priv->kernel_context, - dev_priv->engine[RCS0])->state; + kernel_ctx_vma = dev_priv->engine[RCS0]->kernel_context->state; blob->ads.golden_context_lrca = intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d9bb744c3174..44148433d237 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -626,7 +626,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; - struct intel_context *ce = execlists->preempt_context; + struct intel_context *ce = engine->preempt_context; unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != @@ -2321,7 +2321,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_SEMAPHORES; engine->flags |= I915_ENGINE_SUPPORTS_STATS; - if (engine->i915->preempt_context) + if (engine->preempt_context) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } @@ -2423,14 +2423,9 @@ static int logical_ring_init(struct intel_engine_cs *engine) } execlists->preempt_complete_status = ~0u; - if (i915->preempt_context) { - struct intel_context *ce = - intel_context_lookup(i915->preempt_context, engine); - - execlists->preempt_context = ce; + if (engine->preempt_context) execlists->preempt_complete_status = - upper_32_bits(ce->lrc_desc); - } + upper_32_bits(engine->preempt_context->lrc_desc); execlists->csb_status = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d06908c9584c..03656021d04c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1735,8 +1735,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) * placeholder we use to flush other contexts. */ *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(intel_context_lookup(i915->kernel_context, - engine)->state) | + *cs++ = i915_ggtt_offset(engine->kernel_context->state) | MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT; } diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index e483329fd533..99d7463218d0 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -233,7 +233,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); - if (IS_ERR(intel_context_pin(i915->kernel_context, &engine->base))) + if (pin_context(i915->kernel_context, &engine->base, + &engine->base.kernel_context)) goto err_breadcrumbs; return &engine->base; @@ -276,7 +277,7 @@ void mock_engine_free(struct intel_engine_cs *engine) if (ce) intel_context_unpin(ce); - __intel_context_unpin(engine->i915->kernel_context, engine); + intel_context_unpin(engine->kernel_context); intel_engine_fini_breadcrumbs(engine); i915_timeline_fini(&engine->timeline); -- cgit v1.2.3