From 69c2fc891343cb5217c866d10709343cff190bdc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Jul 2012 12:41:03 +0100 Subject: drm/i915: Remove the per-ring write list This is now handled by a global flag to ensure we emit a flush before the next serialisation point (if we failed to queue one previously). Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1d3c81fdad92..7986f3001cf0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -100,15 +100,6 @@ struct intel_ring_buffer { */ struct list_head request_list; - /** - * List of objects currently pending a GPU write flush. - * - * All elements on this list will belong to either the - * active_list or flushing_list, last_rendering_seqno can - * be used to differentiate between the two elements. - */ - struct list_head gpu_write_list; - /** * Do we have some not yet emitted requests outstanding? */ -- cgit v1.2.3 From a7b9761d0a2ded58170ffb4d423ff3d7228103f4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Jul 2012 12:41:08 +0100 Subject: drm/i915: Split i915_gem_flush_ring() into seperate invalidate/flush funcs By moving the function to intel_ringbuffer and currying the appropriate parameter, hopefully we make the callsites easier to read and understand. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 --- drivers/gpu/drm/i915/i915_gem.c | 29 +++-------------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +------ drivers/gpu/drm/i915/intel_ringbuffer.c | 38 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 5 files changed, 44 insertions(+), 37 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 59e3199da162..700dc838c57f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1256,9 +1256,6 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void i915_gem_load(struct drm_device *dev); int i915_gem_init_object(struct drm_gem_object *obj); -int __must_check i915_gem_flush_ring(struct intel_ring_buffer *ring, - uint32_t invalidate_domains, - uint32_t flush_domains); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); void i915_gem_free_object(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3659d47a9f6e..f26e2b201bad 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1549,13 +1549,9 @@ i915_add_request(struct intel_ring_buffer *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - if (ring->gpu_caches_dirty) { - ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - ring->gpu_caches_dirty = false; - } + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; if (request == NULL) { request = kmalloc(sizeof(*request), GFP_KERNEL); @@ -2254,25 +2250,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return ret; } -int -i915_gem_flush_ring(struct intel_ring_buffer *ring, - uint32_t invalidate_domains, - uint32_t flush_domains) -{ - int ret; - - if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) - return 0; - - trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); - - ret = ring->flush(ring, invalidate_domains, flush_domains); - if (ret) - return ret; - - return 0; -} - static int i915_ring_idle(struct intel_ring_buffer *ring) { if (list_empty(&ring->active_list)) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 55a94c1a1f59..6be1a8920a84 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -707,14 +707,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - ret = i915_gem_flush_ring(ring, - I915_GEM_GPU_DOMAINS, - ring->gpu_caches_dirty ? I915_GEM_GPU_DOMAINS : 0); - if (ret) - return ret; - - ring->gpu_caches_dirty = false; - return 0; + return intel_ring_invalidate_all_caches(ring); } static bool diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8f221d9a7bdb..8b7085e4cf84 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1564,3 +1564,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) return intel_init_ring_buffer(dev, ring); } + +int +intel_ring_flush_all_caches(struct intel_ring_buffer *ring) +{ + int ret; + + if (!ring->gpu_caches_dirty) + return 0; + + ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); + + ring->gpu_caches_dirty = false; + return 0; +} + +int +intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) +{ + uint32_t flush_domains; + int ret; + + flush_domains = 0; + if (ring->gpu_caches_dirty) + flush_domains = I915_GEM_GPU_DOMAINS; + + ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + if (ret) + return ret; + + trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + + ring->gpu_caches_dirty = false; + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7986f3001cf0..8b2b92e00e9d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -195,6 +195,8 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring, void intel_ring_advance(struct intel_ring_buffer *ring); u32 intel_ring_get_seqno(struct intel_ring_buffer *ring); +int intel_ring_flush_all_caches(struct intel_ring_buffer *ring); +int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring); int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); -- cgit v1.2.3 From b2eadbc85b2c26df3fd2fe5c53c2a47cfd307249 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Aug 2012 10:58:30 +0100 Subject: drm/i915: Lazily apply the SNB+ seqno w/a Avoid the forcewake overhead when simply retiring requests, as often the last seen seqno is good enough to satisfy the retirment process and will be promptly re-run in any case. Only ensure that we force the coherent seqno read when we are explicitly waiting upon a completion event to be sure that none go missing, and also for when we are reporting seqno values in case of error or debugging. This greatly reduces the load for userspace using the busy-ioctl to track active buffers, for instance halving the CPU used by X in pushing the pixels from a software render (flash). The effect will be even more magnified with userptr and so providing a zero-copy upload path in that instance, or in similar instances where X is simply compositing DRI buffers. v2: Reverse the polarity of the tachyon stream. Daniel suggested that 'force' was too generic for the parameter name and that 'lazy_coherency' better encapsulated the semantics of it being an optimization and its purpose. Also notice that gen6_get_seqno() is only used by gen6/7 chipsets and so the test for IS_GEN6 || IS_GEN7 is redundant in that function. Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- drivers/gpu/drm/i915/i915_irq.c | 9 +++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 9 ++++++++- 5 files changed, 21 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ed4bc98095b1..0e8f14d04cda 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -391,7 +391,7 @@ static void i915_ring_seqno_info(struct seq_file *m, { if (ring->get_seqno) { seq_printf(m, "Current sequence (%s): %d\n", - ring->name, ring->get_seqno(ring)); + ring->name, ring->get_seqno(ring, false)); } } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c540321b42ba..051459324826 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1716,7 +1716,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) WARN_ON(i915_verify_lists(ring->dev)); - seqno = ring->get_seqno(ring); + seqno = ring->get_seqno(ring, true); for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) if (seqno >= ring->sync_seqno[i]) @@ -1888,7 +1888,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, bool wait_forever = true; int ret; - if (i915_seqno_passed(ring->get_seqno(ring), seqno)) + if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) return 0; trace_i915_gem_request_wait_begin(ring, seqno); @@ -1907,7 +1907,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, getrawmonotonic(&before); #define EXIT_COND \ - (i915_seqno_passed(ring->get_seqno(ring), seqno) || \ + (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ atomic_read(&dev_priv->mm.wedged)) do { if (interruptible) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d15ea50f5854..0c37101934f8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -347,7 +347,7 @@ static void notify_ring(struct drm_device *dev, if (ring->obj == NULL) return; - trace_i915_gem_request_complete(ring, ring->get_seqno(ring)); + trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false)); wake_up_all(&ring->irq_queue); if (i915_enable_hangcheck) { @@ -1051,7 +1051,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, if (!ring->get_seqno) return NULL; - seqno = ring->get_seqno(ring); + seqno = ring->get_seqno(ring, false); list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { if (obj->ring != ring) continue; @@ -1105,7 +1105,7 @@ static void i915_record_ring_state(struct drm_device *dev, error->waiting[ring->id] = waitqueue_active(&ring->irq_queue); error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base)); - error->seqno[ring->id] = ring->get_seqno(ring); + error->seqno[ring->id] = ring->get_seqno(ring, false); error->acthd[ring->id] = intel_ring_get_active_head(ring); error->head[ring->id] = I915_READ_HEAD(ring); error->tail[ring->id] = I915_READ_TAIL(ring); @@ -1602,7 +1602,8 @@ ring_last_seqno(struct intel_ring_buffer *ring) static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring, bool *err) { if (list_empty(&ring->request_list) || - i915_seqno_passed(ring->get_seqno(ring), ring_last_seqno(ring))) { + i915_seqno_passed(ring->get_seqno(ring, false), + ring_last_seqno(ring))) { /* Issue a wake-up to catch stuck h/w. */ if (waitqueue_active(&ring->irq_queue)) { DRM_ERROR("Hangcheck timer elapsed... %s idle\n", diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8733da529edf..e278675cdff9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -625,26 +625,24 @@ pc_render_add_request(struct intel_ring_buffer *ring, } static u32 -gen6_ring_get_seqno(struct intel_ring_buffer *ring) +gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) { - struct drm_device *dev = ring->dev; - /* Workaround to force correct ordering between irq and seqno writes on * ivb (and maybe also on snb) by reading from a CS register (like * ACTHD) before reading the status page. */ - if (IS_GEN6(dev) || IS_GEN7(dev)) + if (!lazy_coherency) intel_ring_get_active_head(ring); return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } static u32 -ring_get_seqno(struct intel_ring_buffer *ring) +ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } static u32 -pc_render_get_seqno(struct intel_ring_buffer *ring) +pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) { struct pipe_control *pc = ring->private; return pc->cpu_page[0]; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8b2b92e00e9d..2ea7a311a1f0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -72,7 +72,14 @@ struct intel_ring_buffer { u32 flush_domains); int (*add_request)(struct intel_ring_buffer *ring, u32 *seqno); - u32 (*get_seqno)(struct intel_ring_buffer *ring); + /* Some chipsets are not quite as coherent as advertised and need + * an expensive kick to force a true read of the up-to-date seqno. + * However, the up-to-date seqno is not always required and the last + * seen value is good enough. Note that the seqno will always be + * monotonic, even if not coherent. + */ + u32 (*get_seqno)(struct intel_ring_buffer *ring, + bool lazy_coherency); int (*dispatch_execbuffer)(struct intel_ring_buffer *ring, u32 offset, u32 length); void (*cleanup)(struct intel_ring_buffer *ring); -- cgit v1.2.3