From dbd6ef29a74e6db22e037fc3d481c1dbca596f80 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Aug 2016 17:47:52 +0100 Subject: drm/i915: Use RCU to annotate and enforce protection for breadcrumb's bh The bottom-half we use for processing the breadcrumb interrupt is a task, which is an RCU protected struct. When accessing this struct, we need to be holding the RCU read lock to prevent it disappearing beneath us. We can use the RCU annotation to mark our irq_seqno_bh pointer as being under RCU guard and then use the RCU accessors to both provide correct ordering of access through the pointer. Most notably, this fixes the access from hard irq context to use the RCU read lock, which both Daniel and Tvrtko complained about. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Tvrtko Ursulin Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470761272-1245-3-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e08a1e1b04e4..16b726fe33eb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2410,9 +2410,7 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) /* After manually advancing the seqno, fake the interrupt in case * there are any waiters for that seqno. */ - rcu_read_lock(); intel_engine_wakeup(engine); - rcu_read_unlock(); } static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) -- cgit v1.2.3 From 737aac2465e9f44f8ab4f3b75f3220b524ae2a1b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 10 Aug 2016 13:41:45 +0100 Subject: drm/i915: Mark unmappable GGTT entries as PIN_HIGH We allocate a few objects into the GGTT that we never need to access via the mappable aperture (such as contexts, status pages). We can request that these are bound high in the VM to increase the amount of mappable aperture available. However, anything that may be frequently pinned (such as logical contexts) we want to use the fast search & insert. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470832906-13972-1-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 309c5d9b1c57..c7f4b64b16f6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1182,7 +1182,7 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) } ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL, - 0, PAGE_SIZE, 0); + 0, PAGE_SIZE, PIN_HIGH); if (ret) { DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", ret); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16b726fe33eb..09f01c641c14 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2093,7 +2093,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, if (ce->state) { ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0, - ctx->ggtt_alignment, 0); + ctx->ggtt_alignment, PIN_HIGH); if (ret) goto error; } @@ -2629,7 +2629,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, i915.semaphores = 0; } else { i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + ret = i915_gem_object_ggtt_pin(obj, NULL, + 0, 0, PIN_HIGH); if (ret != 0) { i915_gem_object_put(obj); DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); -- cgit v1.2.3 From c1bb11451ed93aae53efcf461f936a54675dcbea Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 10 Aug 2016 16:22:10 +0100 Subject: drm/i915: Store number of active engines in device info Until now code was calling hweight32 to figure out the number from device_info->ring_mask at runtime. Instead we can cache it at engine init time and use directly. Signed-off-by: Tvrtko Ursulin Reviewed-by: Dave Gordon Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1470842530-35854-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 10 +++++----- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ade2446d924a..2a3d6d2817d8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3226,7 +3226,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine; - int num_rings = hweight32(INTEL_INFO(dev)->ring_mask); + int num_rings = INTEL_INFO(dev)->num_rings; enum intel_engine_id id; int j, ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7f2754a070a5..7971c76852df 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -793,6 +793,7 @@ struct intel_device_info { u8 gen; u16 gen_mask; u8 ring_mask; /* Rings supported by the HW */ + u8 num_rings; DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON); /* Register offsets for the various display pipes and transcoders */ int pipe_offsets[I915_MAX_TRANSCODERS]; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index bb72af5320b0..547caf26a6b9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -568,7 +568,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ i915.semaphores ? - hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : + INTEL_INFO(dev_priv)->num_rings - 1 : 0; int len, ret; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 0dd3d1de18aa..186c12d07f99 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -109,6 +109,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, int intel_engines_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); unsigned int mask = 0; int (*init)(struct intel_engine_cs *engine); unsigned int i; @@ -142,11 +143,10 @@ int intel_engines_init(struct drm_device *dev) * are added to the driver by a warning and disabling the forgotten * engines. */ - if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { - struct intel_device_info *info = - (struct intel_device_info *)&dev_priv->info; - info->ring_mask = mask; - } + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) + device_info->ring_mask = mask; + + device_info->num_rings = hweight32(mask); return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 09f01c641c14..ed19868df9c6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1317,7 +1317,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req) enum intel_engine_id id; int ret, num_rings; - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); + num_rings = INTEL_INFO(dev_priv)->num_rings; ret = intel_ring_begin(req, (num_rings-1) * 8); if (ret) return ret; @@ -1354,7 +1354,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req) enum intel_engine_id id; int ret, num_rings; - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); + num_rings = INTEL_INFO(dev_priv)->num_rings; ret = intel_ring_begin(req, (num_rings-1) * 6); if (ret) return ret; @@ -1389,7 +1389,7 @@ static int gen6_signal(struct drm_i915_gem_request *req) enum intel_engine_id id; int ret, num_rings; - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); + num_rings = INTEL_INFO(dev_priv)->num_rings; ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2)); if (ret) return ret; -- cgit v1.2.3 From d31d7cb1460c2942e23bfd0c0f4c8066b60353dc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Aug 2016 12:39:58 +0100 Subject: drm/i915: Support for creating write combined type vmaps vmaps has a provision for controlling the page protection bits, with which we can use to control the mapping type, e.g. WB, WC, UC or even WT. To allow the caller to choose their mapping type, we add a parameter to i915_gem_object_pin_map - but we still only allow one vmap to be cached per object. If the object is currently not pinned, then we recreate the previous vmap with the new access type, but if it was pinned we report an error. This effectively limits the access via i915_gem_object_pin_map to a single mapping type for the lifetime of the object. Not usually a problem, but something to be aware of when setting up the object's vmap. We will want to vary the access type to enable WC mappings of ringbuffer and context objects on !llc platforms, as well as other objects where we need coherent access to the GPU's pages without going through the GTT v2: Remove the redundant braces around pin count check and fix the marker in documentation (Chris) v3: - Add a new enum for the vmalloc mapping type & pass that as an argument to i915_object_pin_map. (Tvrtko) - Use PAGE_MASK to extract or filter the mapping type info and remove a superfluous BUG_ON.(Tvrtko) v4: - Rename the enums and clean up the pin_map function. (Chris) v5: Drop the VM_NO_GUARD, minor cosmetics. Signed-off-by: Chris Wilson Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1471001999-17787-1-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 21 ++++++++++-- drivers/gpu/drm/i915/i915_gem.c | 58 +++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 8 ++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 5 files changed, 73 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7971c76852df..654aabe76efc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3144,13 +3144,20 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) obj->pages_pin_count--; } +enum i915_map_type { + I915_MAP_WB = 0, + I915_MAP_WC, +}; + /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object * @obj - the object to map into kernel address space + * @type - the type of mapping, used to select pgprot_t * * Calls i915_gem_object_pin_pages() to prevent reaping of the object's * pages and then returns a contiguous mapping of the backing storage into - * the kernel address space. + * the kernel address space. Based on the @type of mapping, the PTE will be + * set to either WriteBack or WriteCombine (via pgprot_t). * * The caller must hold the struct_mutex, and is responsible for calling * i915_gem_object_unpin_map() when the mapping is no longer required. @@ -3158,7 +3165,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) * Returns the pointer through which to access the mapped object, or an * ERR_PTR() on error. */ -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj); +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, + enum i915_map_type type); /** * i915_gem_object_unpin_map - releases an earlier mapping @@ -3899,4 +3907,13 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) return false; } +#define ptr_unpack_bits(ptr, bits) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (bits) = __v & ~PAGE_MASK; \ + (typeof(ptr))(__v & PAGE_MASK); \ +}) + +#define ptr_pack_bits(ptr, bits) \ + ((typeof(ptr))((unsigned long)(ptr) | (bits))) + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e68b4c62be88..75a57e2da86e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2077,6 +2077,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) list_del(&obj->global_list); if (obj->mapping) { + /* low bits are ignored by is_vmalloc_addr and kmap_to_page */ if (is_vmalloc_addr(obj->mapping)) vunmap(obj->mapping); else @@ -2253,7 +2254,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) } /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, + enum i915_map_type type) { unsigned long n_pages = obj->base.size >> PAGE_SHIFT; struct sg_table *sgt = obj->pages; @@ -2262,10 +2264,11 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) struct page *stack_pages[32]; struct page **pages = stack_pages; unsigned long i = 0; + pgprot_t pgprot; void *addr; /* A single page can always be kmapped */ - if (n_pages == 1) + if (n_pages == 1 && type == I915_MAP_WB) return kmap(sg_page(sgt->sgl)); if (n_pages > ARRAY_SIZE(stack_pages)) { @@ -2281,7 +2284,15 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) /* Check that we have the expected number of pages */ GEM_BUG_ON(i != n_pages); - addr = vmap(pages, n_pages, 0, PAGE_KERNEL); + switch (type) { + case I915_MAP_WB: + pgprot = PAGE_KERNEL; + break; + case I915_MAP_WC: + pgprot = pgprot_writecombine(PAGE_KERNEL_IO); + break; + } + addr = vmap(pages, n_pages, 0, pgprot); if (pages != stack_pages) drm_free_large(pages); @@ -2290,27 +2301,54 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) } /* get, pin, and map the pages of the object into kernel space */ -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, + enum i915_map_type type) { + enum i915_map_type has_type; + bool pinned; + void *ptr; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); ret = i915_gem_object_get_pages(obj); if (ret) return ERR_PTR(ret); i915_gem_object_pin_pages(obj); + pinned = obj->pages_pin_count > 1; - if (!obj->mapping) { - obj->mapping = i915_gem_object_map(obj); - if (!obj->mapping) { - i915_gem_object_unpin_pages(obj); - return ERR_PTR(-ENOMEM); + ptr = ptr_unpack_bits(obj->mapping, has_type); + if (ptr && has_type != type) { + if (pinned) { + ret = -EBUSY; + goto err; } + + if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); + + ptr = obj->mapping = NULL; } - return obj->mapping; + if (!ptr) { + ptr = i915_gem_object_map(obj, type); + if (!ptr) { + ret = -ENOMEM; + goto err; + } + + obj->mapping = ptr_pack_bits(ptr, type); + } + + return ptr; + +err: + i915_gem_object_unpin_pages(obj); + return ERR_PTR(ret); } static void diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index c60a8d5bbad0..10265bb35604 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -119,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) if (ret) return ERR_PTR(ret); - addr = i915_gem_object_pin_map(obj); + addr = i915_gem_object_pin_map(obj, I915_MAP_WB); mutex_unlock(&dev->struct_mutex); return addr; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c7f4b64b16f6..c24ac39d51f6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -780,7 +780,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state); + vaddr = i915_gem_object_pin_map(ce->state, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto unpin_ctx_obj; @@ -1755,7 +1755,7 @@ lrc_setup_hws(struct intel_engine_cs *engine, /* The HWSP is part of the default context object in LRC mode. */ engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + LRC_PPHWSP_PN * PAGE_SIZE; - hws = i915_gem_object_pin_map(dctx_obj); + hws = i915_gem_object_pin_map(dctx_obj, I915_MAP_WB); if (IS_ERR(hws)) return PTR_ERR(hws); engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; @@ -1968,7 +1968,7 @@ populate_lr_context(struct i915_gem_context *ctx, return ret; } - vaddr = i915_gem_object_pin_map(ctx_obj); + vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); @@ -2189,7 +2189,7 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv, if (!ctx_obj) continue; - vaddr = i915_gem_object_pin_map(ctx_obj); + vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); if (WARN_ON(IS_ERR(vaddr))) continue; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ed19868df9c6..75a9f635c3dc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1951,7 +1951,7 @@ int intel_ring_pin(struct intel_ring *ring) if (ret) goto err_unpin; - addr = i915_gem_object_pin_map(obj); + addr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(addr)) { ret = PTR_ERR(addr); goto err_unpin; -- cgit v1.2.3 From bf3783e52a8929c738c5c8c5fa1df7e267b5271d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:48:54 +0100 Subject: drm/i915: Use VMA as the primary object for context state When working with contexts, we most frequently want the GGTT VMA for the context state, first and foremost. Since the object is available via the VMA, we need only then store the VMA. v2: Formatting tweaks to debugfs output, restored some comments removed in the next patch Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-15-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 34 ++++++++-------- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem_context.c | 51 +++++++++++++----------- drivers/gpu/drm/i915/i915_gpu_error.c | 7 ++-- drivers/gpu/drm/i915/i915_guc_submission.c | 6 +-- drivers/gpu/drm/i915/intel_lrc.c | 64 +++++++++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +-- 7 files changed, 86 insertions(+), 85 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 311225249563..eb8753fb107a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -354,7 +354,7 @@ static int per_file_ctx_stats(int id, void *ptr, void *data) for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { if (ctx->engine[n].state) - per_file_stats(0, ctx->engine[n].state, data); + per_file_stats(0, ctx->engine[n].state->obj, data); if (ctx->engine[n].ring) per_file_stats(0, ctx->engine[n].ring->obj, data); } @@ -1977,7 +1977,7 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_printf(m, "%s: ", engine->name); seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) - describe_obj(m, ce->state); + describe_obj(m, ce->state->obj); if (ce->ring) describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); @@ -1995,36 +1995,34 @@ static void i915_dump_lrc_obj(struct seq_file *m, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state; + struct i915_vma *vma = ctx->engine[engine->id].state; struct page *page; - uint32_t *reg_state; int j; - unsigned long ggtt_offset = 0; seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id); - if (ctx_obj == NULL) { - seq_puts(m, "\tNot allocated\n"); + if (!vma) { + seq_puts(m, "\tFake context\n"); return; } - if (!i915_gem_obj_ggtt_bound(ctx_obj)) - seq_puts(m, "\tNot bound in GGTT\n"); - else - ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj); + if (vma->flags & I915_VMA_GLOBAL_BIND) + seq_printf(m, "\tBound in GGTT at 0x%08x\n", + lower_32_bits(vma->node.start)); - if (i915_gem_object_get_pages(ctx_obj)) { - seq_puts(m, "\tFailed to get pages for context object\n"); + if (i915_gem_object_get_pages(vma->obj)) { + seq_puts(m, "\tFailed to get pages for context object\n\n"); return; } - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - if (!WARN_ON(page == NULL)) { - reg_state = kmap_atomic(page); + page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN); + if (page) { + u32 *reg_state = kmap_atomic(page); for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { - seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", - ggtt_offset + 4096 + (j * 4), + seq_printf(m, + "\t[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n", + j * 4, reg_state[j], reg_state[j + 1], reg_state[j + 2], reg_state[j + 3]); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3285c8e2c87a..259425d99e17 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -893,9 +893,8 @@ struct i915_gem_context { u32 ggtt_alignment; struct intel_context { - struct drm_i915_gem_object *state; + struct i915_vma *state; struct intel_ring *ring; - struct i915_vma *lrc_vma; uint32_t *lrc_reg_state; u64 lrc_desc; int pin_count; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 547caf26a6b9..3857ce097c84 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -155,7 +155,7 @@ void i915_gem_context_free(struct kref *ctx_ref) if (ce->ring) intel_ring_free(ce->ring); - i915_gem_object_put(ce->state); + i915_vma_put(ce->state); } list_del(&ctx->link); @@ -281,13 +281,24 @@ __create_hw_context(struct drm_device *dev, ctx->ggtt_alignment = get_context_alignment(dev_priv); if (dev_priv->hw_context_size) { - struct drm_i915_gem_object *obj = - i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_alloc_context_obj(dev, + dev_priv->hw_context_size); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_out; } - ctx->engine[RCS].state = obj; + + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + ret = PTR_ERR(vma); + goto err_out; + } + + ctx->engine[RCS].state = vma; } /* Default context will never have a file_priv */ @@ -399,7 +410,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx, struct intel_context *ce = &ctx->engine[engine->id]; if (ce->state) - i915_gem_object_ggtt_unpin(ce->state); + i915_vma_unpin(ce->state); i915_gem_context_put(ctx); } @@ -620,9 +631,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, - i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) | - flags); + intel_ring_emit(ring, req->ctx->engine[RCS].state->node.start | flags); /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -755,6 +764,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) struct i915_gem_context *to = req->ctx; struct intel_engine_cs *engine = req->engine; struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; + struct i915_vma *vma = to->engine[RCS].state; struct i915_gem_context *from; u32 hw_flags; int ret, i; @@ -763,8 +773,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return 0; /* Trying to pin first makes error handling easier. */ - ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0, - to->ggtt_alignment, 0); + ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL); if (ret) return ret; @@ -785,9 +794,9 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * * XXX: We need a real interface to do this instead of trickery. */ - ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false); + ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); if (ret) - goto unpin_out; + goto err; if (needs_pd_load_pre(ppgtt, engine, to)) { /* Older GENs and non render rings still want the load first, @@ -797,7 +806,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) trace_switch_mm(engine, to); ret = ppgtt->switch_mm(ppgtt, req); if (ret) - goto unpin_out; + goto err; } if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) @@ -814,7 +823,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (to != from || (hw_flags & MI_FORCE_RESTORE)) { ret = mi_set_context(req, hw_flags); if (ret) - goto unpin_out; + goto err; } /* The backing object for the context is done after switching to the @@ -824,8 +833,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from != NULL) { - struct drm_i915_gem_object *obj = from->engine[RCS].state; - /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be @@ -833,11 +840,9 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * able to defer doing this until we know the object would be * swapped, but there is no way to do that yet. */ - obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0); - - /* obj is kept alive until the next request by its active ref */ - i915_gem_object_ggtt_unpin(obj); + i915_vma_move_to_active(from->engine[RCS].state, req, 0); + /* state is kept alive until the next request */ + i915_vma_unpin(from->engine[RCS].state); i915_gem_context_put(from); } engine->last_context = i915_gem_context_get(to); @@ -882,8 +887,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return 0; -unpin_out: - i915_gem_object_ggtt_unpin(to->engine[RCS].state); +err: + i915_vma_unpin(vma); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9ba71dd4a312..61708faebf79 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1103,9 +1103,10 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, i915_error_ggtt_object_create(dev_priv, engine->scratch.obj); - ee->ctx = - i915_error_ggtt_object_create(dev_priv, - request->ctx->engine[i].state); + if (request->ctx->engine[i].state) { + ee->ctx = i915_error_ggtt_object_create(dev_priv, + request->ctx->engine[i].state->obj); + } if (request->pid) { struct task_struct *task; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 29de8cec1b58..4f0f173f9754 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -358,7 +358,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - gfx_addr = i915_gem_obj_ggtt_offset(ce->state); + gfx_addr = ce->state->node.start; lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); @@ -1080,7 +1080,7 @@ int intel_guc_suspend(struct drm_device *dev) /* any value greater than GUC_POWER_D0 */ data[1] = GUC_POWER_D1; /* first page is shared data with GuC */ - data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); + data[2] = ctx->engine[RCS].state->node.start; return host2guc_action(guc, data, ARRAY_SIZE(data)); } @@ -1105,7 +1105,7 @@ int intel_guc_resume(struct drm_device *dev) data[0] = HOST2GUC_ACTION_EXIT_S_STATE; data[1] = GUC_POWER_D0; /* first page is shared data with GuC */ - data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); + data[2] = ctx->engine[RCS].state->node.start; return host2guc_action(guc, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c24ac39d51f6..5538e5c541fe 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -315,7 +315,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, desc = ctx->desc_template; /* bits 3-4 */ desc |= engine->ctx_desc_template; /* bits 0-11 */ - desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE; + desc |= ce->state->node.start + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -763,7 +763,6 @@ void intel_execlists_cancel_requests(struct intel_engine_cs *engine) static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = ctx->i915; struct intel_context *ce = &ctx->engine[engine->id]; void *vaddr; u32 *lrc_reg_state; @@ -774,16 +773,15 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ce->pin_count++) return 0; - ret = i915_gem_object_ggtt_pin(ce->state, NULL, - 0, GEN8_LR_CONTEXT_ALIGN, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL); if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto unpin_ctx_obj; + goto unpin_vma; } lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; @@ -792,24 +790,25 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ret) goto unpin_map; - ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state); intel_lr_context_descriptor_update(ctx, engine); lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start; ce->lrc_reg_state = lrc_reg_state; - ce->state->dirty = true; + ce->state->obj->dirty = true; /* Invalidate GuC TLB. */ - if (i915.enable_guc_submission) + if (i915.enable_guc_submission) { + struct drm_i915_private *dev_priv = ctx->i915; I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + } i915_gem_context_get(ctx); return 0; unpin_map: - i915_gem_object_unpin_map(ce->state); -unpin_ctx_obj: - i915_gem_object_ggtt_unpin(ce->state); + i915_gem_object_unpin_map(ce->state->obj); +unpin_vma: + __i915_vma_unpin(ce->state); err: ce->pin_count = 0; return ret; @@ -828,12 +827,8 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, intel_ring_unpin(ce->ring); - i915_gem_object_unpin_map(ce->state); - i915_gem_object_ggtt_unpin(ce->state); - - ce->lrc_vma = NULL; - ce->lrc_desc = 0; - ce->lrc_reg_state = NULL; + i915_gem_object_unpin_map(ce->state->obj); + i915_vma_unpin(ce->state); i915_gem_context_put(ctx); } @@ -1747,19 +1742,18 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) } static int -lrc_setup_hws(struct intel_engine_cs *engine, - struct drm_i915_gem_object *dctx_obj) +lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma) { void *hws; /* The HWSP is part of the default context object in LRC mode. */ - engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + - LRC_PPHWSP_PN * PAGE_SIZE; - hws = i915_gem_object_pin_map(dctx_obj, I915_MAP_WB); + engine->status_page.gfx_addr = + vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE; + hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(hws)) return PTR_ERR(hws); engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; - engine->status_page.obj = dctx_obj; + engine->status_page.obj = vma->obj; return 0; } @@ -2131,6 +2125,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, { struct drm_i915_gem_object *ctx_obj; struct intel_context *ce = &ctx->engine[engine->id]; + struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; int ret; @@ -2148,6 +2143,12 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } + vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto error_deref_obj; + } + ring = intel_engine_create_ring(engine, ctx->ring_size); if (IS_ERR(ring)) { ret = PTR_ERR(ring); @@ -2161,7 +2162,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, } ce->ring = ring; - ce->state = ctx_obj; + ce->state = vma; ce->initialised = engine->init_context == NULL; return 0; @@ -2170,8 +2171,6 @@ error_ring_free: intel_ring_free(ring); error_deref_obj: i915_gem_object_put(ctx_obj); - ce->ring = NULL; - ce->state = NULL; return ret; } @@ -2182,24 +2181,23 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv, for_each_engine(engine, dev_priv) { struct intel_context *ce = &ctx->engine[engine->id]; - struct drm_i915_gem_object *ctx_obj = ce->state; void *vaddr; uint32_t *reg_state; - if (!ctx_obj) + if (!ce->state) continue; - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); if (WARN_ON(IS_ERR(vaddr))) continue; reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ctx_obj->dirty = true; reg_state[CTX_RING_HEAD+1] = 0; reg_state[CTX_RING_TAIL+1] = 0; - i915_gem_object_unpin_map(ctx_obj); + ce->state->obj->dirty = true; + i915_gem_object_unpin_map(ce->state->obj); ce->ring->head = 0; ce->ring->tail = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 75a9f635c3dc..2318a27341c8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2092,8 +2092,8 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0, - ctx->ggtt_alignment, PIN_HIGH); + ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment, + PIN_GLOBAL | PIN_HIGH); if (ret) goto error; } @@ -2127,7 +2127,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, return; if (ce->state) - i915_gem_object_ggtt_unpin(ce->state); + i915_vma_unpin(ce->state); i915_gem_context_put(ctx); } -- cgit v1.2.3 From 7abc98fadfdd060e54fdb7172e2753a2f9808366 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:48:55 +0100 Subject: drm/i915: Only change the context object's domain when binding We know that the only access to the context object is via the GPU, and the only time when it can be out of the GPU domain is when it is swapped out and unbound. Therefore we only need to clflush the object when binding, thus avoiding any potential stall on touching the domain on an active context. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-16-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 19 +++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3857ce097c84..824dfe14bcd0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -772,6 +772,13 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (skip_rcs_switch(ppgtt, engine, to)) return 0; + /* Clear this page out of any CPU caches for coherent swap-in/out. */ + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); + if (ret) + return ret; + } + /* Trying to pin first makes error handling easier. */ ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL); if (ret) @@ -786,18 +793,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) */ from = engine->last_context; - /* - * Clear this page out of any CPU caches for coherent swap-in/out. Note - * that thanks to write = false in this call and us not setting any gpu - * write domains when putting a context object onto the active list - * (when switching away from it), this won't block. - * - * XXX: We need a real interface to do this instead of trickery. - */ - ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); - if (ret) - goto err; - if (needs_pd_load_pre(ppgtt, engine, to)) { /* Older GENs and non render rings still want the load first, * "PP_DCLV followed by PP_DIR_BASE register through Load diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2318a27341c8..81dc69d1ff05 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2092,6 +2092,10 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { + ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false); + if (ret) + goto error; + ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment, PIN_GLOBAL | PIN_HIGH); if (ret) -- cgit v1.2.3 From e5cdb22b2799f2729930ef6394378570c66da251 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:48:56 +0100 Subject: drm/i915: Move assertion for iomap access to i915_vma_pin_iomap Access through the GTT requires the device to be awake. Ideally i915_vma_pin_iomap() is short-lived and the pinning demarcates the access through the iomap. This is not entirely true, we have a mixture of long lived pins that exceed the wakelock (such as legacy ringbuffers) and short lived pin that do live within the wakelock (such as execlist ringbuffers). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-17-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1bec50bd651b..738a474c5afa 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3650,6 +3650,9 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; + /* Access through the GTT requires the device to be awake. */ + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + lockdep_assert_held(&vma->vm->dev->struct_mutex); if (WARN_ON(!vma->obj->map_and_fenceable)) return IO_ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 81dc69d1ff05..4a614e567353 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1966,9 +1966,6 @@ int intel_ring_pin(struct intel_ring *ring) if (ret) goto err_unpin; - /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(dev_priv); - addr = (void __force *) i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); if (IS_ERR(addr)) { -- cgit v1.2.3 From 57e8853181198065bfd96b3690f6dee68d744745 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:48:57 +0100 Subject: drm/i915: Use VMA for ringbuffer tracking Use the GGTT VMA as the primary cookie for handing ring objects as the most common action upon the ring is mapping and unmapping which act upon the VMA itself. By restructuring the code to work with the ring VMA, we can shrink the code and remove a few cycles from context pinning. v2: Move the flush of the object back to before the first pin. We use the am-I-bound? query to only have to check the flush on the first bind and so avoid stalling on active rings. Lots of little renames and small hoops. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-18-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_guc_submission.c | 16 +- drivers/gpu/drm/i915/intel_lrc.c | 17 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 243 ++++++++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 14 +- 6 files changed, 139 insertions(+), 157 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index eb8753fb107a..6e7cfbaff224 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -356,7 +356,7 @@ static int per_file_ctx_stats(int id, void *ptr, void *data) if (ctx->engine[n].state) per_file_stats(0, ctx->engine[n].state->obj, data); if (ctx->engine[n].ring) - per_file_stats(0, ctx->engine[n].ring->obj, data); + per_file_stats(0, ctx->engine[n].ring->vma->obj, data); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 61708faebf79..27f973fbe80f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1128,12 +1128,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->cpu_ring_tail = ring->tail; ee->ringbuffer = i915_error_ggtt_object_create(dev_priv, - ring->obj); + ring->vma->obj); } ee->hws_page = i915_error_ggtt_object_create(dev_priv, - engine->status_page.obj); + engine->status_page.vma->obj); ee->wa_ctx = i915_error_ggtt_object_create(dev_priv, engine->wa_ctx.obj); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 4f0f173f9754..c40b92e212fa 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -343,7 +343,6 @@ static void guc_init_ctx_desc(struct intel_guc *guc, struct intel_context *ce = &ctx->engine[engine->id]; uint32_t guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; - struct drm_i915_gem_object *obj; /* TODO: We have a design issue to be solved here. Only when we * receive the first batch, we know which engine is used by the @@ -358,17 +357,14 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - gfx_addr = ce->state->node.start; - lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; + lrc->ring_lcra = + ce->state->node.start + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); - obj = ce->ring->obj; - gfx_addr = i915_gem_obj_ggtt_offset(obj); - - lrc->ring_begin = gfx_addr; - lrc->ring_end = gfx_addr + obj->base.size - 1; - lrc->ring_next_free_location = gfx_addr; + lrc->ring_begin = ce->ring->vma->node.start; + lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; + lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; desc.engines_used |= (1 << guc_engine_id); @@ -943,7 +939,7 @@ static void guc_create_ads(struct intel_guc *guc) * to find it. */ engine = &dev_priv->engine[RCS]; - ads->golden_context_lrca = engine->status_page.gfx_addr; + ads->golden_context_lrca = engine->status_page.ggtt_offset; for_each_engine(engine, dev_priv) ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5538e5c541fe..73dd2f9e0547 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1273,7 +1273,7 @@ static void lrc_init_hws(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE(RING_HWS_PGA(engine->mmio_base), - (u32)engine->status_page.gfx_addr); + engine->status_page.ggtt_offset); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); } @@ -1695,9 +1695,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - if (engine->status_page.obj) { - i915_gem_object_unpin_map(engine->status_page.obj); - engine->status_page.obj = NULL; + if (engine->status_page.vma) { + i915_gem_object_unpin_map(engine->status_page.vma->obj); + engine->status_page.vma = NULL; } intel_lr_context_unpin(dev_priv->kernel_context, engine); @@ -1744,16 +1744,17 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) static int lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma) { + const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE; void *hws; /* The HWSP is part of the default context object in LRC mode. */ - engine->status_page.gfx_addr = - vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE; hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(hws)) return PTR_ERR(hws); - engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; - engine->status_page.obj = vma->obj; + + engine->status_page.page_addr = hws + hws_offset; + engine->status_page.ggtt_offset = vma->node.start + hws_offset; + engine->status_page.vma = vma; return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4a614e567353..bdb1ab98b4f2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -466,7 +466,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) mmio = RING_HWS_PGA(engine->mmio_base); } - I915_WRITE(mmio, (u32)engine->status_page.gfx_addr); + I915_WRITE(mmio, engine->status_page.ggtt_offset); POSTING_READ(mmio); /* @@ -531,7 +531,6 @@ static int init_ring_common(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->buffer; - struct drm_i915_gem_object *obj = ring->obj; int ret = 0; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -571,7 +570,7 @@ static int init_ring_common(struct intel_engine_cs *engine) * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring * register values. */ - I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj)); + I915_WRITE_START(engine, ring->vma->node.start); /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (I915_READ_HEAD(engine)) @@ -586,16 +585,16 @@ static int init_ring_common(struct intel_engine_cs *engine) /* If the head is still not zero, the ring is dead */ if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 && - I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) && + I915_READ_START(engine) == ring->vma->node.start && (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) { DRM_ERROR("%s initialization failed " - "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", + "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08llx]\n", engine->name, I915_READ_CTL(engine), I915_READ_CTL(engine) & RING_VALID, I915_READ_HEAD(engine), I915_READ_TAIL(engine), I915_READ_START(engine), - (unsigned long)i915_gem_obj_ggtt_offset(obj)); + ring->vma->node.start); ret = -EIO; goto out; } @@ -1853,79 +1852,79 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine) static void cleanup_status_page(struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = engine->status_page.obj; - if (obj == NULL) + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) return; - kunmap(sg_page(obj->pages->sgl)); - i915_gem_object_ggtt_unpin(obj); - i915_gem_object_put(obj); - engine->status_page.obj = NULL; + i915_vma_unpin(vma); + i915_gem_object_unpin_map(vma->obj); + i915_vma_put(vma); } static int init_status_page(struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj = engine->status_page.obj; - - if (obj == NULL) { - unsigned flags; - int ret; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags; + int ret; - obj = i915_gem_object_create(&engine->i915->drm, 4096); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate status page\n"); - return PTR_ERR(obj); - } + obj = i915_gem_object_create(&engine->i915->drm, 4096); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate status page\n"); + return PTR_ERR(obj); + } - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - if (ret) - goto err_unref; - - flags = 0; - if (!HAS_LLC(engine->i915)) - /* On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actualy map it). - */ - flags |= PIN_MAPPABLE; - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags); - if (ret) { -err_unref: - i915_gem_object_put(obj); - return ret; - } + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + if (ret) + goto err; - engine->status_page.obj = obj; + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; } - engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); - engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); - memset(engine->status_page.page_addr, 0, PAGE_SIZE); + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actualy map it). + */ + flags |= PIN_MAPPABLE; + ret = i915_vma_pin(vma, 0, 4096, flags); + if (ret) + goto err; - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, engine->status_page.gfx_addr); + engine->status_page.vma = vma; + engine->status_page.ggtt_offset = vma->node.start; + engine->status_page.page_addr = + i915_gem_object_pin_map(obj, I915_MAP_WB); + DRM_DEBUG_DRIVER("%s hws offset: 0x%08llx\n", + engine->name, vma->node.start); return 0; + +err: + i915_gem_object_put(obj); + return ret; } static int init_phys_status_page(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (!dev_priv->status_page_dmah) { - dev_priv->status_page_dmah = - drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); - if (!dev_priv->status_page_dmah) - return -ENOMEM; - } + dev_priv->status_page_dmah = + drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); + if (!dev_priv->status_page_dmah) + return -ENOMEM; engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; memset(engine->status_page.page_addr, 0, PAGE_SIZE); @@ -1935,52 +1934,43 @@ static int init_phys_status_page(struct intel_engine_cs *engine) int intel_ring_pin(struct intel_ring *ring) { - struct drm_i915_private *dev_priv = ring->engine->i915; - struct drm_i915_gem_object *obj = ring->obj; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - unsigned flags = PIN_OFFSET_BIAS | 4096; + unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096; + struct i915_vma *vma = ring->vma; void *addr; int ret; - if (HAS_LLC(dev_priv) && !obj->stolen) { - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags); - if (ret) - return ret; + GEM_BUG_ON(ring->vaddr); - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto err_unpin; + if (ring->needs_iomap) + flags |= PIN_MAPPABLE; - addr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_unpin; - } - } else { - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, - flags | PIN_MAPPABLE); - if (ret) + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + if (flags & PIN_MAPPABLE) + ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); + else + ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); + if (unlikely(ret)) return ret; + } - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - goto err_unpin; + ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags); + if (unlikely(ret)) + return ret; - addr = (void __force *) - i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_unpin; - } - } + if (flags & PIN_MAPPABLE) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(addr)) + goto err; ring->vaddr = addr; - ring->vma = i915_gem_obj_to_ggtt(obj); return 0; -err_unpin: - i915_gem_object_ggtt_unpin(obj); - return ret; +err: + i915_vma_unpin(vma); + return PTR_ERR(addr); } void intel_ring_unpin(struct intel_ring *ring) @@ -1988,60 +1978,56 @@ void intel_ring_unpin(struct intel_ring *ring) GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vaddr); - if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen) - i915_gem_object_unpin_map(ring->obj); - else + if (ring->needs_iomap) i915_vma_unpin_iomap(ring->vma); + else + i915_gem_object_unpin_map(ring->vma->obj); ring->vaddr = NULL; - i915_gem_object_ggtt_unpin(ring->obj); - ring->vma = NULL; -} - -static void intel_destroy_ringbuffer_obj(struct intel_ring *ring) -{ - i915_gem_object_put(ring->obj); - ring->obj = NULL; + i915_vma_unpin(ring->vma); } -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ring *ring) +static struct i915_vma * +intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) { struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = NULL; - if (!HAS_LLC(dev)) - obj = i915_gem_object_create_stolen(dev, ring->size); - if (obj == NULL) - obj = i915_gem_object_create(dev, ring->size); + obj = ERR_PTR(-ENODEV); + if (!HAS_LLC(dev_priv)) + obj = i915_gem_object_create_stolen(&dev_priv->drm, size); if (IS_ERR(obj)) - return PTR_ERR(obj); + obj = i915_gem_object_create(&dev_priv->drm, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - ring->obj = obj; + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; - return 0; + return vma; + +err: + i915_gem_object_put(obj); + return vma; } struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size) { struct intel_ring *ring; - int ret; + struct i915_vma *vma; GEM_BUG_ON(!is_power_of_2(size)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (ring == NULL) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", - engine->name); + if (!ring) return ERR_PTR(-ENOMEM); - } ring->engine = engine; - list_add(&ring->link, &engine->buffers); INIT_LIST_HEAD(&ring->request_list); @@ -2057,22 +2043,23 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) ring->last_retired_head = -1; intel_ring_update_space(ring); - ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring); - if (ret) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n", - engine->name, ret); - list_del(&ring->link); + vma = intel_ring_create_vma(engine->i915, size); + if (IS_ERR(vma)) { kfree(ring); - return ERR_PTR(ret); + return ERR_CAST(vma); } + ring->vma = vma; + if (!HAS_LLC(engine->i915) || vma->obj->stolen) + ring->needs_iomap = true; + list_add(&ring->link, &engine->buffers); return ring; } void intel_ring_free(struct intel_ring *ring) { - intel_destroy_ringbuffer_obj(ring); + i915_vma_put(ring->vma); list_del(&ring->link); kfree(ring); } @@ -2166,7 +2153,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) ret = PTR_ERR(ring); goto error; } - engine->buffer = ring; if (I915_NEED_GFX_HWS(dev_priv)) { ret = init_status_page(engine); @@ -2181,11 +2167,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) ret = intel_ring_pin(ring); if (ret) { - DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", - engine->name, ret); - intel_destroy_ringbuffer_obj(ring); + intel_ring_free(ring); goto error; } + engine->buffer = ring; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ea2735144b2a..bc692d519a72 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -26,10 +26,10 @@ */ #define I915_RING_FREE_SPACE 64 -struct intel_hw_status_page { - u32 *page_addr; - unsigned int gfx_addr; - struct drm_i915_gem_object *obj; +struct intel_hw_status_page { + struct i915_vma *vma; + u32 *page_addr; + u32 ggtt_offset; }; #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -83,9 +83,8 @@ struct intel_engine_hangcheck { }; struct intel_ring { - struct drm_i915_gem_object *obj; - void *vaddr; struct i915_vma *vma; + void *vaddr; struct intel_engine_cs *engine; struct list_head link; @@ -97,6 +96,7 @@ struct intel_ring { int space; int size; int effective_size; + bool needs_iomap; /** We track the position of the requests in the ring buffer, and * when each is retired we increment last_retired_head as the GPU @@ -516,7 +516,7 @@ int init_workarounds_ring(struct intel_engine_cs *engine); static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) { - return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR; + return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; } /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -- cgit v1.2.3 From 56c0f1a7c1ae68cb719fc9c8aba35d1f86149b29 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:48:58 +0100 Subject: drm/i915: Use VMA for scratch page tracking Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-19-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 18 +++++------ drivers/gpu/drm/i915/intel_ringbuffer.c | 55 +++++++++++++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 ++---- 6 files changed, 46 insertions(+), 43 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 824dfe14bcd0..e566167d9441 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -660,7 +660,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, engine->scratch.gtt_offset); + intel_ring_emit(ring, engine->scratch->node.start); intel_ring_emit(ring, MI_NOOP); } intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 27f973fbe80f..c327733e6735 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1101,7 +1101,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, if (HAS_BROKEN_CS_TLB(dev_priv)) ee->wa_batchbuffer = i915_error_ggtt_object_create(dev_priv, - engine->scratch.obj); + engine->scratch->obj); if (request->ctx->engine[i].state) { ee->ctx = i915_error_ggtt_object_create(dev_priv, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c5c0c35d4f6e..2e7d03c5bf5c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11795,7 +11795,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256); + intel_ring_emit(ring, req->engine->scratch->node.start + 256); if (IS_GEN8(dev)) { intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 73dd2f9e0547..42999ba02152 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -914,7 +914,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); + wa_ctx_emit(batch, index, engine->scratch->node.start + 256); wa_ctx_emit(batch, index, 0); wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); @@ -932,7 +932,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); + wa_ctx_emit(batch, index, engine->scratch->node.start + 256); wa_ctx_emit(batch, index, 0); return index; @@ -993,7 +993,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; + scratch_addr = engine->scratch->node.start + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1072,8 +1072,8 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - uint32_t scratch_addr - = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; + u32 scratch_addr = + engine->scratch->node.start + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1215,7 +1215,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) } /* some WA perform writes to scratch page, ensure it is valid */ - if (engine->scratch.obj == NULL) { + if (!engine->scratch) { DRM_ERROR("scratch page not allocated for %s\n", engine->name); return -EINVAL; } @@ -1483,7 +1483,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, { struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 scratch_addr = engine->scratch->node.start + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 flags = 0; int ret; @@ -1844,11 +1844,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine) else engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; - engine->cleanup = intel_fini_pipe_control; + engine->cleanup = intel_engine_cleanup_scratch; engine->emit_flush = gen8_emit_flush_render; engine->emit_request = gen8_emit_request_render; - ret = intel_init_pipe_control(engine, 4096); + ret = intel_engine_create_scratch(engine, 4096); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index bdb1ab98b4f2..7ce912f8d96c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -176,7 +176,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + req->engine->scratch->node.start + 2 * CACHELINE_BYTES; int ret; ret = intel_ring_begin(req, 6); @@ -212,7 +212,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + req->engine->scratch->node.start + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -286,7 +286,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + req->engine->scratch->node.start + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -370,7 +370,8 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req, static int gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 scratch_addr = + req->engine->scratch->node.start + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -612,45 +613,51 @@ out: return ret; } -void intel_fini_pipe_control(struct intel_engine_cs *engine) +void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) { - if (engine->scratch.obj == NULL) + struct i915_vma *vma; + + vma = fetch_and_zero(&engine->scratch); + if (!vma) return; - i915_gem_object_ggtt_unpin(engine->scratch.obj); - i915_gem_object_put(engine->scratch.obj); - engine->scratch.obj = NULL; + i915_vma_unpin(vma); + i915_vma_put(vma); } -int intel_init_pipe_control(struct intel_engine_cs *engine, int size) +int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) { struct drm_i915_gem_object *obj; + struct i915_vma *vma; int ret; - WARN_ON(engine->scratch.obj); + WARN_ON(engine->scratch); obj = i915_gem_object_create_stolen(&engine->i915->drm, size); if (!obj) obj = i915_gem_object_create(&engine->i915->drm, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); - ret = PTR_ERR(obj); - goto err; + return PTR_ERR(obj); } - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH); + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); if (ret) goto err_unref; - engine->scratch.obj = obj; - engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", - engine->name, engine->scratch.gtt_offset); + engine->scratch = vma; + DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08llx\n", + engine->name, vma->node.start); return 0; err_unref: - i915_gem_object_put(engine->scratch.obj); -err: + i915_gem_object_put(obj); return ret; } @@ -1305,7 +1312,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) dev_priv->semaphore_obj = NULL; } - intel_fini_pipe_control(engine); + intel_engine_cleanup_scratch(engine); } static int gen8_rcs_signal(struct drm_i915_gem_request *req) @@ -1763,7 +1770,7 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; - u32 cs_offset = req->engine->scratch.gtt_offset; + u32 cs_offset = req->engine->scratch->node.start; int ret; ret = intel_ring_begin(req, 6); @@ -2793,11 +2800,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) return ret; if (INTEL_GEN(dev_priv) >= 6) { - ret = intel_init_pipe_control(engine, 4096); + ret = intel_engine_create_scratch(engine, 4096); if (ret) return ret; } else if (HAS_BROKEN_CS_TLB(dev_priv)) { - ret = intel_init_pipe_control(engine, I830_WA_SIZE); + ret = intel_engine_create_scratch(engine, I830_WA_SIZE); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index bc692d519a72..c6024db75f93 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -198,6 +198,7 @@ struct intel_engine_cs { struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; + struct i915_vma *scratch; u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ @@ -320,11 +321,6 @@ struct intel_engine_cs { struct intel_engine_hangcheck hangcheck; - struct { - struct drm_i915_gem_object *obj; - u32 gtt_offset; - } scratch; - bool needs_cmd_parser; /* @@ -476,8 +472,8 @@ void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); -int intel_init_pipe_control(struct intel_engine_cs *engine, int size); -void intel_fini_pipe_control(struct intel_engine_cs *engine); +int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); +void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); -- cgit v1.2.3 From adc320c4b70916c7bed1420c912b83eb0c2b2c24 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:48:59 +0100 Subject: drm/i915: Move common scratch allocation/destroy to intel_engine_cs.c Since the scratch allocation and cleanup is shared by all engine submission backends, move it out of the legacy intel_ringbuffer.c and into the new home for common routines, intel_engine_cs.c Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-20-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 50 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 1 - drivers/gpu/drm/i915/intel_ringbuffer.c | 50 --------------------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +-- 4 files changed, 51 insertions(+), 54 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 186c12d07f99..7104dec5e893 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -195,6 +195,54 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) i915_gem_batch_pool_init(engine, &engine->batch_pool); } +int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + WARN_ON(engine->scratch); + + obj = i915_gem_object_create_stolen(&engine->i915->drm, size); + if (!obj) + obj = i915_gem_object_create(&engine->i915->drm, size); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + return PTR_ERR(obj); + } + + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_unref; + + engine->scratch = vma; + DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08llx\n", + engine->name, vma->node.start); + return 0; + +err_unref: + i915_gem_object_put(obj); + return ret; +} + +static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) +{ + struct i915_vma *vma; + + vma = fetch_and_zero(&engine->scratch); + if (!vma) + return; + + i915_vma_unpin(vma); + i915_vma_put(vma); +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -226,6 +274,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine) */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { + intel_engine_cleanup_scratch(engine); + intel_engine_cleanup_cmd_parser(engine); intel_engine_fini_breadcrumbs(engine); i915_gem_batch_pool_fini(&engine->batch_pool); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 42999ba02152..56c904e2dc98 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1844,7 +1844,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine) else engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; - engine->cleanup = intel_engine_cleanup_scratch; engine->emit_flush = gen8_emit_flush_render; engine->emit_request = gen8_emit_request_render; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7ce912f8d96c..c89aea55bc10 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -613,54 +613,6 @@ out: return ret; } -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) -{ - struct i915_vma *vma; - - vma = fetch_and_zero(&engine->scratch); - if (!vma) - return; - - i915_vma_unpin(vma); - i915_vma_put(vma); -} - -int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - WARN_ON(engine->scratch); - - obj = i915_gem_object_create_stolen(&engine->i915->drm, size); - if (!obj) - obj = i915_gem_object_create(&engine->i915->drm, size); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate scratch page\n"); - return PTR_ERR(obj); - } - - vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_unref; - } - - ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); - if (ret) - goto err_unref; - - engine->scratch = vma; - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08llx\n", - engine->name, vma->node.start); - return 0; - -err_unref: - i915_gem_object_put(obj); - return ret; -} - static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; @@ -1311,8 +1263,6 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) i915_gem_object_put(dev_priv->semaphore_obj); dev_priv->semaphore_obj = NULL; } - - intel_engine_cleanup_scratch(engine); } static int gen8_rcs_signal(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c6024db75f93..dca84258be16 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -472,11 +472,9 @@ void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); -int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); - void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); +int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); void intel_engine_cleanup_common(struct intel_engine_cs *engine); static inline int intel_engine_idle(struct intel_engine_cs *engine, -- cgit v1.2.3 From 57f275a22b0812b140bf2e581f5e9855f27b78f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:49:00 +0100 Subject: drm/i915: Move common seqno reset to intel_engine_cs.c Since the intel_engine_init_seqno() is shared by all engine submission backends, move it out of the legacy intel_ringbuffer.c and into the new home for common routines, intel_engine_cs.c Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-21-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 42 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 42 --------------------------------- 2 files changed, 42 insertions(+), 42 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 7104dec5e893..829624571ca4 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -161,6 +161,48 @@ cleanup: return ret; } +void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* Our semaphore implementation is strictly monotonic (i.e. we proceed + * so long as the semaphore value in the register/page is greater + * than the sync value), so whenever we reset the seqno, + * so long as we reset the tracking semaphore value to 0, it will + * always be before the next request's seqno. If we don't reset + * the semaphore value, then when the seqno moves backwards all + * future waits will complete instantly (causing rendering corruption). + */ + if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { + I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); + I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); + if (HAS_VEBOX(dev_priv)) + I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); + } + if (dev_priv->semaphore_obj) { + struct drm_i915_gem_object *obj = dev_priv->semaphore_obj; + struct page *page = i915_gem_object_get_dirty_page(obj, 0); + void *semaphores = kmap(page); + memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), + 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); + kunmap(page); + } + memset(engine->semaphore.sync_seqno, 0, + sizeof(engine->semaphore.sync_seqno)); + + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + engine->last_submitted_seqno = seqno; + + engine->hangcheck.seqno = seqno; + + /* After manually advancing the seqno, fake the interrupt in case + * there are any waiters for that seqno. + */ + intel_engine_wakeup(engine); +} + void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c89aea55bc10..6008d54b9152 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2314,48 +2314,6 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) return 0; } -void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - struct drm_i915_private *dev_priv = engine->i915; - - /* Our semaphore implementation is strictly monotonic (i.e. we proceed - * so long as the semaphore value in the register/page is greater - * than the sync value), so whenever we reset the seqno, - * so long as we reset the tracking semaphore value to 0, it will - * always be before the next request's seqno. If we don't reset - * the semaphore value, then when the seqno moves backwards all - * future waits will complete instantly (causing rendering corruption). - */ - if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); - if (HAS_VEBOX(dev_priv)) - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); - } - if (dev_priv->semaphore_obj) { - struct drm_i915_gem_object *obj = dev_priv->semaphore_obj; - struct page *page = i915_gem_object_get_dirty_page(obj, 0); - void *semaphores = kmap(page); - memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), - 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); - kunmap(page); - } - memset(engine->semaphore.sync_seqno, 0, - sizeof(engine->semaphore.sync_seqno)); - - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - engine->last_submitted_seqno = seqno; - - engine->hangcheck.seqno = seqno; - - /* After manually advancing the seqno, fake the interrupt in case - * there are any waiters for that seqno. - */ - intel_engine_wakeup(engine); -} - static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) { struct drm_i915_private *dev_priv = request->i915; -- cgit v1.2.3 From 51d545d0268f3e1b68aa71c906377c2098462e4f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:49:02 +0100 Subject: drm/i915: Use VMA as the primary tracker for semaphore page Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-23-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 +-- drivers/gpu/drm/i915/i915_gpu_error.c | 16 ++++----- drivers/gpu/drm/i915/intel_engine_cs.c | 12 ++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 60 +++++++++++++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +-- 6 files changed, 55 insertions(+), 43 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6e7cfbaff224..a922b78350d1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3189,7 +3189,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) struct page *page; uint64_t *seqno; - page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0); + page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0); seqno = (uint64_t *)kmap_atomic(page); for_each_engine_id(engine, dev_priv, id) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 259425d99e17..50dc3613c61c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -733,7 +733,7 @@ struct drm_i915_error_state { u64 fence[I915_MAX_NUM_FENCES]; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; - struct drm_i915_error_object *semaphore_obj; + struct drm_i915_error_object *semaphore; struct drm_i915_error_engine { int engine_id; @@ -1750,7 +1750,7 @@ struct drm_i915_private { struct pci_dev *bridge_dev; struct i915_gem_context *kernel_context; struct intel_engine_cs engine[I915_NUM_ENGINES]; - struct drm_i915_gem_object *semaphore_obj; + struct i915_vma *semaphore; u32 next_seqno; struct drm_dma_handle *status_page_dmah; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index c327733e6735..4068630bfc68 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -549,7 +549,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - if ((obj = error->semaphore_obj)) { + if ((obj = error->semaphore)) { err_printf(m, "Semaphore page = 0x%08x\n", lower_32_bits(obj->gtt_offset)); for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { @@ -640,7 +640,7 @@ static void i915_error_state_free(struct kref *error_ref) kfree(ee->waiters); } - i915_error_object_free(error->semaphore_obj); + i915_error_object_free(error->semaphore); for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) kfree(error->active_bo[i]); @@ -876,7 +876,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error, struct intel_engine_cs *to; enum intel_engine_id id; - if (!error->semaphore_obj) + if (!error->semaphore) return; for_each_engine_id(to, dev_priv, id) { @@ -889,7 +889,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error, signal_offset = (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; - tmp = error->semaphore_obj->pages[0]; + tmp = error->semaphore->pages[0]; idx = intel_engine_sync_index(engine, to); ee->semaphore_mboxes[idx] = tmp[signal_offset]; @@ -1061,11 +1061,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_gem_request *request; int i, count; - if (dev_priv->semaphore_obj) { - error->semaphore_obj = - i915_error_ggtt_object_create(dev_priv, - dev_priv->semaphore_obj); - } + error->semaphore = + i915_error_ggtt_object_create(dev_priv, + dev_priv->semaphore->obj); for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = &dev_priv->engine[i]; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 829624571ca4..573f642a74f8 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -179,12 +179,16 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) if (HAS_VEBOX(dev_priv)) I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); } - if (dev_priv->semaphore_obj) { - struct drm_i915_gem_object *obj = dev_priv->semaphore_obj; - struct page *page = i915_gem_object_get_dirty_page(obj, 0); - void *semaphores = kmap(page); + if (dev_priv->semaphore) { + struct page *page = i915_vma_first_page(dev_priv->semaphore); + void *semaphores; + + /* Semaphores are in noncoherent memory, flush to be safe */ + semaphores = kmap(page); memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); + drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), + I915_NUM_ENGINES * gen8_semaphore_seqno_size); kunmap(page); } memset(engine->semaphore.sync_seqno, 0, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6008d54b9152..30b066140b0c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1257,12 +1257,14 @@ static int init_render_ring(struct intel_engine_cs *engine) static void render_ring_cleanup(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + struct i915_vma *vma; - if (dev_priv->semaphore_obj) { - i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); - i915_gem_object_put(dev_priv->semaphore_obj); - dev_priv->semaphore_obj = NULL; - } + vma = fetch_and_zero(&dev_priv->semaphore); + if (!vma) + return; + + i915_vma_unpin(vma); + i915_vma_put(vma); } static int gen8_rcs_signal(struct drm_i915_gem_request *req) @@ -2523,30 +2525,30 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, if (!i915.semaphores) return; - if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { + if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { + struct i915_vma *vma; + obj = i915_gem_object_create(&dev_priv->drm, 4096); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); - i915.semaphores = 0; - } else { - i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_object_ggtt_pin(obj, NULL, - 0, 0, PIN_HIGH); - if (ret != 0) { - i915_gem_object_put(obj); - DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); - i915.semaphores = 0; - } else { - dev_priv->semaphore_obj = obj; - } - } - } + if (IS_ERR(obj)) + goto err; - if (!i915.semaphores) - return; + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err_obj; + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto err_obj; + + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_obj; + + dev_priv->semaphore = vma; + } if (INTEL_GEN(dev_priv) >= 8) { - u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); + u64 offset = dev_priv->semaphore->node.start; engine->semaphore.sync_to = gen8_ring_sync_to; engine->semaphore.signal = gen8_xcs_signal; @@ -2613,6 +2615,14 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, engine->semaphore.mbox.signal[i] = mbox_reg; } } + + return; + +err_obj: + i915_gem_object_put(obj); +err: + DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n"); + i915.semaphores = 0; } static void intel_ring_init_irq(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index dca84258be16..cb40785e7677 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -57,10 +57,10 @@ struct intel_hw_status_page { #define GEN8_SEMAPHORE_OFFSET(__from, __to) \ (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) #define GEN8_SIGNAL_OFFSET(__ring, to) \ - (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ + (dev_priv->semaphore->node.start + \ GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) #define GEN8_WAIT_OFFSET(__ring, from) \ - (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ + (dev_priv->semaphore->node.start + \ GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) enum intel_engine_hangcheck_action { -- cgit v1.2.3 From 19880c4a3f19a8ff116e992c2f79459b7c2d15c7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:49:05 +0100 Subject: drm/i915: Consolidate i915_vma_unpin_and_release() In a few places, we repeat a call to clear a pointer to a vma whilst unpinning and releasing a reference to its owner. Refactor those into a common function. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-26-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 ++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_guc_submission.c | 21 ++++----------------- drivers/gpu/drm/i915/intel_engine_cs.c | 9 +-------- drivers/gpu/drm/i915/intel_lrc.c | 9 +-------- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 +------- 6 files changed, 20 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 738a474c5afa..d15eb1d71341 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3674,3 +3674,15 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) __i915_vma_pin(vma); return ptr; } + +void i915_vma_unpin_and_release(struct i915_vma **p_vma) +{ + struct i915_vma *vma; + + vma = fetch_and_zero(p_vma); + if (!vma) + return; + + i915_vma_unpin(vma); + i915_vma_put(vma); +} diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a2691943a404..ec538fcc9c20 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -232,6 +232,7 @@ struct i915_vma * i915_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *view); +void i915_vma_unpin_and_release(struct i915_vma **p_vma); static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) { diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index c40b92e212fa..e7dbc64ec1da 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -653,19 +653,6 @@ err: return vma; } -/** - * guc_release_vma() - Release gem object allocated for GuC usage - * @vma: gem obj to be released - */ -static void guc_release_vma(struct i915_vma *vma) -{ - if (!vma) - return; - - i915_vma_unpin(vma); - i915_vma_put(vma); -} - static void guc_client_free(struct drm_i915_private *dev_priv, struct i915_guc_client *client) @@ -690,7 +677,7 @@ guc_client_free(struct drm_i915_private *dev_priv, kunmap(kmap_to_page(client->client_base)); } - guc_release_vma(client->vma); + i915_vma_unpin_and_release(&client->vma); if (client->ctx_index != GUC_INVALID_CTX_ID) { guc_fini_ctx_desc(guc, client); @@ -1048,12 +1035,12 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - guc_release_vma(fetch_and_zero(&guc->ads_vma)); - guc_release_vma(fetch_and_zero(&guc->log_vma)); + i915_vma_unpin_and_release(&guc->ads_vma); + i915_vma_unpin_and_release(&guc->log_vma); if (guc->ctx_pool_vma) ida_destroy(&guc->ctx_ids); - guc_release_vma(fetch_and_zero(&guc->ctx_pool_vma)); + i915_vma_unpin_and_release(&guc->ctx_pool_vma); } /** diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 573f642a74f8..f02d66bbec4b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -279,14 +279,7 @@ err_unref: static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) { - struct i915_vma *vma; - - vma = fetch_and_zero(&engine->scratch); - if (!vma) - return; - - i915_vma_unpin(vma); - i915_vma_put(vma); + i915_vma_unpin_and_release(&engine->scratch); } /** diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 64cb04e63512..2673fb4f817b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1193,14 +1193,7 @@ err: static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) { - struct i915_vma *vma; - - vma = fetch_and_zero(&engine->wa_ctx.vma); - if (!vma) - return; - - i915_vma_unpin(vma); - i915_vma_put(vma); + i915_vma_unpin_and_release(&engine->wa_ctx.vma); } static int intel_init_workaround_bb(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 30b066140b0c..65ef172e8761 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1257,14 +1257,8 @@ static int init_render_ring(struct intel_engine_cs *engine) static void render_ring_cleanup(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct i915_vma *vma; - - vma = fetch_and_zero(&dev_priv->semaphore); - if (!vma) - return; - i915_vma_unpin(vma); - i915_vma_put(vma); + i915_vma_unpin_and_release(&dev_priv->semaphore); } static int gen8_rcs_signal(struct drm_i915_gem_request *req) -- cgit v1.2.3 From bde13ebdab0778b758b267ff9e38d6c10a42bdc3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:49:07 +0100 Subject: drm/i915: Introduce i915_ggtt_offset() This little helper only exists to safely discard the upper unused 32bits of the general 64-bit VMA address - as we know that all Global GTT currently are less than 4GiB in size and so that the upper bits must be zero. In many places, we use a u32 for the global GTT offset and we want to document where we are discarding the full VMA offset. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-28-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 11 +++++------ drivers/gpu/drm/i915/i915_gem_context.c | 6 ++++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 9 +++++++++ drivers/gpu/drm/i915/i915_guc_submission.c | 15 ++++++++------- drivers/gpu/drm/i915/intel_display.c | 10 +++------- drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/intel_fbdev.c | 6 +++--- drivers/gpu/drm/i915/intel_guc_loader.c | 6 +++--- drivers/gpu/drm/i915/intel_lrc.c | 20 +++++++++++--------- drivers/gpu/drm/i915/intel_overlay.c | 10 ++++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 28 ++++++++++++++-------------- 13 files changed, 70 insertions(+), 59 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 73f3466402c5..019df75be5ac 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2008,7 +2008,7 @@ static void i915_dump_lrc_obj(struct seq_file *m, if (vma->flags & I915_VMA_GLOBAL_BIND) seq_printf(m, "\tBound in GGTT at 0x%08x\n", - lower_32_bits(vma->node.start)); + i915_ggtt_offset(vma)); if (i915_gem_object_get_pages(vma->obj)) { seq_puts(m, "\tFailed to get pages for context object\n\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bbee45acedeb..bd58878de77b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3330,7 +3330,7 @@ static inline unsigned long i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, const struct i915_ggtt_view *view) { - return i915_gem_object_to_ggtt(o, view)->node.start; + return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); } /* i915_gem_fence.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 685253a1323b..7e08c774a1aa 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -767,7 +767,7 @@ i915_gem_gtt_pread(struct drm_device *dev, i915_gem_object_pin_pages(obj); } else { - node.start = vma->node.start; + node.start = i915_ggtt_offset(vma); node.allocated = false; ret = i915_gem_object_put_fence(obj); if (ret) @@ -1071,7 +1071,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, i915_gem_object_pin_pages(obj); } else { - node.start = vma->node.start; + node.start = i915_ggtt_offset(vma); node.allocated = false; ret = i915_gem_object_put_fence(obj); if (ret) @@ -1712,7 +1712,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) goto err_unpin; /* Finally, remap it using the new GTT offset */ - pfn = ggtt->mappable_base + vma->node.start; + pfn = ggtt->mappable_base + i915_ggtt_offset(vma); pfn >>= PAGE_SHIFT; if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { @@ -3759,10 +3759,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, WARN(i915_vma_is_pinned(vma), "bo is already pinned in ggtt with incorrect alignment:" - " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," + " offset=%08x, req.alignment=%llx, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", - upper_32_bits(vma->node.start), - lower_32_bits(vma->node.start), + i915_ggtt_offset(vma), alignment, !!(flags & PIN_MAPPABLE), obj->map_and_fenceable); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index e566167d9441..98d2956f91f4 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -631,7 +631,8 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, req->ctx->engine[RCS].state->node.start | flags); + intel_ring_emit(ring, + i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -660,7 +661,8 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, engine->scratch->node.start); + intel_ring_emit(ring, + i915_ggtt_offset(engine->scratch)); intel_ring_emit(ring, MI_NOOP); } intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 71513b13ca94..d6e4b6529196 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -272,6 +272,15 @@ static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, return vma->active & BIT(engine); } +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(!vma->node.allocated); + GEM_BUG_ON(upper_32_bits(vma->node.start)); + GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); + return lower_32_bits(vma->node.start); +} + struct i915_page_dma { struct page *page; union { diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index e7dbc64ec1da..bb4079223e39 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -358,11 +358,11 @@ static void guc_init_ctx_desc(struct intel_guc *guc, /* The state page is after PPHWSP */ lrc->ring_lcra = - ce->state->node.start + LRC_STATE_PN * PAGE_SIZE; + i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); - lrc->ring_begin = ce->ring->vma->node.start; + lrc->ring_begin = i915_ggtt_offset(ce->ring->vma); lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; @@ -378,7 +378,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, * The doorbell, process descriptor, and workqueue are all parts * of the client object, which the GuC will reference via the GGTT */ - gfx_addr = client->vma->node.start; + gfx_addr = i915_ggtt_offset(client->vma); desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; desc.db_trigger_cpu = (uintptr_t)client->client_base + @@ -864,7 +864,7 @@ static void guc_create_log(struct intel_guc *guc) (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); - offset = vma->node.start >> PAGE_SHIFT; /* in pages */ + offset = i915_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; } @@ -935,7 +935,8 @@ static void guc_create_ads(struct intel_guc *guc) policies = (void *)ads + sizeof(struct guc_ads); init_guc_policies(policies); - ads->scheduler_policies = vma->node.start + sizeof(struct guc_ads); + ads->scheduler_policies = + i915_ggtt_offset(vma) + sizeof(struct guc_ads); /* MMIO reg state */ reg_state = (void *)policies + sizeof(struct guc_policies); @@ -1063,7 +1064,7 @@ int intel_guc_suspend(struct drm_device *dev) /* any value greater than GUC_POWER_D0 */ data[1] = GUC_POWER_D1; /* first page is shared data with GuC */ - data[2] = ctx->engine[RCS].state->node.start; + data[2] = i915_ggtt_offset(ctx->engine[RCS].state); return host2guc_action(guc, data, ARRAY_SIZE(data)); } @@ -1088,7 +1089,7 @@ int intel_guc_resume(struct drm_device *dev) data[0] = HOST2GUC_ACTION_EXIT_S_STATE; data[1] = GUC_POWER_D0; /* first page is shared data with GuC */ - data[2] = ctx->engine[RCS].state->node.start; + data[2] = i915_ggtt_offset(ctx->engine[RCS].state); return host2guc_action(guc, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e582b4267036..31eaeedfad30 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3241,7 +3241,6 @@ u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct i915_ggtt_view view; struct i915_vma *vma; - u64 offset; intel_fill_fb_ggtt_view(&view, fb, rotation); @@ -3250,11 +3249,7 @@ u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, view.type)) return -1; - offset = vma->node.start; - - WARN_ON(upper_32_bits(offset)); - - return lower_32_bits(offset); + return i915_ggtt_offset(vma); } static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) @@ -11804,7 +11799,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, req->engine->scratch->node.start + 256); + intel_ring_emit(ring, + i915_ggtt_offset(req->engine->scratch) + 256); if (IS_GEN8(dev)) { intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f02d66bbec4b..5ec8a10fd18b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -268,8 +268,8 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) goto err_unref; engine->scratch = vma; - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08llx\n", - engine->name, vma->node.start); + DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", + engine->name, i915_ggtt_offset(vma)); return 0; err_unref: diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 096e84dabace..4003e4908c09 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -248,7 +248,7 @@ static int intelfb_create(struct drm_fb_helper *helper, info->apertures->ranges[0].base = dev->mode_config.fb_base; info->apertures->ranges[0].size = ggtt->mappable_end; - info->fix.smem_start = dev->mode_config.fb_base + vma->node.start; + info->fix.smem_start = dev->mode_config.fb_base + i915_ggtt_offset(vma); info->fix.smem_len = vma->node.size; vaddr = i915_vma_pin_iomap(vma); @@ -275,8 +275,8 @@ static int intelfb_create(struct drm_fb_helper *helper, /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx\n", - fb->width, fb->height, vma->node.start); + DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n", + fb->width, fb->height, i915_ggtt_offset(vma)); ifbdev->vma = vma; mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 7dd745dfeffb..324812d69b70 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -194,14 +194,14 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv) } if (guc->ads_vma) { - u32 ads = (u32)guc->ads_vma->node.start >> PAGE_SHIFT; + u32 ads = i915_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; } /* If GuC submission is enabled, set up additional parameters here */ if (i915.enable_guc_submission) { - u32 pgs = dev_priv->guc.ctx_pool_vma->node.start; + u32 pgs = i915_ggtt_offset(dev_priv->guc.ctx_pool_vma); u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; pgs >>= PAGE_SHIFT; @@ -271,7 +271,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ - offset = vma->node.start + guc_fw->header_offset; + offset = i915_ggtt_offset(vma) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2673fb4f817b..6b49df4316f4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -315,7 +315,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, desc = ctx->desc_template; /* bits 3-4 */ desc |= engine->ctx_desc_template; /* bits 0-11 */ - desc |= ce->state->node.start + LRC_PPHWSP_PN * PAGE_SIZE; + desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -792,7 +792,8 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, intel_lr_context_descriptor_update(ctx, engine); - lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start; + lrc_reg_state[CTX_RING_BUFFER_START+1] = + i915_ggtt_offset(ce->ring->vma); ce->lrc_reg_state = lrc_reg_state; ce->state->obj->dirty = true; @@ -914,7 +915,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch->node.start + 256); + wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); wa_ctx_emit(batch, index, 0); wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); @@ -932,7 +933,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch->node.start + 256); + wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); wa_ctx_emit(batch, index, 0); return index; @@ -993,7 +994,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = engine->scratch->node.start + 2 * CACHELINE_BYTES; + scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1073,7 +1074,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { u32 scratch_addr = - engine->scratch->node.start + 2 * CACHELINE_BYTES; + i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1482,7 +1483,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, { struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = engine->scratch->node.start + 2 * CACHELINE_BYTES; + u32 scratch_addr = + i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 flags = 0; int ret; @@ -1752,7 +1754,7 @@ lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma) return PTR_ERR(hws); engine->status_page.page_addr = hws + hws_offset; - engine->status_page.ggtt_offset = vma->node.start + hws_offset; + engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset; engine->status_page.vma = vma; return 0; @@ -2020,7 +2022,7 @@ populate_lr_context(struct i915_gem_context *ctx, RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - u32 ggtt_offset = wa_ctx->vma->node.start; + u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); reg_state[CTX_RCS_INDIRECT_CTX+1] = (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 402e05f2f1de..72f8990a13d2 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -801,7 +801,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, swidth = params->src_w; swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width); sheight = params->src_h; - iowrite32(vma->node.start + params->offset_Y, ®s->OBUF_0Y); + iowrite32(i915_ggtt_offset(vma) + params->offset_Y, ®s->OBUF_0Y); ostride = params->stride_Y; if (params->format & I915_OVERLAY_YUV_PLANAR) { @@ -815,8 +815,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, params->src_w/uv_hscale); swidthsw |= max_t(u32, tmp_U, tmp_V) << 16; sheight |= (params->src_h/uv_vscale) << 16; - iowrite32(vma->node.start + params->offset_U, ®s->OBUF_0U); - iowrite32(vma->node.start + params->offset_V, ®s->OBUF_0V); + iowrite32(i915_ggtt_offset(vma) + params->offset_U, + ®s->OBUF_0U); + iowrite32(i915_ggtt_offset(vma) + params->offset_V, + ®s->OBUF_0V); ostride |= params->stride_UV << 16; } @@ -1412,7 +1414,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) ret = PTR_ERR(vma); goto out_free_bo; } - overlay->flip_addr = vma->node.start; + overlay->flip_addr = i915_ggtt_offset(vma); ret = i915_gem_object_set_to_gtt_domain(reg_bo, true); if (ret) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 65ef172e8761..e3327a2ac6e1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -176,7 +176,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch->node.start + 2 * CACHELINE_BYTES; + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; int ret; ret = intel_ring_begin(req, 6); @@ -212,7 +212,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch->node.start + 2 * CACHELINE_BYTES; + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -286,7 +286,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch->node.start + 2 * CACHELINE_BYTES; + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -371,7 +371,7 @@ static int gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { u32 scratch_addr = - req->engine->scratch->node.start + 2 * CACHELINE_BYTES; + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -571,7 +571,7 @@ static int init_ring_common(struct intel_engine_cs *engine) * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring * register values. */ - I915_WRITE_START(engine, ring->vma->node.start); + I915_WRITE_START(engine, i915_ggtt_offset(ring->vma)); /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (I915_READ_HEAD(engine)) @@ -586,16 +586,16 @@ static int init_ring_common(struct intel_engine_cs *engine) /* If the head is still not zero, the ring is dead */ if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 && - I915_READ_START(engine) == ring->vma->node.start && + I915_READ_START(engine) == i915_ggtt_offset(ring->vma) && (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) { DRM_ERROR("%s initialization failed " - "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08llx]\n", + "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n", engine->name, I915_READ_CTL(engine), I915_READ_CTL(engine) & RING_VALID, I915_READ_HEAD(engine), I915_READ_TAIL(engine), I915_READ_START(engine), - ring->vma->node.start); + i915_ggtt_offset(ring->vma)); ret = -EIO; goto out; } @@ -1716,7 +1716,7 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; - u32 cs_offset = req->engine->scratch->node.start; + u32 cs_offset = i915_ggtt_offset(req->engine->scratch); int ret; ret = intel_ring_begin(req, 6); @@ -1857,12 +1857,12 @@ static int init_status_page(struct intel_engine_cs *engine) goto err; engine->status_page.vma = vma; - engine->status_page.ggtt_offset = vma->node.start; + engine->status_page.ggtt_offset = i915_ggtt_offset(vma); engine->status_page.page_addr = i915_gem_object_pin_map(obj, I915_MAP_WB); - DRM_DEBUG_DRIVER("%s hws offset: 0x%08llx\n", - engine->name, vma->node.start); + DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", + engine->name, i915_ggtt_offset(vma)); return 0; err: @@ -2542,13 +2542,13 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, } if (INTEL_GEN(dev_priv) >= 8) { - u64 offset = dev_priv->semaphore->node.start; + u32 offset = i915_ggtt_offset(dev_priv->semaphore); engine->semaphore.sync_to = gen8_ring_sync_to; engine->semaphore.signal = gen8_xcs_signal; for (i = 0; i < I915_NUM_ENGINES; i++) { - u64 ring_offset; + u32 ring_offset; if (i != engine->id) ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); -- cgit v1.2.3 From 21a2c58a9c122151080ecbdddc115257cd7c30d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Aug 2016 10:49:11 +0100 Subject: drm/i915: Record the RING_MODE register for post-mortem debugging Just another useful register to inspect following a GPU hang. v2: Remove partial decoding of RING_MODE to userspace, be consistent and use GEN > 2 guards around RING_MODE everywhere. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-32-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++++--- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bb7d8130dbfd..35caa9b2f36a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -757,6 +757,7 @@ struct drm_i915_error_state { u32 tail; u32 head; u32 ctl; + u32 mode; u32 hws; u32 ipeir; u32 ipehr; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 776818b86c0c..0c3f30ce85c3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -236,6 +236,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " HEAD: 0x%08x\n", ee->head); err_printf(m, " TAIL: 0x%08x\n", ee->tail); err_printf(m, " CTL: 0x%08x\n", ee->ctl); + err_printf(m, " MODE: 0x%08x\n", ee->mode); err_printf(m, " HWS: 0x%08x\n", ee->hws); err_printf(m, " ACTHD: 0x%08x %08x\n", (u32)(ee->acthd>>32), (u32)ee->acthd); @@ -1005,6 +1006,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error, ee->head = I915_READ_HEAD(engine); ee->tail = I915_READ_TAIL(engine); ee->ctl = I915_READ_CTL(engine); + if (INTEL_GEN(dev_priv) > 2) + ee->mode = I915_READ_MODE(engine); if (I915_NEED_GFX_HWS(dev_priv)) { i915_reg_t mmio; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e3327a2ac6e1..fa22bd87bab0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -498,7 +498,7 @@ static bool stop_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (!IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) > 2) { I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); if (intel_wait_for_register(dev_priv, RING_MI_MODE(engine->mmio_base), @@ -520,7 +520,7 @@ static bool stop_ring(struct intel_engine_cs *engine) I915_WRITE_HEAD(engine, 0); I915_WRITE_TAIL(engine, 0); - if (!IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) > 2) { (void)I915_READ_CTL(engine); I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); } @@ -2142,7 +2142,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); + WARN_ON(INTEL_GEN(dev_priv) > 2 && + (I915_READ_MODE(engine) & MODE_IDLE) == 0); intel_ring_unpin(engine->buffer); intel_ring_free(engine->buffer); -- cgit v1.2.3 From 318f89ca205fc2df61954e3b415d93a06691817e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 16 Aug 2016 17:04:21 +0100 Subject: drm/i915: Initialize legacy semaphores from engine hw id indexed array Build the legacy semaphore initialisation array using the engine hardware ids instead of driver internal ones. This makes the static array size dependent only on the number of gen6 semaphore engines. Also makes the per-engine semaphore wait and signal tables hardware id indexed saving some more space. v2: Refactor I915_GEN6_NUM_ENGINES to GEN6_SEMAPHORE_LAST. (Chris Wilson) v3: More polish. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1471363461-9973-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.c | 55 +++++++++++++++++---------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++-- 2 files changed, 34 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index fa22bd87bab0..12703ea27259 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1337,8 +1337,7 @@ static int gen6_signal(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; - struct intel_engine_cs *useless; - enum intel_engine_id id; + struct intel_engine_cs *engine; int ret, num_rings; num_rings = INTEL_INFO(dev_priv)->num_rings; @@ -1346,9 +1345,13 @@ static int gen6_signal(struct drm_i915_gem_request *req) if (ret) return ret; - for_each_engine_id(useless, dev_priv, id) { - i915_reg_t mbox_reg = req->engine->semaphore.mbox.signal[id]; + for_each_engine(engine, dev_priv) { + i915_reg_t mbox_reg; + + if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) + continue; + mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit_reg(ring, mbox_reg); @@ -1495,7 +1498,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req, u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; int ret; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); @@ -2569,41 +2572,41 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, * initialized as INVALID. Gen8 will initialize the * sema between VCS2 and RCS later. */ - for (i = 0; i < I915_NUM_ENGINES; i++) { + for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { static const struct { u32 wait_mbox; i915_reg_t mbox_reg; - } sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = { - [RCS] = { - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, + } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { + [RCS_HW] = { + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, }, - [VCS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, + [VCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, }, - [BCS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, + [BCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, }, - [VECS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, + [VECS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, }, }; u32 wait_mbox; i915_reg_t mbox_reg; - if (i == engine->id || i == VCS2) { + if (i == engine->hw_id) { wait_mbox = MI_SEMAPHORE_SYNC_INVALID; mbox_reg = GEN6_NOSYNC; } else { - wait_mbox = sem_data[engine->id][i].wait_mbox; - mbox_reg = sem_data[engine->id][i].mbox_reg; + wait_mbox = sem_data[engine->hw_id][i].wait_mbox; + mbox_reg = sem_data[engine->hw_id][i].mbox_reg; } engine->semaphore.mbox.wait[i] = wait_mbox; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9d723c24eeff..86612d502c65 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -278,11 +278,14 @@ struct intel_engine_cs { u32 sync_seqno[I915_NUM_ENGINES-1]; union { +#define GEN6_SEMAPHORE_LAST VECS_HW +#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) +#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) struct { /* our mbox written by others */ - u32 wait[I915_NUM_ENGINES]; + u32 wait[GEN6_NUM_SEMAPHORES]; /* mboxes this ring signals to */ - i915_reg_t signal[I915_NUM_ENGINES]; + i915_reg_t signal[GEN6_NUM_SEMAPHORES]; } mbox; u64 signal_ggtt[I915_NUM_ENGINES]; }; -- cgit v1.2.3 From 9d80841ea4c9df420da3f9a61a819d09a03f2161 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Aug 2016 17:16:56 +0100 Subject: drm/i915: Allow ringbuffers to be bound anywhere Now that we have WC vmapping available, we can bind our rings anywhere in the GGTT and do not need to restrict them to the mappable region. Except for stolen objects, for which direct access is verbatim and we must use the mappable aperture. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 12703ea27259..21bfc69e9dc6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1892,17 +1892,20 @@ int intel_ring_pin(struct intel_ring *ring) { /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096; + enum i915_map_type map; struct i915_vma *vma = ring->vma; void *addr; int ret; GEM_BUG_ON(ring->vaddr); - if (ring->needs_iomap) + map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC; + + if (vma->obj->stolen) flags |= PIN_MAPPABLE; if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { - if (flags & PIN_MAPPABLE) + if (flags & PIN_MAPPABLE || map == I915_MAP_WC) ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); else ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); @@ -1914,10 +1917,10 @@ int intel_ring_pin(struct intel_ring *ring) if (unlikely(ret)) return ret; - if (flags & PIN_MAPPABLE) + if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else - addr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + addr = i915_gem_object_pin_map(vma->obj, map); if (IS_ERR(addr)) goto err; @@ -1934,7 +1937,7 @@ void intel_ring_unpin(struct intel_ring *ring) GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vaddr); - if (ring->needs_iomap) + if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else i915_gem_object_unpin_map(ring->vma->obj); @@ -2005,8 +2008,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) return ERR_CAST(vma); } ring->vma = vma; - if (!HAS_LLC(engine->i915) || vma->obj->stolen) - ring->needs_iomap = true; list_add(&ring->link, &engine->buffers); return ring; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 86612d502c65..84aea549de5d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -96,7 +96,6 @@ struct intel_ring { int space; int size; int effective_size; - bool needs_iomap; /** We track the position of the requests in the ring buffer, and * when each is retired we increment last_retired_head as the GPU -- cgit v1.2.3 From c58b735fc762e891481e92af7124b85cb0a51fce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Aug 2016 17:16:57 +0100 Subject: drm/i915: Allocate rings from stolen If we have stolen available, make use of it for ringbuffer allocation. Previously this was restricted to !llc platforms, as writing to stolen requires a GGTT mapping - but now that we have partial mappable support, the mappable aperture isn't quite so precious so we can use it more freely and ringbuffers are a good user for the otherwise wasted stolen. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 21bfc69e9dc6..cc5bcd14b6df 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1952,10 +1952,8 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = ERR_PTR(-ENODEV); - if (!HAS_LLC(dev_priv)) - obj = i915_gem_object_create_stolen(&dev_priv->drm, size); - if (IS_ERR(obj)) + obj = i915_gem_object_create_stolen(&dev_priv->drm, size); + if (!obj) obj = i915_gem_object_create(&dev_priv->drm, size); if (IS_ERR(obj)) return ERR_CAST(obj); -- cgit v1.2.3