From b93f9cf14e714c20ce9a544ed1a6070ee7604588 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 25 Jan 2012 15:39:34 -0800 Subject: drm/i915: argument to control retiring behavior Sometimes it may be the case when we idle the gpu or wait on something we don't actually want to process the retiring list. This patch allows callers to choose the behavior. Reviewed-by: Keith Packard Reviewed-by: Eugeni Dodonov Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c01cb2018497..c649e0f255b4 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1186,7 +1186,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * so every billion or so execbuffers, we need to stall * the GPU in order to reset the counters. */ - ret = i915_gpu_idle(dev); + ret = i915_gpu_idle(dev, true); if (ret) goto err; -- cgit v1.2.3 From 96154f2faba540281073243d61108d1705d19c6d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Dec 2011 13:57:00 +0100 Subject: drm/i915: switch ring->id to be a real id ... and add a helpr function for the places where we want a flag. This way we can use ring->id to index into arrays. v2: Resurrect the missing beautification-space Chris Wilson noted. I'm moving this space around because I'll reuse ring_str in the next patch. Reviewed-by: Chris Wilson Reviewed-by: Ben Widawsky Reviewed-by: Eugeni Dodonov Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 9 +++++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 20 ++++++++++---------- 5 files changed, 25 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6c3be86274e1..9c5db4edd685 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -669,9 +669,9 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data) static const char *ring_str(int ring) { switch (ring) { - case RING_RENDER: return " render"; - case RING_BSD: return " bsd"; - case RING_BLT: return " blt"; + case RCS: return "render"; + case VCS: return "bsd"; + case BCS: return "blt"; default: return ""; } } @@ -714,7 +714,7 @@ static void print_error_buffers(struct seq_file *m, seq_printf(m, "%s [%d]:\n", name, count); while (count--) { - seq_printf(m, " %08x %8u %04x %04x %08x%s%s%s%s%s%s", + seq_printf(m, " %08x %8u %04x %04x %08x%s%s%s%s%s%s%s", err->gtt_offset, err->size, err->read_domains, @@ -724,6 +724,7 @@ static void print_error_buffers(struct seq_file *m, tiling_flag(err->tiling), dirty_flag(err->dirty), purgeable_flag(err->purgeable), + err->ring != -1 ? " " : "", ring_str(err->ring), cache_level_str(err->cache_level)); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c649e0f255b4..49b3ebc0e7a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -203,9 +203,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, cd->invalidate_domains |= invalidate_domains; cd->flush_domains |= flush_domains; if (flush_domains & I915_GEM_GPU_DOMAINS) - cd->flush_rings |= obj->ring->id; + cd->flush_rings |= intel_ring_flag(obj->ring); if (invalidate_domains & I915_GEM_GPU_DOMAINS) - cd->flush_rings |= ring->id; + cd->flush_rings |= intel_ring_flag(ring); } struct eb_objects { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 275ab6fecbd8..ab53edb9f294 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -804,7 +804,7 @@ static u32 capture_bo_list(struct drm_i915_error_buffer *err, err->tiling = obj->tiling_mode; err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; - err->ring = obj->ring ? obj->ring->id : 0; + err->ring = obj->ring ? obj->ring->id : -1; err->cache_level = obj->cache_level; if (++i == count) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b3da17af8997..48042f3b0ea6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -729,13 +729,13 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) */ if (IS_GEN7(dev)) { switch (ring->id) { - case RING_RENDER: + case RCS: mmio = RENDER_HWS_PGA_GEN7; break; - case RING_BLT: + case BCS: mmio = BLT_HWS_PGA_GEN7; break; - case RING_BSD: + case VCS: mmio = BSD_HWS_PGA_GEN7; break; } @@ -1199,7 +1199,7 @@ void intel_ring_advance(struct intel_ring_buffer *ring) static const struct intel_ring_buffer render_ring = { .name = "render ring", - .id = RING_RENDER, + .id = RCS, .mmio_base = RENDER_RING_BASE, .size = 32 * PAGE_SIZE, .init = init_render_ring, @@ -1222,7 +1222,7 @@ static const struct intel_ring_buffer render_ring = { static const struct intel_ring_buffer bsd_ring = { .name = "bsd ring", - .id = RING_BSD, + .id = VCS, .mmio_base = BSD_RING_BASE, .size = 32 * PAGE_SIZE, .init = init_ring_common, @@ -1332,7 +1332,7 @@ gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring) /* ring buffer for Video Codec for Gen6+ */ static const struct intel_ring_buffer gen6_bsd_ring = { .name = "gen6 bsd ring", - .id = RING_BSD, + .id = VCS, .mmio_base = GEN6_BSD_RING_BASE, .size = 32 * PAGE_SIZE, .init = init_ring_common, @@ -1467,7 +1467,7 @@ static void blt_ring_cleanup(struct intel_ring_buffer *ring) static const struct intel_ring_buffer gen6_blt_ring = { .name = "blt ring", - .id = RING_BLT, + .id = BCS, .mmio_base = BLT_RING_BASE, .size = 32 * PAGE_SIZE, .init = blt_ring_init, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 68281c96c558..c8b9cc0cd0dc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1,13 +1,6 @@ #ifndef _INTEL_RINGBUFFER_H_ #define _INTEL_RINGBUFFER_H_ -enum { - RCS = 0x0, - VCS, - BCS, - I915_NUM_RINGS, -}; - struct intel_hw_status_page { u32 __iomem *page_addr; unsigned int gfx_addr; @@ -36,10 +29,11 @@ struct intel_hw_status_page { struct intel_ring_buffer { const char *name; enum intel_ring_id { - RING_RENDER = 0x1, - RING_BSD = 0x2, - RING_BLT = 0x4, + RCS = 0x0, + VCS, + BCS, } id; +#define I915_NUM_RINGS 3 u32 mmio_base; void __iomem *virtual_start; struct drm_device *dev; @@ -119,6 +113,12 @@ struct intel_ring_buffer { void *private; }; +static inline unsigned +intel_ring_flag(struct intel_ring_buffer *ring) +{ + return 1 << ring->id; +} + static inline u32 intel_ring_sync_index(struct intel_ring_buffer *ring, struct intel_ring_buffer *other) -- cgit v1.2.3 From 1690e1eb7a9021826853e181baa48dd77090da28 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 14 Dec 2011 13:57:08 +0100 Subject: drm/i915: Separate fence pin counting from normal bind pin counting In order to correctly account for reserving space in the GTT and fences for a batch buffer, we need to independently track whether the fence is pinned due to a fenced GPU access in the batch or whether the buffer is pinned in the aperture. Currently we count the fenced as pinned if the buffer has already been seen in the execbuffer. This leads to a false accounting of available fence registers, causing frequent mass evictions. Worse, if coupled with the change to make i915_gem_object_get_fence() report EDADLK upon fence starvation, the batchbuffer can fail with only one fence required... Fixes intel-gpu-tools/tests/gem_fenced_exec_thrash Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38735 Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Tested-by: Paul Neumann [danvet: Resolve the functional conflict with Jesse Barnes sprite patches, acked by Chris Wilson on irc.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 19 ++++ drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 139 +++++++++++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 16 +++- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_sprite.c | 4 +- 6 files changed, 129 insertions(+), 57 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 733f5f57babf..12e8cce79289 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -135,6 +135,7 @@ struct drm_i915_fence_reg { struct list_head lru_list; struct drm_i915_gem_object *obj; uint32_t setup_seqno; + int pin_count; }; struct sdvo_device_mapping { @@ -1159,6 +1160,24 @@ int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj, struct intel_ring_buffer *pipelined); int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj); +static inline void +i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) +{ + if (obj->fence_reg != I915_FENCE_REG_NONE) { + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + dev_priv->fence_regs[obj->fence_reg].pin_count++; + } +} + +static inline void +i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) +{ + if (obj->fence_reg != I915_FENCE_REG_NONE) { + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + dev_priv->fence_regs[obj->fence_reg].pin_count--; + } +} + void i915_gem_retire_requests(struct drm_device *dev); void i915_gem_reset(struct drm_device *dev); void i915_gem_clflush_object(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ff3066c4c76a..c78930ed2e80 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2435,6 +2435,8 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj) if (obj->fence_reg != I915_FENCE_REG_NONE) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + + WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count); i915_gem_clear_fence_reg(obj->base.dev, &dev_priv->fence_regs[obj->fence_reg]); @@ -2459,7 +2461,7 @@ i915_find_fence_reg(struct drm_device *dev, if (!reg->obj) return reg; - if (!reg->obj->pin_count) + if (!reg->pin_count) avail = reg; } @@ -2469,7 +2471,7 @@ i915_find_fence_reg(struct drm_device *dev, /* None available, try to steal one or wait for a user to finish */ avail = first = NULL; list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->obj->pin_count) + if (reg->pin_count) continue; if (first == NULL) @@ -2664,6 +2666,7 @@ i915_gem_clear_fence_reg(struct drm_device *dev, list_del_init(®->lru_list); reg->obj = NULL; reg->setup_seqno = 0; + reg->pin_count = 0; } /** diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 49b3ebc0e7a6..4a43ef5dba31 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -461,6 +461,54 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, return ret; } +#define __EXEC_OBJECT_HAS_FENCE (1<<31) + +static int +pin_and_fence_object(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *ring) +{ + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; + bool need_fence, need_mappable; + int ret; + + need_fence = + has_fenced_gpu_access && + entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj->tiling_mode != I915_TILING_NONE; + need_mappable = + entry->relocation_count ? true : need_fence; + + ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); + if (ret) + return ret; + + if (has_fenced_gpu_access) { + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + if (obj->tiling_mode) { + ret = i915_gem_object_get_fence(obj, ring); + if (ret) + goto err_unpin; + + entry->flags |= __EXEC_OBJECT_HAS_FENCE; + i915_gem_object_pin_fence(obj); + } else { + ret = i915_gem_object_put_fence(obj); + if (ret) + goto err_unpin; + } + } + obj->pending_fenced_gpu_access = need_fence; + } + + entry->offset = obj->gtt_offset; + return 0; + +err_unpin: + i915_gem_object_unpin(obj); + return ret; +} + static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct drm_file *file, @@ -518,6 +566,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool need_fence, need_mappable; + if (!obj->gtt_space) continue; @@ -532,58 +581,47 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, (need_mappable && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else - ret = i915_gem_object_pin(obj, - entry->alignment, - need_mappable); + ret = pin_and_fence_object(obj, ring); if (ret) goto err; - - entry++; } /* Bind fresh objects */ list_for_each_entry(obj, objects, exec_list) { - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - bool need_fence; - - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - - if (!obj->gtt_space) { - bool need_mappable = - entry->relocation_count ? true : need_fence; - - ret = i915_gem_object_pin(obj, - entry->alignment, - need_mappable); - if (ret) - break; - } + if (obj->gtt_space) + continue; - if (has_fenced_gpu_access) { - if (need_fence) { - ret = i915_gem_object_get_fence(obj, ring); - if (ret) - break; - } else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode == I915_TILING_NONE) { - /* XXX pipelined! */ - ret = i915_gem_object_put_fence(obj); - if (ret) - break; - } - obj->pending_fenced_gpu_access = need_fence; + ret = pin_and_fence_object(obj, ring); + if (ret) { + int ret_ignore; + + /* This can potentially raise a harmless + * -EINVAL if we failed to bind in the above + * call. It cannot raise -EINTR since we know + * that the bo is freshly bound and so will + * not need to be flushed or waited upon. + */ + ret_ignore = i915_gem_object_unbind(obj); + (void)ret_ignore; + WARN_ON(obj->gtt_space); + break; } - - entry->offset = obj->gtt_offset; } /* Decrement pin count for bound objects */ list_for_each_entry(obj, objects, exec_list) { - if (obj->gtt_space) - i915_gem_object_unpin(obj); + struct drm_i915_gem_exec_object2 *entry; + + if (!obj->gtt_space) + continue; + + entry = obj->exec_entry; + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { + i915_gem_object_unpin_fence(obj); + entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; + } + + i915_gem_object_unpin(obj); } if (ret != -ENOSPC || retry > 1) @@ -600,16 +638,19 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, } while (1); err: - obj = list_entry(obj->exec_list.prev, - struct drm_i915_gem_object, - exec_list); - while (objects != &obj->exec_list) { - if (obj->gtt_space) - i915_gem_object_unpin(obj); + list_for_each_entry_continue_reverse(obj, objects, exec_list) { + struct drm_i915_gem_exec_object2 *entry; + + if (!obj->gtt_space) + continue; + + entry = obj->exec_entry; + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { + i915_gem_object_unpin_fence(obj); + entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; + } - obj = list_entry(obj->exec_list.prev, - struct drm_i915_gem_object, - exec_list); + i915_gem_object_unpin(obj); } return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0770671fa8af..fc9bc19f6db9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2041,6 +2041,8 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, ret = i915_gem_object_get_fence(obj, pipelined); if (ret) goto err_unpin; + + i915_gem_object_pin_fence(obj); } dev_priv->mm.interruptible = true; @@ -2053,6 +2055,12 @@ err_interruptible: return ret; } +void intel_unpin_fb_obj(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin_fence(obj); + i915_gem_object_unpin(obj); +} + static int i9xx_update_plane(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y) { @@ -2284,7 +2292,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y, LEAVE_ATOMIC_MODE_SET); if (ret) { - i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); + intel_unpin_fb_obj(to_intel_framebuffer(crtc->fb)->obj); mutex_unlock(&dev->struct_mutex); DRM_ERROR("failed to update base address\n"); return ret; @@ -2292,7 +2300,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, if (old_fb) { intel_wait_for_vblank(dev, intel_crtc->pipe); - i915_gem_object_unpin(to_intel_framebuffer(old_fb)->obj); + intel_unpin_fb_obj(to_intel_framebuffer(old_fb)->obj); } mutex_unlock(&dev->struct_mutex); @@ -3355,7 +3363,7 @@ static void intel_crtc_disable(struct drm_crtc *crtc) if (crtc->fb) { mutex_lock(&dev->struct_mutex); - i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); + intel_unpin_fb_obj(to_intel_framebuffer(crtc->fb)->obj); mutex_unlock(&dev->struct_mutex); } } @@ -7158,7 +7166,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) container_of(__work, struct intel_unpin_work, work); mutex_lock(&work->dev->struct_mutex); - i915_gem_object_unpin(work->old_fb_obj); + intel_unpin_fb_obj(work->old_fb_obj); drm_gem_object_unreference(&work->pending_flip_obj->base); drm_gem_object_unreference(&work->old_fb_obj->base); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1348705faf6b..9cec6c3937fa 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -374,6 +374,7 @@ extern void intel_init_emon(struct drm_device *dev); extern int intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_i915_gem_object *obj, struct intel_ring_buffer *pipelined); +extern void intel_unpin_fb_obj(struct drm_i915_gem_object *obj); extern int intel_framebuffer_init(struct drm_device *dev, struct intel_framebuffer *ifb, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index d13989fda501..ad3bd929aec7 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -503,7 +503,7 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, intel_wait_for_vblank(dev, to_intel_crtc(crtc)->pipe); mutex_lock(&dev->struct_mutex); } - i915_gem_object_unpin(old_obj); + intel_unpin_fb_obj(old_obj); } out_unlock: @@ -530,7 +530,7 @@ intel_disable_plane(struct drm_plane *plane) goto out; mutex_lock(&dev->struct_mutex); - i915_gem_object_unpin(intel_plane->obj); + intel_unpin_fb_obj(intel_plane->obj); intel_plane->obj = NULL; mutex_unlock(&dev->struct_mutex); out: -- cgit v1.2.3 From 4ca4a250ac93d5538a2a5c98ee2bcf9195f38be4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Dec 2011 13:57:27 +0100 Subject: drm/i915: reject GTT domain in relocations This confuses our domain tracking and can (for gtt write domains) lead to a subsequent oops. Tested by tests/gem_exec_bad_domains from i-g-t. Reviewed-by: Eric Anholt Reviewed-by: Chris Wilson Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4a43ef5dba31..123c51445a8b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -303,8 +303,9 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, reloc->write_domain); return ret; } - if (unlikely((reloc->write_domain | reloc->read_domains) & I915_GEM_DOMAIN_CPU)) { - DRM_ERROR("reloc with read/write CPU domains: " + if (unlikely((reloc->write_domain | reloc->read_domains) + & ~I915_GEM_GPU_DOMAINS)) { + DRM_ERROR("reloc with read/write non-GPU domains: " "obj %p target %d offset %d " "read %08x write %08x", obj, reloc->target_handle, -- cgit v1.2.3 From ff240199b6a3b0bec5ae9b6d26403dad38e8cb19 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 31 Jan 2012 21:08:14 +0100 Subject: drm/i915: s/DRM_ERROR/DRM_DEBUG in i915_gem_execbuffer.c These are all user-trigerable, so tune down their loudness a notch. For some of these we have i-g-t tests (because they prevent newly-discovered bugs), without this patches running the test suite leaves behind a dirty dmesg. Reviewed-by: Chris Wilson Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 50 +++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 123c51445a8b..b964998b5e25 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -287,14 +287,14 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, * exec_object list, so it should have a GTT space bound by now. */ if (unlikely(target_offset == 0)) { - DRM_ERROR("No GTT space found for object %d\n", + DRM_DEBUG("No GTT space found for object %d\n", reloc->target_handle); return ret; } /* Validate that the target is in a valid r/w GPU domain */ if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { - DRM_ERROR("reloc with multiple write domains: " + DRM_DEBUG("reloc with multiple write domains: " "obj %p target %d offset %d " "read %08x write %08x", obj, reloc->target_handle, @@ -305,7 +305,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } if (unlikely((reloc->write_domain | reloc->read_domains) & ~I915_GEM_GPU_DOMAINS)) { - DRM_ERROR("reloc with read/write non-GPU domains: " + DRM_DEBUG("reloc with read/write non-GPU domains: " "obj %p target %d offset %d " "read %08x write %08x", obj, reloc->target_handle, @@ -316,7 +316,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } if (unlikely(reloc->write_domain && target_obj->pending_write_domain && reloc->write_domain != target_obj->pending_write_domain)) { - DRM_ERROR("Write domain conflict: " + DRM_DEBUG("Write domain conflict: " "obj %p target %d offset %d " "new %08x old %08x\n", obj, reloc->target_handle, @@ -337,7 +337,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > obj->base.size - 4)) { - DRM_ERROR("Relocation beyond object bounds: " + DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, (int) reloc->offset, @@ -345,7 +345,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return ret; } if (unlikely(reloc->offset & 3)) { - DRM_ERROR("Relocation not 4-byte aligned: " + DRM_DEBUG("Relocation not 4-byte aligned: " "obj %p target %d offset %d.\n", obj, reloc->target_handle, (int) reloc->offset); @@ -724,7 +724,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, obj = to_intel_bo(drm_gem_object_lookup(dev, file, exec[i].handle)); if (&obj->base == NULL) { - DRM_ERROR("Invalid object handle %d at index %d\n", + DRM_DEBUG("Invalid object handle %d at index %d\n", exec[i].handle, i); ret = -ENOENT; goto err; @@ -1055,7 +1055,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, int ret, mode, i; if (!i915_gem_check_execbuffer(args)) { - DRM_ERROR("execbuf with invalid offset/length\n"); + DRM_DEBUG("execbuf with invalid offset/length\n"); return -EINVAL; } @@ -1070,20 +1070,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, break; case I915_EXEC_BSD: if (!HAS_BSD(dev)) { - DRM_ERROR("execbuf with invalid ring (BSD)\n"); + DRM_DEBUG("execbuf with invalid ring (BSD)\n"); return -EINVAL; } ring = &dev_priv->ring[VCS]; break; case I915_EXEC_BLT: if (!HAS_BLT(dev)) { - DRM_ERROR("execbuf with invalid ring (BLT)\n"); + DRM_DEBUG("execbuf with invalid ring (BLT)\n"); return -EINVAL; } ring = &dev_priv->ring[BCS]; break; default: - DRM_ERROR("execbuf with unknown ring: %d\n", + DRM_DEBUG("execbuf with unknown ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } @@ -1109,18 +1109,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } break; default: - DRM_ERROR("execbuf with unknown constants: %d\n", mode); + DRM_DEBUG("execbuf with unknown constants: %d\n", mode); return -EINVAL; } if (args->buffer_count < 1) { - DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); + DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); return -EINVAL; } if (args->num_cliprects != 0) { if (ring != &dev_priv->ring[RCS]) { - DRM_ERROR("clip rectangles are only valid with the render ring\n"); + DRM_DEBUG("clip rectangles are only valid with the render ring\n"); return -EINVAL; } @@ -1165,7 +1165,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, obj = to_intel_bo(drm_gem_object_lookup(dev, file, exec[i].handle)); if (&obj->base == NULL) { - DRM_ERROR("Invalid object handle %d at index %d\n", + DRM_DEBUG("Invalid object handle %d at index %d\n", exec[i].handle, i); /* prevent error path from reading uninitialized data */ ret = -ENOENT; @@ -1173,7 +1173,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } if (!list_empty(&obj->exec_list)) { - DRM_ERROR("Object %p [handle %d, index %d] appears more than once in object list\n", + DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", obj, exec[i].handle, i); ret = -EINVAL; goto err; @@ -1211,7 +1211,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* Set the pending read domains for the batch buffer to COMMAND */ if (batch_obj->base.pending_write_domain) { - DRM_ERROR("Attempting to use self-modifying batch buffer\n"); + DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } @@ -1316,7 +1316,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, int ret, i; if (args->buffer_count < 1) { - DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); + DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); return -EINVAL; } @@ -1324,7 +1324,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); if (exec_list == NULL || exec2_list == NULL) { - DRM_ERROR("Failed to allocate exec list for %d buffers\n", + DRM_DEBUG("Failed to allocate exec list for %d buffers\n", args->buffer_count); drm_free_large(exec_list); drm_free_large(exec2_list); @@ -1335,7 +1335,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, (uintptr_t) args->buffers_ptr, sizeof(*exec_list) * args->buffer_count); if (ret != 0) { - DRM_ERROR("copy %d exec entries failed %d\n", + DRM_DEBUG("copy %d exec entries failed %d\n", args->buffer_count, ret); drm_free_large(exec_list); drm_free_large(exec2_list); @@ -1376,7 +1376,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, sizeof(*exec_list) * args->buffer_count); if (ret) { ret = -EFAULT; - DRM_ERROR("failed to copy %d exec entries " + DRM_DEBUG("failed to copy %d exec entries " "back to user (%d)\n", args->buffer_count, ret); } @@ -1396,7 +1396,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, int ret; if (args->buffer_count < 1) { - DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count); + DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); return -EINVAL; } @@ -1406,7 +1406,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); if (exec2_list == NULL) { - DRM_ERROR("Failed to allocate exec list for %d buffers\n", + DRM_DEBUG("Failed to allocate exec list for %d buffers\n", args->buffer_count); return -ENOMEM; } @@ -1415,7 +1415,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, (uintptr_t) args->buffers_ptr, sizeof(*exec2_list) * args->buffer_count); if (ret != 0) { - DRM_ERROR("copy %d exec entries failed %d\n", + DRM_DEBUG("copy %d exec entries failed %d\n", args->buffer_count, ret); drm_free_large(exec2_list); return -EFAULT; @@ -1430,7 +1430,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, sizeof(*exec2_list) * args->buffer_count); if (ret) { ret = -EFAULT; - DRM_ERROR("failed to copy %d exec entries " + DRM_DEBUG("failed to copy %d exec entries " "back to user (%d)\n", args->buffer_count, ret); } -- cgit v1.2.3 From 7bddb01fb9697afd5d39bb69dd9f782a28063101 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 9 Feb 2012 17:15:47 +0100 Subject: drm/i915: ppgtt binding/unbinding support This adds support to bind/unbind objects and wires it up. Objects are only put into the ppgtt when necessary, i.e. at execbuf time. Objects are still unconditionally put into the global gtt. v2: Kill the quick hack and explicitly pass cache_level to ppgtt_bind like for the global gtt function. Noticed by Chris Wilson. Reviewed-by: Ben Widawsky Tested-by: Chris Wilson Tested-by: Eugeni Dodonov Reviewed-by: Eugeni Dodonov Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 7 ++ drivers/gpu/drm/i915/i915_gem.c | 11 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 146 +++++++++++++++++++++++++++-- 4 files changed, 167 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 03a9e49fe93d..35c8b5316396 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -850,6 +850,8 @@ struct drm_i915_gem_object { unsigned int cache_level:2; + unsigned int has_aliasing_ppgtt_mapping:1; + struct page **pages; /** @@ -1249,6 +1251,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, /* i915_gem_gtt.c */ int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev); void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev); +void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level); +void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_object *obj); void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 27fe07a2fd33..59092997bcfb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2020,6 +2020,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { + drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret = 0; if (obj->gtt_space == NULL) @@ -2064,6 +2065,11 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) trace_i915_gem_object_unbind(obj); i915_gem_gtt_unbind_object(obj); + if (obj->has_aliasing_ppgtt_mapping) { + i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); + obj->has_aliasing_ppgtt_mapping = 0; + } + i915_gem_object_put_pages_gtt(obj); list_del_init(&obj->gtt_list); @@ -2882,6 +2888,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { + struct drm_device *dev = obj->base.dev; + drm_i915_private_t *dev_priv = dev->dev_private; int ret; if (obj->cache_level == cache_level) @@ -2910,6 +2918,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, } i915_gem_gtt_rebind_object(obj, cache_level); + if (obj->has_aliasing_ppgtt_mapping) + i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, + obj, cache_level); } if (cache_level == I915_CACHE_NONE) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b964998b5e25..9835b2efd93e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -515,6 +515,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct drm_file *file, struct list_head *objects) { + drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_gem_object *obj; int ret, retry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; @@ -623,6 +624,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, } i915_gem_object_unpin(obj); + + /* ... and ensure ppgtt mapping exist if needed. */ + if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { + i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, + obj, obj->cache_level); + + obj->has_aliasing_ppgtt_mapping = 1; + } } if (ret != -ENOSPC || retry > 1) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f408f8c710db..2eacd78bb93b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -34,22 +34,31 @@ static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, unsigned first_entry, unsigned num_entries) { - int i, j; uint32_t *pt_vaddr; uint32_t scratch_pte; + unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; + unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; + unsigned last_pte, i; scratch_pte = GEN6_PTE_ADDR_ENCODE(ppgtt->scratch_page_dma_addr); scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC; - for (i = 0; i < ppgtt->num_pd_entries; i++) { - pt_vaddr = kmap_atomic(ppgtt->pt_pages[i]); + while (num_entries) { + last_pte = first_pte + num_entries; + if (last_pte > I915_PPGTT_PT_ENTRIES) + last_pte = I915_PPGTT_PT_ENTRIES; + + pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); - for (j = 0; j < I915_PPGTT_PT_ENTRIES; j++) - pt_vaddr[j] = scratch_pte; + for (i = first_pte; i < last_pte; i++) + pt_vaddr[i] = scratch_pte; kunmap_atomic(pt_vaddr); - } + num_entries -= last_pte - first_pte; + first_pte = 0; + act_pd++; + } } int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) @@ -168,6 +177,131 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) kfree(ppgtt); } +static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt, + struct scatterlist *sg_list, + unsigned sg_len, + unsigned first_entry, + uint32_t pte_flags) +{ + uint32_t *pt_vaddr, pte; + unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; + unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; + unsigned i, j, m, segment_len; + dma_addr_t page_addr; + struct scatterlist *sg; + + /* init sg walking */ + sg = sg_list; + i = 0; + segment_len = sg_dma_len(sg) >> PAGE_SHIFT; + m = 0; + + while (i < sg_len) { + pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); + + for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) { + page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT); + pte = GEN6_PTE_ADDR_ENCODE(page_addr); + pt_vaddr[j] = pte | pte_flags; + + /* grab the next page */ + m++; + if (m == segment_len) { + sg = sg_next(sg); + i++; + if (i == sg_len) + break; + + segment_len = sg_dma_len(sg) >> PAGE_SHIFT; + m = 0; + } + } + + kunmap_atomic(pt_vaddr); + + first_pte = 0; + act_pd++; + } +} + +static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, + unsigned first_entry, unsigned num_entries, + struct page **pages, uint32_t pte_flags) +{ + uint32_t *pt_vaddr, pte; + unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; + unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; + unsigned last_pte, i; + dma_addr_t page_addr; + + while (num_entries) { + last_pte = first_pte + num_entries; + last_pte = min_t(unsigned, last_pte, I915_PPGTT_PT_ENTRIES); + + pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); + + for (i = first_pte; i < last_pte; i++) { + page_addr = page_to_phys(*pages); + pte = GEN6_PTE_ADDR_ENCODE(page_addr); + pt_vaddr[i] = pte | pte_flags; + + pages++; + } + + kunmap_atomic(pt_vaddr); + + num_entries -= last_pte - first_pte; + first_pte = 0; + act_pd++; + } +} + +void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t pte_flags = GEN6_PTE_VALID; + + switch (cache_level) { + case I915_CACHE_LLC_MLC: + pte_flags |= GEN6_PTE_CACHE_LLC_MLC; + break; + case I915_CACHE_LLC: + pte_flags |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte_flags |= GEN6_PTE_UNCACHED; + break; + default: + BUG(); + } + + if (dev_priv->mm.gtt->needs_dmar) { + BUG_ON(!obj->sg_list); + + i915_ppgtt_insert_sg_entries(ppgtt, + obj->sg_list, + obj->num_sg, + obj->gtt_space->start >> PAGE_SHIFT, + pte_flags); + } else + i915_ppgtt_insert_pages(ppgtt, + obj->gtt_space->start >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT, + obj->pages, + pte_flags); +} + +void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_object *obj) +{ + i915_ppgtt_clear_range(ppgtt, + obj->gtt_space->start >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT); +} + /* XXX kill agp_type! */ static unsigned int cache_level_to_agp_type(struct drm_device *dev, enum i915_cache_level cache_level) -- cgit v1.2.3