From c0c06bd244179f754d68684fd87674585a153e40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 20 Jan 2011 11:18:48 +0000 Subject: drm/i915/ringbuffer: Kill an annoyingly frequent debug message This is better handled through the tracepoints and just clutters the debug logs. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 03e337072517..94640e0e4edf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -606,7 +606,6 @@ ring_add_request(struct intel_ring_buffer *ring, intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_advance(ring); - DRM_DEBUG_DRIVER("%s %d\n", ring->name, seqno); *result = seqno; return 0; } -- cgit v1.2.3 From 29ee399131395d8b607803874a206b9daf714025 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 24 Jan 2011 16:35:42 +0000 Subject: drm/i915: Silence a few -Wunused-but-set-variable Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 3 --- drivers/gpu/drm/i915/intel_display.c | 3 +-- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cf4f74c7c6fb..a70caf8b2688 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1121,7 +1121,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_mmap *args = data; struct drm_gem_object *obj; - loff_t offset; unsigned long addr; if (!(dev->driver->driver_features & DRIVER_GEM)) @@ -1136,8 +1135,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return -E2BIG; } - offset = args->offset; - down_write(¤t->mm->mmap_sem); addr = do_mmap(obj->filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 36958fdabdce..2fbc3da06c29 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4346,7 +4346,7 @@ static void intel_update_watermarks(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc; int sr_hdisplay = 0; - unsigned long planea_clock = 0, planeb_clock = 0, sr_clock = 0; + unsigned long planea_clock = 0, planeb_clock = 0; int enabled = 0, pixel_size = 0; int sr_htotal = 0; @@ -4368,7 +4368,6 @@ static void intel_update_watermarks(struct drm_device *dev) planeb_clock = crtc->mode.clock; } sr_hdisplay = crtc->mode.hdisplay; - sr_clock = crtc->mode.clock; sr_htotal = crtc->mode.htotal; if (crtc->fb) pixel_size = crtc->fb->bits_per_pixel / 8; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cacc89f22621..5f7bbaa6a608 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -893,6 +893,10 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) /* Disable the ring buffer. The ring must be idle at this point */ dev_priv = ring->dev->dev_private; ret = intel_wait_ring_buffer(ring, ring->size - 8); + if (ret) + DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", + ring->name, ret); + I915_WRITE_CTL(ring, 0); drm_core_ioremapfree(&ring->map, ring->dev); -- cgit v1.2.3 From 21dd373486956d7789ffd878347c36efad16923d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jan 2011 15:55:56 +0000 Subject: drm/i915: Defer reporting EIO until we try to use the GPU Instead of reporting EIO upfront in the entrance of an ioctl that may or may not attempt to use the GPU, defer the actual detection of an invalid ioctl to when we issue a GPU instruction. This allows us to continue to use bo in video memory (via pread/pwrite and mmap) after the GPU has hung. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 36 +++++++++++++-------------------- drivers/gpu/drm/i915/i915_gem_tiling.c | 5 ----- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 4 files changed, 18 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a39f8d254502..ff498b98c9bd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1052,7 +1052,6 @@ extern void i915_mem_takedown(struct mem_block **heap); extern void i915_mem_release(struct drm_device * dev, struct drm_file *file_priv, struct mem_block *heap); /* i915_gem.c */ -int i915_gem_check_is_wedged(struct drm_device *dev); int i915_gem_init_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_create_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b9d4de368de3..52dd77b1bb7c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -75,8 +75,8 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, dev_priv->mm.object_memory -= size; } -int -i915_gem_check_is_wedged(struct drm_device *dev) +static int +i915_gem_wait_for_error(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct completion *x = &dev_priv->error_completion; @@ -90,27 +90,24 @@ i915_gem_check_is_wedged(struct drm_device *dev) if (ret) return ret; - /* Success, we reset the GPU! */ - if (!atomic_read(&dev_priv->mm.wedged)) - return 0; - - /* GPU is hung, bump the completion count to account for - * the token we just consumed so that we never hit zero and - * end up waiting upon a subsequent completion event that - * will never happen. - */ - spin_lock_irqsave(&x->wait.lock, flags); - x->done++; - spin_unlock_irqrestore(&x->wait.lock, flags); - return -EIO; + if (atomic_read(&dev_priv->mm.wedged)) { + /* GPU is hung, bump the completion count to account for + * the token we just consumed so that we never hit zero and + * end up waiting upon a subsequent completion event that + * will never happen. + */ + spin_lock_irqsave(&x->wait.lock, flags); + x->done++; + spin_unlock_irqrestore(&x->wait.lock, flags); + } + return 0; } int i915_mutex_lock_interruptible(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; int ret; - ret = i915_gem_check_is_wedged(dev); + ret = i915_gem_wait_for_error(dev); if (ret) return ret; @@ -118,11 +115,6 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) if (ret) return ret; - if (atomic_read(&dev_priv->mm.wedged)) { - mutex_unlock(&dev->struct_mutex); - return -EAGAIN; - } - WARN_ON(i915_verify_lists(dev)); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 22a32b9932c5..a093d67b94e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -284,11 +284,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_i915_gem_set_tiling *args = data; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; - int ret; - - ret = i915_gem_check_is_wedged(dev); - if (ret) - return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (obj == NULL) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5f7bbaa6a608..235d9c4b40ae 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -980,9 +980,13 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) int intel_ring_begin(struct intel_ring_buffer *ring, int num_dwords) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; int n = 4*num_dwords; int ret; + if (unlikely(atomic_read(&dev_priv->mm.wedged))) + return -EIO; + if (unlikely(ring->tail + n > ring->effective_size)) { ret = intel_wrap_ring_buffer(ring); if (unlikely(ret)) -- cgit v1.2.3 From db53a302611c06bde01851f61fa0675a84ca018c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 3 Feb 2011 11:57:46 +0000 Subject: drm/i915: Refine tracepoints A lot of minor tweaks to fix the tracepoints, improve the outputting for ftrace, and to generally make the tracepoints useful again. It is a start and enough to begin identifying performance issues and gaps in our coverage. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c | 1 - drivers/gpu/drm/i915/i915_drv.h | 52 ++--- drivers/gpu/drm/i915/i915_gem.c | 174 ++++++++--------- drivers/gpu/drm/i915/i915_gem_debug.c | 45 ----- drivers/gpu/drm/i915/i915_gem_evict.c | 5 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 58 ++---- drivers/gpu/drm/i915/i915_irq.c | 12 +- drivers/gpu/drm/i915/i915_trace.h | 301 +++++++++++++++++++---------- drivers/gpu/drm/i915/intel_overlay.c | 13 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 + 11 files changed, 330 insertions(+), 359 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 126e1747fb0c..c79efbc15c5e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2004,7 +2004,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->error_lock); - dev_priv->trace_irq_seqno = 0; ret = drm_vblank_init(dev, I915_NUM_PIPE); if (ret) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fb5979774c09..bdfda0b8c604 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -76,10 +76,7 @@ enum plane { #define DRIVER_PATCHLEVEL 0 #define WATCH_COHERENCY 0 -#define WATCH_EXEC 0 -#define WATCH_RELOC 0 #define WATCH_LISTS 0 -#define WATCH_PWRITE 0 #define I915_GEM_PHYS_CURSOR_0 1 #define I915_GEM_PHYS_CURSOR_1 2 @@ -289,7 +286,6 @@ typedef struct drm_i915_private { int page_flipping; atomic_t irq_received; - u32 trace_irq_seqno; /* protects the irq masks */ spinlock_t irq_lock; @@ -1001,7 +997,6 @@ extern int i915_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int i915_irq_wait(struct drm_device *dev, void *data, struct drm_file *file_priv); -void i915_trace_irq_get(struct drm_device *dev, u32 seqno); extern irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS); extern void i915_driver_irq_preinstall(struct drm_device * dev); @@ -1095,8 +1090,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void i915_gem_load(struct drm_device *dev); int i915_gem_init_object(struct drm_gem_object *obj); -int __must_check i915_gem_flush_ring(struct drm_device *dev, - struct intel_ring_buffer *ring, +int __must_check i915_gem_flush_ring(struct intel_ring_buffer *ring, uint32_t invalidate_domains, uint32_t flush_domains); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, @@ -1127,10 +1121,9 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) } static inline u32 -i915_gem_next_request_seqno(struct drm_device *dev, - struct intel_ring_buffer *ring) +i915_gem_next_request_seqno(struct intel_ring_buffer *ring) { - drm_i915_private_t *dev_priv = dev->dev_private; + drm_i915_private_t *dev_priv = ring->dev->dev_private; return ring->outstanding_lazy_request = dev_priv->next_seqno; } @@ -1155,14 +1148,12 @@ void i915_gem_do_init(struct drm_device *dev, unsigned long end); int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_idle(struct drm_device *dev); -int __must_check i915_add_request(struct drm_device *dev, - struct drm_file *file_priv, - struct drm_i915_gem_request *request, - struct intel_ring_buffer *ring); -int __must_check i915_do_wait_request(struct drm_device *dev, - uint32_t seqno, - bool interruptible, - struct intel_ring_buffer *ring); +int __must_check i915_add_request(struct intel_ring_buffer *ring, + struct drm_file *file, + struct drm_i915_gem_request *request); +int __must_check i915_wait_request(struct intel_ring_buffer *ring, + uint32_t seqno, + bool interruptible); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int __must_check i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, @@ -1311,7 +1302,7 @@ extern void intel_display_print_error_state(struct seq_file *m, #define __i915_read(x, y) \ static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ u##x val = read##y(dev_priv->regs + reg); \ - trace_i915_reg_rw('R', reg, val, sizeof(val)); \ + trace_i915_reg_rw(false, reg, val, sizeof(val)); \ return val; \ } __i915_read(8, b) @@ -1322,7 +1313,7 @@ __i915_read(64, q) #define __i915_write(x, y) \ static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ - trace_i915_reg_rw('W', reg, val, sizeof(val)); \ + trace_i915_reg_rw(true, reg, val, sizeof(val)); \ write##y(val, dev_priv->regs + reg); \ } __i915_write(8, b) @@ -1371,25 +1362,4 @@ static inline u32 i915_safe_read(struct drm_i915_private *dev_priv, u32 reg) return val; } -static inline void -i915_write(struct drm_i915_private *dev_priv, u32 reg, u64 val, int len) -{ - /* Trace down the write operation before the real write */ - trace_i915_reg_rw('W', reg, val, len); - switch (len) { - case 8: - writeq(val, dev_priv->regs + reg); - break; - case 4: - writel(val, dev_priv->regs + reg); - break; - case 2: - writew(val, dev_priv->regs + reg); - break; - case 1: - writeb(val, dev_priv->regs + reg); - break; - } -} - #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a41c0e716805..f0f8c6ff684f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -518,6 +518,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, goto out; } + trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, args->size); @@ -959,6 +961,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto out; } + trace_i915_gem_object_pwrite(obj, args->offset, args->size); + /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get * different detiling behavior between reading and writing. @@ -1175,6 +1179,8 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out; + trace_i915_gem_object_fault(obj, page_offset, true, write); + /* Now bind it into the GTT if needed */ if (!obj->map_and_fenceable) { ret = i915_gem_object_unbind(obj); @@ -1668,9 +1674,8 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) } static void -i915_gem_process_flushing_list(struct drm_device *dev, - uint32_t flush_domains, - struct intel_ring_buffer *ring) +i915_gem_process_flushing_list(struct intel_ring_buffer *ring, + uint32_t flush_domains) { struct drm_i915_gem_object *obj, *next; @@ -1683,7 +1688,7 @@ i915_gem_process_flushing_list(struct drm_device *dev, obj->base.write_domain = 0; list_del_init(&obj->gpu_write_list); i915_gem_object_move_to_active(obj, ring, - i915_gem_next_request_seqno(dev, ring)); + i915_gem_next_request_seqno(ring)); trace_i915_gem_object_change_domain(obj, obj->base.read_domains, @@ -1693,27 +1698,22 @@ i915_gem_process_flushing_list(struct drm_device *dev, } int -i915_add_request(struct drm_device *dev, +i915_add_request(struct intel_ring_buffer *ring, struct drm_file *file, - struct drm_i915_gem_request *request, - struct intel_ring_buffer *ring) + struct drm_i915_gem_request *request) { - drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_file_private *file_priv = NULL; + drm_i915_private_t *dev_priv = ring->dev->dev_private; uint32_t seqno; int was_empty; int ret; BUG_ON(request == NULL); - if (file != NULL) - file_priv = file->driver_priv; - ret = ring->add_request(ring, &seqno); if (ret) return ret; - ring->outstanding_lazy_request = false; + trace_i915_gem_request_add(ring, seqno); request->seqno = seqno; request->ring = ring; @@ -1721,7 +1721,9 @@ i915_add_request(struct drm_device *dev, was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); - if (file_priv) { + if (file) { + struct drm_i915_file_private *file_priv = file->driver_priv; + spin_lock(&file_priv->mm.lock); request->file_priv = file_priv; list_add_tail(&request->client_list, @@ -1729,6 +1731,8 @@ i915_add_request(struct drm_device *dev, spin_unlock(&file_priv->mm.lock); } + ring->outstanding_lazy_request = false; + if (!dev_priv->mm.suspended) { mod_timer(&dev_priv->hangcheck_timer, jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); @@ -1845,18 +1849,15 @@ void i915_gem_reset(struct drm_device *dev) * This function clears the request list as sequence numbers are passed. */ static void -i915_gem_retire_requests_ring(struct drm_device *dev, - struct intel_ring_buffer *ring) +i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { - drm_i915_private_t *dev_priv = dev->dev_private; uint32_t seqno; int i; - if (!ring->status_page.page_addr || - list_empty(&ring->request_list)) + if (list_empty(&ring->request_list)) return; - WARN_ON(i915_verify_lists(dev)); + WARN_ON(i915_verify_lists(ring->dev)); seqno = ring->get_seqno(ring); @@ -1874,7 +1875,7 @@ i915_gem_retire_requests_ring(struct drm_device *dev, if (!i915_seqno_passed(seqno, request->seqno)) break; - trace_i915_gem_request_retire(dev, request->seqno); + trace_i915_gem_request_retire(ring, request->seqno); list_del(&request->list); i915_gem_request_remove_from_client(request); @@ -1900,13 +1901,13 @@ i915_gem_retire_requests_ring(struct drm_device *dev, i915_gem_object_move_to_inactive(obj); } - if (unlikely (dev_priv->trace_irq_seqno && - i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) { + if (unlikely(ring->trace_irq_seqno && + i915_seqno_passed(seqno, ring->trace_irq_seqno))) { ring->irq_put(ring); - dev_priv->trace_irq_seqno = 0; + ring->trace_irq_seqno = 0; } - WARN_ON(i915_verify_lists(dev)); + WARN_ON(i915_verify_lists(ring->dev)); } void @@ -1930,7 +1931,7 @@ i915_gem_retire_requests(struct drm_device *dev) } for (i = 0; i < I915_NUM_RINGS; i++) - i915_gem_retire_requests_ring(dev, &dev_priv->ring[i]); + i915_gem_retire_requests_ring(&dev_priv->ring[i]); } static void @@ -1964,11 +1965,11 @@ i915_gem_retire_work_handler(struct work_struct *work) struct drm_i915_gem_request *request; int ret; - ret = i915_gem_flush_ring(dev, ring, 0, - I915_GEM_GPU_DOMAINS); + ret = i915_gem_flush_ring(ring, + 0, I915_GEM_GPU_DOMAINS); request = kzalloc(sizeof(*request), GFP_KERNEL); if (ret || request == NULL || - i915_add_request(dev, NULL, request, ring)) + i915_add_request(ring, NULL, request)) kfree(request); } @@ -1981,11 +1982,16 @@ i915_gem_retire_work_handler(struct work_struct *work) mutex_unlock(&dev->struct_mutex); } +/** + * Waits for a sequence number to be signaled, and cleans up the + * request and object lists appropriately for that event. + */ int -i915_do_wait_request(struct drm_device *dev, uint32_t seqno, - bool interruptible, struct intel_ring_buffer *ring) +i915_wait_request(struct intel_ring_buffer *ring, + uint32_t seqno, + bool interruptible) { - drm_i915_private_t *dev_priv = dev->dev_private; + drm_i915_private_t *dev_priv = ring->dev->dev_private; u32 ier; int ret = 0; @@ -2011,7 +2017,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, if (request == NULL) return -ENOMEM; - ret = i915_add_request(dev, NULL, request, ring); + ret = i915_add_request(ring, NULL, request); if (ret) { kfree(request); return ret; @@ -2021,18 +2027,18 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, } if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { - if (HAS_PCH_SPLIT(dev)) + if (HAS_PCH_SPLIT(ring->dev)) ier = I915_READ(DEIER) | I915_READ(GTIER); else ier = I915_READ(IER); if (!ier) { DRM_ERROR("something (likely vbetool) disabled " "interrupts, re-enabling\n"); - i915_driver_irq_preinstall(dev); - i915_driver_irq_postinstall(dev); + i915_driver_irq_preinstall(ring->dev); + i915_driver_irq_postinstall(ring->dev); } - trace_i915_gem_request_wait_begin(dev, seqno); + trace_i915_gem_request_wait_begin(ring, seqno); ring->waiting_seqno = seqno; if (ring->irq_get(ring)) { @@ -2052,7 +2058,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, ret = -EBUSY; ring->waiting_seqno = 0; - trace_i915_gem_request_wait_end(dev, seqno); + trace_i915_gem_request_wait_end(ring, seqno); } if (atomic_read(&dev_priv->mm.wedged)) ret = -EAGAIN; @@ -2068,22 +2074,11 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, * a separate wait queue to handle that. */ if (ret == 0) - i915_gem_retire_requests_ring(dev, ring); + i915_gem_retire_requests_ring(ring); return ret; } -/** - * Waits for a sequence number to be signaled, and cleans up the - * request and object lists appropriately for that event. - */ -static int -i915_wait_request(struct drm_device *dev, uint32_t seqno, - struct intel_ring_buffer *ring) -{ - return i915_do_wait_request(dev, seqno, 1, ring); -} - /** * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. @@ -2092,7 +2087,6 @@ int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, bool interruptible) { - struct drm_device *dev = obj->base.dev; int ret; /* This function only exists to support waiting for existing rendering, @@ -2104,10 +2098,9 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, * it. */ if (obj->active) { - ret = i915_do_wait_request(dev, - obj->last_rendering_seqno, - interruptible, - obj->ring); + ret = i915_wait_request(obj->ring, + obj->last_rendering_seqno, + interruptible); if (ret) return ret; } @@ -2157,6 +2150,8 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (ret == -ERESTARTSYS) return ret; + trace_i915_gem_object_unbind(obj); + i915_gem_gtt_unbind_object(obj); i915_gem_object_put_pages_gtt(obj); @@ -2172,29 +2167,27 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (i915_gem_object_is_purgeable(obj)) i915_gem_object_truncate(obj); - trace_i915_gem_object_unbind(obj); - return ret; } int -i915_gem_flush_ring(struct drm_device *dev, - struct intel_ring_buffer *ring, +i915_gem_flush_ring(struct intel_ring_buffer *ring, uint32_t invalidate_domains, uint32_t flush_domains) { int ret; + trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); + ret = ring->flush(ring, invalidate_domains, flush_domains); if (ret) return ret; - i915_gem_process_flushing_list(dev, flush_domains, ring); + i915_gem_process_flushing_list(ring, flush_domains); return 0; } -static int i915_ring_idle(struct drm_device *dev, - struct intel_ring_buffer *ring) +static int i915_ring_idle(struct intel_ring_buffer *ring) { int ret; @@ -2202,15 +2195,15 @@ static int i915_ring_idle(struct drm_device *dev, return 0; if (!list_empty(&ring->gpu_write_list)) { - ret = i915_gem_flush_ring(dev, ring, + ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) return ret; } - return i915_wait_request(dev, - i915_gem_next_request_seqno(dev, ring), - ring); + return i915_wait_request(ring, + i915_gem_next_request_seqno(ring), + true); } int @@ -2227,7 +2220,7 @@ i915_gpu_idle(struct drm_device *dev) /* Flush everything onto the inactive list. */ for (i = 0; i < I915_NUM_RINGS; i++) { - ret = i915_ring_idle(dev, &dev_priv->ring[i]); + ret = i915_ring_idle(&dev_priv->ring[i]); if (ret) return ret; } @@ -2418,8 +2411,7 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, if (obj->fenced_gpu_access) { if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->base.dev, - obj->last_fenced_ring, + ret = i915_gem_flush_ring(obj->last_fenced_ring, 0, obj->base.write_domain); if (ret) return ret; @@ -2431,10 +2423,10 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { if (!ring_passed_seqno(obj->last_fenced_ring, obj->last_fenced_seqno)) { - ret = i915_do_wait_request(obj->base.dev, - obj->last_fenced_seqno, - interruptible, - obj->last_fenced_ring); + ret = i915_wait_request(obj->last_fenced_ring, + obj->last_fenced_seqno, + interruptible); + if (ret) return ret; } @@ -2560,10 +2552,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, if (reg->setup_seqno) { if (!ring_passed_seqno(obj->last_fenced_ring, reg->setup_seqno)) { - ret = i915_do_wait_request(obj->base.dev, - reg->setup_seqno, - interruptible, - obj->last_fenced_ring); + ret = i915_wait_request(obj->last_fenced_ring, + reg->setup_seqno, + interruptible); if (ret) return ret; } @@ -2580,7 +2571,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, } else if (obj->tiling_changed) { if (obj->fenced_gpu_access) { if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->base.dev, obj->ring, + ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); if (ret) return ret; @@ -2597,7 +2588,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, if (obj->tiling_changed) { if (pipelined) { reg->setup_seqno = - i915_gem_next_request_seqno(dev, pipelined); + i915_gem_next_request_seqno(pipelined); obj->last_fenced_seqno = reg->setup_seqno; obj->last_fenced_ring = pipelined; } @@ -2637,7 +2628,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, old->fence_reg = I915_FENCE_REG_NONE; old->last_fenced_ring = pipelined; old->last_fenced_seqno = - pipelined ? i915_gem_next_request_seqno(dev, pipelined) : 0; + pipelined ? i915_gem_next_request_seqno(pipelined) : 0; drm_gem_object_unreference(&old->base); } else if (obj->last_fenced_seqno == 0) @@ -2649,7 +2640,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, obj->last_fenced_ring = pipelined; reg->setup_seqno = - pipelined ? i915_gem_next_request_seqno(dev, pipelined) : 0; + pipelined ? i915_gem_next_request_seqno(pipelined) : 0; obj->last_fenced_seqno = reg->setup_seqno; update: @@ -2846,7 +2837,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->map_and_fenceable = mappable && fenceable; - trace_i915_gem_object_bind(obj, obj->gtt_offset, map_and_fenceable); + trace_i915_gem_object_bind(obj, map_and_fenceable); return 0; } @@ -2869,13 +2860,11 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) { - struct drm_device *dev = obj->base.dev; - if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) return 0; /* Queue the GPU write cache flushing we need. */ - return i915_gem_flush_ring(dev, obj->ring, 0, obj->base.write_domain); + return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3024,8 +3013,7 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj, return 0; if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->base.dev, obj->ring, - 0, obj->base.write_domain); + ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); if (ret) return ret; } @@ -3442,7 +3430,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * flush earlier is beneficial. */ if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(dev, obj->ring, + ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); } else if (obj->ring->outstanding_lazy_request == obj->last_rendering_seqno) { @@ -3453,9 +3441,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, */ request = kzalloc(sizeof(*request), GFP_KERNEL); if (request) - ret = i915_add_request(dev, - NULL, request, - obj->ring); + ret = i915_add_request(obj->ring, NULL,request); else ret = -ENOMEM; } @@ -3465,7 +3451,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * are actually unmasked, and our working set ends up being * larger than required. */ - i915_gem_retire_requests_ring(dev, obj->ring); + i915_gem_retire_requests_ring(obj->ring); args->busy = obj->active; } @@ -3595,6 +3581,8 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) kfree(obj->page_cpu_valid); kfree(obj->bit_17); kfree(obj); + + trace_i915_gem_object_destroy(obj); } void i915_gem_free_object(struct drm_gem_object *gem_obj) @@ -3602,8 +3590,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); struct drm_device *dev = obj->base.dev; - trace_i915_gem_object_destroy(obj); - while (obj->pin_count > 0) i915_gem_object_unpin(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c index 29d014c48ca2..8da1899bd24f 100644 --- a/drivers/gpu/drm/i915/i915_gem_debug.c +++ b/drivers/gpu/drm/i915/i915_gem_debug.c @@ -134,51 +134,6 @@ i915_verify_lists(struct drm_device *dev) } #endif /* WATCH_INACTIVE */ - -#if WATCH_EXEC | WATCH_PWRITE -static void -i915_gem_dump_page(struct page *page, uint32_t start, uint32_t end, - uint32_t bias, uint32_t mark) -{ - uint32_t *mem = kmap_atomic(page, KM_USER0); - int i; - for (i = start; i < end; i += 4) - DRM_INFO("%08x: %08x%s\n", - (int) (bias + i), mem[i / 4], - (bias + i == mark) ? " ********" : ""); - kunmap_atomic(mem, KM_USER0); - /* give syslog time to catch up */ - msleep(1); -} - -void -i915_gem_dump_object(struct drm_i915_gem_object *obj, int len, - const char *where, uint32_t mark) -{ - int page; - - DRM_INFO("%s: object at offset %08x\n", where, obj->gtt_offset); - for (page = 0; page < (len + PAGE_SIZE-1) / PAGE_SIZE; page++) { - int page_len, chunk, chunk_len; - - page_len = len - page * PAGE_SIZE; - if (page_len > PAGE_SIZE) - page_len = PAGE_SIZE; - - for (chunk = 0; chunk < page_len; chunk += 128) { - chunk_len = page_len - chunk; - if (chunk_len > 128) - chunk_len = 128; - i915_gem_dump_page(obj->pages[page], - chunk, chunk + chunk_len, - obj->gtt_offset + - page * PAGE_SIZE, - mark); - } - } -} -#endif - #if WATCH_COHERENCY void i915_gem_object_check_coherency(struct drm_i915_gem_object *obj, int handle) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 3d39005540aa..da05a2692a75 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -30,6 +30,7 @@ #include "drm.h" #include "i915_drv.h" #include "i915_drm.h" +#include "i915_trace.h" static bool mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind) @@ -63,6 +64,8 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, return 0; } + trace_i915_gem_evict(dev, min_size, alignment, mappable); + /* * The goal is to evict objects and amalgamate space in LRU order. * The oldest idle objects reside on the inactive list, which is in @@ -189,6 +192,8 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only) if (lists_empty) return -ENOSPC; + trace_i915_gem_evict_everything(dev, purgeable_only); + /* Flush everything (on to the inactive lists) and evict */ ret = i915_gpu_idle(dev); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b0a0238c36d1..84fa24e6cca8 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -282,21 +282,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, target_offset = to_intel_bo(target_obj)->gtt_offset; -#if WATCH_RELOC - DRM_INFO("%s: obj %p offset %08x target %d " - "read %08x write %08x gtt %08x " - "presumed %08x delta %08x\n", - __func__, - obj, - (int) reloc->offset, - (int) reloc->target_handle, - (int) reloc->read_domains, - (int) reloc->write_domain, - (int) target_offset, - (int) reloc->presumed_offset, - reloc->delta); -#endif - /* The target buffer should have appeared before us in the * exec_object list, so it should have a GTT space bound by now. */ @@ -747,8 +732,7 @@ i915_gem_execbuffer_flush(struct drm_device *dev, if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { for (i = 0; i < I915_NUM_RINGS; i++) if (flush_rings & (1 << i)) { - ret = i915_gem_flush_ring(dev, - &dev_priv->ring[i], + ret = i915_gem_flush_ring(&dev_priv->ring[i], invalidate_domains, flush_domains); if (ret) @@ -787,7 +771,7 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, if (request == NULL) return -ENOMEM; - ret = i915_add_request(obj->base.dev, NULL, request, from); + ret = i915_add_request(from, NULL, request); if (ret) { kfree(request); return ret; @@ -815,12 +799,6 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, i915_gem_object_set_to_gpu_domain(obj, ring, &cd); if (cd.invalidate_domains | cd.flush_domains) { -#if WATCH_EXEC - DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", - __func__, - cd.invalidate_domains, - cd.flush_domains); -#endif ret = i915_gem_execbuffer_flush(ring->dev, cd.invalidate_domains, cd.flush_domains, @@ -924,6 +902,10 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, struct drm_i915_gem_object *obj; list_for_each_entry(obj, objects, exec_list) { + u32 old_read = obj->base.read_domains; + u32 old_write = obj->base.write_domain; + + obj->base.read_domains = obj->base.pending_read_domains; obj->base.write_domain = obj->base.pending_write_domain; obj->fenced_gpu_access = obj->pending_fenced_gpu_access; @@ -937,9 +919,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, intel_mark_busy(ring->dev, obj); } - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - obj->base.write_domain); + trace_i915_gem_object_change_domain(obj, old_read, old_write); } } @@ -961,14 +941,14 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev, if (INTEL_INFO(dev)->gen >= 4) invalidate |= I915_GEM_DOMAIN_SAMPLER; if (ring->flush(ring, invalidate, 0)) { - i915_gem_next_request_seqno(dev, ring); + i915_gem_next_request_seqno(ring); return; } /* Add a breadcrumb for the completion of the batch buffer */ request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL || i915_add_request(dev, file, request, ring)) { - i915_gem_next_request_seqno(dev, ring); + if (request == NULL || i915_add_request(ring, file, request)) { + i915_gem_next_request_seqno(ring); kfree(request); } } @@ -998,10 +978,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) return ret; -#if WATCH_EXEC - DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", - (int) args->buffers_ptr, args->buffer_count, args->batch_len); -#endif switch (args->flags & I915_EXEC_RING_MASK) { case I915_EXEC_DEFAULT: case I915_EXEC_RENDER: @@ -1172,7 +1148,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; - seqno = i915_gem_next_request_seqno(dev, ring); + seqno = i915_gem_next_request_seqno(ring); for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) { if (seqno < ring->sync_seqno[i]) { /* The GPU can not handle its semaphore value wrapping, @@ -1187,6 +1163,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } } + trace_i915_gem_ring_dispatch(ring, seqno); + exec_start = batch_obj->gtt_offset + args->batch_start_offset; exec_len = args->batch_len; if (cliprects) { @@ -1243,11 +1221,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_i915_gem_exec_object2 *exec2_list = NULL; int ret, i; -#if WATCH_EXEC - DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", - (int) args->buffers_ptr, args->buffer_count, args->batch_len); -#endif - if (args->buffer_count < 1) { DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); return -EINVAL; @@ -1328,11 +1301,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_i915_gem_exec_object2 *exec2_list = NULL; int ret; -#if WATCH_EXEC - DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", - (int) args->buffers_ptr, args->buffer_count, args->batch_len); -#endif - if (args->buffer_count < 1) { DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 95304472f0d0..15d6269027e7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -365,7 +365,7 @@ static void notify_ring(struct drm_device *dev, return; seqno = ring->get_seqno(ring); - trace_i915_gem_request_complete(dev, seqno); + trace_i915_gem_request_complete(ring, seqno); ring->irq_seqno = seqno; wake_up_all(&ring->irq_queue); @@ -1273,16 +1273,6 @@ static int i915_emit_irq(struct drm_device * dev) return dev_priv->counter; } -void i915_trace_irq_get(struct drm_device *dev, u32 seqno) -{ - drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; - struct intel_ring_buffer *ring = LP_RING(dev_priv); - - if (dev_priv->trace_irq_seqno == 0 && - ring->irq_get(ring)) - dev_priv->trace_irq_seqno = seqno; -} - static int i915_wait_irq(struct drm_device * dev, int irq_nr) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 7f0fc3ed61aa..d623fefbfaca 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -7,6 +7,7 @@ #include #include "i915_drv.h" +#include "intel_ringbuffer.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM i915 @@ -16,9 +17,7 @@ /* object tracking */ TRACE_EVENT(i915_gem_object_create, - TP_PROTO(struct drm_i915_gem_object *obj), - TP_ARGS(obj), TP_STRUCT__entry( @@ -35,33 +34,51 @@ TRACE_EVENT(i915_gem_object_create, ); TRACE_EVENT(i915_gem_object_bind, - - TP_PROTO(struct drm_i915_gem_object *obj, u32 gtt_offset, bool mappable), - - TP_ARGS(obj, gtt_offset, mappable), + TP_PROTO(struct drm_i915_gem_object *obj, bool mappable), + TP_ARGS(obj, mappable), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, gtt_offset) + __field(u32, offset) + __field(u32, size) __field(bool, mappable) ), TP_fast_assign( __entry->obj = obj; - __entry->gtt_offset = gtt_offset; + __entry->offset = obj->gtt_space->start; + __entry->size = obj->gtt_space->size; __entry->mappable = mappable; ), - TP_printk("obj=%p, gtt_offset=%08x%s", - __entry->obj, __entry->gtt_offset, + TP_printk("obj=%p, offset=%08x size=%x%s", + __entry->obj, __entry->offset, __entry->size, __entry->mappable ? ", mappable" : "") ); -TRACE_EVENT(i915_gem_object_change_domain, +TRACE_EVENT(i915_gem_object_unbind, + TP_PROTO(struct drm_i915_gem_object *obj), + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_i915_gem_object *, obj) + __field(u32, offset) + __field(u32, size) + ), - TP_PROTO(struct drm_i915_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain), + TP_fast_assign( + __entry->obj = obj; + __entry->offset = obj->gtt_space->start; + __entry->size = obj->gtt_space->size; + ), - TP_ARGS(obj, old_read_domains, old_write_domain), + TP_printk("obj=%p, offset=%08x size=%x", + __entry->obj, __entry->offset, __entry->size) +); + +TRACE_EVENT(i915_gem_object_change_domain, + TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write), + TP_ARGS(obj, old_read, old_write), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) @@ -71,177 +88,264 @@ TRACE_EVENT(i915_gem_object_change_domain, TP_fast_assign( __entry->obj = obj; - __entry->read_domains = obj->base.read_domains | (old_read_domains << 16); - __entry->write_domain = obj->base.write_domain | (old_write_domain << 16); + __entry->read_domains = obj->base.read_domains | (old_read << 16); + __entry->write_domain = obj->base.write_domain | (old_write << 16); ), - TP_printk("obj=%p, read=%04x, write=%04x", + TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x", __entry->obj, - __entry->read_domains, __entry->write_domain) + __entry->read_domains >> 16, + __entry->read_domains & 0xffff, + __entry->write_domain >> 16, + __entry->write_domain & 0xffff) ); -DECLARE_EVENT_CLASS(i915_gem_object, +TRACE_EVENT(i915_gem_object_pwrite, + TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_ARGS(obj, offset, len), - TP_PROTO(struct drm_i915_gem_object *obj), + TP_STRUCT__entry( + __field(struct drm_i915_gem_object *, obj) + __field(u32, offset) + __field(u32, len) + ), - TP_ARGS(obj), + TP_fast_assign( + __entry->obj = obj; + __entry->offset = offset; + __entry->len = len; + ), + + TP_printk("obj=%p, offset=%u, len=%u", + __entry->obj, __entry->offset, __entry->len) +); + +TRACE_EVENT(i915_gem_object_pread, + TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) + __field(u32, offset) + __field(u32, len) ), TP_fast_assign( __entry->obj = obj; + __entry->offset = offset; + __entry->len = len; ), - TP_printk("obj=%p", __entry->obj) + TP_printk("obj=%p, offset=%u, len=%u", + __entry->obj, __entry->offset, __entry->len) ); -DEFINE_EVENT(i915_gem_object, i915_gem_object_clflush, +TRACE_EVENT(i915_gem_object_fault, + TP_PROTO(struct drm_i915_gem_object *obj, u32 index, bool gtt, bool write), + TP_ARGS(obj, index, gtt, write), + + TP_STRUCT__entry( + __field(struct drm_i915_gem_object *, obj) + __field(u32, index) + __field(bool, gtt) + __field(bool, write) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->index = index; + __entry->gtt = gtt; + __entry->write = write; + ), + TP_printk("obj=%p, %s index=%u %s", + __entry->obj, + __entry->gtt ? "GTT" : "CPU", + __entry->index, + __entry->write ? ", writable" : "") +); + +DECLARE_EVENT_CLASS(i915_gem_object, TP_PROTO(struct drm_i915_gem_object *obj), + TP_ARGS(obj), - TP_ARGS(obj) + TP_STRUCT__entry( + __field(struct drm_i915_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) ); -DEFINE_EVENT(i915_gem_object, i915_gem_object_unbind, +DEFINE_EVENT(i915_gem_object, i915_gem_object_clflush, + TP_PROTO(struct drm_i915_gem_object *obj), + TP_ARGS(obj) +); +DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, TP_PROTO(struct drm_i915_gem_object *obj), - TP_ARGS(obj) ); -DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, +TRACE_EVENT(i915_gem_evict, + TP_PROTO(struct drm_device *dev, u32 size, u32 align, bool mappable), + TP_ARGS(dev, size, align, mappable), - TP_PROTO(struct drm_i915_gem_object *obj), + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, size) + __field(u32, align) + __field(bool, mappable) + ), - TP_ARGS(obj) + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->size = size; + __entry->align = align; + __entry->mappable = mappable; + ), + + TP_printk("dev=%d, size=%d, align=%d %s", + __entry->dev, __entry->size, __entry->align, + __entry->mappable ? ", mappable" : "") ); -/* batch tracing */ +TRACE_EVENT(i915_gem_evict_everything, + TP_PROTO(struct drm_device *dev, bool purgeable), + TP_ARGS(dev, purgeable), -TRACE_EVENT(i915_gem_request_submit, + TP_STRUCT__entry( + __field(u32, dev) + __field(bool, purgeable) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->purgeable = purgeable; + ), - TP_PROTO(struct drm_device *dev, u32 seqno), + TP_printk("dev=%d%s", + __entry->dev, + __entry->purgeable ? ", purgeable only" : "") +); - TP_ARGS(dev, seqno), +TRACE_EVENT(i915_gem_ring_dispatch, + TP_PROTO(struct intel_ring_buffer *ring, u32 seqno), + TP_ARGS(ring, seqno), TP_STRUCT__entry( __field(u32, dev) + __field(u32, ring) __field(u32, seqno) ), TP_fast_assign( - __entry->dev = dev->primary->index; + __entry->dev = ring->dev->primary->index; + __entry->ring = ring->id; __entry->seqno = seqno; - i915_trace_irq_get(dev, seqno); + i915_trace_irq_get(ring, seqno); ), - TP_printk("dev=%u, seqno=%u", __entry->dev, __entry->seqno) + TP_printk("dev=%u, ring=%u, seqno=%u", + __entry->dev, __entry->ring, __entry->seqno) ); -TRACE_EVENT(i915_gem_request_flush, - - TP_PROTO(struct drm_device *dev, u32 seqno, - u32 flush_domains, u32 invalidate_domains), - - TP_ARGS(dev, seqno, flush_domains, invalidate_domains), +TRACE_EVENT(i915_gem_ring_flush, + TP_PROTO(struct intel_ring_buffer *ring, u32 invalidate, u32 flush), + TP_ARGS(ring, invalidate, flush), TP_STRUCT__entry( __field(u32, dev) - __field(u32, seqno) - __field(u32, flush_domains) - __field(u32, invalidate_domains) + __field(u32, ring) + __field(u32, invalidate) + __field(u32, flush) ), TP_fast_assign( - __entry->dev = dev->primary->index; - __entry->seqno = seqno; - __entry->flush_domains = flush_domains; - __entry->invalidate_domains = invalidate_domains; + __entry->dev = ring->dev->primary->index; + __entry->ring = ring->id; + __entry->invalidate = invalidate; + __entry->flush = flush; ), - TP_printk("dev=%u, seqno=%u, flush=%04x, invalidate=%04x", - __entry->dev, __entry->seqno, - __entry->flush_domains, __entry->invalidate_domains) + TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x", + __entry->dev, __entry->ring, + __entry->invalidate, __entry->flush) ); DECLARE_EVENT_CLASS(i915_gem_request, - - TP_PROTO(struct drm_device *dev, u32 seqno), - - TP_ARGS(dev, seqno), + TP_PROTO(struct intel_ring_buffer *ring, u32 seqno), + TP_ARGS(ring, seqno), TP_STRUCT__entry( __field(u32, dev) + __field(u32, ring) __field(u32, seqno) ), TP_fast_assign( - __entry->dev = dev->primary->index; + __entry->dev = ring->dev->primary->index; + __entry->ring = ring->id; __entry->seqno = seqno; ), - TP_printk("dev=%u, seqno=%u", __entry->dev, __entry->seqno) + TP_printk("dev=%u, ring=%u, seqno=%u", + __entry->dev, __entry->ring, __entry->seqno) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, - - TP_PROTO(struct drm_device *dev, u32 seqno), +DEFINE_EVENT(i915_gem_request, i915_gem_request_add, + TP_PROTO(struct intel_ring_buffer *ring, u32 seqno), + TP_ARGS(ring, seqno) +); - TP_ARGS(dev, seqno) +DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, + TP_PROTO(struct intel_ring_buffer *ring, u32 seqno), + TP_ARGS(ring, seqno) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, - - TP_PROTO(struct drm_device *dev, u32 seqno), - - TP_ARGS(dev, seqno) + TP_PROTO(struct intel_ring_buffer *ring, u32 seqno), + TP_ARGS(ring, seqno) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_begin, - - TP_PROTO(struct drm_device *dev, u32 seqno), - - TP_ARGS(dev, seqno) + TP_PROTO(struct intel_ring_buffer *ring, u32 seqno), + TP_ARGS(ring, seqno) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, - - TP_PROTO(struct drm_device *dev, u32 seqno), - - TP_ARGS(dev, seqno) + TP_PROTO(struct intel_ring_buffer *ring, u32 seqno), + TP_ARGS(ring, seqno) ); DECLARE_EVENT_CLASS(i915_ring, - - TP_PROTO(struct drm_device *dev), - - TP_ARGS(dev), + TP_PROTO(struct intel_ring_buffer *ring), + TP_ARGS(ring), TP_STRUCT__entry( __field(u32, dev) + __field(u32, ring) ), TP_fast_assign( - __entry->dev = dev->primary->index; + __entry->dev = ring->dev->primary->index; + __entry->ring = ring->id; ), - TP_printk("dev=%u", __entry->dev) + TP_printk("dev=%u, ring=%u", __entry->dev, __entry->ring) ); DEFINE_EVENT(i915_ring, i915_ring_wait_begin, - - TP_PROTO(struct drm_device *dev), - - TP_ARGS(dev) + TP_PROTO(struct intel_ring_buffer *ring), + TP_ARGS(ring) ); DEFINE_EVENT(i915_ring, i915_ring_wait_end, - - TP_PROTO(struct drm_device *dev), - - TP_ARGS(dev) + TP_PROTO(struct intel_ring_buffer *ring), + TP_ARGS(ring) ); TRACE_EVENT(i915_flip_request, @@ -281,26 +385,29 @@ TRACE_EVENT(i915_flip_complete, ); TRACE_EVENT(i915_reg_rw, - TP_PROTO(int cmd, uint32_t reg, uint64_t val, int len), + TP_PROTO(bool write, u32 reg, u64 val, int len), - TP_ARGS(cmd, reg, val, len), + TP_ARGS(write, reg, val, len), TP_STRUCT__entry( - __field(int, cmd) - __field(uint32_t, reg) - __field(uint64_t, val) - __field(int, len) + __field(u64, val) + __field(u32, reg) + __field(u16, write) + __field(u16, len) ), TP_fast_assign( - __entry->cmd = cmd; + __entry->val = (u64)val; __entry->reg = reg; - __entry->val = (uint64_t)val; + __entry->write = write; __entry->len = len; ), - TP_printk("cmd=%c, reg=0x%x, val=0x%llx, len=%d", - __entry->cmd, __entry->reg, __entry->val, __entry->len) + TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)", + __entry->write ? "write" : "read", + __entry->reg, __entry->len, + (u32)(__entry->val & 0xffffffff), + (u32)(__entry->val >> 32)) ); #endif /* _I915_TRACE_H_ */ diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 3fbb98b948d6..d2fdfd589c85 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -221,16 +221,15 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, int ret; BUG_ON(overlay->last_flip_req); - ret = i915_add_request(dev, NULL, request, LP_RING(dev_priv)); + ret = i915_add_request(LP_RING(dev_priv), NULL, request); if (ret) { kfree(request); return ret; } overlay->last_flip_req = request->seqno; overlay->flip_tail = tail; - ret = i915_do_wait_request(dev, - overlay->last_flip_req, true, - LP_RING(dev_priv)); + ret = i915_wait_request(LP_RING(dev_priv), + overlay->last_flip_req, true); if (ret) return ret; @@ -364,7 +363,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, OUT_RING(flip_addr); ADVANCE_LP_RING(); - ret = i915_add_request(dev, NULL, request, LP_RING(dev_priv)); + ret = i915_add_request(LP_RING(dev_priv), NULL, request); if (ret) { kfree(request); return ret; @@ -453,8 +452,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay, if (overlay->last_flip_req == 0) return 0; - ret = i915_do_wait_request(dev, overlay->last_flip_req, - interruptible, LP_RING(dev_priv)); + ret = i915_wait_request(LP_RING(dev_priv), + overlay->last_flip_req, interruptible); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 235d9c4b40ae..ec7175e0dcd8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -62,18 +62,9 @@ render_ring_flush(struct intel_ring_buffer *ring, u32 flush_domains) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; u32 cmd; int ret; -#if WATCH_EXEC - DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, - invalidate_domains, flush_domains); -#endif - - trace_i915_gem_request_flush(dev, dev_priv->next_seqno, - invalidate_domains, flush_domains); - if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { /* * read/write caches: @@ -122,9 +113,6 @@ render_ring_flush(struct intel_ring_buffer *ring, (IS_G4X(dev) || IS_GEN5(dev))) cmd |= MI_INVALIDATE_ISP; -#if WATCH_EXEC - DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); -#endif ret = intel_ring_begin(ring, 2); if (ret) return ret; @@ -714,11 +702,8 @@ render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 len) { struct drm_device *dev = ring->dev; - drm_i915_private_t *dev_priv = dev->dev_private; int ret; - trace_i915_gem_request_submit(dev, dev_priv->next_seqno + 1); - if (IS_I830(dev) || IS_845G(dev)) { ret = intel_ring_begin(ring, 4); if (ret) @@ -953,13 +938,13 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) return 0; } - trace_i915_ring_wait_begin (dev); + trace_i915_ring_wait_begin(ring); end = jiffies + 3 * HZ; do { ring->head = I915_READ_HEAD(ring); ring->space = ring_space(ring); if (ring->space >= n) { - trace_i915_ring_wait_end(dev); + trace_i915_ring_wait_end(ring); return 0; } @@ -973,7 +958,7 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) if (atomic_read(&dev_priv->mm.wedged)) return -EAGAIN; } while (!time_after(jiffies, end)); - trace_i915_ring_wait_end (dev); + trace_i915_ring_wait_end(ring); return -EBUSY; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5e14b09f67ce..bd6a5fbfa929 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -58,6 +58,7 @@ struct intel_ring_buffer { u32 irq_refcount; u32 irq_mask; u32 irq_seqno; /* last seq seem at irq time */ + u32 trace_irq_seqno; u32 waiting_seqno; u32 sync_seqno[I915_NUM_RINGS-1]; bool __must_check (*irq_get)(struct intel_ring_buffer *ring); @@ -186,6 +187,12 @@ int intel_init_blt_ring_buffer(struct drm_device *dev); u32 intel_ring_get_active_head(struct intel_ring_buffer *ring); void intel_ring_setup_status_page(struct intel_ring_buffer *ring); +static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno) +{ + if (ring->trace_irq_seqno == 0 && ring->irq_get(ring)) + ring->trace_irq_seqno = seqno; +} + /* DRI warts */ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size); -- cgit v1.2.3 From 36d527deadf7d0c302e3452dde39465e74a65a08 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Mar 2011 22:26:49 +0000 Subject: drm/i915: Restore missing command flush before interrupt on BLT ring We always skipped flushing the BLT ring if the request flush did not include the RENDER domain. However, this neglects that we try to flush the COMMAND domain after every batch and before the breadcrumb interrupt (to make sure the batch is indeed completed prior to the interrupt firing and so insuring CPU coherency). As a result of the missing flush, incoherency did indeed creep in, most notable when using lots of command buffers and so potentially rewritting an active command buffer (i.e. the GPU was still executing from it even though the following interrupt had already fired and the request/buffer retired). As all ring->flush routines now have the same preconditions, de-duplicate and move those checks up into i915_gem_flush_ring(). Fixes gem_linear_blit. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=35284 Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Tested-by: mengmeng.meng@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 109 ++++++++++++++------------------ 2 files changed, 55 insertions(+), 61 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0b99c30bb19c..edd6098743b2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2219,13 +2219,18 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring, { int ret; + if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) + return 0; + trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); ret = ring->flush(ring, invalidate_domains, flush_domains); if (ret) return ret; - i915_gem_process_flushing_list(ring, flush_domains); + if (flush_domains & I915_GEM_GPU_DOMAINS) + i915_gem_process_flushing_list(ring, flush_domains); + return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 789c47801ba8..e9e6f71418a4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -65,62 +65,60 @@ render_ring_flush(struct intel_ring_buffer *ring, u32 cmd; int ret; - if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { + /* + * read/write caches: + * + * I915_GEM_DOMAIN_RENDER is always invalidated, but is + * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is + * also flushed at 2d versus 3d pipeline switches. + * + * read-only caches: + * + * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if + * MI_READ_FLUSH is set, and is always flushed on 965. + * + * I915_GEM_DOMAIN_COMMAND may not exist? + * + * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is + * invalidated when MI_EXE_FLUSH is set. + * + * I915_GEM_DOMAIN_VERTEX, which exists on 965, is + * invalidated with every MI_FLUSH. + * + * TLBs: + * + * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND + * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and + * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER + * are flushed at any MI_FLUSH. + */ + + cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; + if ((invalidate_domains|flush_domains) & + I915_GEM_DOMAIN_RENDER) + cmd &= ~MI_NO_WRITE_FLUSH; + if (INTEL_INFO(dev)->gen < 4) { /* - * read/write caches: - * - * I915_GEM_DOMAIN_RENDER is always invalidated, but is - * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is - * also flushed at 2d versus 3d pipeline switches. - * - * read-only caches: - * - * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if - * MI_READ_FLUSH is set, and is always flushed on 965. - * - * I915_GEM_DOMAIN_COMMAND may not exist? - * - * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is - * invalidated when MI_EXE_FLUSH is set. - * - * I915_GEM_DOMAIN_VERTEX, which exists on 965, is - * invalidated with every MI_FLUSH. - * - * TLBs: - * - * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND - * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and - * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER - * are flushed at any MI_FLUSH. + * On the 965, the sampler cache always gets flushed + * and this bit is reserved. */ + if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) + cmd |= MI_READ_FLUSH; + } + if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) + cmd |= MI_EXE_FLUSH; - cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; - if ((invalidate_domains|flush_domains) & - I915_GEM_DOMAIN_RENDER) - cmd &= ~MI_NO_WRITE_FLUSH; - if (INTEL_INFO(dev)->gen < 4) { - /* - * On the 965, the sampler cache always gets flushed - * and this bit is reserved. - */ - if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) - cmd |= MI_READ_FLUSH; - } - if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) - cmd |= MI_EXE_FLUSH; - - if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && - (IS_G4X(dev) || IS_GEN5(dev))) - cmd |= MI_INVALIDATE_ISP; + if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && + (IS_G4X(dev) || IS_GEN5(dev))) + cmd |= MI_INVALIDATE_ISP; - ret = intel_ring_begin(ring, 2); - if (ret) - return ret; + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - } + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -568,9 +566,6 @@ bsd_ring_flush(struct intel_ring_buffer *ring, { int ret; - if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0) - return 0; - ret = intel_ring_begin(ring, 2); if (ret) return ret; @@ -1056,9 +1051,6 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, uint32_t cmd; int ret; - if (((invalidate | flush) & I915_GEM_GPU_DOMAINS) == 0) - return 0; - ret = intel_ring_begin(ring, 4); if (ret) return ret; @@ -1230,9 +1222,6 @@ static int blt_ring_flush(struct intel_ring_buffer *ring, uint32_t cmd; int ret; - if (((invalidate | flush) & I915_GEM_DOMAIN_RENDER) == 0) - return 0; - ret = blt_ring_begin(ring, 4); if (ret) return ret; -- cgit v1.2.3