From 1584f16ca96ef124aad79efa3303cff5f3530e2c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 28 Nov 2018 15:09:25 -0800 Subject: drm/v3d: Add support for submitting jobs to the TFU. The TFU can copy from raster, UIF, and SAND input images to UIF output images, with optional mipmap generation. This will certainly be useful for media EGL image input, but is also useful immediately for mipmap generation without bogging the V3D core down. For now we only run the queue 1 job deep, and don't have any hang recovery (though I don't think we should need it, with TFU). Queuing multiple jobs in the HW will require synchronizing the YUV coefficient regs updates since they don't get FIFOed with the job. v2: Change the ioctl to IOW instead of IOWR, always set COEF0, explain why TFU is AUTH, clarify the syncing docs, drop the unused TFU interrupt regs (you're expected to use the hub's), don't take &bo->base for NULL bos. v3: Fix a little whitespace alignment (noticed by checkpatch), rebase on drm_sched_job_cleanup() changes. Signed-off-by: Eric Anholt Reviewed-by: Dave Emett (v2) Link: https://patchwork.freedesktop.org/patch/264607/ --- drivers/gpu/drm/v3d/v3d_irq.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/v3d/v3d_irq.c') diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index e07514eb11b5..dd7a7b0bd5a1 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -4,8 +4,8 @@ /** * DOC: Interrupt management for the V3D engine * - * When we take a binning or rendering flush done interrupt, we need - * to signal the fence for that job so that the scheduler can queue up + * When we take a bin, render, or TFU done interrupt, we need to + * signal the fence for that job so that the scheduler can queue up * the next one and unblock any waiters. * * When we take the binner out of memory interrupt, we need to @@ -23,7 +23,8 @@ #define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \ V3D_HUB_INT_MMU_PTI | \ - V3D_HUB_INT_MMU_CAP)) + V3D_HUB_INT_MMU_CAP | \ + V3D_HUB_INT_TFUC)) static void v3d_overflow_mem_work(struct work_struct *work) @@ -117,6 +118,11 @@ v3d_hub_irq(int irq, void *arg) /* Acknowledge the interrupts we're handling here. */ V3D_WRITE(V3D_HUB_INT_CLR, intsts); + if (intsts & V3D_HUB_INT_TFUC) { + dma_fence_signal(v3d->tfu_job->done_fence); + status = IRQ_HANDLED; + } + if (intsts & (V3D_HUB_INT_MMU_WRV | V3D_HUB_INT_MMU_PTI | V3D_HUB_INT_MMU_CAP)) { -- cgit v1.2.3 From 55a9b74846ed5e6219c7d81a8e1bf96f25d8ad5e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 30 Nov 2018 16:57:58 -0800 Subject: drm/v3d: Add more tracepoints for V3D GPU rendering. The core scheduler tells us when the job is pushed to the scheduler's queue, and I had the job_run functions saying when they actually queue the job to the hardware. By adding tracepoints for the very top of the ioctls and the IRQs signaling job completion, "perf record -a -e v3d:.\* -e gpu_scheduler:.\* ; perf script" gets you a pretty decent timeline. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20181201005759.28093-5-eric@anholt.net Reviewed-by: Dave Emett --- drivers/gpu/drm/v3d/v3d_gem.c | 4 ++ drivers/gpu/drm/v3d/v3d_irq.c | 19 ++++++-- drivers/gpu/drm/v3d/v3d_trace.h | 101 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/v3d/v3d_irq.c') diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 8b4af512450f..f565b197cba9 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -521,6 +521,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *sync_out; int ret = 0; + trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); + if (args->pad != 0) { DRM_INFO("pad must be zero: %d\n", args->pad); return -EINVAL; @@ -648,6 +650,8 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, int ret = 0; int bo_count; + trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); + job = kcalloc(1, sizeof(*job), GFP_KERNEL); if (!job) return -ENOMEM; diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index dd7a7b0bd5a1..69338da70ddc 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -15,6 +15,7 @@ #include "v3d_drv.h" #include "v3d_regs.h" +#include "v3d_trace.h" #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \ V3D_INT_FLDONE | \ @@ -88,12 +89,20 @@ v3d_irq(int irq, void *arg) } if (intsts & V3D_INT_FLDONE) { - dma_fence_signal(v3d->bin_job->bin.done_fence); + struct v3d_fence *fence = + to_v3d_fence(v3d->bin_job->bin.done_fence); + + trace_v3d_bcl_irq(&v3d->drm, fence->seqno); + dma_fence_signal(&fence->base); status = IRQ_HANDLED; } if (intsts & V3D_INT_FRDONE) { - dma_fence_signal(v3d->render_job->render.done_fence); + struct v3d_fence *fence = + to_v3d_fence(v3d->render_job->render.done_fence); + + trace_v3d_rcl_irq(&v3d->drm, fence->seqno); + dma_fence_signal(&fence->base); status = IRQ_HANDLED; } @@ -119,7 +128,11 @@ v3d_hub_irq(int irq, void *arg) V3D_WRITE(V3D_HUB_INT_CLR, intsts); if (intsts & V3D_HUB_INT_TFUC) { - dma_fence_signal(v3d->tfu_job->done_fence); + struct v3d_fence *fence = + to_v3d_fence(v3d->tfu_job->done_fence); + + trace_v3d_tfu_irq(&v3d->drm, fence->seqno); + dma_fence_signal(&fence->base); status = IRQ_HANDLED; } diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h index f54ed9cd3444..edd984afa33f 100644 --- a/drivers/gpu/drm/v3d/v3d_trace.h +++ b/drivers/gpu/drm/v3d/v3d_trace.h @@ -12,6 +12,28 @@ #define TRACE_SYSTEM v3d #define TRACE_INCLUDE_FILE v3d_trace +TRACE_EVENT(v3d_submit_cl_ioctl, + TP_PROTO(struct drm_device *dev, u32 ct1qba, u32 ct1qea), + TP_ARGS(dev, ct1qba, ct1qea), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, ct1qba) + __field(u32, ct1qea) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->ct1qba = ct1qba; + __entry->ct1qea = ct1qea; + ), + + TP_printk("dev=%u, RCL 0x%08x..0x%08x", + __entry->dev, + __entry->ct1qba, + __entry->ct1qea) +); + TRACE_EVENT(v3d_submit_cl, TP_PROTO(struct drm_device *dev, bool is_render, uint64_t seqno, @@ -42,6 +64,85 @@ TRACE_EVENT(v3d_submit_cl, __entry->ctnqea) ); +TRACE_EVENT(v3d_bcl_irq, + TP_PROTO(struct drm_device *dev, + uint64_t seqno), + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u64, seqno) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->seqno = seqno; + ), + + TP_printk("dev=%u, seqno=%llu", + __entry->dev, + __entry->seqno) +); + +TRACE_EVENT(v3d_rcl_irq, + TP_PROTO(struct drm_device *dev, + uint64_t seqno), + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u64, seqno) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->seqno = seqno; + ), + + TP_printk("dev=%u, seqno=%llu", + __entry->dev, + __entry->seqno) +); + +TRACE_EVENT(v3d_tfu_irq, + TP_PROTO(struct drm_device *dev, + uint64_t seqno), + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u64, seqno) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->seqno = seqno; + ), + + TP_printk("dev=%u, seqno=%llu", + __entry->dev, + __entry->seqno) +); + +TRACE_EVENT(v3d_submit_tfu_ioctl, + TP_PROTO(struct drm_device *dev, u32 iia), + TP_ARGS(dev, iia), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, iia) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->iia = iia; + ), + + TP_printk("dev=%u, IIA 0x%08x", + __entry->dev, + __entry->iia) +); + TRACE_EVENT(v3d_submit_tfu, TP_PROTO(struct drm_device *dev, uint64_t seqno), -- cgit v1.2.3