From f5617f9dde5ae2466560f7cb008c741e2b88adab Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 5 Nov 2015 11:41:50 +0800 Subject: drm/amd: add kmem cache for sched fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I45bb8ff10ef05dc3b15e31a77fbcf31117705f11 Signed-off-by: Chunming Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/scheduler/gpu_scheduler.h') diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 929e9aced041..4d05ca6fb057 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -30,6 +30,9 @@ struct amd_gpu_scheduler; struct amd_sched_rq; +extern struct kmem_cache *sched_fence_slab; +extern atomic_t sched_fence_slab_ref; + /** * A scheduler entity is a wrapper around a job queue or a group * of other entities. Entities take turns emitting jobs from their -- cgit v1.2.3 From e284022163716ecf11c37fd1057c35d689ef2c11 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 5 Nov 2015 19:49:48 +0100 Subject: drm/amdgpu: fix incorrect mutex usage v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this patch the scheduler fence was created when we push the job into the queue, so we could only get the fence after pushing it. The mutex now was necessary to prevent the thread pushing the jobs to the hardware from running faster than the thread pushing the jobs into the queue. Otherwise the thread pushing jobs into the queue would have accessed possible freed up memory when it tries to get a reference to the fence. So what you get in the end is thread A: mutex_lock(&job->lock); ... Kick of thread B. ... mutex_unlock(&job->lock); And thread B: mutex_lock(&job->lock); .... mutex_unlock(&job->lock); kfree(job); I'm actually not sure if I'm still up to date on this, but this usage pattern used to be not allowed with mutexes. See here as well https://lwn.net/Articles/575460/. v2: remove unrelated changes, fix missing owner v3: rebased, add more commit message Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 43 +++++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 27 +++++++---------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 10 +------ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 +- 5 files changed, 37 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/amd/scheduler/gpu_scheduler.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7b02e3455172..0f187027c753 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1225,7 +1225,7 @@ struct amdgpu_job { struct amdgpu_device *adev; struct amdgpu_ib *ibs; uint32_t num_ibs; - struct mutex job_lock; + void *owner; struct amdgpu_user_fence uf; int (*free_job)(struct amdgpu_job *job); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2ae73d5232dd..44cf977ae4f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -845,8 +845,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; if (amdgpu_enable_scheduler && parser.num_ibs) { - struct amdgpu_job *job; struct amdgpu_ring * ring = parser.ibs->ring; + struct amd_sched_fence *fence; + struct amdgpu_job *job; job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); if (!job) { @@ -859,37 +860,41 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) job->adev = parser.adev; job->ibs = parser.ibs; job->num_ibs = parser.num_ibs; - job->base.owner = parser.filp; - mutex_init(&job->job_lock); + job->owner = parser.filp; + job->free_job = amdgpu_cs_free_job; + if (job->ibs[job->num_ibs - 1].user) { job->uf = parser.uf; job->ibs[job->num_ibs - 1].user = &job->uf; parser.uf.bo = NULL; } - parser.ibs = NULL; - parser.num_ibs = 0; - - job->free_job = amdgpu_cs_free_job; - mutex_lock(&job->job_lock); - r = amd_sched_entity_push_job(&job->base); - if (r) { - mutex_unlock(&job->job_lock); + fence = amd_sched_fence_create(job->base.s_entity, + parser.filp); + if (!fence) { + r = -ENOMEM; amdgpu_cs_free_job(job); kfree(job); goto out; } - cs->out.handle = - amdgpu_ctx_add_fence(parser.ctx, ring, - &job->base.s_fence->base); + job->base.s_fence = fence; + fence_get(&fence->base); + + cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring, + &fence->base); job->ibs[job->num_ibs - 1].sequence = cs->out.handle; - list_sort(NULL, &parser.validated, cmp_size_smaller_first); - ttm_eu_fence_buffer_objects(&parser.ticket, - &parser.validated, - &job->base.s_fence->base); + parser.ibs = NULL; + parser.num_ibs = 0; + trace_amdgpu_cs_ioctl(job); - mutex_unlock(&job->job_lock); + amd_sched_entity_push_job(&job->base); + + list_sort(NULL, &parser.validated, cmp_size_smaller_first); + ttm_eu_fence_buffer_objects(&parser.ticket, &parser.validated, + &fence->base); + fence_put(&fence->base); + amdgpu_cs_parser_fini_late(&parser); mutex_unlock(&vm->mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 8ef9e4415fcc..438c05254695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -45,12 +45,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) return NULL; } job = to_amdgpu_job(sched_job); - mutex_lock(&job->job_lock); trace_amdgpu_sched_run_job(job); - r = amdgpu_ib_schedule(job->adev, - job->num_ibs, - job->ibs, - job->base.owner); + r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner); if (r) { DRM_ERROR("Error scheduling IBs (%d)\n", r); goto err; @@ -63,7 +59,6 @@ err: if (job->free_job) job->free_job(job); - mutex_unlock(&job->job_lock); kfree(job); return fence ? &fence->base : NULL; } @@ -89,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; job->base.sched = &ring->sched; job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; + job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner); + if (!job->base.s_fence) { + kfree(job); + return -ENOMEM; + } + *f = fence_get(&job->base.s_fence->base); + job->adev = adev; job->ibs = ibs; job->num_ibs = num_ibs; - job->base.owner = owner; - mutex_init(&job->job_lock); + job->owner = owner; job->free_job = free_job; - mutex_lock(&job->job_lock); - r = amd_sched_entity_push_job(&job->base); - if (r) { - mutex_unlock(&job->job_lock); - kfree(job); - return r; - } - *f = fence_get(&job->base.s_fence->base); - mutex_unlock(&job->job_lock); + amd_sched_entity_push_job(&job->base); } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); if (r) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index ccb7c1554f5e..ea30d6ad4c13 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -276,21 +276,13 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) * * Returns 0 for success, negative error code otherwise. */ -int amd_sched_entity_push_job(struct amd_sched_job *sched_job) +void amd_sched_entity_push_job(struct amd_sched_job *sched_job) { struct amd_sched_entity *entity = sched_job->s_entity; - struct amd_sched_fence *fence = amd_sched_fence_create( - entity, sched_job->owner); - - if (!fence) - return -ENOMEM; - - sched_job->s_fence = fence; wait_event(entity->sched->job_scheduled, amd_sched_entity_in(sched_job)); trace_amd_sched_job(sched_job); - return 0; } /** diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 4d05ca6fb057..939692b14f4b 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -79,7 +79,6 @@ struct amd_sched_job { struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; struct amd_sched_fence *s_fence; - void *owner; }; extern const struct fence_ops amd_sched_fence_ops; @@ -131,7 +130,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, uint32_t jobs); void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); -int amd_sched_entity_push_job(struct amd_sched_job *sched_job); +void amd_sched_entity_push_job(struct amd_sched_job *sched_job); struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_entity *s_entity, void *owner); -- cgit v1.2.3 From 393a0bd4376a8ecdde079344681a014ec3eb1291 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 5 Nov 2015 12:57:10 +0100 Subject: drm/amdgpu: optimize scheduler fence handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only need to wait for jobs to be scheduled when the dependency is from the same scheduler. Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 51 ++++++++++++++++++++------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 ++- drivers/gpu/drm/amd/scheduler/sched_fence.c | 13 +++++++ 3 files changed, 55 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/scheduler/gpu_scheduler.h') diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index ea30d6ad4c13..b7cd108212c2 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -211,6 +211,41 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb) amd_sched_wakeup(entity->sched); } +static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) +{ + struct amd_gpu_scheduler *sched = entity->sched; + struct fence * fence = entity->dependency; + struct amd_sched_fence *s_fence; + + if (fence->context == entity->fence_context) { + /* We can ignore fences from ourself */ + fence_put(entity->dependency); + return false; + } + + s_fence = to_amd_sched_fence(fence); + if (s_fence && s_fence->sched == sched) { + /* Fence is from the same scheduler */ + if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) { + /* Ignore it when it is already scheduled */ + fence_put(entity->dependency); + return false; + } + + /* Wait for fence to be scheduled */ + entity->cb.func = amd_sched_entity_wakeup; + list_add_tail(&entity->cb.node, &s_fence->scheduled_cb); + return true; + } + + if (!fence_add_callback(entity->dependency, &entity->cb, + amd_sched_entity_wakeup)) + return true; + + fence_put(entity->dependency); + return false; +} + static struct amd_sched_job * amd_sched_entity_pop_job(struct amd_sched_entity *entity) { @@ -223,20 +258,9 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity) if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job))) return NULL; - while ((entity->dependency = sched->ops->dependency(sched_job))) { - - if (entity->dependency->context == entity->fence_context) { - /* We can ignore fences from ourself */ - fence_put(entity->dependency); - continue; - } - - if (fence_add_callback(entity->dependency, &entity->cb, - amd_sched_entity_wakeup)) - fence_put(entity->dependency); - else + while ((entity->dependency = sched->ops->dependency(sched_job))) + if (amd_sched_entity_add_dependency_cb(entity)) return NULL; - } return sched_job; } @@ -400,6 +424,7 @@ static int amd_sched_main(void *param) atomic_inc(&sched->hw_rq_count); fence = sched->ops->run_job(sched_job); + amd_sched_fence_scheduled(s_fence); if (fence) { r = fence_add_callback(fence, &s_fence->cb, amd_sched_process_job); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 939692b14f4b..a0f0ae53aacd 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -27,6 +27,8 @@ #include #include +#define AMD_SCHED_FENCE_SCHEDULED_BIT FENCE_FLAG_USER_BITS + struct amd_gpu_scheduler; struct amd_sched_rq; @@ -68,6 +70,7 @@ struct amd_sched_rq { struct amd_sched_fence { struct fence base; struct fence_cb cb; + struct list_head scheduled_cb; struct amd_gpu_scheduler *sched; spinlock_t lock; void *owner; @@ -134,7 +137,7 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job); struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_entity *s_entity, void *owner); +void amd_sched_fence_scheduled(struct amd_sched_fence *fence); void amd_sched_fence_signal(struct amd_sched_fence *fence); - #endif diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index 8d2130b9ff05..87c78eecea64 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -35,6 +35,8 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL); if (fence == NULL) return NULL; + + INIT_LIST_HEAD(&fence->scheduled_cb); fence->owner = owner; fence->sched = s_entity->sched; spin_lock_init(&fence->lock); @@ -55,6 +57,17 @@ void amd_sched_fence_signal(struct amd_sched_fence *fence) FENCE_TRACE(&fence->base, "was already signaled\n"); } +void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence) +{ + struct fence_cb *cur, *tmp; + + set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags); + list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) { + list_del_init(&cur->node); + cur->func(&s_fence->base, cur); + } +} + static const char *amd_sched_fence_get_driver_name(struct fence *fence) { return "amd_sched"; -- cgit v1.2.3