diff options
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_gem.c')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 233 |
1 files changed, 188 insertions, 45 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index e9c381c42139..d0c6bfb68c4e 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -55,7 +55,7 @@ vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) unsigned int i; for (i = 0; i < state->user_state.bo_count; i++) - drm_gem_object_unreference_unlocked(state->bo[i]); + drm_gem_object_put_unlocked(state->bo[i]); kfree(state); } @@ -111,26 +111,29 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, &handle); if (ret) { - state->bo_count = i - 1; - goto err; + state->bo_count = i; + goto err_delete_handle; } bo_state[i].handle = handle; bo_state[i].paddr = vc4_bo->base.paddr; bo_state[i].size = vc4_bo->base.base.size; } - if (copy_to_user((void __user *)(uintptr_t)get_state->bo, + if (copy_to_user(u64_to_user_ptr(get_state->bo), bo_state, state->bo_count * sizeof(*bo_state))) ret = -EFAULT; - kfree(bo_state); +err_delete_handle: + if (ret) { + for (i = 0; i < state->bo_count; i++) + drm_gem_handle_delete(file_priv, bo_state[i].handle); + } err_free: - vc4_free_hang_state(dev, kernel_state); + kfree(bo_state); -err: return ret; } @@ -185,12 +188,12 @@ vc4_save_hang_state(struct drm_device *dev) continue; for (j = 0; j < exec[i]->bo_count; j++) { - drm_gem_object_reference(&exec[i]->bo[j]->base); + drm_gem_object_get(&exec[i]->bo[j]->base); kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; } list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { - drm_gem_object_reference(&bo->base.base); + drm_gem_object_get(&bo->base.base); kernel_state->bo[j + prev_idx] = &bo->base.base; j++; } @@ -463,6 +466,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) for (i = 0; i < exec->bo_count; i++) { bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno; + + reservation_object_add_shared_fence(bo->resv, exec->fence); } list_for_each_entry(bo, &exec->unref_list, unref_head) { @@ -472,9 +477,105 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) for (i = 0; i < exec->rcl_write_bo_count; i++) { bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); bo->write_seqno = seqno; + + reservation_object_add_excl_fence(bo->resv, exec->fence); } } +static void +vc4_unlock_bo_reservations(struct drm_device *dev, + struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int i; + + for (i = 0; i < exec->bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + ww_acquire_fini(acquire_ctx); +} + +/* Takes the reservation lock on all the BOs being referenced, so that + * at queue submit time we can update the reservations. + * + * We don't lock the RCL the tile alloc/state BOs, or overflow memory + * (all of which are on exec->unref_list). They're entirely private + * to vc4, so we don't attach dma-buf fences to them. + */ +static int +vc4_lock_bo_reservations(struct drm_device *dev, + struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int contended_lock = -1; + int i, ret; + struct vc4_bo *bo; + + ww_acquire_init(acquire_ctx, &reservation_ww_class); + +retry: + if (contended_lock != -1) { + bo = to_vc4_bo(&exec->bo[contended_lock]->base); + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + acquire_ctx); + if (ret) { + ww_acquire_done(acquire_ctx); + return ret; + } + } + + for (i = 0; i < exec->bo_count; i++) { + if (i == contended_lock) + continue; + + bo = to_vc4_bo(&exec->bo[i]->base); + + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); + if (ret) { + int j; + + for (j = 0; j < i; j++) { + bo = to_vc4_bo(&exec->bo[j]->base); + ww_mutex_unlock(&bo->resv->lock); + } + + if (contended_lock != -1 && contended_lock >= i) { + bo = to_vc4_bo(&exec->bo[contended_lock]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + if (ret == -EDEADLK) { + contended_lock = i; + goto retry; + } + + ww_acquire_done(acquire_ctx); + return ret; + } + } + + ww_acquire_done(acquire_ctx); + + /* Reserve space for our shared (read-only) fence references, + * before we commit the CL to the hardware. + */ + for (i = 0; i < exec->bo_count; i++) { + bo = to_vc4_bo(&exec->bo[i]->base); + + ret = reservation_object_reserve_shared(bo->resv); + if (ret) { + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); + return ret; + } + } + + return 0; +} + /* Queues a struct vc4_exec_info for execution. If no job is * currently executing, then submits it. * @@ -484,19 +585,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) * then bump the end address. That's a change for a later date, * though. */ -static void -vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) +static int +vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) { struct vc4_dev *vc4 = to_vc4_dev(dev); uint64_t seqno; unsigned long irqflags; + struct vc4_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return -ENOMEM; + fence->dev = dev; spin_lock_irqsave(&vc4->job_lock, irqflags); seqno = ++vc4->emit_seqno; exec->seqno = seqno; + + dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock, + vc4->dma_fence_context, exec->seqno); + fence->seqno = exec->seqno; + exec->fence = &fence->base; + vc4_update_bo_seqnos(exec, seqno); + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); + list_add_tail(&exec->head, &vc4->bin_job_list); /* If no job was executing, kick ours off. Otherwise, it'll @@ -509,6 +625,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) } spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + return 0; } /** @@ -541,26 +659,26 @@ vc4_cl_lookup_bos(struct drm_device *dev, /* See comment on bo_index for why we have to check * this. */ - DRM_ERROR("Rendering requires BOs to validate\n"); + DRM_DEBUG("Rendering requires BOs to validate\n"); return -EINVAL; } - exec->bo = drm_calloc_large(exec->bo_count, - sizeof(struct drm_gem_cma_object *)); + exec->bo = kvmalloc_array(exec->bo_count, + sizeof(struct drm_gem_cma_object *), + GFP_KERNEL | __GFP_ZERO); if (!exec->bo) { DRM_ERROR("Failed to allocate validated BO pointers\n"); return -ENOMEM; } - handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); + handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL); if (!handles) { ret = -ENOMEM; DRM_ERROR("Failed to allocate incoming GEM handles\n"); goto fail; } - if (copy_from_user(handles, - (void __user *)(uintptr_t)args->bo_handles, + if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles), exec->bo_count * sizeof(uint32_t))) { ret = -EFAULT; DRM_ERROR("Failed to copy in GEM handles\n"); @@ -572,19 +690,19 @@ vc4_cl_lookup_bos(struct drm_device *dev, struct drm_gem_object *bo = idr_find(&file_priv->object_idr, handles[i]); if (!bo) { - DRM_ERROR("Failed to look up GEM BO %d: %d\n", + DRM_DEBUG("Failed to look up GEM BO %d: %d\n", i, handles[i]); ret = -EINVAL; spin_unlock(&file_priv->table_lock); goto fail; } - drm_gem_object_reference(bo); + drm_gem_object_get(bo); exec->bo[i] = (struct drm_gem_cma_object *)bo; } spin_unlock(&file_priv->table_lock); fail: - drm_free_large(handles); + kvfree(handles); return ret; } @@ -610,7 +728,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) args->shader_rec_count >= (UINT_MAX / sizeof(struct vc4_shader_state)) || temp_size < exec_size) { - DRM_ERROR("overflow in exec arguments\n"); + DRM_DEBUG("overflow in exec arguments\n"); ret = -EINVAL; goto fail; } @@ -622,7 +740,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) * read the contents back for validation, and I think the * bo->vaddr is uncached access. */ - temp = drm_malloc_ab(temp_size, 1); + temp = kvmalloc_array(temp_size, 1, GFP_KERNEL); if (!temp) { DRM_ERROR("Failed to allocate storage for copying " "in bin/render CLs.\n"); @@ -636,27 +754,27 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) exec->shader_state_size = args->shader_rec_count; if (copy_from_user(bin, - (void __user *)(uintptr_t)args->bin_cl, + u64_to_user_ptr(args->bin_cl), args->bin_cl_size)) { ret = -EFAULT; goto fail; } if (copy_from_user(exec->shader_rec_u, - (void __user *)(uintptr_t)args->shader_rec, + u64_to_user_ptr(args->shader_rec), args->shader_rec_size)) { ret = -EFAULT; goto fail; } if (copy_from_user(exec->uniforms_u, - (void __user *)(uintptr_t)args->uniforms, + u64_to_user_ptr(args->uniforms), args->uniforms_size)) { ret = -EFAULT; goto fail; } - bo = vc4_bo_create(dev, exec_size, true); + bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL); if (IS_ERR(bo)) { DRM_ERROR("Couldn't allocate BO for binning\n"); ret = PTR_ERR(bo); @@ -697,7 +815,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); fail: - drm_free_large(temp); + kvfree(temp); return ret; } @@ -705,21 +823,33 @@ static void vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) { struct vc4_dev *vc4 = to_vc4_dev(dev); + unsigned long irqflags; unsigned i; + /* If we got force-completed because of GPU reset rather than + * through our IRQ handler, signal the fence now. + */ + if (exec->fence) + dma_fence_signal(exec->fence); + if (exec->bo) { for (i = 0; i < exec->bo_count; i++) - drm_gem_object_unreference_unlocked(&exec->bo[i]->base); - drm_free_large(exec->bo); + drm_gem_object_put_unlocked(&exec->bo[i]->base); + kvfree(exec->bo); } while (!list_empty(&exec->unref_list)) { struct vc4_bo *bo = list_first_entry(&exec->unref_list, struct vc4_bo, unref_head); list_del(&bo->unref_head); - drm_gem_object_unreference_unlocked(&bo->base.base); + drm_gem_object_put_unlocked(&bo->base.base); } + /* Free up the allocation of any bin slots we used. */ + spin_lock_irqsave(&vc4->job_lock, irqflags); + vc4->bin_alloc_used &= ~exec->bin_slots; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) { pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); @@ -843,7 +973,7 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data, gem_obj = drm_gem_object_lookup(file_priv, args->handle); if (!gem_obj) { - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); return -EINVAL; } bo = to_vc4_bo(gem_obj); @@ -851,7 +981,7 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data, ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, &args->timeout_ns); - drm_gem_object_unreference_unlocked(gem_obj); + drm_gem_object_put_unlocked(gem_obj); return ret; } @@ -874,10 +1004,14 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; + struct ww_acquire_ctx acquire_ctx; int ret = 0; - if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { - DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); + if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | + VC4_SUBMIT_CL_FIXED_RCL_ORDER | + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { + DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); return -EINVAL; } @@ -888,13 +1022,16 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } mutex_lock(&vc4->power_lock); - if (vc4->power_refcount++ == 0) + if (vc4->power_refcount++ == 0) { ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - mutex_unlock(&vc4->power_lock); - if (ret < 0) { - kfree(exec); - return ret; + if (ret < 0) { + mutex_unlock(&vc4->power_lock); + vc4->power_refcount--; + kfree(exec); + return ret; + } } + mutex_unlock(&vc4->power_lock); exec->args = args; INIT_LIST_HEAD(&exec->unref_list); @@ -916,12 +1053,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); + if (ret) + goto fail; + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - vc4_queue_submit(dev, exec); + ret = vc4_queue_submit(dev, exec, &acquire_ctx); + if (ret) + goto fail; /* Return the seqno for our job. */ args->seqno = vc4->emit_seqno; @@ -939,6 +1082,8 @@ vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + vc4->dma_fence_context = dma_fence_context_alloc(1); + INIT_LIST_HEAD(&vc4->bin_job_list); INIT_LIST_HEAD(&vc4->render_job_list); INIT_LIST_HEAD(&vc4->job_done_list); @@ -968,13 +1113,11 @@ vc4_gem_destroy(struct drm_device *dev) /* V3D should already have disabled its interrupt and cleared * the overflow allocation registers. Now free the object. */ - if (vc4->overflow_mem) { - drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); - vc4->overflow_mem = NULL; + if (vc4->bin_bo) { + drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); + vc4->bin_bo = NULL; } if (vc4->hang_state) vc4_free_hang_state(dev, vc4->hang_state); - - vc4_bo_cache_destroy(dev); } |