From 00f06b246a3056bbaa901a90a5a93c9f81ab8e36 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Tue, 27 Jun 2017 22:33:18 -0400 Subject: drm/amdgpu: Throttle visible VRAM moves separately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BO move throttling code is designed to allow VRAM to fill quickly if it is relatively empty. However, this does not take into account situations where the visible VRAM is smaller than total VRAM, and total VRAM may not be close to full but the visible VRAM segment is under pressure. In such situations, visible VRAM would experience unrestricted swapping and performance would drop. Add a separate counter specifically for moves involving visible VRAM, and check it before moving BOs there. v2: Only perform calculations for separate counter if visible VRAM is smaller than total VRAM. (Michel Dänzer) v3: [Michel Dänzer] * Use BO's location rather than the AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag to determine whether to account a move for visible VRAM in most cases. * Use a single if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { block in amdgpu_cs_get_threshold_for_moves. Fixes: 95844d20ae02 (drm/amdgpu: throttle buffer migrations at CS using a fixed MBps limit (v2)) Signed-off-by: John Brooks Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 92 +++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5599c01b265d..33789510e663 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) * ticks. The accumulated microseconds (us) are converted to bytes and * returned. */ -static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) +static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, + u64 *max_bytes, + u64 *max_vis_bytes) { s64 time_us, increment_us; - u64 max_bytes; u64 free_vram, total_vram, used_vram; /* Allow a maximum of 200 accumulated ms. This is basically per-IB @@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) - return 0; + if (!adev->mm_stats.log2_max_MBps) { + *max_bytes = 0; + *max_vis_bytes = 0; + return; + } total_vram = adev->mc.real_vram_size - adev->vram_pin_size; used_vram = atomic64_read(&adev->vram_usage); @@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); } - /* This returns 0 if the driver is in debt to disallow (optional) + /* This is set to 0 if the driver is in debt to disallow (optional) * buffer moves. */ - max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + + /* Do the same for visible VRAM if half of it is free */ + if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { + u64 total_vis_vram = adev->mc.visible_vram_size; + u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); + + if (used_vis_vram < total_vis_vram) { + u64 free_vis_vram = total_vis_vram - used_vis_vram; + adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + + increment_us, us_upper_bound); + + if (free_vis_vram >= total_vis_vram / 2) + adev->mm_stats.accum_us_vis = + max(bytes_to_us(adev, free_vis_vram / 2), + adev->mm_stats.accum_us_vis); + } + + *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); + } else { + *max_vis_bytes = 0; + } spin_unlock(&adev->mm_stats.lock); - return max_bytes; } /* Report how many bytes have really been moved for the last command * submission. This can result in a debt that can stop buffer migrations * temporarily. */ -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) +void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, + u64 num_vis_bytes) { spin_lock(&adev->mm_stats.lock); adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); + adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); spin_unlock(&adev->mm_stats.lock); } @@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; uint32_t domain; int r; @@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, /* Don't move this buffer if we have depleted our allowance * to move it. Don't move anything if the threshold is zero. */ - if (p->bytes_moved < p->bytes_moved_threshold) - domain = bo->prefered_domains; - else + if (p->bytes_moved < p->bytes_moved_threshold) { + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { + /* And don't move a CPU_ACCESS_REQUIRED BO to limited + * visible VRAM if we've depleted our allowance to do + * that. + */ + if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) + domain = bo->prefered_domains; + else + domain = bo->allowed_domains; + } else { + domain = bo->prefered_domains; + } + } else { domain = bo->allowed_domains; + } retry: amdgpu_ttm_placement_from_domain(bo, domain); initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - + initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + p->bytes_moved_vis += bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; @@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; + bool update_bytes_moved_vis; uint32_t other; /* If we reached our current BO we can forget it */ @@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); + update_bytes_moved_vis = + adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (update_bytes_moved_vis) + p->bytes_moved_vis += bytes_moved; if (unlikely(r)) break; @@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_splice(&need_pages, &p->validated); } - p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); + amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, + &p->bytes_moved_vis_threshold); p->bytes_moved = 0; + p->bytes_moved_vis = 0; p->evictable = list_last_entry(&p->validated, struct amdgpu_bo_list_entry, tv.head); @@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_validate; } - amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); - + amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, + p->bytes_moved_vis); fpriv->vm.last_eviction_counter = atomic64_read(&p->adev->num_evictions); -- cgit v1.2.3 From 7ecc245a8ce32ffcaa6a3e5795e0b14db8e076fc Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 26 Jul 2017 17:02:52 +0200 Subject: drm/amdgpu: consistent use u64_to_user_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of open coding the conversion from u64 to pointers. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index d324e1c24028..ccd7697792ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -270,7 +270,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_bo_list *args = data; uint32_t handle = args->in.list_handle; - const void __user *uptr = (const void*)(uintptr_t)args->in.bo_info_ptr; + const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); struct drm_amdgpu_bo_list_entry *info; struct amdgpu_bo_list *list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 33789510e663..381b4f99648d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -90,7 +90,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } /* get chunks */ - chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks); + chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; @@ -110,7 +110,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; - chunk_ptr = (void __user *)(uintptr_t)chunk_array[i]; + chunk_ptr = u64_to_user_ptr(chunk_array[i]); if (copy_from_user(&user_chunk, chunk_ptr, sizeof(struct drm_amdgpu_cs_chunk))) { ret = -EFAULT; @@ -121,7 +121,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) p->chunks[i].length_dw = user_chunk.length_dw; size = p->chunks[i].length_dw; - cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; + cdata = u64_to_user_ptr(user_chunk.chunk_data); p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); if (p->chunks[i].kdata == NULL) { @@ -1437,7 +1437,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, if (fences == NULL) return -ENOMEM; - fences_user = (void __user *)(uintptr_t)(wait->in.fences); + fences_user = u64_to_user_ptr(wait->in.fences); if (copy_from_user(fences, fences_user, sizeof(struct drm_amdgpu_fence) * fence_count)) { r = -EFAULT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 917ac5e074a0..88085e7f72de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -689,7 +689,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, switch (args->op) { case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: { struct drm_amdgpu_gem_create_in info; - void __user *out = (void __user *)(uintptr_t)args->value; + void __user *out = u64_to_user_ptr(args->value); info.bo_size = robj->gem_base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; -- cgit v1.2.3 From f62facc2eb78ebbedae70e394ac034ab9407a244 Mon Sep 17 00:00:00 2001 From: Cihangir Akturk Date: Thu, 3 Aug 2017 14:58:16 +0300 Subject: drm/amdgpu: switch to drm_*{get,put} helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_*_reference() and drm_*_unreference() functions are just compatibility alias for drm_*_get() and drm_*_put() and should not be used by new code. So convert all users of compatibility functions to use the new APIs. Reviewed-by: Christian König Signed-off-by: Cihangir Akturk Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 6 +++--- 9 files changed, 30 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index ccd7697792ab..75c6107eaccf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -136,7 +136,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, } bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); if (usermm) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 381b4f99648d..da8209945ef2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -54,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, *offset = data->offset; - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { amdgpu_bo_unref(&p->uf_entry.robj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index cdf2ab20166a..6ad243293a78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -482,7 +482,7 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - drm_gem_object_unreference_unlocked(amdgpu_fb->obj); + drm_gem_object_put_unlocked(amdgpu_fb->obj); drm_framebuffer_cleanup(fb); kfree(amdgpu_fb); } @@ -542,14 +542,14 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL); if (amdgpu_fb == NULL) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ERR_PTR(-ENOMEM); } ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index cd95574705ee..9afa9c097e1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -118,7 +118,7 @@ static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj) amdgpu_bo_unpin(abo); amdgpu_bo_unreserve(abo); } - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); } static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, @@ -280,7 +280,7 @@ out: } if (fb && ret) { - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); drm_framebuffer_unregister_private(fb); drm_framebuffer_cleanup(fb); kfree(fb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index a227d34d3852..86a88a025b17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -91,7 +91,7 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev) spin_lock(&file->table_lock); idr_for_each_entry(&file->object_idr, gobj, handle) { WARN_ONCE(1, "And also active allocations!\n"); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); } idr_destroy(&file->object_idr); spin_unlock(&file->table_lock); @@ -263,7 +263,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) return r; @@ -341,7 +341,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) return r; @@ -355,7 +355,7 @@ unlock_mmap_sem: up_read(¤t->mm->mmap_sem); release_object: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -374,11 +374,11 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, robj = gem_to_amdgpu_bo(gobj); if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) || (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return -EPERM; } *offset_p = amdgpu_bo_mmap_offset(robj); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return 0; } @@ -448,7 +448,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, } else r = ret; - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -491,7 +491,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, unreserve: amdgpu_bo_unreserve(robj); out: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -664,7 +664,7 @@ error_backoff: ttm_eu_backoff_reservation(&ticket, &list); error_unref: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -726,7 +726,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, } out: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -754,7 +754,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, r = drm_gem_handle_create(file_priv, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) { return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 490e84944851..4e519dc42916 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2431,7 +2431,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2439,7 +2439,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2473,7 +2473,7 @@ unpin: amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 921c6f772f11..11edc75edaa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2506,7 +2506,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2514,7 +2514,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2548,7 +2548,7 @@ unpin: amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 8ad0a659f789..a51e35f824a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2322,7 +2322,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2330,7 +2330,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2364,7 +2364,7 @@ unpin: amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 609438fe8584..9cf14b8b2db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2335,7 +2335,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2343,7 +2343,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2377,7 +2377,7 @@ unpin: amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; -- cgit v1.2.3 From 6d7d9c5aa212d069b5271cd6bb8a9e05a3e1b986 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 8 Aug 2017 07:58:01 -0400 Subject: drm/amdgpu: Fix preferred typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change "prefered" to "preferred" Signed-off-by: Kent Russell Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 4 ++-- 7 files changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 75c6107eaccf..59089e027f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -156,11 +156,11 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, entry->tv.bo = &entry->robj->tbo; entry->tv.shared = !entry->robj->prime_shared_count; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) gds_obj = entry->robj; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) gws_obj = entry->robj; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) oa_obj = entry->robj; total_size += amdgpu_bo_size(entry->robj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 69806c5bcd01..8c462b091aff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -166,7 +166,7 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h r = amdgpu_bo_reserve(obj, true); if (unlikely(r != 0)) return r; - r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains, + r = amdgpu_bo_pin_restricted(obj, obj->preferred_domains, min_offset, max_offset, mcaddr); amdgpu_bo_unreserve(obj); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index da8209945ef2..c05479ec825a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -348,11 +348,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * that. */ if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) - domain = bo->prefered_domains; + domain = bo->preferred_domains; else domain = bo->allowed_domains; } else { - domain = bo->prefered_domains; + domain = bo->preferred_domains; } } else { domain = bo->allowed_domains; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 86a88a025b17..81127ffcefb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -306,7 +306,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, return r; bo = gem_to_amdgpu_bo(gobj); - bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); if (r) @@ -693,7 +693,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, info.bo_size = robj->gem_base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; - info.domains = robj->prefered_domains; + info.domains = robj->preferred_domains; info.domain_flags = robj->flags; amdgpu_bo_unreserve(robj); if (copy_to_user(out, &info, sizeof(info))) @@ -711,10 +711,10 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, amdgpu_bo_unreserve(robj); break; } - robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | + robj->preferred_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU); - robj->allowed_domains = robj->prefered_domains; + robj->allowed_domains = robj->preferred_domains; if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 16f31cbd9147..6e72fe7901ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -391,13 +391,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, } INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | + bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA); - bo->allowed_domains = bo->prefered_domains; + bo->allowed_domains = bo->preferred_domains; if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; @@ -606,7 +606,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) if (bo->pin_count) return 0; - domain = bo->prefered_domains; + domain = bo->preferred_domains; retry: amdgpu_ttm_placement_from_domain(bo, domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index b6d8be84b884..9b7b4fcb047b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -65,7 +65,7 @@ struct amdgpu_bo_va { struct amdgpu_bo { /* Protected by tbo.reserved */ - u32 prefered_domains; + u32 preferred_domains; u32 allowed_domains; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; struct ttm_placement placement; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..9ab58245e518 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -105,12 +105,12 @@ TRACE_EVENT(amdgpu_bo_create, __entry->bo = bo; __entry->pages = bo->tbo.num_pages; __entry->type = bo->tbo.mem.mem_type; - __entry->prefer = bo->prefered_domains; + __entry->prefer = bo->preferred_domains; __entry->allow = bo->allowed_domains; __entry->visible = bo->flags; ), - TP_printk("bo=%p, pages=%u, type=%d, prefered=%d, allowed=%d, visible=%d", + TP_printk("bo=%p, pages=%u, type=%d, preferred=%d, allowed=%d, visible=%d", __entry->bo, __entry->pages, __entry->type, __entry->prefer, __entry->allow, __entry->visible) ); -- cgit v1.2.3 From b636922553ee2c47b9e3955c5665b8996dfcdbd7 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 3 Aug 2017 11:44:01 -0400 Subject: drm/amdgpu: only move VM BOs in the LRU during validation v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should save us a bunch of command submission overhead. v2: move the LRU move to the right place to avoid the move for the root BO and handle the shadow BOs as well. This turned out to be a bug fix because the move needs to happen before the kmap. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Acked-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 15 +++------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 58 +++++++--------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 -- 3 files changed, 16 insertions(+), 59 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c05479ec825a..825784b3b193 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -673,10 +673,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: - if (r) { - amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm); + if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - } error_free_pages: @@ -724,21 +722,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) * If error is set than unvalidate buffer, otherwise just free memory * used by parsing context. **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, + bool backoff) { - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; unsigned i; - if (!error) { - amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); - + if (!error) ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->fence); - } else if (backoff) { + else if (backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - } for (i = 0; i < parser->num_post_dep_syncobjs; i++) drm_syncobj_put(parser->post_dep_syncobjs[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ce36652029e..ff8ab2074a59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -159,7 +159,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, */ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, int (*validate)(void *, struct amdgpu_bo *), - void *param, bool use_cpu_for_update) + void *param, bool use_cpu_for_update, + struct ttm_bo_global *glob) { unsigned i; int r; @@ -183,12 +184,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, if (r) return r; + spin_lock(&glob->lru_lock); + ttm_bo_move_to_lru_tail(&entry->bo->tbo); + if (entry->bo->shadow) + ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); + spin_unlock(&glob->lru_lock); + /* * Recurse into the sub directory. This is harmless because we * have only a maximum of 5 layers. */ r = amdgpu_vm_validate_level(entry, validate, param, - use_cpu_for_update); + use_cpu_for_update, glob); if (r) return r; } @@ -220,54 +227,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, return 0; return amdgpu_vm_validate_level(&vm->root, validate, param, - vm->use_cpu_for_update); + vm->use_cpu_for_update, + adev->mman.bdev.glob); } /** - * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent) -{ - unsigned i; - - if (!parent->entries) - return; - - for (i = 0; i <= parent->last_entry_used; ++i) { - struct amdgpu_vm_pt *entry = &parent->entries[i]; - - if (!entry->bo) - continue; - - ttm_bo_move_to_lru_tail(&entry->bo->tbo); - amdgpu_vm_move_level_in_lru(entry); - } -} - -/** - * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct ttm_bo_global *glob = adev->mman.bdev.glob; - - spin_lock(&glob->lru_lock); - amdgpu_vm_move_level_in_lru(&vm->root); - spin_unlock(&glob->lru_lock); -} - - /** * amdgpu_vm_alloc_levels - allocate the PD/PT levels * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 217ecba8f4cc..6e94cd2e610c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -223,8 +223,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, - struct amdgpu_vm *vm); int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size); -- cgit v1.2.3 From 3c848bb38aca1f7fd23edeb867b89d714a2e6ce2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 7 Aug 2017 17:46:49 +0200 Subject: drm/amdgpu: move vram usage tracking into the vram manager v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like a better place for this. v2: use atomic64_t members instead Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 9 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 50 ------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 72 ++++++++++++++++++++++++++-- 6 files changed, 79 insertions(+), 62 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 593618e6c186..ad944aea0d4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1482,8 +1482,6 @@ struct amdgpu_device { struct amdgpu_mman mman; struct amdgpu_vram_scratch vram_scratch; struct amdgpu_wb wb; - atomic64_t vram_usage; - atomic64_t vram_vis_usage; atomic64_t num_bytes_moved; atomic64_t num_evictions; atomic64_t num_vram_cpu_page_faults; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 825784b3b193..7e71a511990e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -246,7 +246,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, } total_vram = adev->mc.real_vram_size - adev->vram_pin_size; - used_vram = atomic64_read(&adev->vram_usage); + used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; spin_lock(&adev->mm_stats.lock); @@ -292,7 +292,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, /* Do the same for visible VRAM if half of it is free */ if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { u64 total_vis_vram = adev->mc.visible_vram_size; - u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); + u64 used_vis_vram = + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); if (used_vis_vram < total_vis_vram) { u64 free_vis_vram = total_vis_vram - used_vis_vram; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 889f96fcfc81..29cd5dabf8b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -455,10 +455,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = atomic64_read(&adev->vram_usage); + ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: - ui64 = atomic64_read(&adev->vram_vis_usage); + ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GTT_USAGE: ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); @@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file mem.vram.total_heap_size = adev->mc.real_vram_size; mem.vram.usable_heap_size = adev->mc.real_vram_size - adev->vram_pin_size; - mem.vram.heap_usage = atomic64_read(&adev->vram_usage); + mem.vram.heap_usage = + amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = @@ -506,7 +507,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file adev->mc.visible_vram_size - (adev->vram_pin_size - adev->invisible_pin_size); mem.cpu_accessible_vram.heap_usage = - atomic64_read(&adev->vram_vis_usage); + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = mem.cpu_accessible_vram.usable_heap_size * 3 / 4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 57d2cb6bd331..e7e899190bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -37,53 +37,6 @@ #include "amdgpu.h" #include "amdgpu_trace.h" - - -static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, - struct ttm_mem_reg *mem) -{ - if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size) - return 0; - - return ((mem->start << PAGE_SHIFT) + mem->size) > - adev->mc.visible_vram_size ? - adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) : - mem->size; -} - -static void amdgpu_update_memory_usage(struct amdgpu_device *adev, - struct ttm_mem_reg *old_mem, - struct ttm_mem_reg *new_mem) -{ - u64 vis_size; - if (!adev) - return; - - if (new_mem) { - switch (new_mem->mem_type) { - case TTM_PL_TT: - break; - case TTM_PL_VRAM: - atomic64_add(new_mem->size, &adev->vram_usage); - vis_size = amdgpu_get_vis_part_size(adev, new_mem); - atomic64_add(vis_size, &adev->vram_vis_usage); - break; - } - } - - if (old_mem) { - switch (old_mem->mem_type) { - case TTM_PL_TT: - break; - case TTM_PL_VRAM: - atomic64_sub(old_mem->size, &adev->vram_usage); - vis_size = amdgpu_get_vis_part_size(adev, old_mem); - atomic64_sub(vis_size, &adev->vram_vis_usage); - break; - } - } -} - static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); @@ -92,7 +45,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct amdgpu_bo, tbo); amdgpu_bo_kunmap(bo); - amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL); drm_gem_object_release(&bo->gem_base); amdgpu_bo_unref(&bo->parent); @@ -990,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, return; /* move_notify is called before move happens */ - amdgpu_update_memory_usage(adev, &bo->mem, new_mem); - trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 1307ab581449..f22a4758719d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -68,6 +68,9 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem); uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); +uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); +uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); + int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 1eb8d5d3acf2..26e900627971 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -28,6 +28,8 @@ struct amdgpu_vram_mgr { struct drm_mm mm; spinlock_t lock; + atomic64_t usage; + atomic64_t vis_usage; }; /** @@ -78,6 +80,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) return 0; } +/** + * amdgpu_vram_mgr_vis_size - Calculate visible node size + * + * @adev: amdgpu device structure + * @node: MM node structure + * + * Calculate how many bytes of the MM node are inside visible VRAM + */ +static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, + struct drm_mm_node *node) +{ + uint64_t start = node->start << PAGE_SHIFT; + uint64_t end = (node->size + node->start) << PAGE_SHIFT; + + if (start >= adev->mc.visible_vram_size) + return 0; + + return (end > adev->mc.visible_vram_size ? + adev->mc.visible_vram_size : end) - start; +} + /** * amdgpu_vram_mgr_new - allocate new ranges * @@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, const struct ttm_place *place, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm *mm = &mgr->mm; struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; + uint64_t usage = 0, vis_usage = 0; unsigned i; int r; @@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) goto error; + usage += nodes[i].size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); + /* Calculate a virtual BO start address to easily check if * everything is CPU accessible. */ @@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, } spin_unlock(&mgr->lock); + atomic64_add(usage, &mgr->usage); + atomic64_add(vis_usage, &mgr->vis_usage); + mem->mm_node = nodes; return 0; @@ -181,8 +212,10 @@ error: static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm_node *nodes = mem->mm_node; + uint64_t usage = 0, vis_usage = 0; unsigned pages = mem->num_pages; if (!mem->mm_node) @@ -192,14 +225,47 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, while (pages) { pages -= nodes->size; drm_mm_remove_node(nodes); + usage += nodes->size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); ++nodes; } spin_unlock(&mgr->lock); + atomic64_sub(usage, &mgr->usage); + atomic64_sub(vis_usage, &mgr->vis_usage); + kfree(mem->mm_node); mem->mm_node = NULL; } +/** + * amdgpu_vram_mgr_usage - how many bytes are used in this domain + * + * @man: TTM memory type manager + * + * Returns how many bytes are used in this domain. + */ +uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_vram_mgr *mgr = man->priv; + + return atomic64_read(&mgr->usage); +} + +/** + * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part + * + * @man: TTM memory type manager + * + * Returns how many bytes are used in the visible part of VRAM + */ +uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_vram_mgr *mgr = man->priv; + + return atomic64_read(&mgr->vis_usage); +} + /** * amdgpu_vram_mgr_debug - dump VRAM table * @@ -211,7 +277,6 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, struct drm_printer *printer) { - struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; spin_lock(&mgr->lock); @@ -219,9 +284,8 @@ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, spin_unlock(&mgr->lock); drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", - adev->mman.bdev.man[TTM_PL_VRAM].size, - (u64)atomic64_read(&adev->vram_usage) >> 20, - (u64)atomic64_read(&adev->vram_vis_usage) >> 20); + man->size, amdgpu_vram_mgr_usage(man) >> 20, + amdgpu_vram_mgr_vis_usage(man) >> 20); } const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { -- cgit v1.2.3 From 0f4b3c68626199cd5ce619e2a3105d44b81f2753 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 31 Jul 2017 15:32:40 +0200 Subject: drm/amdgpu: cleanup static CSA handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the CSA bo_va from the VM to the fpriv structure. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 25 ++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 -- 6 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ad944aea0d4b..1f915a5ce9ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -748,6 +748,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); struct amdgpu_fpriv { struct amdgpu_vm vm; struct amdgpu_bo_va *prt_va; + struct amdgpu_bo_va *csa_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 7e71a511990e..3c64248673ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -787,7 +787,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (amdgpu_sriov_vf(adev)) { struct dma_fence *f; - bo_va = vm->csa_bo_va; + + bo_va = fpriv->csa_va; BUG_ON(!bo_va); r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 29cd5dabf8b5..1aac5821ac8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -843,7 +843,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) } if (amdgpu_sriov_vf(adev)) { - r = amdgpu_map_static_csa(adev, &fpriv->vm); + r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); if (r) goto out_suspend; } @@ -896,8 +896,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); - amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); - fpriv->vm.csa_bo_va = NULL; + amdgpu_vm_bo_rmv(adev, fpriv->csa_va); + fpriv->csa_va = NULL; amdgpu_bo_unreserve(adev->virt.csa_obj); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8a081e162d13..89208456d360 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -46,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) * address within META_DATA init package to support SRIOV gfx preemption. */ -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va) { - int r; - struct amdgpu_bo_va *bo_va; struct ww_acquire_ctx ticket; struct list_head list; struct amdgpu_bo_list_entry pd; struct ttm_validate_buffer csa_tv; + int r; INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&csa_tv.head); @@ -69,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; } - bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); - if (!bo_va) { + *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); + if (!*bo_va) { ttm_eu_backoff_reservation(&ticket, &list); DRM_ERROR("failed to create bo_va for static CSA\n"); return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR, - AMDGPU_CSA_SIZE); + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->vm, AMDGPU_CSA_VADDR, + AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, bo_va); + amdgpu_vm_bo_rmv(adev, *bo_va); ttm_eu_backoff_reservation(&ticket, &list); return r; } - r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); + r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, bo_va); + amdgpu_vm_bo_rmv(adev, *bo_va); ttm_eu_backoff_reservation(&ticket, &list); return r; } - vm->csa_bo_va = bo_va; ttm_eu_backoff_reservation(&ticket, &list); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index e5b1baf387c1..afcfb8bcfb65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -90,7 +90,8 @@ static inline bool is_virtual_machine(void) struct amdgpu_vm; int amdgpu_allocate_static_csa(struct amdgpu_device *adev); -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6e94cd2e610c..9c309c5a86f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -141,8 +141,6 @@ struct amdgpu_vm { u64 client_id; /* dedicated to vm */ struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; - /* each VM will map on CSA */ - struct amdgpu_bo_va *csa_bo_va; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; -- cgit v1.2.3 From ec681545afe5a448b43a2fe5c206ee48e19dabb3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 1 Aug 2017 10:51:43 +0200 Subject: drm/amdgpu: separate bo_va structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split that into vm_bo_base and bo_va to allow other uses as well. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 14 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 81 ++++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 12 +++++ 7 files changed, 66 insertions(+), 55 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 3c64248673ee..75e7141c8de4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1487,7 +1487,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, addr > mapping->last) continue; - *bo = lobj->bo_va->bo; + *bo = lobj->bo_va->base.bo; return mapping; } @@ -1496,7 +1496,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, addr > mapping->last) continue; - *bo = lobj->bo_va->bo; + *bo = lobj->bo_va->base.bo; return mapping; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5ae9941bad7c..7171968f261e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -621,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, args->map_size); if (r) goto error_backoff; @@ -641,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, args->map_size); if (r) goto error_backoff; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 9b7b4fcb047b..a288fa6d72c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -33,6 +33,7 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX +/* bo virtual addresses in a vm */ struct amdgpu_bo_va_mapping { struct list_head list; struct rb_node rb; @@ -43,26 +44,19 @@ struct amdgpu_bo_va_mapping { uint64_t flags; }; -/* bo virtual addresses in a specific vm */ +/* User space allocated BO in a VM */ struct amdgpu_bo_va { + struct amdgpu_vm_bo_base base; + /* protected by bo being reserved */ - struct list_head bo_list; struct dma_fence *last_pt_update; unsigned ref_count; - /* protected by vm mutex and spinlock */ - struct list_head vm_status; - /* mappings for this bo_va */ struct list_head invalids; struct list_head valids; - - /* constant after initialization */ - struct amdgpu_vm *vm; - struct amdgpu_bo *bo; }; - struct amdgpu_bo { /* Protected by tbo.reserved */ u32 preferred_domains; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index d8cd3e554488..1c88bd5e29ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -284,7 +284,7 @@ TRACE_EVENT(amdgpu_vm_bo_map, ), TP_fast_assign( - __entry->bo = bo_va ? bo_va->bo : NULL; + __entry->bo = bo_va ? bo_va->base.bo : NULL; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; @@ -308,7 +308,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, ), TP_fast_assign( - __entry->bo = bo_va->bo; + __entry->bo = bo_va->base.bo; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 89208456d360..ab05121b9272 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -76,7 +76,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->vm, AMDGPU_CSA_VADDR, + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 14012e80fa27..f24554f2d0e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -870,8 +870,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, { struct amdgpu_bo_va *bo_va; - list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->vm == vm) { + list_for_each_entry(bo_va, &bo->va, base.bo_list) { + if (bo_va->base.vm == vm) { return bo_va; } } @@ -1726,7 +1726,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear) { - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; uint64_t gtt_flags, flags; @@ -1735,27 +1736,27 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct dma_fence *exclusive; int r; - if (clear || !bo_va->bo) { + if (clear || !bo_va->base.bo) { mem = NULL; nodes = NULL; exclusive = NULL; } else { struct ttm_dma_tt *ttm; - mem = &bo_va->bo->tbo.mem; + mem = &bo_va->base.bo->tbo.mem; nodes = mem->mm_node; if (mem->mem_type == TTM_PL_TT) { - ttm = container_of(bo_va->bo->tbo.ttm, struct - ttm_dma_tt, ttm); + ttm = container_of(bo_va->base.bo->tbo.ttm, + struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); + exclusive = reservation_object_get_excl(bo->tbo.resv); } - if (bo_va->bo) { - flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && - adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? + if (bo) { + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); + gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) && + adev == amdgpu_ttm_adev(bo->tbo.bdev)) ? flags : 0; } else { flags = 0x0; @@ -1763,7 +1764,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, } spin_lock(&vm->status_lock); - if (!list_empty(&bo_va->vm_status)) + if (!list_empty(&bo_va->base.vm_status)) list_splice_init(&bo_va->valids, &bo_va->invalids); spin_unlock(&vm->status_lock); @@ -1786,9 +1787,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_splice_init(&bo_va->invalids, &bo_va->valids); - list_del_init(&bo_va->vm_status); + list_del_init(&bo_va->base.vm_status); if (clear) - list_add(&bo_va->vm_status, &vm->cleared); + list_add(&bo_va->base.vm_status, &vm->cleared); spin_unlock(&vm->status_lock); if (vm->use_cpu_for_update) { @@ -2001,7 +2002,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, spin_lock(&vm->status_lock); while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, - struct amdgpu_bo_va, vm_status); + struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); r = amdgpu_vm_bo_update(adev, bo_va, true); @@ -2041,16 +2042,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (bo_va == NULL) { return NULL; } - bo_va->vm = vm; - bo_va->bo = bo; + bo_va->base.vm = vm; + bo_va->base.bo = bo; + INIT_LIST_HEAD(&bo_va->base.bo_list); + INIT_LIST_HEAD(&bo_va->base.vm_status); + bo_va->ref_count = 1; - INIT_LIST_HEAD(&bo_va->bo_list); INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - INIT_LIST_HEAD(&bo_va->vm_status); if (bo) - list_add_tail(&bo_va->bo_list, &bo->va); + list_add_tail(&bo_va->base.bo_list, &bo->va); return bo_va; } @@ -2075,7 +2077,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t size, uint64_t flags) { struct amdgpu_bo_va_mapping *mapping, *tmp; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; /* validate the parameters */ @@ -2086,7 +2089,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + (bo && offset + size > amdgpu_bo_size(bo))) return -EINVAL; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2096,7 +2099,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, if (tmp) { /* bo and tmp overlap, invalid addr */ dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " - "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, + "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, tmp->start, tmp->last + 1); return -EINVAL; } @@ -2141,7 +2144,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t size, uint64_t flags) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; int r; @@ -2153,7 +2157,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + (bo && offset + size > amdgpu_bo_size(bo))) return -EINVAL; /* Allocate all the needed memory */ @@ -2161,7 +2165,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, if (!mapping) return -ENOMEM; - r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); + r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); if (r) { kfree(mapping); return r; @@ -2201,7 +2205,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, uint64_t saddr) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_vm *vm = bo_va->base.vm; bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2349,12 +2353,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va) { struct amdgpu_bo_va_mapping *mapping, *next; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_vm *vm = bo_va->base.vm; - list_del(&bo_va->bo_list); + list_del(&bo_va->base.bo_list); spin_lock(&vm->status_lock); - list_del(&bo_va->vm_status); + list_del(&bo_va->base.vm_status); spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { @@ -2386,13 +2390,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo) { - struct amdgpu_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - spin_lock(&bo_va->vm->status_lock); - if (list_empty(&bo_va->vm_status)) - list_add(&bo_va->vm_status, &bo_va->vm->invalidated); - spin_unlock(&bo_va->vm->status_lock); + struct amdgpu_vm_bo_base *bo_base; + + list_for_each_entry(bo_base, &bo->va, bo_list) { + spin_lock(&bo_base->vm->status_lock); + if (list_empty(&bo_base->vm_status)) + list_add(&bo_base->vm_status, + &bo_base->vm->invalidated); + spin_unlock(&bo_base->vm->status_lock); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index f12c12fec3c0..95e5e81e1026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -99,6 +99,18 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) +/* base structure for tracking BO usage in a VM */ +struct amdgpu_vm_bo_base { + /* constant after initialization */ + struct amdgpu_vm *vm; + struct amdgpu_bo *bo; + + /* protected by bo being reserved */ + struct list_head bo_list; + + /* protected by spinlock */ + struct list_head vm_status; +}; struct amdgpu_vm_pt { struct amdgpu_bo *bo; -- cgit v1.2.3 From 27c7b9aeecd7c06a3b527795807c19a0bbe25c1e Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 1 Aug 2017 11:27:36 +0200 Subject: drm/amdgpu: rename VM invalidated to moved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That better describes what happens here with the BO. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 75e7141c8de4..15d4a28d73bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -825,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) } - r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); + r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync); if (amdgpu_vm_debug && p->bo_list) { /* Invalidate all BOs to test for userspace bugs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f24554f2d0e5..2ed99b8f7da7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1983,25 +1983,25 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, } /** - * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT + * amdgpu_vm_clear_moved - clear moved BOs in the PT * * @adev: amdgpu_device pointer * @vm: requested vm * - * Make sure all invalidated BOs are cleared in the PT. + * Make sure all moved BOs are cleared in the PT. * Returns 0 for success. * * PTs have to be reserved and mutex must be locked! */ -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, - struct amdgpu_vm *vm, struct amdgpu_sync *sync) +int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = NULL; int r = 0; spin_lock(&vm->status_lock); - while (!list_empty(&vm->invalidated)) { - bo_va = list_first_entry(&vm->invalidated, + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); @@ -2396,7 +2396,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, spin_lock(&bo_base->vm->status_lock); if (list_empty(&bo_base->vm_status)) list_add(&bo_base->vm_status, - &bo_base->vm->invalidated); + &bo_base->vm->moved); spin_unlock(&bo_base->vm->status_lock); } } @@ -2465,7 +2465,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); - INIT_LIST_HEAD(&vm->invalidated); + INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 95e5e81e1026..a740b57e9eee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -129,7 +129,7 @@ struct amdgpu_vm { spinlock_t status_lock; /* BOs moved, but not yet updated in the PT */ - struct list_head invalidated; + struct list_head moved; /* BOs cleared in the PT because of a move */ struct list_head cleared; @@ -247,8 +247,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_sync *sync); +int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_sync *sync); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); -- cgit v1.2.3 From a1d6b1901a2154e9f44f39738491f0afc3f8608e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 23 Aug 2017 07:52:36 +0200 Subject: drm/amdgpu: check memory allocation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check memory allocation failure and return -ENOMEM in such a case. 'num_post_dep_syncobjs' still has to be set to 0 before the test in order to have it initialized if 'amdgpu_cs_parser_fini()' is called to free resources. The calling graph would be, in such a case! failure in amdgpu_cs_process_syncobj_out_dep() ---> error code returned by amdgpu_cs_dependencies() --> amdgpu_cs_parser_fini() is called Reviewed-by: Christian König Signed-off-by: Christophe JAILLET Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 15d4a28d73bb..baa90df90aea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1079,6 +1079,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, GFP_KERNEL); p->num_post_dep_syncobjs = 0; + if (!p->post_dep_syncobjs) + return -ENOMEM; + for (i = 0; i < num_deps; ++i) { p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); if (!p->post_dep_syncobjs[i]) -- cgit v1.2.3 From afaf59237843bf89823c33143beca6b262dff0ca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:19 -0700 Subject: drm/syncobj: Rename fence_get to find_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function has far more in common with drm_syncobj_find than with any in the get/put functions. Signed-off-by: Jason Ekstrand Acked-by: Christian König (v1) Signed-off-by: Dave Airlie --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/drm_syncobj.c | 10 +++++----- include/drm/drm_syncobj.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 15d4a28d73bb..269b835571eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1035,7 +1035,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, { int r; struct dma_fence *fence; - r = drm_syncobj_fence_get(p->filp, handle, &fence); + r = drm_syncobj_find_fence(p->filp, handle, &fence); if (r) return r; diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index a5b38a80a99a..0412b0b0a342 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -95,9 +95,9 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, } EXPORT_SYMBOL(drm_syncobj_replace_fence); -int drm_syncobj_fence_get(struct drm_file *file_private, - u32 handle, - struct dma_fence **fence) +int drm_syncobj_find_fence(struct drm_file *file_private, + u32 handle, + struct dma_fence **fence) { struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle); int ret = 0; @@ -112,7 +112,7 @@ int drm_syncobj_fence_get(struct drm_file *file_private, drm_syncobj_put(syncobj); return ret; } -EXPORT_SYMBOL(drm_syncobj_fence_get); +EXPORT_SYMBOL(drm_syncobj_find_fence); /** * drm_syncobj_free - free a sync object. @@ -307,7 +307,7 @@ int drm_syncobj_export_sync_file(struct drm_file *file_private, if (fd < 0) return fd; - ret = drm_syncobj_fence_get(file_private, handle, &fence); + ret = drm_syncobj_find_fence(file_private, handle, &fence); if (ret) goto err_put_fd; diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 89976da542b1..7d4ad777132e 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -81,9 +81,9 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle); void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence); -int drm_syncobj_fence_get(struct drm_file *file_private, - u32 handle, - struct dma_fence **fence); +int drm_syncobj_find_fence(struct drm_file *file_private, + u32 handle, + struct dma_fence **fence); void drm_syncobj_free(struct kref *kref); #endif -- cgit v1.2.3 From 06f10a537ec1d5fe68dc889a9a5d11afa49e6e0d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 23 Aug 2017 07:52:36 +0200 Subject: drm/amdgpu: check memory allocation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check memory allocation failure and return -ENOMEM in such a case. 'num_post_dep_syncobjs' still has to be set to 0 before the test in order to have it initialized if 'amdgpu_cs_parser_fini()' is called to free resources. The calling graph would be, in such a case! failure in amdgpu_cs_process_syncobj_out_dep() ---> error code returned by amdgpu_cs_dependencies() --> amdgpu_cs_parser_fini() is called Reviewed-by: Christian König Signed-off-by: Christophe JAILLET Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 269b835571eb..d6ddd5562c16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1079,6 +1079,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, GFP_KERNEL); p->num_post_dep_syncobjs = 0; + if (!p->post_dep_syncobjs) + return -ENOMEM; + for (i = 0; i < num_deps; ++i) { p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); if (!p->post_dep_syncobjs[i]) -- cgit v1.2.3 From 3f3333f8a0e90ac26f84ed7b0aa344efce695c08 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 3 Aug 2017 14:02:13 +0200 Subject: drm/amdgpu: track evicted page tables v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of validating all page tables when one was evicted, track which one needs a validation. v2: simplify amdgpu_vm_ready as well Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 227 +++++++++++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 16 +- 5 files changed, 119 insertions(+), 141 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d6ddd5562c16..8bf178a912f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -636,9 +636,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); - fpriv->vm.last_eviction_counter = - atomic64_read(&p->adev->num_evictions); - if (p->bo_list) { struct amdgpu_bo *gds = p->bo_list->gds_obj; struct amdgpu_bo *gws = p->bo_list->gws_obj; @@ -835,7 +832,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (!bo) continue; - amdgpu_vm_bo_invalidate(adev, bo); + amdgpu_vm_bo_invalidate(adev, bo, false); } } @@ -860,7 +857,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, } if (p->job->vm) { - p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo); + p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); r = amdgpu_bo_vm_update_pte(p); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index ba012933e6aa..d02880640ee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -160,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, if (bo_va && --bo_va->ref_count == 0) { amdgpu_vm_bo_rmv(adev, bo_va); - if (amdgpu_vm_ready(adev, vm)) { + if (amdgpu_vm_ready(vm)) { struct dma_fence *fence = NULL; r = amdgpu_vm_clear_freed(adev, vm, &fence); @@ -481,10 +481,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct list_head *list, uint32_t operation) { - int r = -ERESTARTSYS; + int r; - if (!amdgpu_vm_ready(adev, vm)) - goto error; + if (!amdgpu_vm_ready(vm)) + return; r = amdgpu_vm_update_directories(adev, vm); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 9e495da0bb03..52d0109c0d9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -929,7 +929,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, return; abo = container_of(bo, struct amdgpu_bo, tbo); - amdgpu_vm_bo_invalidate(adev, abo); + amdgpu_vm_bo_invalidate(adev, abo, evict); amdgpu_bo_kunmap(abo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1334bbb82634..6ff3c1bf035e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -140,7 +140,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry) { - entry->robj = vm->root.bo; + entry->robj = vm->root.base.bo; entry->priority = 0; entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; @@ -148,61 +148,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, list_add(&entry->tv.head, validated); } -/** - * amdgpu_vm_validate_layer - validate a single page table level - * - * @parent: parent page table level - * @validate: callback to do the validation - * @param: parameter for the validation callback - * - * Validate the page table BOs on command submission if neccessary. - */ -static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, - int (*validate)(void *, struct amdgpu_bo *), - void *param, bool use_cpu_for_update, - struct ttm_bo_global *glob) -{ - unsigned i; - int r; - - if (use_cpu_for_update) { - r = amdgpu_bo_kmap(parent->bo, NULL); - if (r) - return r; - } - - if (!parent->entries) - return 0; - - for (i = 0; i <= parent->last_entry_used; ++i) { - struct amdgpu_vm_pt *entry = &parent->entries[i]; - - if (!entry->bo) - continue; - - r = validate(param, entry->bo); - if (r) - return r; - - spin_lock(&glob->lru_lock); - ttm_bo_move_to_lru_tail(&entry->bo->tbo); - if (entry->bo->shadow) - ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); - spin_unlock(&glob->lru_lock); - - /* - * Recurse into the sub directory. This is harmless because we - * have only a maximum of 5 layers. - */ - r = amdgpu_vm_validate_level(entry, validate, param, - use_cpu_for_update, glob); - if (r) - return r; - } - - return r; -} - /** * amdgpu_vm_validate_pt_bos - validate the page table BOs * @@ -217,32 +162,43 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { - uint64_t num_evictions; + struct ttm_bo_global *glob = adev->mman.bdev.glob; + int r; - /* We only need to validate the page tables - * if they aren't already valid. - */ - num_evictions = atomic64_read(&adev->num_evictions); - if (num_evictions == vm->last_eviction_counter) - return 0; + spin_lock(&vm->status_lock); + while (!list_empty(&vm->evicted)) { + struct amdgpu_vm_bo_base *bo_base; + struct amdgpu_bo *bo; - return amdgpu_vm_validate_level(&vm->root, validate, param, - vm->use_cpu_for_update, - adev->mman.bdev.glob); -} + bo_base = list_first_entry(&vm->evicted, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); -/** - * amdgpu_vm_check - helper for amdgpu_vm_ready - */ -static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo) -{ - /* if anything is swapped out don't swap it in here, - just abort and wait for the next CS */ - if (!amdgpu_bo_gpu_accessible(bo)) - return -ERESTARTSYS; + bo = bo_base->bo; + BUG_ON(!bo); + if (bo->parent) { + r = validate(param, bo); + if (r) + return r; - if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) - return -ERESTARTSYS; + spin_lock(&glob->lru_lock); + ttm_bo_move_to_lru_tail(&bo->tbo); + if (bo->shadow) + ttm_bo_move_to_lru_tail(&bo->shadow->tbo); + spin_unlock(&glob->lru_lock); + } + + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(bo, NULL); + if (r) + return r; + } + + spin_lock(&vm->status_lock); + list_del_init(&bo_base->vm_status); + } + spin_unlock(&vm->status_lock); return 0; } @@ -250,17 +206,19 @@ static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo) /** * amdgpu_vm_ready - check VM is ready for updates * - * @adev: amdgpu device * @vm: VM to check * * Check if all VM PDs/PTs are ready for updates */ -bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm) +bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - if (amdgpu_vm_check(NULL, vm->root.bo)) - return false; + bool ready; + + spin_lock(&vm->status_lock); + ready = list_empty(&vm->evicted); + spin_unlock(&vm->status_lock); - return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL); + return ready; } /** @@ -326,11 +284,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { - struct reservation_object *resv = vm->root.bo->tbo.resv; + struct reservation_object *resv = vm->root.base.bo->tbo.resv; struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; struct amdgpu_bo *pt; - if (!entry->bo) { + if (!entry->base.bo) { r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, @@ -351,9 +309,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, /* Keep a reference to the root directory to avoid * freeing them up in the wrong order. */ - pt->parent = amdgpu_bo_ref(vm->root.bo); + pt->parent = amdgpu_bo_ref(vm->root.base.bo); - entry->bo = pt; + entry->base.vm = vm; + entry->base.bo = pt; + list_add_tail(&entry->base.bo_list, &pt->va); + INIT_LIST_HEAD(&entry->base.vm_status); entry->addr = 0; } @@ -1020,7 +981,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); + amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); r = amdgpu_sync_wait(&sync, true); amdgpu_sync_free(&sync); @@ -1059,10 +1020,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; - shadow = parent->bo->shadow; + shadow = parent->base.bo->shadow; if (vm->use_cpu_for_update) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); if (unlikely(r)) return r; @@ -1078,7 +1039,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, /* assume the worst case */ ndw += parent->last_entry_used * 6; - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); if (shadow) { shadow_addr = amdgpu_bo_gpu_offset(shadow); @@ -1098,7 +1059,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, /* walk over the address space and update the directory */ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { - struct amdgpu_bo *bo = parent->entries[pt_idx].bo; + struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo; uint64_t pde, pt; if (bo == NULL) @@ -1141,7 +1102,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, } if (count) { - if (vm->root.bo->shadow) + if (vm->root.base.bo->shadow) params.func(¶ms, last_shadow, last_pt, count, incr, AMDGPU_PTE_VALID); @@ -1154,7 +1115,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, amdgpu_job_free(job); } else { amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, + amdgpu_sync_resv(adev, &job->sync, + parent->base.bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); if (shadow) amdgpu_sync_resv(adev, &job->sync, @@ -1167,7 +1129,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_bo_fence(parent->bo, fence, true); + amdgpu_bo_fence(parent->base.bo, fence, true); dma_fence_put(vm->last_dir_update); vm->last_dir_update = dma_fence_get(fence); dma_fence_put(fence); @@ -1180,7 +1142,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - if (!entry->bo) + if (!entry->base.bo) continue; r = amdgpu_vm_update_level(adev, vm, entry, level + 1); @@ -1213,7 +1175,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - if (!entry->bo) + if (!entry->base.bo) continue; entry->addr = ~0ULL; @@ -1268,7 +1230,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, *entry = &p->vm->root; while ((*entry)->entries) { idx = addr >> (p->adev->vm_manager.block_size * level--); - idx %= amdgpu_bo_size((*entry)->bo) / 8; + idx %= amdgpu_bo_size((*entry)->base.bo) / 8; *parent = *entry; *entry = &(*entry)->entries[idx]; } @@ -1304,7 +1266,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, p->src || !(flags & AMDGPU_PTE_VALID)) { - dst = amdgpu_bo_gpu_offset(entry->bo); + dst = amdgpu_bo_gpu_offset(entry->base.bo); dst = amdgpu_gart_get_vm_pde(p->adev, dst); flags = AMDGPU_PTE_VALID; } else { @@ -1330,18 +1292,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, tmp = p->pages_addr; p->pages_addr = NULL; - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); pde = pd_addr + (entry - parent->entries) * 8; amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); p->pages_addr = tmp; } else { - if (parent->bo->shadow) { - pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); + if (parent->base.bo->shadow) { + pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); pde = pd_addr + (entry - parent->entries) * 8; amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); } - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); pde = pd_addr + (entry - parent->entries) * 8; amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); } @@ -1392,7 +1354,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, if (entry->addr & AMDGPU_PDE_PTE) continue; - pt = entry->bo; + pt = entry->base.bo; if (use_cpu_update) { pe_start = (unsigned long)amdgpu_bo_kptr(pt); } else { @@ -1612,12 +1574,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv, + r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, owner); if (r) goto error_free; - r = reservation_object_reserve_shared(vm->root.bo->tbo.resv); + r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); if (r) goto error_free; @@ -1632,7 +1594,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_bo_fence(vm->root.bo, f, true); + amdgpu_bo_fence(vm->root.base.bo, f, true); dma_fence_put(*fence); *fence = f; return 0; @@ -1927,7 +1889,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, */ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct reservation_object *resv = vm->root.bo->tbo.resv; + struct reservation_object *resv = vm->root.base.bo->tbo.resv; struct dma_fence *excl, **shared; unsigned i, shared_count; int r; @@ -2414,12 +2376,25 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, * Mark @bo as invalid. */ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo) + struct amdgpu_bo *bo, bool evicted) { struct amdgpu_vm_bo_base *bo_base; list_for_each_entry(bo_base, &bo->va, bo_list) { + struct amdgpu_vm *vm = bo_base->vm; + bo_base->moved = true; + if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + spin_lock(&bo_base->vm->status_lock); + list_move(&bo_base->vm_status, &vm->evicted); + spin_unlock(&bo_base->vm->status_lock); + continue; + } + + /* Don't add page tables to the moved state */ + if (bo->tbo.type == ttm_bo_type_kernel) + continue; + spin_lock(&bo_base->vm->status_lock); list_move(&bo_base->vm_status, &bo_base->vm->moved); spin_unlock(&bo_base->vm->status_lock); @@ -2507,6 +2482,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); + INIT_LIST_HEAD(&vm->evicted); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->freed); @@ -2551,30 +2527,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, NULL, init_pde_value, &vm->root.bo); + NULL, NULL, init_pde_value, &vm->root.base.bo); if (r) goto error_free_sched_entity; - r = amdgpu_bo_reserve(vm->root.bo, false); - if (r) - goto error_free_root; - - vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + vm->root.base.vm = vm; + list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); + INIT_LIST_HEAD(&vm->root.base.vm_status); if (vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(vm->root.bo, NULL); + r = amdgpu_bo_reserve(vm->root.base.bo, false); if (r) goto error_free_root; - } - amdgpu_bo_unreserve(vm->root.bo); + r = amdgpu_bo_kmap(vm->root.base.bo, NULL); + if (r) + goto error_free_root; + amdgpu_bo_unreserve(vm->root.base.bo); + } return 0; error_free_root: - amdgpu_bo_unref(&vm->root.bo->shadow); - amdgpu_bo_unref(&vm->root.bo); - vm->root.bo = NULL; + amdgpu_bo_unref(&vm->root.base.bo->shadow); + amdgpu_bo_unref(&vm->root.base.bo); + vm->root.base.bo = NULL; error_free_sched_entity: amd_sched_entity_fini(&ring->sched, &vm->entity); @@ -2593,9 +2570,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) { unsigned i; - if (level->bo) { - amdgpu_bo_unref(&level->bo->shadow); - amdgpu_bo_unref(&level->bo); + if (level->base.bo) { + list_del(&level->base.bo_list); + list_del(&level->base.vm_status); + amdgpu_bo_unref(&level->base.bo->shadow); + amdgpu_bo_unref(&level->base.bo); } if (level->entries) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ff093d4b5e11..4e465e817fe8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -111,12 +111,12 @@ struct amdgpu_vm_bo_base { }; struct amdgpu_vm_pt { - struct amdgpu_bo *bo; - uint64_t addr; + struct amdgpu_vm_bo_base base; + uint64_t addr; /* array of page tables, one for each directory entry */ - struct amdgpu_vm_pt *entries; - unsigned last_entry_used; + struct amdgpu_vm_pt *entries; + unsigned last_entry_used; }; struct amdgpu_vm { @@ -126,6 +126,9 @@ struct amdgpu_vm { /* protecting invalidated */ spinlock_t status_lock; + /* BOs who needs a validation */ + struct list_head evicted; + /* BOs moved, but not yet updated in the PT */ struct list_head moved; @@ -135,7 +138,6 @@ struct amdgpu_vm { /* contains the page directory */ struct amdgpu_vm_pt root; struct dma_fence *last_dir_update; - uint64_t last_eviction_counter; /* protecting freed */ spinlock_t freed_lock; @@ -225,7 +227,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry); -bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm); +bool amdgpu_vm_ready(struct amdgpu_vm *vm); int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); @@ -250,7 +252,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo); + struct amdgpu_bo *bo, bool evicted); struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo); struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, -- cgit v1.2.3 From 73fb16e7ebee12953de32a7a2552e0cf2bf74ebf Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 16 Aug 2017 11:13:48 +0200 Subject: drm/amdgpu: add support for per VM BOs v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per VM BOs are handled like VM PDs and PTs. They are always valid and don't need to be specified in the BO lists. v2: validate PDs/PTs first Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 ++- 3 files changed, 60 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8bf178a912f2..b57adc0723cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -822,7 +822,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) } - r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync); + r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync); if (amdgpu_vm_debug && p->bo_list) { /* Invalidate all BOs to test for userspace bugs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d3c48557555c..26eb7dce5fe5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&glob->lru_lock); } - if (vm->use_cpu_for_update) { + if (bo->tbo.type == ttm_bo_type_kernel && + vm->use_cpu_for_update) { r = amdgpu_bo_kmap(bo, NULL); if (r) return r; } spin_lock(&vm->status_lock); - list_move(&bo_base->vm_status, &vm->relocated); + if (bo->tbo.type != ttm_bo_type_kernel) + list_move(&bo_base->vm_status, &vm->moved); + else + list_move(&bo_base->vm_status, &vm->relocated); } spin_unlock(&vm->status_lock); @@ -1985,20 +1989,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, } /** - * amdgpu_vm_clear_moved - clear moved BOs in the PT + * amdgpu_vm_handle_moved - handle moved BOs in the PT * * @adev: amdgpu_device pointer * @vm: requested vm + * @sync: sync object to add fences to * - * Make sure all moved BOs are cleared in the PT. + * Make sure all BOs which are moved are updated in the PTs. * Returns 0 for success. * - * PTs have to be reserved and mutex must be locked! + * PTs have to be reserved! */ -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_sync *sync) +int amdgpu_vm_handle_moved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = NULL; + bool clear; int r = 0; spin_lock(&vm->status_lock); @@ -2007,7 +2014,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); - r = amdgpu_vm_bo_update(adev, bo_va, true); + /* Per VM BOs never need to bo cleared in the page tables */ + clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; + + r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; @@ -2059,6 +2069,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, return bo_va; } + +/** + * amdgpu_vm_bo_insert_mapping - insert a new mapping + * + * @adev: amdgpu_device pointer + * @bo_va: bo_va to store the address + * @mapping: the mapping to insert + * + * Insert a new mapping into all structures. + */ +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, + struct amdgpu_bo_va_mapping *mapping) +{ + struct amdgpu_vm *vm = bo_va->base.vm; + struct amdgpu_bo *bo = bo_va->base.bo; + + list_add(&mapping->list, &bo_va->invalids); + amdgpu_vm_it_insert(mapping, &vm->va); + + if (mapping->flags & AMDGPU_PTE_PRT) + amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + spin_lock(&vm->status_lock); + list_move(&bo_va->base.vm_status, &vm->moved); + spin_unlock(&vm->status_lock); + } + trace_amdgpu_vm_bo_map(bo_va, mapping); +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -2110,18 +2151,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, if (!mapping) return -ENOMEM; - INIT_LIST_HEAD(&mapping->list); mapping->start = saddr; mapping->last = eaddr; mapping->offset = offset; mapping->flags = flags; - list_add(&mapping->list, &bo_va->invalids); - amdgpu_vm_it_insert(mapping, &vm->va); - - if (flags & AMDGPU_PTE_PRT) - amdgpu_vm_prt_get(adev); - trace_amdgpu_vm_bo_map(bo_va, mapping); + amdgpu_vm_bo_insert_map(adev, bo_va, mapping); return 0; } @@ -2148,7 +2183,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo = bo_va->base.bo; - struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; int r; @@ -2182,12 +2216,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, mapping->offset = offset; mapping->flags = flags; - list_add(&mapping->list, &bo_va->invalids); - amdgpu_vm_it_insert(mapping, &vm->va); - - if (flags & AMDGPU_PTE_PRT) - amdgpu_vm_prt_get(adev); - trace_amdgpu_vm_bo_map(bo_va, mapping); + amdgpu_vm_bo_insert_map(adev, bo_va, mapping); return 0; } @@ -2402,7 +2431,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, bo_base->moved = true; if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { spin_lock(&bo_base->vm->status_lock); - list_move(&bo_base->vm_status, &vm->evicted); + if (bo->tbo.type == ttm_bo_type_kernel) + list_move(&bo_base->vm_status, &vm->evicted); + else + list_move_tail(&bo_base->vm_status, + &vm->evicted); spin_unlock(&bo_base->vm->status_lock); continue; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index c3753afe9853..90b7741d024b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_sync *sync); +int amdgpu_vm_handle_moved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_sync *sync); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); -- cgit v1.2.3 From a2138eaf97b4e053b229fe07e1bb4ecbe07e6769 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 11 Aug 2017 17:49:48 +0800 Subject: drm/amdgpu: fix wait_any_fence first is incorrect if hit NULL/signaled fence Signed-off-by: Monk Liu Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b57adc0723cb..233b6f2f8427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1384,6 +1384,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, array[i] = fence; } else { /* NULL, the fence has been already signaled */ r = 1; + first = i; goto out; } } -- cgit v1.2.3 From a216ab09955d6b77f3af4f0aba9255c5ddf382f5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Sat, 2 Sep 2017 13:21:31 +0200 Subject: drm/amdgpu: fix userptr put_page handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move calling put_page into the unpopulate callback. Otherwise we mess up the pages reference count when it is unbound multiple times. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 +++++++++++++- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5809f55e0d9d..cc6de0b46326 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1793,6 +1793,7 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 233b6f2f8427..e58db0c69c6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -474,10 +474,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, /* Check if we have user pages and nobody bound the BO already */ if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { - size_t size = sizeof(struct page *); - - size *= bo->tbo.ttm->num_pages; - memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, + lobj->user_pages); binding_userptr = true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 841a5699bef0..28e121984332 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -664,6 +664,18 @@ release_pages: return r; } +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +{ + unsigned i; + + for (i = 0; i < ttm->num_pages; ++i) { + if (ttm->pages[i]) + put_page(ttm->pages[i]); + + ttm->pages[i] = pages ? pages[i] : NULL; + } +} + static void amdgpu_trace_dma_map(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); @@ -738,7 +750,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) set_page_dirty(page); mark_page_accessed(page); - put_page(page); } amdgpu_trace_dma_unmap(ttm); @@ -971,6 +982,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); if (gtt && gtt->userptr) { + amdgpu_ttm_tt_set_user_pages(ttm, NULL); kfree(ttm->sg); ttm->page_flags &= ~TTM_PAGE_FLAG_SG; return; -- cgit v1.2.3 From aa4ec7ce7ec52c7230cfa73b06d79288b45fe1c9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 5 Sep 2017 15:10:50 +0200 Subject: drm/amdgpu: revert "fix deadlock of reservation between cs and gpu reset v2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 10e709cb296c98424c03408d23e3addeddcd4088. The patch doesn't work at all: 1. The CS can still be blocked because of amdgpu_ctx_add_fence(). 2. The order of submission isn't correct any more. 3. We could end up using freed up memory because we now drop the ctx reference to early. This needs to be fixed cleanly by doing the context handling after the BO handling, but this is a larger task just avoid the obvious crashes for now. Signed-off-by: Christian König Reviewed-by: Monk Liu monk.liu@amd.com Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e58db0c69c6a..3fe816f6beca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1148,7 +1148,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); - amdgpu_cs_parser_fini(p, 0, true); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); @@ -1206,10 +1205,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; r = amdgpu_cs_submit(&parser, cs); - if (r) - goto out; - return 0; out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); return r; -- cgit v1.2.3 From b72cf4fca2bb786e20864b5e8755105aa9626fb4 Mon Sep 17 00:00:00 2001 From: Christian König Date: Sun, 3 Sep 2017 15:22:06 +0200 Subject: drm/amdgpu: move taking mmap_sem into get_user_pages v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This didn't helped as intended, just simplify the code. v2: unlock mmap_sem in the error path as well Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 +----------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +++++++- 3 files changed, 8 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 3fe816f6beca..283a216ee758 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -500,18 +500,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - bool need_mmap_lock = false; unsigned i, tries = 10; int r; INIT_LIST_HEAD(&p->validated); p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - if (p->bo_list) { - need_mmap_lock = p->bo_list->first_userptr != - p->bo_list->num_entries; + if (p->bo_list) amdgpu_bo_list_get_list(p->bo_list, &p->validated); - } INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -519,9 +515,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.robj) list_add(&p->uf_entry.tv.head, &p->validated); - if (need_mmap_lock) - down_read(¤t->mm->mmap_sem); - while (1) { struct list_head need_pages; unsigned i; @@ -674,9 +667,6 @@ error_validate: error_free_pages: - if (need_mmap_lock) - up_read(¤t->mm->mmap_sem); - if (p->bo_list) { for (i = p->bo_list->first_userptr; i < p->bo_list->num_entries; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index f1e61b3df640..b0d45c8e6bb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -318,8 +318,6 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, } if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - down_read(¤t->mm->mmap_sem); - r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (r) @@ -334,8 +332,6 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, amdgpu_bo_unreserve(bo); if (r) goto free_pages; - - up_read(¤t->mm->mmap_sem); } r = drm_gem_handle_create(filp, gobj, &handle); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 28e121984332..ea0378c8b049 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -622,6 +622,8 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) flags |= FOLL_WRITE; + down_read(¤t->mm->mmap_sem); + if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only use anonymous memory to prevent problems with writeback */ @@ -629,8 +631,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) struct vm_area_struct *vma; vma = find_vma(gtt->usermm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) + if (!vma || vma->vm_file || vma->vm_end < end) { + up_read(¤t->mm->mmap_sem); return -EPERM; + } } do { @@ -657,10 +661,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) } while (pinned < ttm->num_pages); + up_read(¤t->mm->mmap_sem); return 0; release_pages: release_pages(pages, pinned, 0); + up_read(¤t->mm->mmap_sem); return r; } -- cgit v1.2.3 From ca666a3c298f838346ccea46ff542c605e68deb5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 5 Sep 2017 14:30:05 +0200 Subject: drm/amdgpu: stop using BO status for user pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead use a counter to figure out if we need to set new pages or not. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 11 +++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cc6de0b46326..f3e561136597 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1802,6 +1802,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); +bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 283a216ee758..4d3f8fbfa59d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -473,7 +473,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, return -EPERM; /* Check if we have user pages and nobody bound the BO already */ - if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && + lobj->user_pages) { amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); binding_userptr = true; @@ -534,23 +535,25 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, INIT_LIST_HEAD(&need_pages); for (i = p->bo_list->first_userptr; i < p->bo_list->num_entries; ++i) { + struct amdgpu_bo *bo; e = &p->bo_list->array[i]; + bo = e->robj; - if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, + if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, &e->user_invalidated) && e->user_pages) { /* We acquired a page array, but somebody * invalidated it. Free it and try again */ release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages, + bo->tbo.ttm->num_pages, false); kvfree(e->user_pages); e->user_pages = NULL; } - if (e->robj->tbo.ttm->state != tt_bound && + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && !e->user_pages) { list_del(&e->tv.head); list_add(&e->tv.head, &need_pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ea0378c8b049..e67785191032 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -609,6 +609,7 @@ struct amdgpu_ttm_tt { spinlock_t guptasklock; struct list_head guptasks; atomic_t mmu_invalidations; + uint32_t last_set_pages; struct list_head list; }; @@ -672,8 +673,10 @@ release_pages: void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { + struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; + gtt->last_set_pages = atomic_read(>t->mmu_invalidations); for (i = 0; i < ttm->num_pages; ++i) { if (ttm->pages[i]) put_page(ttm->pages[i]); @@ -1025,6 +1028,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); atomic_set(>t->mmu_invalidations, 0); + gtt->last_set_pages = 0; return 0; } @@ -1077,6 +1081,16 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, return prev_invalidated != *last_invalidated; } +bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL || !gtt->userptr) + return false; + + return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; +} + bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; -- cgit v1.2.3 From 1b0c0f9dc5ca6c0c8be21eeac92c7aa77bbf1d33 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 5 Sep 2017 14:36:44 +0200 Subject: drm/amdgpu: move userptr BOs to CPU domain during CS v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of moving them in the MMU notifier move them during CS. v2: still mark pages as accessed/dirty Signed-off-by: Christian König Reviewed-by: Felix Kuehling (v1) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 5 +---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 27 +++++++++++++++++++-------- 4 files changed, 27 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f3e561136597..134a049f87bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1794,6 +1794,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); +void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4d3f8fbfa59d..4877df83b801 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -475,6 +475,12 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, /* Check if we have user pages and nobody bound the BO already */ if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && lobj->user_pages) { + amdgpu_ttm_placement_from_domain(bo, + AMDGPU_GEM_DOMAIN_CPU); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, + false); + if (r) + return r; amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); binding_userptr = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 6558a3ed57a7..df85a1314799 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -137,10 +137,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); - amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); - if (r) - DRM_ERROR("(%ld) failed to validate user bo\n", r); + amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); amdgpu_bo_unreserve(bo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e67785191032..fe887f361be8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -685,6 +685,24 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) } } +void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned i; + + for (i = 0; i < ttm->num_pages; ++i) { + struct page *page = ttm->pages[i]; + + if (!page) + continue; + + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) + set_page_dirty(page); + + mark_page_accessed(page); + } +} + static void amdgpu_trace_dma_map(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); @@ -740,7 +758,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct sg_page_iter sg_iter; int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? @@ -753,13 +770,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* free the sg table and pages again */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - for_each_sg_page(ttm->sg->sgl, &sg_iter, ttm->sg->nents, 0) { - struct page *page = sg_page_iter_page(&sg_iter); - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - } + amdgpu_ttm_tt_mark_user_pages(ttm); amdgpu_trace_dma_unmap(ttm); -- cgit v1.2.3 From 3fe89771cb0a65d3b686bcafb5b7e3ebae0ea604 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 12 Sep 2017 14:25:14 -0400 Subject: drm/amdgpu: stop reserving the BO in the MMU callback v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead take the callback lock during the final parts of CS. This should solve the last remaining locking order problems with BO reservations. v2: rebase, make dummy functions static inline v3: add one more missing inline and comments Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 30 ++++++++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 30 +++++++++++++++++++++--------- 3 files changed, 56 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 134a049f87bd..740683474a1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -178,6 +178,7 @@ struct amdgpu_cs_parser; struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; +struct amdgpu_mn; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -1057,6 +1058,7 @@ struct amdgpu_cs_parser { /* buffer objects */ struct ww_acquire_ctx ticket; struct amdgpu_bo_list *bo_list; + struct amdgpu_mn *mn; struct amdgpu_bo_list_entry vm_pd; struct list_head validated; struct dma_fence *fence; @@ -1201,9 +1203,18 @@ void amdgpu_test_moves(struct amdgpu_device *adev); * MMU Notifier */ #if defined(CONFIG_MMU_NOTIFIER) +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); +void amdgpu_mn_lock(struct amdgpu_mn *mn); +void amdgpu_mn_unlock(struct amdgpu_mn *mn); #else +static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} +static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} +static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +{ + return NULL; +} static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { return -ENODEV; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4877df83b801..c2310d4eebc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -513,8 +513,11 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, INIT_LIST_HEAD(&p->validated); p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - if (p->bo_list) + if (p->bo_list) { amdgpu_bo_list_get_list(p->bo_list, &p->validated); + if (p->bo_list->first_userptr != p->bo_list->num_entries) + p->mn = amdgpu_mn_get(p->adev); + } INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -722,11 +725,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (!error) - ttm_eu_fence_buffer_objects(&parser->ticket, - &parser->validated, - parser->fence); - else if (backoff) + if (error && backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); @@ -1127,14 +1126,29 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_ring *ring = p->job->ring; struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; struct amdgpu_job *job; + unsigned i; int r; + amdgpu_mn_lock(p->mn); + if (p->bo_list) { + for (i = p->bo_list->first_userptr; + i < p->bo_list->num_entries; ++i) { + struct amdgpu_bo *bo = p->bo_list->array[i].robj; + + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { + amdgpu_mn_unlock(p->mn); + return -ERESTARTSYS; + } + } + } + job = p->job; p->job = NULL; r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); if (r) { amdgpu_job_free(job); + amdgpu_mn_unlock(p->mn); return r; } @@ -1150,6 +1164,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); + + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); + amdgpu_mn_unlock(p->mn); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 6d216abd0e1d..99edb40b5f99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -106,6 +106,25 @@ static void amdgpu_mn_release(struct mmu_notifier *mn, schedule_work(&rmn->work); } + +/** + * amdgpu_mn_lock - take the write side lock for this mn + */ +void amdgpu_mn_lock(struct amdgpu_mn *mn) +{ + if (mn) + down_write(&mn->lock); +} + +/** + * amdgpu_mn_unlock - drop the write side lock for this mn + */ +void amdgpu_mn_unlock(struct amdgpu_mn *mn) +{ + if (mn) + up_write(&mn->lock); +} + /** * amdgpu_mn_invalidate_node - unmap all BOs of a node * @@ -126,20 +145,12 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) continue; - r = amdgpu_bo_reserve(bo, true); - if (r) { - DRM_ERROR("(%ld) failed to reserve user bo\n", r); - continue; - } - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); - - amdgpu_bo_unreserve(bo); } } @@ -223,7 +234,7 @@ static const struct mmu_notifier_ops amdgpu_mn_ops = { * * Creates a notifier context for current->mm. */ -static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) { struct mm_struct *mm = current->mm; struct amdgpu_mn *rmn; @@ -368,3 +379,4 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) up_write(&rmn->lock); mutex_unlock(&adev->mn_lock); } + -- cgit v1.2.3 From 9cca0b8e5df0ac438c65eec5044bfa089d16fbbe Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 6 Sep 2017 16:15:28 +0200 Subject: drm/amdgpu: move amdgpu_cs_sysvm_access_required into find_mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we need to find the mapping we need sysvm access anyway. Signed-off-by: Christian König Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 64 ++++++++++++--------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 16 ++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 11 +++--- 4 files changed, 36 insertions(+), 63 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 740683474a1c..5ee6cea8caaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -179,6 +179,7 @@ struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_mn; +struct amdgpu_bo_va_mapping; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -1900,10 +1901,9 @@ static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } #endif -struct amdgpu_bo_va_mapping * -amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, - uint64_t addr, struct amdgpu_bo **bo); -int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser); +int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, + uint64_t addr, struct amdgpu_bo **bo, + struct amdgpu_bo_va_mapping **mapping); #include "amdgpu_object.h" #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c2310d4eebc8..c30110a3024a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -921,11 +921,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, uint64_t offset; uint8_t *kptr; - m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, - &aobj); - if (!aobj) { + r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, + &aobj, &m); + if (r) { DRM_ERROR("IB va_start is invalid\n"); - return -EINVAL; + return r; } if ((chunk_ib->va_start + chunk_ib->ib_bytes) > @@ -1475,15 +1475,16 @@ err_free_fences: * virtual memory address. Returns allocation structure when found, NULL * otherwise. */ -struct amdgpu_bo_va_mapping * -amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, - uint64_t addr, struct amdgpu_bo **bo) +int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, + uint64_t addr, struct amdgpu_bo **bo, + struct amdgpu_bo_va_mapping **map) { struct amdgpu_bo_va_mapping *mapping; unsigned i; + int r; if (!parser->bo_list) - return NULL; + return 0; addr /= AMDGPU_GPU_PAGE_SIZE; @@ -1500,7 +1501,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, continue; *bo = lobj->bo_va->base.bo; - return mapping; + *map = mapping; + goto found; } list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { @@ -1509,44 +1511,22 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, continue; *bo = lobj->bo_va->base.bo; - return mapping; + *map = mapping; + goto found; } } - return NULL; -} + return -EINVAL; -/** - * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM - * - * @parser: command submission parser context - * - * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM. - */ -int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser) -{ - unsigned i; - int r; +found: + r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); + if (unlikely(r)) + return r; - if (!parser->bo_list) + if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) return 0; - for (i = 0; i < parser->bo_list->num_entries; i++) { - struct amdgpu_bo *bo = parser->bo_list->array[i].robj; - - r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); - if (unlikely(r)) - return r; - - if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) - continue; - - bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains); - r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); - if (unlikely(r)) - return r; - } - - return 0; + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); + return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e19928dae8e3..331e34ac61fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -410,10 +410,10 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); int r = 0; - mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); - if (mapping == NULL) { + r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); + if (r) { DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); - return -EINVAL; + return r; } if (!ctx->parser->adev->uvd.address_64_bit) { @@ -737,10 +737,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); int r; - mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); - if (mapping == NULL) { + r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); + if (r) { DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); - return -EINVAL; + return r; } start = amdgpu_bo_gpu_offset(bo); @@ -917,10 +917,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) return -EINVAL; } - r = amdgpu_cs_sysvm_access_required(parser); - if (r) - return r; - ctx.parser = parser; ctx.buf_sizes = buf_sizes; ctx.ib_idx = ib_idx; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index c855366521ab..b46280c1279f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -559,6 +559,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; uint64_t addr; + int r; if (index == 0xffffffff) index = 0; @@ -567,11 +568,11 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; addr += ((uint64_t)size) * ((uint64_t)index); - mapping = amdgpu_cs_find_mapping(p, addr, &bo); - if (mapping == NULL) { + r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); + if (r) { DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", addr, lo, hi, size, index); - return -EINVAL; + return r; } if ((addr + (uint64_t)size) > @@ -652,10 +653,6 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) p->job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); - r = amdgpu_cs_sysvm_access_required(p); - if (r) - return r; - while (idx < ib->length_dw) { uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); -- cgit v1.2.3 From aebc5e6f50f770ec9392c3ca804f18b30797dfa7 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 6 Sep 2017 16:55:16 +0200 Subject: drm/amdgpu: rework amdgpu_cs_find_mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the VM instead of the BO list to find the BO for a virtual address. This fixes UVD/VCE in physical mode with VM local BOs. Signed-off-by: Christian König Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 42 +++++++----------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++ 4 files changed, 30 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c30110a3024a..5f19227b35e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1479,46 +1479,24 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo, struct amdgpu_bo_va_mapping **map) { + struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; - unsigned i; int r; - if (!parser->bo_list) - return 0; - addr /= AMDGPU_GPU_PAGE_SIZE; - for (i = 0; i < parser->bo_list->num_entries; i++) { - struct amdgpu_bo_list_entry *lobj; - - lobj = &parser->bo_list->array[i]; - if (!lobj->bo_va) - continue; - - list_for_each_entry(mapping, &lobj->bo_va->valids, list) { - if (mapping->start > addr || - addr > mapping->last) - continue; - - *bo = lobj->bo_va->base.bo; - *map = mapping; - goto found; - } - - list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { - if (mapping->start > addr || - addr > mapping->last) - continue; + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); + if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) + return -EINVAL; - *bo = lobj->bo_va->base.bo; - *map = mapping; - goto found; - } - } + *bo = mapping->bo_va->base.bo; + *map = mapping; - return -EINVAL; + /* Double check that the BO is reserved by this CS */ + if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) + return -EINVAL; -found: r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); if (unlikely(r)) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 42492e63b3a2..a4891bea2ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -35,6 +35,7 @@ /* bo virtual addresses in a vm */ struct amdgpu_bo_va_mapping { + struct amdgpu_bo_va *bo_va; struct list_head list; struct rb_node rb; uint64_t start; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 545531db66db..758bbb9e77f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2086,6 +2086,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo *bo = bo_va->base.bo; + mapping->bo_va = bo_va; list_add(&mapping->list, &bo_va->invalids); amdgpu_vm_it_insert(mapping, &vm->va); @@ -2263,6 +2264,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); + mapping->bo_va = NULL; trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (valid) @@ -2348,6 +2350,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, if (tmp->last > eaddr) tmp->last = eaddr; + tmp->bo_va = NULL; list_add(&tmp->list, &vm->freed); trace_amdgpu_vm_bo_unmap(NULL, tmp); } @@ -2373,6 +2376,19 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, return 0; } +/** + * amdgpu_vm_bo_lookup_mapping - find mapping by address + * + * @vm: the requested VM + * + * Find a mapping by it's address. + */ +struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, + uint64_t addr) +{ + return amdgpu_vm_it_iter_first(&vm->va, addr, addr); +} + /** * amdgpu_vm_bo_rmv - remove a bo to a specific vm * @@ -2398,6 +2414,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); + mapping->bo_va = NULL; trace_amdgpu_vm_bo_unmap(bo_va, mapping); list_add(&mapping->list, &vm->freed); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 90b7741d024b..c1accd15efc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -276,6 +276,8 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size); +struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, + uint64_t addr); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, -- cgit v1.2.3 From d5884513a31df072879c89c80306d544467ee770 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 8 Sep 2017 14:09:41 +0200 Subject: drm/amdgpu: fix VM sync with always valid BOs v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All users of a VM must always wait for updates with always valid BOs to be completed. v2: remove debugging leftovers, rename struct member Signed-off-by: Christian König Reviewed-by: Roger He Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5f19227b35e9..ff61073b7181 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -761,10 +761,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update); - if (r) - return r; - r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; @@ -819,6 +815,12 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) } r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync); + if (r) + return r; + + r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update); + if (r) + return r; if (amdgpu_vm_debug && p->bo_list) { /* Invalidate all BOs to test for userspace bugs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 758bbb9e77f3..64baa3138965 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1141,9 +1141,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, goto error_free; amdgpu_bo_fence(parent->base.bo, fence, true); - dma_fence_put(vm->last_dir_update); - vm->last_dir_update = dma_fence_get(fence); - dma_fence_put(fence); + dma_fence_put(vm->last_update); + vm->last_update = fence; } } @@ -1804,6 +1803,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, trace_amdgpu_vm_bo_mapping(mapping); } + if (bo_va->base.bo && + bo_va->base.bo->tbo.resv == vm->root.base.bo->tbo.resv) { + dma_fence_put(vm->last_update); + vm->last_update = dma_fence_get(bo_va->last_pt_update); + } + return 0; } @@ -2587,7 +2592,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), "CPU update of VM recommended only for large BAR system\n"); - vm->last_dir_update = NULL; + vm->last_update = NULL; flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_VRAM_CLEARED; @@ -2693,7 +2698,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } amdgpu_vm_free_levels(&vm->root); - dma_fence_put(vm->last_dir_update); + dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) amdgpu_vm_free_reserved_vmid(adev, vm, i); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index c1accd15efc8..cb6a6222fc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -140,7 +140,7 @@ struct amdgpu_vm { /* contains the page directory */ struct amdgpu_vm_pt root; - struct dma_fence *last_dir_update; + struct dma_fence *last_update; /* protecting freed */ spinlock_t freed_lock; -- cgit v1.2.3 From 3d138c14c4174a2c80874a96935c87eee7c82ecf Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 5 Sep 2017 15:10:50 +0200 Subject: drm/amdgpu: revert "fix deadlock of reservation between cs and gpu reset v2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 10e709cb296c98424c03408d23e3addeddcd4088. The patch doesn't work at all: 1. The CS can still be blocked because of amdgpu_ctx_add_fence(). 2. The order of submission isn't correct any more. 3. We could end up using freed up memory because we now drop the ctx reference to early. This needs to be fixed cleanly by doing the context handling after the BO handling, but this is a larger task just avoid the obvious crashes for now. Signed-off-by: Christian König Reviewed-by: Monk Liu monk.liu@amd.com Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index baa90df90aea..ba10a83535d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1153,7 +1153,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); - amdgpu_cs_parser_fini(p, 0, true); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); @@ -1211,10 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; r = amdgpu_cs_submit(&parser, cs); - if (r) - goto out; - return 0; out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); return r; -- cgit v1.2.3 From 4e55eb3879fea6d8c7d414cebaa5bff1da58b4a1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 11 Sep 2017 16:54:59 +0200 Subject: drm/amdgpu: fix amdgpu_vm_handle_moved as well v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no guarantee that the last BO_VA actually needed an update. Additional to that all command submissions must wait for moved BOs to be cleared, not just the first one. v2: Don't overwrite any newer fence. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +-- 3 files changed, 12 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ff61073b7181..9f1202a4182f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -814,7 +814,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) } - r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync); + r = amdgpu_vm_handle_moved(adev, vm); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 64baa3138965..2df254cc802e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1743,7 +1743,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, dma_addr_t *pages_addr = NULL; struct ttm_mem_reg *mem; struct drm_mm_node *nodes; - struct dma_fence *exclusive; + struct dma_fence *exclusive, **last_update; uint64_t flags; int r; @@ -1769,6 +1769,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, else flags = 0x0; + if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) + last_update = &vm->last_update; + else + last_update = &bo_va->last_pt_update; + if (!clear && bo_va->base.moved) { bo_va->base.moved = false; list_splice_init(&bo_va->valids, &bo_va->invalids); @@ -1780,7 +1785,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, list_for_each_entry(mapping, &bo_va->invalids, list) { r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, mapping, flags, nodes, - &bo_va->last_pt_update); + last_update); if (r) return r; } @@ -1803,12 +1808,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, trace_amdgpu_vm_bo_mapping(mapping); } - if (bo_va->base.bo && - bo_va->base.bo->tbo.resv == vm->root.base.bo->tbo.resv) { - dma_fence_put(vm->last_update); - vm->last_update = dma_fence_get(bo_va->last_pt_update); - } - return 0; } @@ -2006,15 +2005,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_sync *sync) + struct amdgpu_vm *vm) { - struct amdgpu_bo_va *bo_va = NULL; bool clear; int r = 0; spin_lock(&vm->status_lock); while (!list_empty(&vm->moved)) { + struct amdgpu_bo_va *bo_va; + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); @@ -2030,9 +2029,6 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, } spin_unlock(&vm->status_lock); - if (bo_va) - r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index cb6a6222fc3f..48c58ae4bb3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -250,8 +250,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_sync *sync); + struct amdgpu_vm *vm); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); -- cgit v1.2.3 From eb01abc7c4fd1faa26d0787f410894d9c704eb60 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 15 Sep 2017 13:40:31 +0800 Subject: drm/amdgpu:make ctx_add_fence interruptible(v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit otherwise a gpu hang will make application couldn't be killed under timedout=0 mode v2: Fix memoryleak job/job->s_fence issue unlock mn remove the ERROR msg after waiting being interrupted Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 ++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 12 +++++++----- 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 33e1d619d060..9cce59f6ada5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -735,8 +735,8 @@ struct amdgpu_ctx_mgr { struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct dma_fence *fence); +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct dma_fence *fence, uint64_t *seq); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9f1202a4182f..c6a214f1e991 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1129,6 +1129,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; struct amdgpu_job *job; unsigned i; + uint64_t seq; + int r; amdgpu_mn_lock(p->mn); @@ -1158,10 +1160,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(&job->base.s_fence->finished); + r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); + if (r) { + dma_fence_put(p->fence); + dma_fence_put(&job->base.s_fence->finished); + amdgpu_job_free(job); + amdgpu_mn_unlock(p->mn); + return r; + } + amdgpu_cs_post_dependencies(p); - cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); - job->uf_sequence = cs->out.handle; + cs->out.handle = seq; + job->uf_sequence = seq; + amdgpu_job_free_resources(job); trace_amdgpu_cs_ioctl(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a11e44340b23..75c933b1a432 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -246,8 +246,8 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) return 0; } -uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct dma_fence *fence) +int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct dma_fence *fence, uint64_t* handler) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; uint64_t seq = cring->sequence; @@ -258,9 +258,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, other = cring->fences[idx]; if (other) { signed long r; - r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); if (r < 0) - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + return r; } dma_fence_get(fence); @@ -271,8 +271,10 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, spin_unlock(&ctx->ring_lock); dma_fence_put(other); + if (handler) + *handler = seq; - return seq; + return 0; } struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, -- cgit v1.2.3 From 7ca24cf2d2269bde25e21c02a77fe81995a081ae Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 12 Sep 2017 22:42:14 +0200 Subject: drm/amdgpu: add FENCE_TO_HANDLE ioctl that returns syncobj or sync_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit for being able to convert an amdgpu fence into one of the handles. Mesa will use this. Reviewed-by: Dave Airlie Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 61 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 + include/uapi/drm/amdgpu_drm.h | 16 +++++++++ 5 files changed, 82 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0725ab940f49..a23b8af95319 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1311,6 +1311,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c6a214f1e991..ab83dfcabb41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -25,6 +25,7 @@ * Jerome Glisse */ #include +#include #include #include #include @@ -1330,6 +1331,66 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, return fence; } +int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + union drm_amdgpu_fence_to_handle *info = data; + struct dma_fence *fence; + struct drm_syncobj *syncobj; + struct sync_file *sync_file; + int fd, r; + + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; + + fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + switch (info->in.what) { + case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: + r = drm_syncobj_create(&syncobj, 0, fence); + dma_fence_put(fence); + if (r) + return r; + r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); + drm_syncobj_put(syncobj); + return r; + + case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD: + r = drm_syncobj_create(&syncobj, 0, fence); + dma_fence_put(fence); + if (r) + return r; + r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle); + drm_syncobj_put(syncobj); + return r; + + case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD: + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + dma_fence_put(fence); + return fd; + } + + sync_file = sync_file_create(fence); + dma_fence_put(fence); + if (!sync_file) { + put_unused_fd(fd); + return -ENOMEM; + } + + fd_install(fd, sync_file->file); + info->out.handle = fd; + return 0; + + default: + return -EINVAL; + } +} + /** * amdgpu_cs_wait_all_fence - wait on all fences to signal * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 9816f50a7498..ad02d3fbb44c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -70,9 +70,10 @@ * - 3.18.0 - Export gpu always on cu bitmap * - 3.19.0 - Add support for UVD MJPEG decode * - 3.20.0 - Add support for local BOs + * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 20 +#define KMS_DRIVER_MINOR 21 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4fd06f8d9768..51841259e23f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1024,6 +1024,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index e055776f2f4c..4c6e8c482ee4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -52,6 +52,7 @@ extern "C" { #define DRM_AMDGPU_GEM_USERPTR 0x11 #define DRM_AMDGPU_WAIT_FENCES 0x12 #define DRM_AMDGPU_VM 0x13 +#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -67,6 +68,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) +#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -515,6 +517,20 @@ struct drm_amdgpu_cs_chunk_sem { __u32 handle; }; +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 + +union drm_amdgpu_fence_to_handle { + struct { + struct drm_amdgpu_fence fence; + __u32 what; + } in; + struct { + __u32 handle; + } out; +}; + struct drm_amdgpu_cs_chunk_data { union { struct drm_amdgpu_cs_chunk_ib ib_data; -- cgit v1.2.3 From 177ae09b5d699a5ebd1cafcee78889db968abf54 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Fri, 15 Sep 2017 20:44:06 -0400 Subject: drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a flag to signal that access to a BO will be synchronized through an external mechanism. Currently all buffers shared between contexts are subject to implicit synchronization. However, this is only required for protocols that currently don't support an explicit synchronization mechanism (DRI2/3). This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that users can specify when it is safe to disable implicit sync. v2: only disable explicit sync in amdgpu_cs_ioctl Reviewed-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++---- include/uapi/drm/amdgpu_drm.h | 2 ++ 8 files changed, 29 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ab83dfcabb41..38027a00f8ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -705,7 +705,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, + amdgpu_bo_explicit_sync(e->robj)); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8e6bb3..21e99366cab3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c26ef53604af..428aae048f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44312f9..a4bf21f8f1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner) + void *owner, bool explicit_sync) { struct reservation_object_list *flist; struct dma_fence *f; @@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, f = reservation_object_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f); + if (explicit_sync) + return r; + flist = reservation_object_get_list(resv); if (!flist || r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index dc7687993317..70d7e3a279a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner); + void *owner, + bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 10952c3e5eb6..a2282bacf960 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1489,7 +1489,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, job->vm_needs_flush = vm_needs_flush; if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; @@ -1581,7 +1582,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index eb4a01c14eee..c559d76ff695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1035,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); + amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); r = amdgpu_sync_wait(&sync, true); amdgpu_sync_free(&sync); @@ -1176,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, parent->base.bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); if (shadow) amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, @@ -1644,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_free; r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, - owner); + owner, false); if (r) goto error_free; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4c6e8c482ee4..b62484af8ccb 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -91,6 +91,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) /* Flag that BO is always valid in this VM */ #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From b2ff0e8ac4ce1fb647ae40feb4cf26bc9301e0c9 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Mon, 20 Feb 2017 17:53:19 -0500 Subject: drm/amdgpu: add framework for HW specific priority settings v9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an initial framework for changing the HW priorities of rings. The framework allows requesting priority changes for the lifetime of an amdgpu_job. After the job completes the priority will decay to the next lowest priority for which a request is still valid. A new ring function set_priority() can now be populated to take care of the HW specific programming sequence for priority changes. v2: set priority before emitting IB, and take a ref on amdgpu_job v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: plug amdgpu_ring_restore_priority_cb into amdgpu_job_free_cb v5: use atomic for tracking job priorities instead of last_job v6: rename amdgpu_ring_priority_[get/put]() and align parameters v7: replace spinlocks with mutexes for KIQ compatibility v8: raise ring priority during cs_ioctl, instead of job_run v9: priority_get() before push_job() Reviewed-by: Christian König Acked-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 76 ++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 15 ++++++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 +++ 5 files changed, 103 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 38027a00f8ab..fe7dd44ac9fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1177,6 +1177,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, + amd_sched_get_job_priority(&job->base)); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4510627ae83e..83d13431cbdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -103,6 +103,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->dep_sync); @@ -139,6 +140,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, + amd_sched_get_job_priority(&job->base)); amd_sched_entity_push_job(&job->base); return 0; @@ -203,6 +206,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); + amdgpu_job_free_resources(job); return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 019932a7ea3a..e5ece1fae149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -154,6 +154,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) ring->funcs->end_use(ring); } +/** + * amdgpu_ring_priority_put - restore a ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Release a request for executing at @priority + */ +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + int i; + + if (!ring->funcs->set_priority) + return; + + if (atomic_dec_return(&ring->num_jobs[priority]) > 0) + return; + + /* no need to restore if the job is already at the lowest priority */ + if (priority == AMD_SCHED_PRIORITY_NORMAL) + return; + + mutex_lock(&ring->priority_mutex); + /* something higher prio is executing, no need to decay */ + if (ring->priority > priority) + goto out_unlock; + + /* decay priority to the next level with a job available */ + for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { + if (i == AMD_SCHED_PRIORITY_NORMAL + || atomic_read(&ring->num_jobs[i])) { + ring->priority = i; + ring->funcs->set_priority(ring, i); + break; + } + } + +out_unlock: + mutex_unlock(&ring->priority_mutex); +} + +/** + * amdgpu_ring_priority_get - change the ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Request a ring's priority to be raised to @priority (refcounted). + */ +void amdgpu_ring_priority_get(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + if (!ring->funcs->set_priority) + return; + + atomic_inc(&ring->num_jobs[priority]); + + mutex_lock(&ring->priority_mutex); + if (priority <= ring->priority) + goto out_unlock; + + ring->priority = priority; + ring->funcs->set_priority(ring, priority); + +out_unlock: + mutex_unlock(&ring->priority_mutex); +} + /** * amdgpu_ring_init - init driver ring struct. * @@ -169,7 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned max_dw, struct amdgpu_irq_src *irq_src, unsigned irq_type) { - int r; + int r, i; int sched_hw_submission = amdgpu_sched_hw_submission; /* Set the hw submission limit higher for KIQ because @@ -247,9 +316,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; + ring->priority = AMD_SCHED_PRIORITY_NORMAL; + mutex_init(&ring->priority_mutex); INIT_LIST_HEAD(&ring->lru_list); amdgpu_ring_lru_touch(adev, ring); + for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i) + atomic_set(&ring->num_jobs[i], 0); + if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 491bd5512dcc..0d9ce141404c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -24,6 +24,7 @@ #ifndef __AMDGPU_RING_H__ #define __AMDGPU_RING_H__ +#include #include "gpu_scheduler.h" /* max number of rings */ @@ -56,6 +57,7 @@ struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib; struct amdgpu_cs_parser; +struct amdgpu_job; /* * Fences. @@ -147,6 +149,9 @@ struct amdgpu_ring_funcs { void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); + /* priority functions */ + void (*set_priority) (struct amdgpu_ring *ring, + enum amd_sched_priority priority); }; struct amdgpu_ring { @@ -187,6 +192,12 @@ struct amdgpu_ring { volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; bool has_compute_vm_bug; + + atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX]; + struct mutex priority_mutex; + /* protected by priority_mutex */ + int priority; + #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif @@ -197,6 +208,10 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); +void amdgpu_ring_priority_get(struct amdgpu_ring *ring, + enum amd_sched_priority priority); +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 38e622ce06de..dbcaa2e1c5c7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -170,4 +170,11 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity); void amd_sched_job_kickout(struct amd_sched_job *s_job); + +static inline enum amd_sched_priority +amd_sched_get_job_priority(struct amd_sched_job *job) +{ + return (job->s_entity->rq - job->sched->sched_rq); +} + #endif -- cgit v1.2.3 From ad864d243826cedc53404a1c0db7d1e38ddceb84 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 10 Oct 2017 16:50:16 -0400 Subject: drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables old fence waiting before reservation lock is aquired which in turn is part of a bigger solution to deadlock happening when gpu reset with VRAM recovery accures during intensive rendering. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 112 ++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 51 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index fe7dd44ac9fe..9166d5e1e557 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -845,15 +845,60 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_ring *ring = p->job->ring; - int i, r; + int i, j, r; + + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + if (p->job->ring->funcs->parse_cs) { + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj = NULL; + uint64_t offset; + uint8_t *kptr; + + r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, + &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } - /* Only for UVD/VCE VM emulation */ - if (ring->funcs->parse_cs) { - for (i = 0; i < p->job->num_ibs; i++) { - r = amdgpu_ring_parse_cs(ring, p, i); + if ((chunk_ib->va_start + chunk_ib->ib_bytes) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + offset = m->start * AMDGPU_GPU_PAGE_SIZE; + kptr += chunk_ib->va_start - offset; + + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); + + /* Only for UVD/VCE VM emulation */ + r = amdgpu_ring_parse_cs(ring, p, j); if (r) return r; + } + + j++; } if (p->job->vm) { @@ -919,54 +964,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring = ring; - if (ring->funcs->parse_cs) { - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - uint64_t offset; - uint8_t *kptr; - - r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, - &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += chunk_ib->va_start - offset; - - r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - } else { - r = amdgpu_ib_get(adev, vm, 0, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - + r = amdgpu_ib_get(adev, vm, + ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, + ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; } ib->gpu_addr = chunk_ib->va_start; ib->length_dw = chunk_ib->ib_bytes / 4; ib->flags = chunk_ib->flags; + j++; } @@ -1212,6 +1221,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } + r = amdgpu_cs_ib_fill(adev, &parser); + if (r) + goto out; + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) @@ -1222,9 +1235,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } reserved_buffers = true; - r = amdgpu_cs_ib_fill(adev, &parser); - if (r) - goto out; r = amdgpu_cs_dependencies(adev, &parser); if (r) { -- cgit v1.2.3 From 0ae94444c08a0adf2fab4aab26be0646ee445a19 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 10 Oct 2017 16:50:17 -0400 Subject: drm/amdgpu: Move old fence waiting before reservation lock is aquired v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helps avoiding deadlock during GPU reset. Added mutex to amdgpu_ctx to preserve order of fences on a ring. v2: Put waiting logic in a function in a seperate function in amdgpu_ctx.c Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------ 3 files changed, 34 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 951c8db01412..76033e2cdba8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -738,6 +738,7 @@ struct amdgpu_ctx { bool preamble_presented; enum amd_sched_priority init_priority; enum amd_sched_priority override_priority; + struct mutex lock; }; struct amdgpu_ctx_mgr { @@ -760,9 +761,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); + /* * file private structure */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9166d5e1e557..5de092eab0fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_chunk; } + mutex_lock(&p->ctx->lock); + /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, @@ -737,8 +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, dma_fence_put(parser->fence); - if (parser->ctx) + if (parser->ctx) { + mutex_unlock(&parser->ctx->lock); amdgpu_ctx_put(parser->ctx); + } if (parser->bo_list) amdgpu_bo_list_put(parser->bo_list); @@ -895,9 +899,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, r = amdgpu_ring_parse_cs(ring, p, j); if (r) return r; - } - j++; } @@ -985,7 +987,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return 0; + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a78b03f65c69..4309820658c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, if (!ctx->fences) return -ENOMEM; + mutex_init(&ctx->lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; @@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) &ctx->rings[i].entity); amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); + + mutex_destroy(&ctx->lock); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, idx = seq & (amdgpu_sched_jobs - 1); other = cring->fences[idx]; - if (other) { - signed long r; - r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - } + if (other) + BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); @@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, } } +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) +{ + struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id]; + unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1); + struct dma_fence *other = cring->fences[idx]; + + if (other) { + signed long r; + r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + return r; + } + } + + return 0; +} + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) { mutex_init(&mgr->lock); -- cgit v1.2.3 From 396bcb41e035df7b98fb150ca950bf213e70ae7b Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 14:45:09 +0200 Subject: drm/amdgpu: partial revert VRAM lost handling v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep blocking the CS, but revert everything else. Mapping BOs and info IOCTL are harmless and can still happen even when VRAM content ist lost. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 11 ----------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 ++++++++++---- 3 files changed, 10 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5de092eab0fa..0c07df72743c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1272,16 +1272,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; @@ -1350,16 +1346,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_fence_to_handle *info = data; struct dma_fence *fence; struct drm_syncobj *syncobj; struct sync_file *sync_file; int fd, r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); if (IS_ERR(fence)) return PTR_ERR(fence); @@ -1521,15 +1513,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_wait_fences *wait = data; uint32_t fence_count = wait->in.fence_count; struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; /* Get the fences from userspace */ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 21e99366cab3..fb72edc4c026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -579,11 +579,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->operation); return -EINVAL; } - if ((args->operation == AMDGPU_VA_OP_MAP) || - (args->operation == AMDGPU_VA_OP_REPLACE)) { - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - } INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&duplicates); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 82e8d43b235a..f759836d10ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -270,7 +270,6 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; void __user *out = (void __user *)(uintptr_t)info->return_pointer; @@ -283,8 +282,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (!info->return_size || !info->return_pointer) return -EINVAL; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: @@ -792,10 +789,19 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } +/** + * amdgpu_kms_vram_lost - check if VRAM was lost for this client + * + * @adev: amdgpu device + * @fpriv: client private + * + * Check if all CS is blocked for the client because of lost VRAM + */ bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) { - return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); + return fpriv->vram_lost_counter != + atomic_read(&adev->vram_lost_counter); } /** -- cgit v1.2.3 From 14e47f93c5cc4a1237dbacc137e174706093b69c Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 15:04:41 +0200 Subject: drm/amdgpu: keep copy of VRAM lost counter in job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of reading the current counter from fpriv. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 +++++++------ 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 76033e2cdba8..aa70f8c045b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1125,6 +1125,7 @@ struct amdgpu_job { uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; + uint32_t vram_lost_counter; /* user fence handling */ uint64_t uf_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0c07df72743c..9daa7cac0ffb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -172,6 +172,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (ret) goto free_all_kdata; + p->job->vram_lost_counter = fpriv->vram_lost_counter; + if (p->uf_entry.robj) p->job->uf_addr = uf_offset; kfree(chunk_array); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 83d13431cbdd..4f2b5acc8743 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -61,6 +61,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; + (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->dep_sync); @@ -180,8 +181,8 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; + struct amdgpu_device *adev; struct amdgpu_job *job; - struct amdgpu_fpriv *fpriv = NULL; int r; if (!sched_job) { @@ -189,17 +190,17 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) return NULL; } job = to_amdgpu_job(sched_job); + adev = job->adev; BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - if (job->vm) - fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); /* skip ib schedule when vram is lost */ - if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) + if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { DRM_ERROR("Skip scheduling IBs!\n"); - else { - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); + } else { + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, + &fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); } -- cgit v1.2.3 From e55f2b646df3318e24f12b8388ab6e5cccb3e92d Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 15:18:43 +0200 Subject: drm/amdgpu: move the VRAM lost counter per context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of per device track the VRAM lost per context and return ECANCELED instead of ENODEV. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 ---------------- 4 files changed, 8 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index aa70f8c045b1..67b864436be1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -732,10 +732,11 @@ struct amdgpu_ctx { struct amdgpu_device *adev; struct amdgpu_queue_mgr queue_mgr; unsigned reset_counter; + uint32_t vram_lost_counter; spinlock_t ring_lock; struct dma_fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; - bool preamble_presented; + bool preamble_presented; enum amd_sched_priority init_priority; enum amd_sched_priority override_priority; struct mutex lock; @@ -778,7 +779,6 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; - u32 vram_lost_counter; }; /* @@ -1860,8 +1860,6 @@ static inline bool amdgpu_has_atpx(void) { return false; } extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; extern const int amdgpu_max_kms_ioctl; -bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv); int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); void amdgpu_driver_unload_kms(struct drm_device *dev); void amdgpu_driver_lastclose_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9daa7cac0ffb..b355189533d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -172,7 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (ret) goto free_all_kdata; - p->job->vram_lost_counter = fpriv->vram_lost_counter; + p->job->vram_lost_counter = atomic_read(&p->adev->vram_lost_counter); + if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { + ret = -ECANCELED; + goto free_all_kdata; + } if (p->uf_entry.robj) p->job->uf_addr = uf_offset; @@ -1205,7 +1209,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; @@ -1213,8 +1216,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; parser.adev = adev; parser.filp = filp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 4309820658c4..c184468e2b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -75,6 +75,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, } ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->init_priority = priority; ctx->override_priority = AMD_SCHED_PRIORITY_UNSET; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f759836d10ef..ff1a416a66c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -789,21 +789,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } -/** - * amdgpu_kms_vram_lost - check if VRAM was lost for this client - * - * @adev: amdgpu device - * @fpriv: client private - * - * Check if all CS is blocked for the client because of lost VRAM - */ -bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv) -{ - return fpriv->vram_lost_counter != - atomic_read(&adev->vram_lost_counter); -} - /** * amdgpu_driver_open_kms - drm callback for open * @@ -860,7 +845,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); - fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); file_priv->driver_priv = fpriv; out_suspend: -- cgit v1.2.3 From 7a0a48ddf63bc9944b9690c6fa043ea4305f7f79 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 9 Oct 2017 15:51:10 +0200 Subject: drm/amdgpu: set -ECANCELED when dropping jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And return from the wait functions the fence error code. Signed-off-by: Christian König Reviewed-by: Nicolai Hähnle Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b355189533d2..2ae5d523ca10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1298,6 +1298,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, r = PTR_ERR(fence); else if (fence) { r = dma_fence_wait_timeout(fence, true, timeout); + if (r > 0 && fence->error) + r = fence->error; dma_fence_put(fence); } else r = 1; @@ -1435,6 +1437,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, if (r == 0) break; + + if (fence->error) + return fence->error; } memset(wait, 0, sizeof(*wait)); @@ -1495,7 +1500,7 @@ out: wait->out.status = (r > 0); wait->out.first_signaled = first; /* set return value 0 to indicate success */ - r = 0; + r = array[first]->error; err_free_fence_array: for (i = 0; i < fence_count; i++) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4f2b5acc8743..a8357885776e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -197,6 +197,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) trace_amdgpu_sched_run_job(job); /* skip ib schedule when vram is lost */ if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { + dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED); DRM_ERROR("Skip scheduling IBs!\n"); } else { r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, -- cgit v1.2.3 From 26eedf6daec4e7937c8f0f1dde5e9b8e3dcebfd3 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 11 Oct 2017 17:02:02 -0400 Subject: drm/amdgpu: Fix extra call to amdgpu_ctx_put. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In amdgpu_cs_parser_init() in case of error handling amdgpu_ctx_put() is called without setting p->ctx to NULL after that, later amdgpu_cs_parser_fini() also calls amdgpu_ctx_put() again and mess up the reference count. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2ae5d523ca10..dfd37785563f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -97,7 +97,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; - goto put_ctx; + goto free_chunk; } p->nchunks = cs->in.num_chunks; @@ -105,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) GFP_KERNEL); if (!p->chunks) { ret = -ENOMEM; - goto put_ctx; + goto free_chunk; } for (i = 0; i < p->nchunks; i++) { @@ -191,8 +191,6 @@ free_partial_kdata: kfree(p->chunks); p->chunks = NULL; p->nchunks = 0; -put_ctx: - amdgpu_ctx_put(p->ctx); free_chunk: kfree(chunk_array); -- cgit v1.2.3 From c5795c555bbaca51192ffc6164bb85845ecdf717 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 12 Oct 2017 12:16:33 +0200 Subject: drm/amdgpu: minor CS optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only need to loop over all IBs for old UVD/VCE command stream patching. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dfd37785563f..52dd78ee8fd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -853,36 +853,37 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_ring *ring = p->job->ring; - int i, j, r; - - for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { - - struct amdgpu_cs_chunk *chunk; - struct amdgpu_ib *ib; - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - - chunk = &p->chunks[i]; - ib = &p->job->ibs[j]; - chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; + int r; - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; + /* Only for UVD/VCE VM emulation */ + if (p->job->ring->funcs->parse_cs) { + unsigned i, j; - if (p->job->ring->funcs->parse_cs) { + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + struct drm_amdgpu_cs_chunk_ib *chunk_ib; struct amdgpu_bo_va_mapping *m; struct amdgpu_bo *aobj = NULL; + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; uint64_t offset; uint8_t *kptr; + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, - &aobj, &m); + &aobj, &m); if (r) { DRM_ERROR("IB va_start is invalid\n"); return r; } if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { DRM_ERROR("IB va_start+ib_bytes is invalid\n"); return -EINVAL; } @@ -899,12 +900,12 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); - /* Only for UVD/VCE VM emulation */ r = amdgpu_ring_parse_cs(ring, p, j); if (r) return r; + + j++; } - j++; } if (p->job->vm) { -- cgit v1.2.3 From c70b78a71e9a283240f72dfdfff8fd2388db51da Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 16 Oct 2017 20:02:08 +0800 Subject: drm/amdgpu:fix duplicated setting job's vram_lost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 52dd78ee8fd0..32cf83e2f2d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -172,7 +172,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (ret) goto free_all_kdata; - p->job->vram_lost_counter = atomic_read(&p->adev->vram_lost_counter); if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { ret = -ECANCELED; goto free_all_kdata; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index a8357885776e..0cfc68db575b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -61,11 +61,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; - (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->dep_sync); amdgpu_sync_create(&(*job)->sched_sync); + (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); return 0; } -- cgit v1.2.3 From 4b6b691ee38abae8842aed61d442dfb315c45789 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 16 Oct 2017 10:32:04 +0200 Subject: drm/amdgpu: linear validate first then bind to GART MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For VM emulation for old UVD/VCE we need to validate the BO with linear VRAM flag set first and then eventually bind it to GART. Validating with linear VRAM flag set can move the BO to GART making UVD/VCE read/write from an unbound GART BO. Signed-off-by: Christian König Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 32cf83e2f2d9..f7fceb63413c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1582,14 +1582,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) return -EINVAL; - r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); - if (unlikely(r)) - return r; - - if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) - return 0; + if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, + false); + if (r) + return r; + } - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); - return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false); + return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); } -- cgit v1.2.3 From cdadab89f8cc37dd6101e2bbab50037d72e0e1d0 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Thu, 9 Nov 2017 17:18:18 +0800 Subject: drm/amdgpu: Fix null pointer issue in amdgpu_cs_wait_any_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The array[first] may be null when the fence has already been signaled. BUG: SWDEV-136239 Signed-off-by: Emily Deng Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f7fceb63413c..146e05f21d35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1497,8 +1497,11 @@ out: memset(wait, 0, sizeof(*wait)); wait->out.status = (r > 0); wait->out.first_signaled = first; - /* set return value 0 to indicate success */ - r = array[first]->error; + + if (array[first]) + r = array[first]->error; + else + r = 0; err_free_fence_array: for (i = 0; i < fence_count; i++) -- cgit v1.2.3 From c6f92f9fbe7dbcc8903a67229aa88b4077ae4422 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:55 -0800 Subject: mm: remove cold parameter for release_pages All callers of release_pages claim the pages being released are cache hot. As no one cares about the hotness of pages being released to the allocator, just ditch the parameter. No performance impact is expected as the overhead is marginal. The parameter is removed simply because it is a bit stupid to have a useless parameter copied everywhere. Link: http://lkml.kernel.org/r/20171018075952.10627-7-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_userptr.c | 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- fs/fuse/dev.c | 2 +- include/linux/pagemap.h | 2 +- include/linux/swap.h | 2 +- mm/swap.c | 8 ++++---- mm/swap_state.c | 2 +- 11 files changed, 18 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 60d8bedb694d..cd664832f9e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -553,8 +553,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, * invalidated it. Free it and try again */ release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages, - false); + e->robj->tbo.ttm->num_pages); kvfree(e->user_pages); e->user_pages = NULL; } @@ -691,8 +690,7 @@ error_free_pages: continue; release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages, - false); + e->robj->tbo.ttm->num_pages); kvfree(e->user_pages); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6149a47fe63d..0bda8f2a188a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -347,7 +347,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, return 0; free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false); + release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); unlock_mmap_sem: up_read(¤t->mm->mmap_sem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index bc746131987f..d792959fac43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -659,7 +659,7 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) return 0; release_pages: - release_pages(pages, pinned, 0); + release_pages(pages, pinned); return r; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 57881167ccd2..bcc8c2d7c7c9 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -779,7 +779,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages( up_read(&mm->mmap_sem); if (ret < 0) { - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); return ERR_PTR(ret); } @@ -852,7 +852,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) } } - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); work = kmalloc(sizeof(*work), GFP_KERNEL); @@ -886,7 +886,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj) if (etnaviv_obj->pages) { int npages = etnaviv_obj->base.size >> PAGE_SHIFT; - release_pages(etnaviv_obj->pages, npages, 0); + release_pages(etnaviv_obj->pages, npages); kvfree(etnaviv_obj->pages); } put_task_struct(etnaviv_obj->userptr.task); diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 709efe2357ea..aa22361bd5a1 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -554,7 +554,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } mutex_unlock(&obj->mm.lock); - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); i915_gem_object_put(obj); @@ -668,7 +668,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) __i915_gem_userptr_set_active(obj, true); if (IS_ERR(pages)) - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); return pages; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index bf69bf9086bf..1fdfc7a46072 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -597,7 +597,7 @@ release_sg: kfree(ttm->sg); release_pages: - release_pages(ttm->pages, pinned, 0); + release_pages(ttm->pages, pinned); return r; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a42d89371748..17f0d05bfd4c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1636,7 +1636,7 @@ out_finish: static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) { - release_pages(req->pages, req->num_pages, false); + release_pages(req->pages, req->num_pages); } static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e0f7181118fe..4c6790bb7afb 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -118,7 +118,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) m->gfp_mask = mask; } -void release_pages(struct page **pages, int nr, bool cold); +void release_pages(struct page **pages, int nr); /* * speculatively take a reference to a page. diff --git a/include/linux/swap.h b/include/linux/swap.h index 454f042bcdd5..c2b8128799c1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -510,7 +510,7 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) #define free_page_and_swap_cache(page) \ put_page(page) #define free_pages_and_swap_cache(pages, nr) \ - release_pages((pages), (nr), false); + release_pages((pages), (nr)); static inline void show_swap_cache_info(void) { diff --git a/mm/swap.c b/mm/swap.c index 88a19b6cdf7c..29cf75f1a860 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -210,7 +210,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, } if (pgdat) spin_unlock_irqrestore(&pgdat->lru_lock, flags); - release_pages(pvec->pages, pvec->nr, 0); + release_pages(pvec->pages, pvec->nr); pagevec_reinit(pvec); } @@ -740,7 +740,7 @@ void lru_add_drain_all(void) * Decrement the reference count on all the pages in @pages. If it * fell to zero, remove the page from the LRU and free it. */ -void release_pages(struct page **pages, int nr, bool cold) +void release_pages(struct page **pages, int nr) { int i; LIST_HEAD(pages_to_free); @@ -817,7 +817,7 @@ void release_pages(struct page **pages, int nr, bool cold) spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); mem_cgroup_uncharge_list(&pages_to_free); - free_hot_cold_page_list(&pages_to_free, cold); + free_hot_cold_page_list(&pages_to_free, 0); } EXPORT_SYMBOL(release_pages); @@ -837,7 +837,7 @@ void __pagevec_release(struct pagevec *pvec) lru_add_drain(); pvec->drained = true; } - release_pages(pvec->pages, pagevec_count(pvec), 0); + release_pages(pvec->pages, pagevec_count(pvec)); pagevec_reinit(pvec); } EXPORT_SYMBOL(__pagevec_release); diff --git a/mm/swap_state.c b/mm/swap_state.c index 374d446f7a0a..39ae7cfad90f 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -319,7 +319,7 @@ void free_pages_and_swap_cache(struct page **pages, int nr) lru_add_drain(); for (i = 0; i < nr; i++) free_swap_cache(pagep[i]); - release_pages(pagep, nr, false); + release_pages(pagep, nr); } /* -- cgit v1.2.3 From eb174c77e258f93b0f1046afd23a0aede68be3f2 Mon Sep 17 00:00:00 2001 From: Roger He Date: Fri, 17 Nov 2017 12:45:18 +0800 Subject: drm/amd/amdgpu: fix over-bound accessing in amdgpu_cs_wait_any_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes an oops in amdgpu_cs_wait_any_fence. Reviewed-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Roger He Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 146e05f21d35..bdef497a6a26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1498,7 +1498,7 @@ out: wait->out.status = (r > 0); wait->out.first_signaled = first; - if (array[first]) + if (first < fence_count && array[first]) r = array[first]->error; else r = 0; -- cgit v1.2.3