From fc9c8f5459f2dfa7f83bee3f388faaf570ef96ed Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 29 Jun 2017 11:46:15 +0200 Subject: drm/amdgpu: add vm_needs_flush parameter to amdgpu_copy_buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to flush the system VM here. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8ee69652be8c..c34cf2c1ae4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -535,7 +535,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr, amdgpu_bo_size(bo), resv, fence, - direct); + direct, false); if (!r) amdgpu_bo_fence(bo, *fence, true); @@ -588,7 +588,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr, amdgpu_bo_size(bo), resv, fence, - direct); + direct, false); if (!r) amdgpu_bo_fence(bo, *fence, true); -- cgit v1.2.3 From 5327dd8acf05a48b01804af9e4fad90054c6cdb7 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 3 Jul 2017 15:21:42 +0200 Subject: drm/amdgpu: remove stale TODO comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That is already fixed. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c34cf2c1ae4e..a85e75327456 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -951,7 +951,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) size = bo->mem.num_pages << PAGE_SHIFT; offset = bo->mem.start << PAGE_SHIFT; - /* TODO: figure out how to map scattered VRAM to the CPU */ if ((offset + size) <= adev->mc.visible_vram_size) return 0; -- cgit v1.2.3 From 00f06b246a3056bbaa901a90a5a93c9f81ab8e36 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Tue, 27 Jun 2017 22:33:18 -0400 Subject: drm/amdgpu: Throttle visible VRAM moves separately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BO move throttling code is designed to allow VRAM to fill quickly if it is relatively empty. However, this does not take into account situations where the visible VRAM is smaller than total VRAM, and total VRAM may not be close to full but the visible VRAM segment is under pressure. In such situations, visible VRAM would experience unrestricted swapping and performance would drop. Add a separate counter specifically for moves involving visible VRAM, and check it before moving BOs there. v2: Only perform calculations for separate counter if visible VRAM is smaller than total VRAM. (Michel Dänzer) v3: [Michel Dänzer] * Use BO's location rather than the AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag to determine whether to account a move for visible VRAM in most cases. * Use a single if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { block in amdgpu_cs_get_threshold_for_moves. Fixes: 95844d20ae02 (drm/amdgpu: throttle buffer migrations at CS using a fixed MBps limit (v2)) Signed-off-by: John Brooks Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 92 ++++++++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 +++- 3 files changed, 87 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0d6b0617cdf0..c290b262d7da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1117,7 +1117,9 @@ struct amdgpu_cs_parser { struct list_head validated; struct dma_fence *fence; uint64_t bytes_moved_threshold; + uint64_t bytes_moved_vis_threshold; uint64_t bytes_moved; + uint64_t bytes_moved_vis; struct amdgpu_bo_list_entry *evictable; /* user fence */ @@ -1555,6 +1557,7 @@ struct amdgpu_device { spinlock_t lock; s64 last_update_us; s64 accum_us; /* accumulated microseconds */ + s64 accum_us_vis; /* for visible VRAM */ u32 log2_max_MBps; } mm_stats; @@ -1846,7 +1849,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_need_post(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); +void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, + u64 num_vis_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5599c01b265d..33789510e663 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) * ticks. The accumulated microseconds (us) are converted to bytes and * returned. */ -static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) +static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, + u64 *max_bytes, + u64 *max_vis_bytes) { s64 time_us, increment_us; - u64 max_bytes; u64 free_vram, total_vram, used_vram; /* Allow a maximum of 200 accumulated ms. This is basically per-IB @@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) - return 0; + if (!adev->mm_stats.log2_max_MBps) { + *max_bytes = 0; + *max_vis_bytes = 0; + return; + } total_vram = adev->mc.real_vram_size - adev->vram_pin_size; used_vram = atomic64_read(&adev->vram_usage); @@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); } - /* This returns 0 if the driver is in debt to disallow (optional) + /* This is set to 0 if the driver is in debt to disallow (optional) * buffer moves. */ - max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + + /* Do the same for visible VRAM if half of it is free */ + if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { + u64 total_vis_vram = adev->mc.visible_vram_size; + u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); + + if (used_vis_vram < total_vis_vram) { + u64 free_vis_vram = total_vis_vram - used_vis_vram; + adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + + increment_us, us_upper_bound); + + if (free_vis_vram >= total_vis_vram / 2) + adev->mm_stats.accum_us_vis = + max(bytes_to_us(adev, free_vis_vram / 2), + adev->mm_stats.accum_us_vis); + } + + *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); + } else { + *max_vis_bytes = 0; + } spin_unlock(&adev->mm_stats.lock); - return max_bytes; } /* Report how many bytes have really been moved for the last command * submission. This can result in a debt that can stop buffer migrations * temporarily. */ -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) +void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, + u64 num_vis_bytes) { spin_lock(&adev->mm_stats.lock); adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); + adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); spin_unlock(&adev->mm_stats.lock); } @@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; uint32_t domain; int r; @@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, /* Don't move this buffer if we have depleted our allowance * to move it. Don't move anything if the threshold is zero. */ - if (p->bytes_moved < p->bytes_moved_threshold) - domain = bo->prefered_domains; - else + if (p->bytes_moved < p->bytes_moved_threshold) { + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { + /* And don't move a CPU_ACCESS_REQUIRED BO to limited + * visible VRAM if we've depleted our allowance to do + * that. + */ + if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) + domain = bo->prefered_domains; + else + domain = bo->allowed_domains; + } else { + domain = bo->prefered_domains; + } + } else { domain = bo->allowed_domains; + } retry: amdgpu_ttm_placement_from_domain(bo, domain); initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - + initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + p->bytes_moved_vis += bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; @@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; + bool update_bytes_moved_vis; uint32_t other; /* If we reached our current BO we can forget it */ @@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); + update_bytes_moved_vis = + adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (update_bytes_moved_vis) + p->bytes_moved_vis += bytes_moved; if (unlikely(r)) break; @@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_splice(&need_pages, &p->validated); } - p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); + amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, + &p->bytes_moved_vis_threshold); p->bytes_moved = 0; + p->bytes_moved_vis = 0; p->evictable = list_last_entry(&p->validated, struct amdgpu_bo_list_entry, tv.head); @@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_validate; } - amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); - + amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, + p->bytes_moved_vis); fpriv->vm.last_eviction_counter = atomic64_read(&p->adev->num_evictions); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a85e75327456..e429829ae93d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -322,7 +322,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct amdgpu_bo *bo; enum ttm_bo_type type; unsigned long page_align; - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; size_t acc_size; int r; @@ -398,8 +398,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); - amdgpu_cs_report_moved_bytes(adev, - atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved); + bytes_moved = atomic64_read(&adev->num_bytes_moved) - + initial_bytes_moved; + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved); + else + amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); if (unlikely(r != 0)) return r; -- cgit v1.2.3 From 96cf8271df546d0f4cfc9ddddbc42dce633e0190 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Fri, 30 Jun 2017 11:31:08 -0400 Subject: drm/amdgpu: Set/clear CPU_ACCESS flag on page fault and move to VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a BO is moved to VRAM, clear AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED. This allows it to potentially later move to invisible VRAM if the CPU does not access it again. Setting the CPU_ACCESS flag in amdgpu_bo_fault_reserve_notify() also means that we can remove the loop to restrict lpfn to the end of visible VRAM, because amdgpu_ttm_placement_init() will do it for us. v3 [Michel Dänzer] * Use AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED instead of a new flag (Christian König) * Clear flag in amdgpu_bo_move instead of amdgpu_move_ram_vram (Christian) * Explicitly mention amdgpu_bo_fault_reserve_notify in amdgpu_bo_move * Also clear flag in amdgpu_bo_create_restricted Suggested-by: Michel Dänzer Signed-off-by: John Brooks Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 20 ++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 9 +++++++++ 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e429829ae93d..93601fbea695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -432,6 +432,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, trace_amdgpu_bo_create(bo); + /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ + if (type == ttm_bo_type_device) + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + return 0; fail_unreserve: @@ -945,13 +949,17 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - unsigned long offset, size, lpfn; - int i, r; + unsigned long offset, size; + int r; if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return 0; abo = container_of(bo, struct amdgpu_bo, tbo); + + /* Remember that this BO was accessed by the CPU */ + abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (bo->mem.mem_type != TTM_PL_VRAM) return 0; @@ -967,14 +975,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* hurrah the memory is not visible ! */ atomic64_inc(&adev->num_vram_cpu_page_faults); amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); - lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; - for (i = 0; i < abo->placement.num_placement; i++) { - /* Force into visible VRAM */ - if ((abo->placements[i].flags & TTM_PL_FLAG_VRAM) && - (!abo->placements[i].lpfn || - abo->placements[i].lpfn > lpfn)) - abo->placements[i].lpfn = lpfn; - } r = ttm_bo_validate(bo, &abo->placement, false, false); if (unlikely(r == -ENOMEM)) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e238084b7142..9bbaffbd5f6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -499,6 +499,15 @@ memcpy: } } + if (bo->type == ttm_bo_type_device && + new_mem->mem_type == TTM_PL_VRAM && + old_mem->mem_type != TTM_PL_VRAM) { + /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU + * accesses the BO after it's moved. + */ + abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + } + /* update statistics */ atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved); return 0; -- cgit v1.2.3 From 41d9a6a728ea34d697dc74bc5fea1b6f6c5fce80 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Tue, 27 Jun 2017 22:33:21 -0400 Subject: drm/amdgpu: Don't force BOs into visible VRAM for page faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need for page faults to force BOs into visible VRAM if it's full, and the time it takes to do so is great enough to cause noticeable stuttering. Add GTT as a possible placement so that if visible VRAM is full, page faults move BOs to GTT instead of evicting other BOs from VRAM. Suggested-by: Michel Dänzer Signed-off-by: John Brooks Reviewed-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 93601fbea695..6e24339ecc46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -974,18 +974,21 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* hurrah the memory is not visible ! */ atomic64_inc(&adev->num_vram_cpu_page_faults); - amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT); + + /* Avoid costly evictions; only set GTT as a busy placement */ + abo->placement.num_busy_placement = 1; + abo->placement.busy_placement = &abo->placements[1]; + r = ttm_bo_validate(bo, &abo->placement, false, false); - if (unlikely(r == -ENOMEM)) { - amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); - return ttm_bo_validate(bo, &abo->placement, false, false); - } else if (unlikely(r != 0)) { + if (unlikely(r != 0)) return r; - } offset = bo->mem.start << PAGE_SHIFT; /* this should never happen */ - if ((offset + size) > adev->mc.visible_vram_size) + if (bo->mem.mem_type == TTM_PL_VRAM && + (offset + size) > adev->mc.visible_vram_size) return -EINVAL; return 0; -- cgit v1.2.3 From 6375bbb4d259416ad867ab887333ee88d1b90323 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 11 Jul 2017 17:25:49 +0200 Subject: drm/amdgpu: make sure BOs are always kunmapped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a BO is moved or destroyed it shouldn't be kmapped any more. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6e24339ecc46..a019556a8e71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -93,6 +93,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct amdgpu_bo, tbo); + amdgpu_bo_kunmap(bo); amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL); drm_gem_object_release(&bo->gem_base); @@ -931,6 +932,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, abo = container_of(bo, struct amdgpu_bo, tbo); amdgpu_vm_bo_invalidate(adev, abo); + amdgpu_bo_kunmap(abo); + /* remember the eviction */ if (evict) atomic64_inc(&adev->num_evictions); -- cgit v1.2.3 From 07306b4f5f7a8ba6a9de96420d3524dcc8698b00 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 12 Jul 2017 12:36:47 +0800 Subject: drm/amdgpu: ttm_bind only when user needs gpu_addr in bo pin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a019556a8e71..3ec43cf9ad78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -735,15 +735,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, dev_err(adev->dev, "%p pin failed\n", bo); goto error; } - r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); - if (unlikely(r)) { - dev_err(adev->dev, "%p bind failed\n", bo); - goto error; - } bo->pin_count = 1; - if (gpu_addr != NULL) + if (gpu_addr != NULL) { + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + if (unlikely(r)) { + dev_err(adev->dev, "%p bind failed\n", bo); + goto error; + } *gpu_addr = amdgpu_bo_gpu_offset(bo); + } if (domain == AMDGPU_GEM_DOMAIN_VRAM) { adev->vram_pin_size += amdgpu_bo_size(bo); if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) -- cgit v1.2.3 From f5e1c740afb6fec8969234a066b1628246a6a08d Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Jul 2017 23:45:18 +0200 Subject: drm/amdgpu: cleanup kptr handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't keep around the same pointer twice. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 26 +++++++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- 5 files changed, 19 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0fa6438c152c..241abc140896 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -425,7 +425,6 @@ struct amdgpu_bo { struct ttm_bo_kmap_obj kmap; u64 flags; unsigned pin_count; - void *kptr; u64 tiling_flags; u64 metadata_flags; void *metadata; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 0a8ee2411180..cd95574705ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -250,7 +250,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start; info->fix.smem_start = adev->mc.aper_base + tmp; info->fix.smem_len = amdgpu_bo_size(abo); - info->screen_base = abo->kptr; + info->screen_base = amdgpu_bo_kptr(abo); info->screen_size = amdgpu_bo_size(abo); drm_fb_helper_fill_var(info, &rfbdev->helper, sizes->fb_width, sizes->fb_height); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 3ec43cf9ad78..8fddea4bc146 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -609,16 +609,16 @@ err: int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) { - bool is_iomem; + void *kptr; long r; if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; - if (bo->kptr) { - if (ptr) { - *ptr = bo->kptr; - } + kptr = amdgpu_bo_kptr(bo); + if (kptr) { + if (ptr) + *ptr = kptr; return 0; } @@ -631,19 +631,23 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (r) return r; - bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); if (ptr) - *ptr = bo->kptr; + *ptr = amdgpu_bo_kptr(bo); return 0; } +void *amdgpu_bo_kptr(struct amdgpu_bo *bo) +{ + bool is_iomem; + + return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); +} + void amdgpu_bo_kunmap(struct amdgpu_bo *bo) { - if (bo->kptr == NULL) - return; - bo->kptr = NULL; - ttm_bo_kunmap(&bo->kmap); + if (bo->kmap.bo) + ttm_bo_kunmap(&bo->kmap); } struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 833b172a2c2a..f53d53d7a0af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -147,6 +147,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); +void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); void amdgpu_bo_unref(struct amdgpu_bo **bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 250c8e80e646..9255c3e579fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1060,7 +1060,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, shadow = parent->bo->shadow; if (vm->use_cpu_for_update) { - pd_addr = (unsigned long)parent->bo->kptr; + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); if (unlikely(r)) return r; @@ -1401,7 +1401,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, pt = entry->bo; if (use_cpu_update) { - pe_start = (unsigned long)pt->kptr; + pe_start = (unsigned long)amdgpu_bo_kptr(pt); } else { if (pt->shadow) { pe_start = amdgpu_bo_gpu_offset(pt->shadow); -- cgit v1.2.3 From 53766e5adac75872e9d413cdbfd0e47ae40701f9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 27 Jul 2017 14:52:53 +0200 Subject: drm/amdgpu: improve amdgpu_bo_create_kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make allocating the new BO optional. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8fddea4bc146..81d40e3bfbdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -239,15 +239,20 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr) { + bool free = false; int r; - r = amdgpu_bo_create(adev, size, align, true, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, bo_ptr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); - return r; + if (!*bo_ptr) { + r = amdgpu_bo_create(adev, size, align, true, domain, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, bo_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", + r); + return r; + } + free = true; } r = amdgpu_bo_reserve(*bo_ptr, false); @@ -278,7 +283,8 @@ error_unreserve: amdgpu_bo_unreserve(*bo_ptr); error_free: - amdgpu_bo_unref(bo_ptr); + if (free) + amdgpu_bo_unref(bo_ptr); return r; } -- cgit v1.2.3 From 9d903cbd9995bd63e04d71362a8b59ce49437544 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 27 Jul 2017 17:08:54 +0200 Subject: drm/amdgpu: add amdgpu_bo_create_reserved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same as amdgpu_bo_create_kernel, but keeps the BO reserved. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 48 +++++++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 +++ 2 files changed, 44 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 81d40e3bfbdb..494b79374828 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -220,7 +220,7 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, } /** - * amdgpu_bo_create_kernel - create BO for kernel use + * amdgpu_bo_create_reserved - create reserved BO for kernel use * * @adev: amdgpu device object * @size: size for the new BO @@ -230,14 +230,15 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, * @gpu_addr: GPU addr of the pinned BO * @cpu_addr: optional CPU address mapping * - * Allocates and pins a BO for kernel internal use. + * Allocates and pins a BO for kernel internal use, and returns it still + * reserved. * * Returns 0 on success, negative error code otherwise. */ -int amdgpu_bo_create_kernel(struct amdgpu_device *adev, - unsigned long size, int align, - u32 domain, struct amdgpu_bo **bo_ptr, - u64 *gpu_addr, void **cpu_addr) +int amdgpu_bo_create_reserved(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr) { bool free = false; int r; @@ -275,8 +276,6 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, } } - amdgpu_bo_unreserve(*bo_ptr); - return 0; error_unreserve: @@ -289,6 +288,39 @@ error_free: return r; } +/** + * amdgpu_bo_create_kernel - create BO for kernel use + * + * @adev: amdgpu device object + * @size: size for the new BO + * @align: alignment for the new BO + * @domain: where to place it + * @bo_ptr: resulting BO + * @gpu_addr: GPU addr of the pinned BO + * @cpu_addr: optional CPU address mapping + * + * Allocates and pins a BO for kernel internal use. + * + * Returns 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_kernel(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr) +{ + int r; + + r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr, + gpu_addr, cpu_addr); + + if (r) + return r; + + amdgpu_bo_unreserve(*bo_ptr); + + return 0; +} + /** * amdgpu_bo_free_kernel - free BO for kernel use * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 3b92d5236670..cbf6e6d54ef0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -201,6 +201,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct ttm_placement *placement, struct reservation_object *resv, struct amdgpu_bo **bo_ptr); +int amdgpu_bo_create_reserved(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, -- cgit v1.2.3 From 2046d46db9166bddc84778f0b3477f6d1e9068ea Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Thu, 20 Jul 2017 18:49:09 -0400 Subject: drm/amdgpu: Add a parameter to amdgpu_bo_create() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter init_value contains the value to which we initialized VRAM bo when AMDGPU_GEM_CREATE_VRAM_CLEARED flag is set. Signed-off-by: Yong Zhao Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 +++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- 14 files changed, 29 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 37971d9402e3..3d96bd7371e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -184,7 +184,8 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, return -ENOMEM; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, + &(*mem)->bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 2fb299afc12b..63ec1e1bb6aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -81,7 +81,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, n = AMDGPU_BENCHMARK_ITERATIONS; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, - NULL, &sobj); + NULL, 0, &sobj); if (r) { goto out_cleanup; } @@ -94,7 +94,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, goto out_cleanup; } r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, - NULL, &dobj); + NULL, 0, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index a99e0bca6812..69806c5bcd01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -124,7 +124,7 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE, true, domain, flags, NULL, &placement, NULL, - &obj); + 0, &obj); if (ret) { DRM_ERROR("(%d) bo create failed\n", ret); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 5cc4987cd887..94c1e2e8e34c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -144,7 +144,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->gart.robj); + NULL, NULL, 0, &adev->gart.robj); if (r) { return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 88085e7f72de..a227d34d3852 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -59,7 +59,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, retry: r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, - flags, NULL, NULL, &robj); + flags, NULL, NULL, 0, &robj); if (r) { if (r != -ERESTARTSYS) { if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 494b79374828..16f31cbd9147 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -247,7 +247,7 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, r = amdgpu_bo_create(adev, size, align, true, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, bo_ptr); + NULL, NULL, 0, bo_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); @@ -356,6 +356,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct sg_table *sg, struct ttm_placement *placement, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr) { struct amdgpu_bo *bo; @@ -456,7 +457,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -508,6 +509,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &placement, bo->tbo.resv, + 0, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); @@ -519,11 +521,15 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } +/* init_value will only take effect when flags contains + * AMDGPU_GEM_CREATE_VRAM_CLEARED. + */ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr) { struct ttm_placement placement = {0}; @@ -538,7 +544,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain, flags, sg, &placement, - resv, bo_ptr); + resv, init_value, bo_ptr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index cbf6e6d54ef0..b6d8be84b884 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,6 +193,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_restricted(struct amdgpu_device *adev, unsigned long size, int byte_align, @@ -200,6 +201,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct sg_table *sg, struct ttm_placement *placement, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 6bdc866570ab..5b3f92891f89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -69,7 +69,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, ww_mutex_lock(&resv->lock, NULL); ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, &bo); + AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo); ww_mutex_unlock(&resv->lock); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 5ca75a456ad2..3144400435b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -64,7 +64,7 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&sa_manager->flist[i]); r = amdgpu_bo_create(adev, size, align, true, domain, - 0, NULL, NULL, &sa_manager->bo); + 0, NULL, NULL, 0, &sa_manager->bo); if (r) { dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 3c4d7574d704..ed8c3739015b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -61,7 +61,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0, - NULL, NULL, &vram_obj); + NULL, NULL, 0, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -82,7 +82,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, gtt_obj + i); + NULL, 0, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index fcfb9d4f7477..d3468a2d8839 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -381,7 +381,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, bo); + NULL, NULL, 0, bo); if (err) { dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); goto failed; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 2ca09f111f08..aefecf6c1e7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1051,7 +1051,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; @@ -1101,7 +1101,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index d58ca8240ae1..041e0121590c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -359,7 +359,7 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; @@ -411,7 +411,7 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9255c3e579fc..601e899005d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -333,7 +333,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, resv, &pt); + NULL, resv, 0, &pt); if (r) return r; @@ -2538,7 +2538,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, NULL, &vm->root.bo); + NULL, NULL, 0, &vm->root.bo); if (r) goto error_free_sched_entity; -- cgit v1.2.3 From 6d7d9c5aa212d069b5271cd6bb8a9e05a3e1b986 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 8 Aug 2017 07:58:01 -0400 Subject: drm/amdgpu: Fix preferred typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change "prefered" to "preferred" Signed-off-by: Kent Russell Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 4 ++-- 7 files changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 75c6107eaccf..59089e027f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -156,11 +156,11 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, entry->tv.bo = &entry->robj->tbo; entry->tv.shared = !entry->robj->prime_shared_count; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) gds_obj = entry->robj; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) gws_obj = entry->robj; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) oa_obj = entry->robj; total_size += amdgpu_bo_size(entry->robj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 69806c5bcd01..8c462b091aff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -166,7 +166,7 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h r = amdgpu_bo_reserve(obj, true); if (unlikely(r != 0)) return r; - r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains, + r = amdgpu_bo_pin_restricted(obj, obj->preferred_domains, min_offset, max_offset, mcaddr); amdgpu_bo_unreserve(obj); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index da8209945ef2..c05479ec825a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -348,11 +348,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * that. */ if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) - domain = bo->prefered_domains; + domain = bo->preferred_domains; else domain = bo->allowed_domains; } else { - domain = bo->prefered_domains; + domain = bo->preferred_domains; } } else { domain = bo->allowed_domains; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 86a88a025b17..81127ffcefb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -306,7 +306,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, return r; bo = gem_to_amdgpu_bo(gobj); - bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); if (r) @@ -693,7 +693,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, info.bo_size = robj->gem_base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; - info.domains = robj->prefered_domains; + info.domains = robj->preferred_domains; info.domain_flags = robj->flags; amdgpu_bo_unreserve(robj); if (copy_to_user(out, &info, sizeof(info))) @@ -711,10 +711,10 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, amdgpu_bo_unreserve(robj); break; } - robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | + robj->preferred_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU); - robj->allowed_domains = robj->prefered_domains; + robj->allowed_domains = robj->preferred_domains; if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 16f31cbd9147..6e72fe7901ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -391,13 +391,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, } INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | + bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA); - bo->allowed_domains = bo->prefered_domains; + bo->allowed_domains = bo->preferred_domains; if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; @@ -606,7 +606,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) if (bo->pin_count) return 0; - domain = bo->prefered_domains; + domain = bo->preferred_domains; retry: amdgpu_ttm_placement_from_domain(bo, domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index b6d8be84b884..9b7b4fcb047b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -65,7 +65,7 @@ struct amdgpu_bo_va { struct amdgpu_bo { /* Protected by tbo.reserved */ - u32 prefered_domains; + u32 preferred_domains; u32 allowed_domains; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; struct ttm_placement placement; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 509f7a63d40c..9ab58245e518 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -105,12 +105,12 @@ TRACE_EVENT(amdgpu_bo_create, __entry->bo = bo; __entry->pages = bo->tbo.num_pages; __entry->type = bo->tbo.mem.mem_type; - __entry->prefer = bo->prefered_domains; + __entry->prefer = bo->preferred_domains; __entry->allow = bo->allowed_domains; __entry->visible = bo->flags; ), - TP_printk("bo=%p, pages=%u, type=%d, prefered=%d, allowed=%d, visible=%d", + TP_printk("bo=%p, pages=%u, type=%d, preferred=%d, allowed=%d, visible=%d", __entry->bo, __entry->pages, __entry->type, __entry->prefer, __entry->allow, __entry->visible) ); -- cgit v1.2.3 From 9255d77d8cfd714f336a48168a9dada32912b19a Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 7 Aug 2017 17:11:33 +0200 Subject: drm/amdgpu: move gtt usage tracking into the gtt manager v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't make much sense to count those numbers twice. v2: use and atomic64_t instead Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 33 +++++++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + 5 files changed, 26 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a5427cf4b19d..593618e6c186 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1484,7 +1484,6 @@ struct amdgpu_device { struct amdgpu_wb wb; atomic64_t vram_usage; atomic64_t vram_vis_usage; - atomic64_t gtt_usage; atomic64_t num_bytes_moved; atomic64_t num_evictions; atomic64_t num_vram_cpu_page_faults; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 97c63ee1e20a..9e05e257729f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -28,7 +28,7 @@ struct amdgpu_gtt_mgr { struct drm_mm mm; spinlock_t lock; - uint64_t available; + atomic64_t available; }; /** @@ -54,7 +54,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, size = (adev->mc.gart_size >> PAGE_SHIFT) - start; drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); - mgr->available = p_size; + atomic64_set(&mgr->available, p_size); man->priv = mgr; return 0; } @@ -173,11 +173,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, int r; spin_lock(&mgr->lock); - if (mgr->available < mem->num_pages) { + if (atomic64_read(&mgr->available) < mem->num_pages) { spin_unlock(&mgr->lock); return 0; } - mgr->available -= mem->num_pages; + atomic64_sub(mem->num_pages, &mgr->available); spin_unlock(&mgr->lock); node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -204,9 +204,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, return 0; err_out: - spin_lock(&mgr->lock); - mgr->available += mem->num_pages; - spin_unlock(&mgr->lock); + atomic64_add(mem->num_pages, &mgr->available); return r; } @@ -233,13 +231,27 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, spin_lock(&mgr->lock); if (node->start != AMDGPU_BO_INVALID_OFFSET) drm_mm_remove_node(node); - mgr->available += mem->num_pages; spin_unlock(&mgr->lock); + atomic64_add(mem->num_pages, &mgr->available); kfree(node); mem->mm_node = NULL; } +/** + * amdgpu_gtt_mgr_usage - return usage of GTT domain + * + * @man: TTM memory type manager + * + * Return how many bytes are used in the GTT domain + */ +uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + + return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE; +} + /** * amdgpu_gtt_mgr_debug - dump VRAM table * @@ -251,7 +263,6 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, struct drm_printer *printer) { - struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr = man->priv; spin_lock(&mgr->lock); @@ -259,8 +270,8 @@ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, spin_unlock(&mgr->lock); drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", - man->size, mgr->available, - (u64)atomic64_read(&adev->gtt_usage) >> 20); + man->size, (u64)atomic64_read(&mgr->available), + amdgpu_gtt_mgr_usage(man) >> 20); } const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index c908f972283c..889f96fcfc81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -461,7 +461,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ui64 = atomic64_read(&adev->vram_vis_usage); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GTT_USAGE: - ui64 = atomic64_read(&adev->gtt_usage); + ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GDS_CONFIG: { struct drm_amdgpu_info_gds gds_info; @@ -514,7 +514,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file mem.gtt.total_heap_size *= PAGE_SIZE; mem.gtt.usable_heap_size = mem.gtt.total_heap_size - adev->gart_pin_size; - mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); + mem.gtt.heap_usage = + amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; return copy_to_user(out, &mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6e72fe7901ec..57d2cb6bd331 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -62,7 +62,6 @@ static void amdgpu_update_memory_usage(struct amdgpu_device *adev, if (new_mem) { switch (new_mem->mem_type) { case TTM_PL_TT: - atomic64_add(new_mem->size, &adev->gtt_usage); break; case TTM_PL_VRAM: atomic64_add(new_mem->size, &adev->vram_usage); @@ -75,7 +74,6 @@ static void amdgpu_update_memory_usage(struct amdgpu_device *adev, if (old_mem) { switch (old_mem->mem_type) { case TTM_PL_TT: - atomic64_sub(old_mem->size, &adev->gtt_usage); break; case TTM_PL_VRAM: atomic64_sub(old_mem->size, &adev->vram_usage); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 0e2399f32de7..1307ab581449 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -66,6 +66,7 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, struct ttm_buffer_object *tbo, const struct ttm_place *place, struct ttm_mem_reg *mem); +uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, -- cgit v1.2.3 From 3c848bb38aca1f7fd23edeb867b89d714a2e6ce2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 7 Aug 2017 17:46:49 +0200 Subject: drm/amdgpu: move vram usage tracking into the vram manager v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like a better place for this. v2: use atomic64_t members instead Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 9 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 50 ------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 72 ++++++++++++++++++++++++++-- 6 files changed, 79 insertions(+), 62 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 593618e6c186..ad944aea0d4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1482,8 +1482,6 @@ struct amdgpu_device { struct amdgpu_mman mman; struct amdgpu_vram_scratch vram_scratch; struct amdgpu_wb wb; - atomic64_t vram_usage; - atomic64_t vram_vis_usage; atomic64_t num_bytes_moved; atomic64_t num_evictions; atomic64_t num_vram_cpu_page_faults; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 825784b3b193..7e71a511990e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -246,7 +246,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, } total_vram = adev->mc.real_vram_size - adev->vram_pin_size; - used_vram = atomic64_read(&adev->vram_usage); + used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; spin_lock(&adev->mm_stats.lock); @@ -292,7 +292,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, /* Do the same for visible VRAM if half of it is free */ if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { u64 total_vis_vram = adev->mc.visible_vram_size; - u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); + u64 used_vis_vram = + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); if (used_vis_vram < total_vis_vram) { u64 free_vis_vram = total_vis_vram - used_vis_vram; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 889f96fcfc81..29cd5dabf8b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -455,10 +455,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = atomic64_read(&adev->vram_usage); + ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: - ui64 = atomic64_read(&adev->vram_vis_usage); + ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GTT_USAGE: ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); @@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file mem.vram.total_heap_size = adev->mc.real_vram_size; mem.vram.usable_heap_size = adev->mc.real_vram_size - adev->vram_pin_size; - mem.vram.heap_usage = atomic64_read(&adev->vram_usage); + mem.vram.heap_usage = + amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = @@ -506,7 +507,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file adev->mc.visible_vram_size - (adev->vram_pin_size - adev->invisible_pin_size); mem.cpu_accessible_vram.heap_usage = - atomic64_read(&adev->vram_vis_usage); + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = mem.cpu_accessible_vram.usable_heap_size * 3 / 4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 57d2cb6bd331..e7e899190bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -37,53 +37,6 @@ #include "amdgpu.h" #include "amdgpu_trace.h" - - -static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, - struct ttm_mem_reg *mem) -{ - if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size) - return 0; - - return ((mem->start << PAGE_SHIFT) + mem->size) > - adev->mc.visible_vram_size ? - adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) : - mem->size; -} - -static void amdgpu_update_memory_usage(struct amdgpu_device *adev, - struct ttm_mem_reg *old_mem, - struct ttm_mem_reg *new_mem) -{ - u64 vis_size; - if (!adev) - return; - - if (new_mem) { - switch (new_mem->mem_type) { - case TTM_PL_TT: - break; - case TTM_PL_VRAM: - atomic64_add(new_mem->size, &adev->vram_usage); - vis_size = amdgpu_get_vis_part_size(adev, new_mem); - atomic64_add(vis_size, &adev->vram_vis_usage); - break; - } - } - - if (old_mem) { - switch (old_mem->mem_type) { - case TTM_PL_TT: - break; - case TTM_PL_VRAM: - atomic64_sub(old_mem->size, &adev->vram_usage); - vis_size = amdgpu_get_vis_part_size(adev, old_mem); - atomic64_sub(vis_size, &adev->vram_vis_usage); - break; - } - } -} - static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); @@ -92,7 +45,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct amdgpu_bo, tbo); amdgpu_bo_kunmap(bo); - amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL); drm_gem_object_release(&bo->gem_base); amdgpu_bo_unref(&bo->parent); @@ -990,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, return; /* move_notify is called before move happens */ - amdgpu_update_memory_usage(adev, &bo->mem, new_mem); - trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 1307ab581449..f22a4758719d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -68,6 +68,9 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem); uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); +uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); +uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); + int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 1eb8d5d3acf2..26e900627971 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -28,6 +28,8 @@ struct amdgpu_vram_mgr { struct drm_mm mm; spinlock_t lock; + atomic64_t usage; + atomic64_t vis_usage; }; /** @@ -78,6 +80,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) return 0; } +/** + * amdgpu_vram_mgr_vis_size - Calculate visible node size + * + * @adev: amdgpu device structure + * @node: MM node structure + * + * Calculate how many bytes of the MM node are inside visible VRAM + */ +static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, + struct drm_mm_node *node) +{ + uint64_t start = node->start << PAGE_SHIFT; + uint64_t end = (node->size + node->start) << PAGE_SHIFT; + + if (start >= adev->mc.visible_vram_size) + return 0; + + return (end > adev->mc.visible_vram_size ? + adev->mc.visible_vram_size : end) - start; +} + /** * amdgpu_vram_mgr_new - allocate new ranges * @@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, const struct ttm_place *place, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm *mm = &mgr->mm; struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; + uint64_t usage = 0, vis_usage = 0; unsigned i; int r; @@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) goto error; + usage += nodes[i].size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); + /* Calculate a virtual BO start address to easily check if * everything is CPU accessible. */ @@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, } spin_unlock(&mgr->lock); + atomic64_add(usage, &mgr->usage); + atomic64_add(vis_usage, &mgr->vis_usage); + mem->mm_node = nodes; return 0; @@ -181,8 +212,10 @@ error: static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm_node *nodes = mem->mm_node; + uint64_t usage = 0, vis_usage = 0; unsigned pages = mem->num_pages; if (!mem->mm_node) @@ -192,14 +225,47 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, while (pages) { pages -= nodes->size; drm_mm_remove_node(nodes); + usage += nodes->size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); ++nodes; } spin_unlock(&mgr->lock); + atomic64_sub(usage, &mgr->usage); + atomic64_sub(vis_usage, &mgr->vis_usage); + kfree(mem->mm_node); mem->mm_node = NULL; } +/** + * amdgpu_vram_mgr_usage - how many bytes are used in this domain + * + * @man: TTM memory type manager + * + * Returns how many bytes are used in this domain. + */ +uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_vram_mgr *mgr = man->priv; + + return atomic64_read(&mgr->usage); +} + +/** + * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part + * + * @man: TTM memory type manager + * + * Returns how many bytes are used in the visible part of VRAM + */ +uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_vram_mgr *mgr = man->priv; + + return atomic64_read(&mgr->vis_usage); +} + /** * amdgpu_vram_mgr_debug - dump VRAM table * @@ -211,7 +277,6 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, struct drm_printer *printer) { - struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; spin_lock(&mgr->lock); @@ -219,9 +284,8 @@ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, spin_unlock(&mgr->lock); drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", - adev->mman.bdev.man[TTM_PL_VRAM].size, - (u64)atomic64_read(&adev->vram_usage) >> 20, - (u64)atomic64_read(&adev->vram_vis_usage) >> 20); + man->size, amdgpu_vram_mgr_usage(man) >> 20, + amdgpu_vram_mgr_vis_usage(man) >> 20); } const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { -- cgit v1.2.3 From cf273a59ca3068caced2adaf2deeb44d2013c8a9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 18 Aug 2017 15:50:17 +0200 Subject: drm/amdgpu: fix and cleanup shadow handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the shadow flag on the shadow and not the parent, always bind shadow BOs during allocation instead of manually, use the reservation_object wrappers to grab the lock. This fixes a couple of issues with binding the shadow BOs as well as correctly evicting them when memory becomes tight. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 46 +++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ------ 3 files changed, 23 insertions(+), 37 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f7ffb029f6d5..e630d918fefc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2622,12 +2622,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, goto err; } - r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem); - if (r) { - DRM_ERROR("%p bind failed\n", bo->shadow); - goto err; - } - r = amdgpu_bo_restore_from_shadow(adev, ring, bo, NULL, fence, true); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e7e899190bef..9e495da0bb03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -91,7 +91,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; - places[c].lpfn = 0; + if (flags & AMDGPU_GEM_CREATE_SHADOW) + places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT; + else + places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_TT; if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) places[c].flags |= TTM_PL_FLAG_WC | @@ -446,17 +449,16 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, if (bo->shadow) return 0; - bo->flags |= AMDGPU_GEM_CREATE_SHADOW; - memset(&placements, 0, - (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); - - amdgpu_ttm_placement_init(adev, &placement, - placements, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC); + memset(&placements, 0, sizeof(placements)); + amdgpu_ttm_placement_init(adev, &placement, placements, + AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW); r = amdgpu_bo_create_restricted(adev, size, byte_align, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW, NULL, &placement, bo->tbo.resv, 0, @@ -484,30 +486,28 @@ int amdgpu_bo_create(struct amdgpu_device *adev, { struct ttm_placement placement = {0}; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; + uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; int r; - memset(&placements, 0, - (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); + memset(&placements, 0, sizeof(placements)); + amdgpu_ttm_placement_init(adev, &placement, placements, + domain, parent_flags); - amdgpu_ttm_placement_init(adev, &placement, - placements, domain, flags); - - r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, - domain, flags, sg, &placement, - resv, init_value, bo_ptr); + r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain, + parent_flags, sg, &placement, resv, + init_value, bo_ptr); if (r) return r; - if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) { - if (!resv) { - r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL); - WARN_ON(r != 0); - } + if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { + if (!resv) + WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, + NULL)); r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); if (!resv) - ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock); + reservation_object_unlock((*bo_ptr)->tbo.resv); if (r) amdgpu_bo_unref(bo_ptr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ba475af99332..96ec4e2b56e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -165,14 +165,6 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, unsigned i; int r; - if (parent->bo->shadow) { - struct amdgpu_bo *shadow = parent->bo->shadow; - - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); - if (r) - return r; - } - if (use_cpu_for_update) { r = amdgpu_bo_kmap(parent->bo, NULL); if (r) -- cgit v1.2.3 From 3f3333f8a0e90ac26f84ed7b0aa344efce695c08 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 3 Aug 2017 14:02:13 +0200 Subject: drm/amdgpu: track evicted page tables v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of validating all page tables when one was evicted, track which one needs a validation. v2: simplify amdgpu_vm_ready as well Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 227 +++++++++++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 16 +- 5 files changed, 119 insertions(+), 141 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d6ddd5562c16..8bf178a912f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -636,9 +636,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); - fpriv->vm.last_eviction_counter = - atomic64_read(&p->adev->num_evictions); - if (p->bo_list) { struct amdgpu_bo *gds = p->bo_list->gds_obj; struct amdgpu_bo *gws = p->bo_list->gws_obj; @@ -835,7 +832,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (!bo) continue; - amdgpu_vm_bo_invalidate(adev, bo); + amdgpu_vm_bo_invalidate(adev, bo, false); } } @@ -860,7 +857,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, } if (p->job->vm) { - p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo); + p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); r = amdgpu_bo_vm_update_pte(p); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index ba012933e6aa..d02880640ee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -160,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, if (bo_va && --bo_va->ref_count == 0) { amdgpu_vm_bo_rmv(adev, bo_va); - if (amdgpu_vm_ready(adev, vm)) { + if (amdgpu_vm_ready(vm)) { struct dma_fence *fence = NULL; r = amdgpu_vm_clear_freed(adev, vm, &fence); @@ -481,10 +481,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct list_head *list, uint32_t operation) { - int r = -ERESTARTSYS; + int r; - if (!amdgpu_vm_ready(adev, vm)) - goto error; + if (!amdgpu_vm_ready(vm)) + return; r = amdgpu_vm_update_directories(adev, vm); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 9e495da0bb03..52d0109c0d9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -929,7 +929,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, return; abo = container_of(bo, struct amdgpu_bo, tbo); - amdgpu_vm_bo_invalidate(adev, abo); + amdgpu_vm_bo_invalidate(adev, abo, evict); amdgpu_bo_kunmap(abo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1334bbb82634..6ff3c1bf035e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -140,7 +140,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry) { - entry->robj = vm->root.bo; + entry->robj = vm->root.base.bo; entry->priority = 0; entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; @@ -148,61 +148,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, list_add(&entry->tv.head, validated); } -/** - * amdgpu_vm_validate_layer - validate a single page table level - * - * @parent: parent page table level - * @validate: callback to do the validation - * @param: parameter for the validation callback - * - * Validate the page table BOs on command submission if neccessary. - */ -static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, - int (*validate)(void *, struct amdgpu_bo *), - void *param, bool use_cpu_for_update, - struct ttm_bo_global *glob) -{ - unsigned i; - int r; - - if (use_cpu_for_update) { - r = amdgpu_bo_kmap(parent->bo, NULL); - if (r) - return r; - } - - if (!parent->entries) - return 0; - - for (i = 0; i <= parent->last_entry_used; ++i) { - struct amdgpu_vm_pt *entry = &parent->entries[i]; - - if (!entry->bo) - continue; - - r = validate(param, entry->bo); - if (r) - return r; - - spin_lock(&glob->lru_lock); - ttm_bo_move_to_lru_tail(&entry->bo->tbo); - if (entry->bo->shadow) - ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); - spin_unlock(&glob->lru_lock); - - /* - * Recurse into the sub directory. This is harmless because we - * have only a maximum of 5 layers. - */ - r = amdgpu_vm_validate_level(entry, validate, param, - use_cpu_for_update, glob); - if (r) - return r; - } - - return r; -} - /** * amdgpu_vm_validate_pt_bos - validate the page table BOs * @@ -217,32 +162,43 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { - uint64_t num_evictions; + struct ttm_bo_global *glob = adev->mman.bdev.glob; + int r; - /* We only need to validate the page tables - * if they aren't already valid. - */ - num_evictions = atomic64_read(&adev->num_evictions); - if (num_evictions == vm->last_eviction_counter) - return 0; + spin_lock(&vm->status_lock); + while (!list_empty(&vm->evicted)) { + struct amdgpu_vm_bo_base *bo_base; + struct amdgpu_bo *bo; - return amdgpu_vm_validate_level(&vm->root, validate, param, - vm->use_cpu_for_update, - adev->mman.bdev.glob); -} + bo_base = list_first_entry(&vm->evicted, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); -/** - * amdgpu_vm_check - helper for amdgpu_vm_ready - */ -static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo) -{ - /* if anything is swapped out don't swap it in here, - just abort and wait for the next CS */ - if (!amdgpu_bo_gpu_accessible(bo)) - return -ERESTARTSYS; + bo = bo_base->bo; + BUG_ON(!bo); + if (bo->parent) { + r = validate(param, bo); + if (r) + return r; - if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) - return -ERESTARTSYS; + spin_lock(&glob->lru_lock); + ttm_bo_move_to_lru_tail(&bo->tbo); + if (bo->shadow) + ttm_bo_move_to_lru_tail(&bo->shadow->tbo); + spin_unlock(&glob->lru_lock); + } + + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(bo, NULL); + if (r) + return r; + } + + spin_lock(&vm->status_lock); + list_del_init(&bo_base->vm_status); + } + spin_unlock(&vm->status_lock); return 0; } @@ -250,17 +206,19 @@ static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo) /** * amdgpu_vm_ready - check VM is ready for updates * - * @adev: amdgpu device * @vm: VM to check * * Check if all VM PDs/PTs are ready for updates */ -bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm) +bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - if (amdgpu_vm_check(NULL, vm->root.bo)) - return false; + bool ready; + + spin_lock(&vm->status_lock); + ready = list_empty(&vm->evicted); + spin_unlock(&vm->status_lock); - return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL); + return ready; } /** @@ -326,11 +284,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { - struct reservation_object *resv = vm->root.bo->tbo.resv; + struct reservation_object *resv = vm->root.base.bo->tbo.resv; struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; struct amdgpu_bo *pt; - if (!entry->bo) { + if (!entry->base.bo) { r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, @@ -351,9 +309,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, /* Keep a reference to the root directory to avoid * freeing them up in the wrong order. */ - pt->parent = amdgpu_bo_ref(vm->root.bo); + pt->parent = amdgpu_bo_ref(vm->root.base.bo); - entry->bo = pt; + entry->base.vm = vm; + entry->base.bo = pt; + list_add_tail(&entry->base.bo_list, &pt->va); + INIT_LIST_HEAD(&entry->base.vm_status); entry->addr = 0; } @@ -1020,7 +981,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); + amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); r = amdgpu_sync_wait(&sync, true); amdgpu_sync_free(&sync); @@ -1059,10 +1020,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; - shadow = parent->bo->shadow; + shadow = parent->base.bo->shadow; if (vm->use_cpu_for_update) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); if (unlikely(r)) return r; @@ -1078,7 +1039,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, /* assume the worst case */ ndw += parent->last_entry_used * 6; - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); if (shadow) { shadow_addr = amdgpu_bo_gpu_offset(shadow); @@ -1098,7 +1059,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, /* walk over the address space and update the directory */ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { - struct amdgpu_bo *bo = parent->entries[pt_idx].bo; + struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo; uint64_t pde, pt; if (bo == NULL) @@ -1141,7 +1102,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, } if (count) { - if (vm->root.bo->shadow) + if (vm->root.base.bo->shadow) params.func(¶ms, last_shadow, last_pt, count, incr, AMDGPU_PTE_VALID); @@ -1154,7 +1115,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, amdgpu_job_free(job); } else { amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, + amdgpu_sync_resv(adev, &job->sync, + parent->base.bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); if (shadow) amdgpu_sync_resv(adev, &job->sync, @@ -1167,7 +1129,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_bo_fence(parent->bo, fence, true); + amdgpu_bo_fence(parent->base.bo, fence, true); dma_fence_put(vm->last_dir_update); vm->last_dir_update = dma_fence_get(fence); dma_fence_put(fence); @@ -1180,7 +1142,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - if (!entry->bo) + if (!entry->base.bo) continue; r = amdgpu_vm_update_level(adev, vm, entry, level + 1); @@ -1213,7 +1175,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - if (!entry->bo) + if (!entry->base.bo) continue; entry->addr = ~0ULL; @@ -1268,7 +1230,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, *entry = &p->vm->root; while ((*entry)->entries) { idx = addr >> (p->adev->vm_manager.block_size * level--); - idx %= amdgpu_bo_size((*entry)->bo) / 8; + idx %= amdgpu_bo_size((*entry)->base.bo) / 8; *parent = *entry; *entry = &(*entry)->entries[idx]; } @@ -1304,7 +1266,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, p->src || !(flags & AMDGPU_PTE_VALID)) { - dst = amdgpu_bo_gpu_offset(entry->bo); + dst = amdgpu_bo_gpu_offset(entry->base.bo); dst = amdgpu_gart_get_vm_pde(p->adev, dst); flags = AMDGPU_PTE_VALID; } else { @@ -1330,18 +1292,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, tmp = p->pages_addr; p->pages_addr = NULL; - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); pde = pd_addr + (entry - parent->entries) * 8; amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); p->pages_addr = tmp; } else { - if (parent->bo->shadow) { - pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); + if (parent->base.bo->shadow) { + pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); pde = pd_addr + (entry - parent->entries) * 8; amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); } - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); pde = pd_addr + (entry - parent->entries) * 8; amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); } @@ -1392,7 +1354,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, if (entry->addr & AMDGPU_PDE_PTE) continue; - pt = entry->bo; + pt = entry->base.bo; if (use_cpu_update) { pe_start = (unsigned long)amdgpu_bo_kptr(pt); } else { @@ -1612,12 +1574,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv, + r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, owner); if (r) goto error_free; - r = reservation_object_reserve_shared(vm->root.bo->tbo.resv); + r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); if (r) goto error_free; @@ -1632,7 +1594,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_bo_fence(vm->root.bo, f, true); + amdgpu_bo_fence(vm->root.base.bo, f, true); dma_fence_put(*fence); *fence = f; return 0; @@ -1927,7 +1889,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, */ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct reservation_object *resv = vm->root.bo->tbo.resv; + struct reservation_object *resv = vm->root.base.bo->tbo.resv; struct dma_fence *excl, **shared; unsigned i, shared_count; int r; @@ -2414,12 +2376,25 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, * Mark @bo as invalid. */ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo) + struct amdgpu_bo *bo, bool evicted) { struct amdgpu_vm_bo_base *bo_base; list_for_each_entry(bo_base, &bo->va, bo_list) { + struct amdgpu_vm *vm = bo_base->vm; + bo_base->moved = true; + if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + spin_lock(&bo_base->vm->status_lock); + list_move(&bo_base->vm_status, &vm->evicted); + spin_unlock(&bo_base->vm->status_lock); + continue; + } + + /* Don't add page tables to the moved state */ + if (bo->tbo.type == ttm_bo_type_kernel) + continue; + spin_lock(&bo_base->vm->status_lock); list_move(&bo_base->vm_status, &bo_base->vm->moved); spin_unlock(&bo_base->vm->status_lock); @@ -2507,6 +2482,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); + INIT_LIST_HEAD(&vm->evicted); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->freed); @@ -2551,30 +2527,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, NULL, init_pde_value, &vm->root.bo); + NULL, NULL, init_pde_value, &vm->root.base.bo); if (r) goto error_free_sched_entity; - r = amdgpu_bo_reserve(vm->root.bo, false); - if (r) - goto error_free_root; - - vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + vm->root.base.vm = vm; + list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); + INIT_LIST_HEAD(&vm->root.base.vm_status); if (vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(vm->root.bo, NULL); + r = amdgpu_bo_reserve(vm->root.base.bo, false); if (r) goto error_free_root; - } - amdgpu_bo_unreserve(vm->root.bo); + r = amdgpu_bo_kmap(vm->root.base.bo, NULL); + if (r) + goto error_free_root; + amdgpu_bo_unreserve(vm->root.base.bo); + } return 0; error_free_root: - amdgpu_bo_unref(&vm->root.bo->shadow); - amdgpu_bo_unref(&vm->root.bo); - vm->root.bo = NULL; + amdgpu_bo_unref(&vm->root.base.bo->shadow); + amdgpu_bo_unref(&vm->root.base.bo); + vm->root.base.bo = NULL; error_free_sched_entity: amd_sched_entity_fini(&ring->sched, &vm->entity); @@ -2593,9 +2570,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) { unsigned i; - if (level->bo) { - amdgpu_bo_unref(&level->bo->shadow); - amdgpu_bo_unref(&level->bo); + if (level->base.bo) { + list_del(&level->base.bo_list); + list_del(&level->base.vm_status); + amdgpu_bo_unref(&level->base.bo->shadow); + amdgpu_bo_unref(&level->base.bo); } if (level->entries) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ff093d4b5e11..4e465e817fe8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -111,12 +111,12 @@ struct amdgpu_vm_bo_base { }; struct amdgpu_vm_pt { - struct amdgpu_bo *bo; - uint64_t addr; + struct amdgpu_vm_bo_base base; + uint64_t addr; /* array of page tables, one for each directory entry */ - struct amdgpu_vm_pt *entries; - unsigned last_entry_used; + struct amdgpu_vm_pt *entries; + unsigned last_entry_used; }; struct amdgpu_vm { @@ -126,6 +126,9 @@ struct amdgpu_vm { /* protecting invalidated */ spinlock_t status_lock; + /* BOs who needs a validation */ + struct list_head evicted; + /* BOs moved, but not yet updated in the PT */ struct list_head moved; @@ -135,7 +138,6 @@ struct amdgpu_vm { /* contains the page directory */ struct amdgpu_vm_pt root; struct dma_fence *last_dir_update; - uint64_t last_eviction_counter; /* protecting freed */ spinlock_t freed_lock; @@ -225,7 +227,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry); -bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm); +bool amdgpu_vm_ready(struct amdgpu_vm *vm); int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); @@ -250,7 +252,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo); + struct amdgpu_bo *bo, bool evicted); struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo); struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, -- cgit v1.2.3 From c09312a6532a9a976ec4e72eb3b7fa10e87a8b07 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 12 Sep 2017 10:56:17 +0200 Subject: drm/amdgpu: fix and cleanup amdgpu_bo_create v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We adjusted the BO flags for USWC handling, but those never took effect because the placement was passed in instead of generated inside this function. v2: better commit message Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 83 +++++++++--------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 --- 2 files changed, 23 insertions(+), 68 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 52d0109c0d9c..726a662f43f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -64,11 +64,12 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) return false; } -static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, - struct ttm_placement *placement, - struct ttm_place *places, - u32 domain, u64 flags) +void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) { + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + struct ttm_placement *placement = &abo->placement; + struct ttm_place *places = abo->placements; + u64 flags = abo->flags; u32 c = 0; if (domain & AMDGPU_GEM_DOMAIN_VRAM) { @@ -151,27 +152,6 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, placement->busy_placement = places; } -void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); - - amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements, - domain, abo->flags); -} - -static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, - struct ttm_placement *placement) -{ - BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1)); - - memcpy(bo->placements, placement->placement, - placement->num_placement * sizeof(struct ttm_place)); - bo->placement.num_placement = placement->num_placement; - bo->placement.num_busy_placement = placement->num_busy_placement; - bo->placement.placement = bo->placements; - bo->placement.busy_placement = bo->placements; -} - /** * amdgpu_bo_create_reserved - create reserved BO for kernel use * @@ -303,14 +283,13 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, *cpu_addr = NULL; } -int amdgpu_bo_create_restricted(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, - struct ttm_placement *placement, - struct reservation_object *resv, - uint64_t init_value, - struct amdgpu_bo **bo_ptr) +static int amdgpu_bo_do_create(struct amdgpu_device *adev, + unsigned long size, int byte_align, + bool kernel, u32 domain, u64 flags, + struct sg_table *sg, + struct reservation_object *resv, + uint64_t init_value, + struct amdgpu_bo **bo_ptr) { struct amdgpu_bo *bo; enum ttm_bo_type type; @@ -384,10 +363,11 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; #endif - amdgpu_fill_placement_to_bo(bo, placement); - /* Kernel allocation are uninterruptible */ + bo->tbo.bdev = &adev->mman.bdev; + amdgpu_ttm_placement_from_domain(bo, domain); initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); + /* Kernel allocation are uninterruptible */ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); @@ -442,27 +422,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, unsigned long size, int byte_align, struct amdgpu_bo *bo) { - struct ttm_placement placement = {0}; - struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; int r; if (bo->shadow) return 0; - memset(&placements, 0, sizeof(placements)); - amdgpu_ttm_placement_init(adev, &placement, placements, - AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW); - - r = amdgpu_bo_create_restricted(adev, size, byte_align, true, - AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW, - NULL, &placement, - bo->tbo.resv, - 0, - &bo->shadow); + r = amdgpu_bo_do_create(adev, size, byte_align, true, + AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW, + NULL, bo->tbo.resv, 0, + &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -484,18 +454,11 @@ int amdgpu_bo_create(struct amdgpu_device *adev, uint64_t init_value, struct amdgpu_bo **bo_ptr) { - struct ttm_placement placement = {0}; - struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; int r; - memset(&placements, 0, sizeof(placements)); - amdgpu_ttm_placement_init(adev, &placement, placements, - domain, parent_flags); - - r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain, - parent_flags, sg, &placement, resv, - init_value, bo_ptr); + r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain, + parent_flags, sg, resv, init_value, bo_ptr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index a4891bea2ca8..39b6bf6fb051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -195,14 +195,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, struct reservation_object *resv, uint64_t init_value, struct amdgpu_bo **bo_ptr); -int amdgpu_bo_create_restricted(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, - struct ttm_placement *placement, - struct reservation_object *resv, - uint64_t init_value, - struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, -- cgit v1.2.3 From e9c7577c09b5062a4f1464b8ef4472c14bdfd2e7 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 11 Sep 2017 17:29:26 +0200 Subject: drm/amdgpu: simplify pinning into visible VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just set the CPU access required flag when we pin it. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 726a662f43f4..6982baeccd14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -635,7 +635,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); int r, i; - unsigned fpfn, lpfn; if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) return -EPERM; @@ -667,22 +666,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, } bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + /* force to pin into visible video ram */ + if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; amdgpu_ttm_placement_from_domain(bo, domain); for (i = 0; i < bo->placement.num_placement; i++) { - /* force to pin into visible video ram */ - if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && - !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) && - (!max_offset || max_offset > - adev->mc.visible_vram_size)) { - if (WARN_ON_ONCE(min_offset > - adev->mc.visible_vram_size)) - return -EINVAL; - fpfn = min_offset >> PAGE_SHIFT; - lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; - } else { - fpfn = min_offset >> PAGE_SHIFT; - lpfn = max_offset >> PAGE_SHIFT; - } + unsigned fpfn, lpfn; + + fpfn = min_offset >> PAGE_SHIFT; + lpfn = max_offset >> PAGE_SHIFT; + if (fpfn > bo->placements[i].fpfn) bo->placements[i].fpfn = fpfn; if (!bo->placements[i].lpfn || -- cgit v1.2.3 From b82485fd384a56c27fae44e649552eca6334237a Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Fri, 15 Sep 2017 21:05:19 -0400 Subject: drm/amdgpu: add helper to convert a ttm bo to amdgpu_bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++----- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 9 +++++---- 3 files changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6982baeccd14..8b4ed8a98a18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -40,9 +40,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *bo; - - bo = container_of(tbo, struct amdgpu_bo, tbo); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); amdgpu_bo_kunmap(bo); @@ -884,7 +882,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); amdgpu_vm_bo_invalidate(adev, abo, evict); amdgpu_bo_kunmap(abo); @@ -911,7 +909,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return 0; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 39b6bf6fb051..c26ef53604af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -94,6 +94,11 @@ struct amdgpu_bo { }; }; +static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) +{ + return container_of(tbo, struct amdgpu_bo, tbo); +} + /** * amdgpu_mem_type_to_domain - return domain corresponding to mem_type * @mem_type: ttm memory type diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1f68a146e26c..10952c3e5eb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -44,6 +44,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_object.h" #include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" @@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (adev->mman.buffer_funcs && @@ -257,7 +258,7 @@ gtt: static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; @@ -484,7 +485,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, int r; /* Can't move a pinned BO */ - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); if (WARN_ON_ONCE(abo->pin_count > 0)) return -EINVAL; @@ -1142,7 +1143,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); struct drm_mm_node *nodes = abo->tbo.mem.mm_node; uint32_t value = 0; -- cgit v1.2.3 From a695e43712242c354748e9bae5d137d4337a7694 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 31 Oct 2017 09:36:13 +0100 Subject: drm/amdgpu: fix error handling in amdgpu_bo_do_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bo structure is freed up in case of an error, so we can't do any accounting if that happens. Signed-off-by: Christian König Reviewed-by: Michel Dänzer CC: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8b4ed8a98a18..ea25164e7f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -369,6 +369,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + if (unlikely(r != 0)) + return r; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && @@ -378,9 +381,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); - if (unlikely(r != 0)) - return r; - if (kernel) bo->tbo.priority = 1; -- cgit v1.2.3