From a667386cb997a136e169de3cf70f007223bb74ee Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 15 Jul 2016 18:37:05 -0400 Subject: drm/amdgpu: Make SDMA phase quantum configurable Set a configurable SDMA phase quantum when enabling SDMA context switching. The default value significantly reduces SDMA latency in page table updates when user-mode SDMA queues have concurrent activity, compared to the initial HW setting. Signed-off-by: Felix Kuehling Reviewed-by: Andres Rodriguez Reviewed-by: Shaoyun Liu Acked-by: Chunming Zhou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4a65697ccc94..591f3e7fb508 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -493,13 +493,45 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) { - u32 f32_cntl; + u32 f32_cntl, phase_quantum = 0; int i; + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); + if (enable && amdgpu_sdma_phase_quantum) { + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM), + phase_quantum); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM), + phase_quantum); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM), + phase_quantum); + } WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); } -- cgit v1.2.3 From 51668b0b1bc23d2b0cedf19fd1cc210e85a823a2 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Wed, 28 Jun 2017 20:02:04 +0800 Subject: drm/amdgpu/sdma4: Enable sdma poll mem addr on vega10 for SRIOV While doing flr on VFs, there is possibility to lost the doorbell writing for sdma, so enable poll mem for sdma, then sdma fw would check the pollmem holding wptr. Signed-off-by: Frank Min Signed-off-by: Xiangliang.Yu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 48 +++++++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d02f248b2028..0fa6438c152c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1209,6 +1209,7 @@ struct amdgpu_sdma_instance { struct amdgpu_ring ring; bool burst_nop; + uint32_t poll_mem_offs; }; struct amdgpu_sdma { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 591f3e7fb508..5c247082f699 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -35,6 +35,7 @@ #include "vega10/MMHUB/mmhub_1_0_offset.h" #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" #include "vega10/HDP/hdp_4_0_offset.h" +#include "vega10/NBIO/nbio_6_1_offset.h" #include "raven1/SDMA0/sdma0_4_1_default.h" #include "soc15_common.h" @@ -287,6 +288,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) */ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) { + int i; + u32 offset; struct amdgpu_device *adev = ring->adev; DRM_DEBUG("Setting write pointer\n"); @@ -303,6 +306,17 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); + + if (amdgpu_sriov_vf(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + if (&adev->sdma.instance[i].ring == ring) { + offset = adev->sdma.instance[i].poll_mem_offs; + atomic64_set((atomic64_t *)&adev->wb.wb[offset], + (ring->wptr << 2)); + nbio_v6_1_hdp_flush(adev); + } + } + } WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; @@ -573,9 +587,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, wptr_poll_addr_lo, wptr_poll_addr_hi, wptr_poll_cntl; u32 rb_bufsz; - u32 wb_offset; + u32 wb_offset, poll_offset; u32 doorbell; u32 doorbell_offset; u32 temp; @@ -687,6 +701,21 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + + if (amdgpu_sriov_vf(adev)) { + poll_offset = adev->sdma.instance[i].poll_mem_offs * 4; + + wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_addr_lo = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO)); + wptr_poll_addr_lo = REG_SET_FIELD(wptr_poll_addr_lo, SDMA0_GFX_RB_WPTR_POLL_ADDR_LO, ADDR, + lower_32_bits(adev->wb.gpu_addr + poll_offset) >> 2); + wptr_poll_addr_hi = upper_32_bits(adev->wb.gpu_addr + poll_offset); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); + + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), wptr_poll_addr_lo); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), wptr_poll_addr_hi); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + } } return 0; @@ -1247,6 +1276,15 @@ static int sdma_v4_0_sw_init(void *handle) (i == 0) ? AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1); + + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_wb_get_64bit(adev, + &adev->sdma.instance[i].poll_mem_offs); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate SDMA poll mem wb.\n", r); + return r; + } + } if (r) return r; } @@ -1259,9 +1297,13 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); + if (amdgpu_sriov_vf(adev)) + amdgpu_wb_free_64bit(adev, + adev->sdma.instance[i].poll_mem_offs); + } return 0; } -- cgit v1.2.3 From 5060baa0e5511aaece8e1f08ee22a3bbf8a30377 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Jul 2017 15:24:49 -0400 Subject: drm/amdgpu/sdma4: drop unused register header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nbio registers are not used in this file. Reviewed-by: Christian König Cc: Frank Min Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 5c247082f699..7cb5320d2424 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -35,7 +35,6 @@ #include "vega10/MMHUB/mmhub_1_0_offset.h" #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" #include "vega10/HDP/hdp_4_0_offset.h" -#include "vega10/NBIO/nbio_6_1_offset.h" #include "raven1/SDMA0/sdma0_4_1_default.h" #include "soc15_common.h" -- cgit v1.2.3 From 575a07d2dabca6d05c3b59262fe754f9a52b29a6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Jul 2017 15:28:14 -0400 Subject: drm/amdgpu/sdma4: set wptr shadow atomically (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change until wptr polling uses this location (future patch). v2: use WRITE_ONCE Cc: Frank Min Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 7cb5320d2424..f970a4a6b666 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -293,6 +293,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("Setting write pointer\n"); if (ring->use_doorbell) { + u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; + DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " "lower_32_bits(ring->wptr) << 2 == 0x%08x " @@ -301,8 +303,7 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); - adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + WRITE_ONCE(*wb, (ring->wptr << 2)); DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); -- cgit v1.2.3 From a67094432da75d2769e7c35323f26c6873908e6c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Jul 2017 15:30:27 -0400 Subject: drm/amdgpu/sdma4: drop hdp flush from wptr shadow update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wb buffer is in system memory, not vram so the flush is useless. Cc: Frank Min Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index f970a4a6b666..587bf508c794 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -313,7 +313,6 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) offset = adev->sdma.instance[i].poll_mem_offs; atomic64_set((atomic64_t *)&adev->wb.wb[offset], (ring->wptr << 2)); - nbio_v6_1_hdp_flush(adev); } } } -- cgit v1.2.3 From 34c3a82b5aa7ddbc26f0d4d7ecec7720d73faed1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Jul 2017 19:04:21 -0400 Subject: drm/amdgpu/sdma4: drop allocation of poll_mem_offs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already allocate this as part of the ring structure, use that instead. Cc: Frank Min Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 45 +++++++--------------------------- 2 files changed, 9 insertions(+), 37 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 87afe8785bb2..d492ff79c296 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1149,7 +1149,6 @@ struct amdgpu_sdma_instance { struct amdgpu_ring ring; bool burst_nop; - uint32_t poll_mem_offs; }; struct amdgpu_sdma { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 587bf508c794..85b856182bc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -287,8 +287,6 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) */ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) { - int i; - u32 offset; struct amdgpu_device *adev = ring->adev; DRM_DEBUG("Setting write pointer\n"); @@ -306,16 +304,6 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) WRITE_ONCE(*wb, (ring->wptr << 2)); DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); - - if (amdgpu_sriov_vf(adev)) { - for (i = 0; i < adev->sdma.num_instances; i++) { - if (&adev->sdma.instance[i].ring == ring) { - offset = adev->sdma.instance[i].poll_mem_offs; - atomic64_set((atomic64_t *)&adev->wb.wb[offset], - (ring->wptr << 2)); - } - } - } WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; @@ -586,12 +574,13 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - u32 rb_cntl, ib_cntl, wptr_poll_addr_lo, wptr_poll_addr_hi, wptr_poll_cntl; + u32 rb_cntl, ib_cntl, wptr_poll_cntl; u32 rb_bufsz; - u32 wb_offset, poll_offset; + u32 wb_offset; u32 doorbell; u32 doorbell_offset; u32 temp; + u64 wptr_gpu_addr; int i, r; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -702,17 +691,14 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); if (amdgpu_sriov_vf(adev)) { - poll_offset = adev->sdma.instance[i].poll_mem_offs * 4; - + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); - wptr_poll_addr_lo = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO)); - wptr_poll_addr_lo = REG_SET_FIELD(wptr_poll_addr_lo, SDMA0_GFX_RB_WPTR_POLL_ADDR_LO, ADDR, - lower_32_bits(adev->wb.gpu_addr + poll_offset) >> 2); - wptr_poll_addr_hi = upper_32_bits(adev->wb.gpu_addr + poll_offset); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), wptr_poll_addr_lo); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), wptr_poll_addr_hi); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); } } @@ -1275,15 +1261,6 @@ static int sdma_v4_0_sw_init(void *handle) (i == 0) ? AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1); - - if (amdgpu_sriov_vf(adev)) { - r = amdgpu_wb_get_64bit(adev, - &adev->sdma.instance[i].poll_mem_offs); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate SDMA poll mem wb.\n", r); - return r; - } - } if (r) return r; } @@ -1296,13 +1273,9 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { + for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); - if (amdgpu_sriov_vf(adev)) - amdgpu_wb_free_64bit(adev, - adev->sdma.instance[i].poll_mem_offs); - } return 0; } -- cgit v1.2.3 From 68c3c67fe6e527f121b13ad9b4e1714377b3d106 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Jul 2017 15:43:59 -0400 Subject: drm/amdgpu/sdma4: move wptr polling setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move it up before ring enablement with all of the other engine setup and explicitly disable it for bare metal. Cc: Frank Min Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 85b856182bc9..79a9e44dc8eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -662,6 +662,19 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp); } + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + if (amdgpu_sriov_vf(adev)) + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); + else + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -690,17 +703,6 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); - if (amdgpu_sriov_vf(adev)) { - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); - } } return 0; -- cgit v1.2.3 From b416bf14bdd2019ed62b1a20876a33cf2480bbfd Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 10 Aug 2017 11:24:43 +0800 Subject: drm/amdgpu: ignore digest_size when loading sdma fw for raven digest_size has been retired from sdma v4 fw Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Reviewed-by: Evan Quan Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 79a9e44dc8eb..fd7c72aaafa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -799,15 +799,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) const struct sdma_firmware_header_v1_0 *hdr; const __le32 *fw_data; u32 fw_size; - u32 digest_size = 0; int i, j; /* halt the MEs */ sdma_v4_0_enable(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { - uint16_t version_major; - uint16_t version_minor; if (!adev->sdma.instance[i].fw) return -EINVAL; @@ -815,23 +812,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) amdgpu_ucode_print_sdma_hdr(&hdr->header); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - version_major = le16_to_cpu(hdr->header.header_version_major); - version_minor = le16_to_cpu(hdr->header.header_version_minor); - - if (version_major == 1 && version_minor >= 1) { - const struct sdma_firmware_header_v1_1 *sdma_v1_1_hdr = (const struct sdma_firmware_header_v1_1 *) hdr; - digest_size = le32_to_cpu(sdma_v1_1_hdr->digest_size); - } - - fw_size -= digest_size; - fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0); - for (j = 0; j < fw_size; j++) WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); -- cgit v1.2.3