summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c176
1 files changed, 153 insertions, 23 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index c5f46d264b23..fafbad3cf08d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -53,7 +53,7 @@
* 2. Async ring
*/
#define GFX10_NUM_GFX_RINGS_NV1X 1
-#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@@ -3780,11 +3780,12 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
unsigned i;
int r;
- WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
+ WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
@@ -3793,13 +3794,13 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
}
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
+ amdgpu_ring_write(ring, scratch -
PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
+ tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -3975,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
}
+static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_4 *rlc_hdr;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
+}
+
static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
{
bool ret = false;
@@ -4152,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
if (version_major == 2) {
if (version_minor >= 1)
gfx_v10_0_init_rlc_ext_microcode(adev);
- if (version_minor == 2)
+ if (version_minor >= 2)
gfx_v10_0_init_rlc_iram_dram_microcode(adev);
+ if (version_minor == 4) {
+ gfx_v10_0_init_tap_delays_microcode(adev);
+ }
}
}
@@ -4250,8 +4271,39 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
}
+
}
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
+
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
info->fw = adev->gfx.mec_fw;
@@ -4711,6 +4763,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
{
struct amdgpu_ring *ring;
unsigned int irq_type;
+ unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@@ -4728,8 +4781,10 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
+ hw_prio, NULL);
}
static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
@@ -4791,7 +4846,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_pipe_per_me = 2;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -6581,6 +6636,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
}
}
+static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v10_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
@@ -6609,11 +6682,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
- /* set up default queue priority level
- * 0x0 = low priority, 0x1 = high priority */
- tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
- tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
- mqd->cp_gfx_hqd_queue_priority = tmp;
+ /* set up gfx queue priority */
+ gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
@@ -8506,14 +8576,45 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
+ uint64_t wptr_tmp;
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always being used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
} else {
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
}
}
@@ -8538,13 +8639,42 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
+ uint64_t wptr_tmp;
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
} else {
- BUG(); /* only DOORBELL method supported on gfx10 now */
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx10 now */
+ }
}
}