From c68cbbfd54c68485ac67b5a04c06feacdce15fec Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 15 Aug 2023 08:34:27 +0200 Subject: drm/amdgpu: cleanup conditional execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First of all calculating the number of dw to patch into a conditional execution is not something HW generation specific. This is just standard ring buffer calculations. While at it also reduce the BUG_ON() into WARN_ON(). Then instead of a random bit pattern use 0 as default value for the number of dw skipped, this way it's not mandatory any more to patch the conditional execution. And last make the address to check a parameter of the conditional execution instead of getting this from the ring. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index b9a15d51eb5c..8cedee059c8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -546,34 +546,21 @@ static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); } -static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0); return ret; } -static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - WARN_ON_ONCE(offset > ring->buf_mask); - WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -864,7 +851,6 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .test_ring = vpe_ring_test_ring, .test_ib = vpe_ring_test_ib, .init_cond_exec = vpe_ring_init_cond_exec, - .patch_cond_exec = vpe_ring_patch_cond_exec, .preempt_ib = vpe_ring_preempt_ib, .begin_use = vpe_ring_begin_use, .end_use = vpe_ring_end_use, -- cgit v1.2.3 From 26f5f34e6e44f995d97b8917484373c22715fd8d Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Mon, 16 Jan 2023 13:11:59 +0800 Subject: drm/amdgpu/vpe: add collaborate mode support for VPE Under clollaborate mode, multiple VPE instances share a ring buferr and work together to finish a job. Signed-off-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 20 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h | 1 + 2 files changed, 21 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 8cedee059c8a..9d2415f26b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -457,6 +457,18 @@ static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid) return csa_mc_addr; } +static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring, + uint32_t device_select, + uint32_t exec_count) +{ + if (!ring->adev->vpe.collaborate_mode) + return; + + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) | + (device_select << 16)); + amdgpu_ring_write(ring, exec_count & 0x1fff); +} + static void vpe_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -481,6 +493,8 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, { int i = 0; + vpe_ring_emit_pred_exec(ring, 0, 10); + do { /* write the fence */ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0)); @@ -505,6 +519,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; + vpe_ring_emit_pred_exec(ring, 0, 6); + /* wait for idle */ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | @@ -520,6 +536,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { + vpe_ring_emit_pred_exec(ring, 0, 3); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, val); @@ -528,6 +546,8 @@ static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { + vpe_ring_emit_pred_exec(ring, 0, 6); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h index ee6db04cf27b..231d86d0953e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h @@ -78,6 +78,7 @@ struct amdgpu_vpe { bool context_started; uint32_t num_instances; + bool collaborate_mode; }; int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev); -- cgit v1.2.3 From d40f6213b52c161fd4634933acbc32103a283363 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Wed, 31 Jan 2024 15:40:42 +0800 Subject: drm/amdgpu/vpe: don't emit cond exec command under collaborate mode Not ready now. Signed-off-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 9d2415f26b7c..eea2487d2ea2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -571,6 +571,9 @@ static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring, { unsigned int ret; + if (ring->adev->vpe.collaborate_mode) + return ~0; + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); -- cgit v1.2.3 From f9070b0f2f9edb503b20b12782d4d601cafc6d5e Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Wed, 15 Nov 2023 11:56:14 +0800 Subject: drm/amdgpu/vpe: add VPE 6.1.1 support Add initial support for VPE 6.1.1. v2: squash in updates (Alex) Signed-off-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 32 ++-- drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c | 281 +++++++++++++++++++++----------- 2 files changed, 207 insertions(+), 106 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index eea2487d2ea2..70c5cc80ecdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -297,6 +297,10 @@ static int vpe_early_init(void *handle) case IP_VERSION(6, 1, 0): vpe_v6_1_set_funcs(vpe); break; + case IP_VERSION(6, 1, 1): + vpe_v6_1_set_funcs(vpe); + vpe->collaborate_mode = true; + break; default: return -EINVAL; } @@ -304,6 +308,8 @@ static int vpe_early_init(void *handle) vpe_set_ring_funcs(adev); vpe_set_regs(vpe); + dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false"); + return 0; } @@ -493,8 +499,6 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, { int i = 0; - vpe_ring_emit_pred_exec(ring, 0, 10); - do { /* write the fence */ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0)); @@ -705,16 +709,22 @@ static void vpe_ring_set_wptr(struct amdgpu_ring *ring) upper_32_bits(ring->wptr << 2)); atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + if (vpe->collaborate_mode) + WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2); } else { - dev_dbg(adev->dev, "Not using doorbell, \ - regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ - regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo), - lower_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi), - upper_32_bits(ring->wptr << 2)); + int i; + + for (i = 0; i < vpe->num_instances; i++) { + dev_dbg(adev->dev, "Not using doorbell, \ + regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ + regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo), + lower_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi), + upper_32_bits(ring->wptr << 2)); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index d20060a51e05..769eb8f7bb3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -33,14 +33,38 @@ #include "vpe/vpe_6_1_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin"); +MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin"); #define VPE_THREAD1_UCODE_OFFSET 0x8000 +#define regVPEC_COLLABORATE_CNTL 0x0013 +#define regVPEC_COLLABORATE_CNTL_BASE_IDX 0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN__SHIFT 0x0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN_MASK 0x00000001L + +#define regVPEC_COLLABORATE_CFG 0x0014 +#define regVPEC_COLLABORATE_CFG_BASE_IDX 0 +#define VPEC_COLLABORATE_CFG__MASTER_ID__SHIFT 0x0 +#define VPEC_COLLABORATE_CFG__MASTER_EN__SHIFT 0x3 +#define VPEC_COLLABORATE_CFG__SLAVE0_ID__SHIFT 0x4 +#define VPEC_COLLABORATE_CFG__SLAVE0_EN__SHIFT 0x7 +#define VPEC_COLLABORATE_CFG__MASTER_ID_MASK 0x00000007L +#define VPEC_COLLABORATE_CFG__MASTER_EN_MASK 0x00000008L +#define VPEC_COLLABORATE_CFG__SLAVE0_ID_MASK 0x00000070L +#define VPEC_COLLABORATE_CFG__SLAVE0_EN_MASK 0x00000080L + +#define regVPEC_CNTL_6_1_1 0x0016 +#define regVPEC_CNTL_6_1_1_BASE_IDX 0 +#define regVPEC_QUEUE_RESET_REQ_6_1_1 0x002c +#define regVPEC_QUEUE_RESET_REQ_6_1_1_BASE_IDX 0 +#define regVPEC_PUB_DUMMY2_6_1_1 0x004c +#define regVPEC_PUB_DUMMY2_6_1_1_BASE_IDX 0 + static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset) { uint32_t base; - base = vpe->ring.adev->reg_offset[VPE_HWIP][0][0]; + base = vpe->ring.adev->reg_offset[VPE_HWIP][inst][0]; return base + offset; } @@ -48,12 +72,14 @@ static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, u static void vpe_v6_1_halt(struct amdgpu_vpe *vpe, bool halt) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t f32_cntl; + uint32_t i, f32_cntl; - f32_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl); + for (i = 0; i < vpe->num_instances; i++) { + f32_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL), f32_cntl); + } } static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) @@ -70,20 +96,58 @@ static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) return 0; } +static void vpe_v6_1_set_collaborate_mode(struct amdgpu_vpe *vpe, bool enable) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t vpe_colla_cntl, vpe_colla_cfg, i; + + if (!vpe->collaborate_mode) + return; + + for (i = 0; i < vpe->num_instances; i++) { + vpe_colla_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL)); + vpe_colla_cntl = REG_SET_FIELD(vpe_colla_cntl, VPEC_COLLABORATE_CNTL, + COLLABORATE_MODE_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL), vpe_colla_cntl); + + vpe_colla_cfg = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG)); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_ID, 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_EN, enable ? 1 : 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_ID, 1); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG), vpe_colla_cfg); + } +} + static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; const struct vpe_firmware_header_v1_0 *vpe_hdr; const __le32 *data; uint32_t ucode_offset[2], ucode_size[2]; - uint32_t i, size_dw; + uint32_t i, j, size_dw; uint32_t ret; - // disable UMSCH_INT_ENABLE - ret = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); - ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), ret); + /* disable UMSCH_INT_ENABLE */ + for (j = 0; j < vpe->num_instances; j++) { + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1)); + else + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL)); + + ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1), ret); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); + } + + /* + * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. + * Here use instance 0 as master. + */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { uint32_t f32_offset, f32_cntl; @@ -96,8 +160,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; amdgpu_vpe_psp_update_sram(adev); - - /* Config DPM */ + vpe_v6_1_set_collaborate_mode(vpe, true); amdgpu_vpe_configure_dpm(vpe); return 0; @@ -114,25 +177,26 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) vpe_v6_1_halt(vpe, true); - for (i = 0; i < 2; i++) { - if (i > 0) - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); - else - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), 0); - - data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); - size_dw = ucode_size[i] / sizeof(__le32); - - while (size_dw--) { - if (amdgpu_emu_mode && size_dw % 500 == 0) - msleep(1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + for (j = 0; j < vpe->num_instances; j++) { + for (i = 0; i < 2; i++) { + if (i > 0) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), 0); + + data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); + size_dw = ucode_size[i] / sizeof(__le32); + + while (size_dw--) { + if (amdgpu_emu_mode && size_dw % 500 == 0) + msleep(1); + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + } } - } vpe_v6_1_halt(vpe, false); - /* Config DPM */ + vpe_v6_1_set_collaborate_mode(vpe, true); amdgpu_vpe_configure_dpm(vpe); return 0; @@ -142,68 +206,68 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) { struct amdgpu_ring *ring = &vpe->ring; struct amdgpu_device *adev = ring->adev; - uint32_t rb_bufsz, rb_cntl; - uint32_t ib_cntl; uint32_t doorbell, doorbell_offset; + uint32_t rb_bufsz, rb_cntl; + uint32_t ib_cntl, i; int ret; - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_HI), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), 0); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); - - ring->wptr = 0; - - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); - - doorbell = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL)); - doorbell_offset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET)); - - doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); - doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL), doorbell); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - adev->nbio.funcs->vpe_doorbell_range(adev, 0, ring->use_doorbell, ring->doorbell_index, 2); - - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - ib_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL), ib_cntl); - - ring->sched.ready = true; + for (i = 0; i < vpe->num_instances; i++) { + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_HI), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), 0); + + /* set the wb address whether it's enabled or not */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); + + doorbell_offset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET)); + doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index + i*4); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); + + doorbell = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL)); + doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL), doorbell); + + adev->nbio.funcs->vpe_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index + i*4, 4); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL), ib_cntl); + } ret = amdgpu_ring_test_helper(ring); - if (ret) { - ring->sched.ready = false; + if (ret) return ret; - } return 0; } @@ -211,17 +275,30 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t queue_reset; + uint32_t queue_reset, i; int ret; - queue_reset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ)); - queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ), queue_reset); + for (i = 0; i < vpe->num_instances; i++) { + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1)); + else + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ)); + + queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ_6_1_1, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } else { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } - ret = SOC15_WAIT_ON_RREG(VPE, 0, regVPEC_QUEUE_RESET_REQ, 0, - VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); - if (ret) - dev_err(adev->dev, "VPE queue reset failed\n"); + if (ret) + dev_err(adev->dev, "VPE queue reset failed\n"); + } vpe->ring.sched.ready = false; @@ -236,10 +313,18 @@ static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_vpe *vpe = &adev->vpe; uint32_t vpe_cntl; - vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1)); + else + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1), vpe_cntl); + else + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); return 0; } @@ -264,13 +349,19 @@ static int vpe_v6_1_process_trap_irq(struct amdgpu_device *adev, static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe) { + struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); + vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR; vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI; vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR; vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI; vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT; - vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2_6_1_1; + else + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4; vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3; vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4; -- cgit v1.2.3 From 1b7eec6bf360145bbca959a6c036e885dc5cf8f5 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Mon, 18 Mar 2024 18:31:30 +0800 Subject: Revert "drm/amdgpu/vpe: don't emit cond exec command under collaborate mode" Ready now. Remove this workaround. This reverts commit d40f6213b52c161fd4634933acbc32103a283363. Signed-off-by: Lang Yu Tested-by: Alan Liu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 70c5cc80ecdc..7a65a2b128ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -575,9 +575,6 @@ static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring, { unsigned int ret; - if (ring->adev->vpe.collaborate_mode) - return ~0; - amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); -- cgit v1.2.3 From eed14eb48ee176fe0144c6a999d00c855d0b199b Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Wed, 13 Mar 2024 16:53:49 +0800 Subject: drm/amdgpu/vpe: power on vpe when hw_init To fix mode2 reset failure. Should power on VPE when hw_init. Signed-off-by: Peyton Lee Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 7a65a2b128ec..6695481f870f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -396,6 +396,12 @@ static int vpe_hw_init(void *handle) struct amdgpu_vpe *vpe = &adev->vpe; int ret; + /* Power on VPE */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (ret) + return ret; + ret = vpe_load_microcode(vpe); if (ret) return ret; -- cgit v1.2.3