diff options
author | Mario Limonciello <mario.limonciello@amd.com> | 2025-05-06 15:49:46 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2025-05-07 17:48:11 -0400 |
commit | d1a46cdd00539a573b7b7b67f7e5668344ac5231 (patch) | |
tree | b0933dd2f737c9f26b3bc54db8444dd8ae4fd9bf | |
parent | 5c937b4a6050316af37ef214825b6340b5e9e391 (diff) |
drm/amd: Add per-ring reset for vcn v4.0.5 use
There is a problem occurring on VCN 4.0.5 where in some situations a job
is timing out. This triggers a job timeout which then causes a GPU
reset for recovery. That has exposed a number of issues with GPU reset
that have since been fixed. But also a GPU reset isn't actually needed
for this circumstance. Just restarting the ring is enough.
Add a reset callback for the ring which will stop and start VCN if the
issue happens.
Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12528
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3909
Link: https://lore.kernel.org/r/20250506204948.12048-2-mario.limonciello@amd.com
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 558469744f3a..ed00d35039c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -219,6 +219,13 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; } + adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -1440,6 +1447,20 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + vcn_v4_0_5_stop(vinst); + vcn_v4_0_5_start(vinst); + + return amdgpu_ring_test_helper(ring); +} + static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1467,6 +1488,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v4_0_5_ring_reset, }; /** |