diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm | 13 |
2 files changed, 9 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h index 5255378af53c..f67569ccf9f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = { 0xd70f6a01, 0x000202ff, 0x00000400, 0x80828102, 0xbf84fff7, 0xbefc03ff, - 0x00000068, 0xbe803080, - 0xbe813080, 0xbe823080, - 0xbe833080, 0x80fc847c, + 0x00000068, 0xbe803000, + 0xbe813000, 0xbe823000, + 0xbe833000, 0x80fc847c, 0xbf84fffa, 0xbeea0480, 0xbeec0480, 0xbeee0480, 0xbef00480, 0xbef20480, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm index 9ba3359253c9..54f7ed9e2801 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm @@ -40,7 +40,6 @@ shader main type(CS) wave_size(32) // Note: original source code from SQ team - // // Create 32 waves in a threadgroup (CS waves) // Each allocates 64 VGPRs @@ -71,8 +70,8 @@ label_0005: s_sub_u32 s2, s2, 8 s_cbranch_scc0 label_0005 // - s_mov_b32 s2, 0x80000000 // Bit31 is first_wave - s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup // CLEAR LDS // @@ -99,10 +98,10 @@ label_001F: label_0023: s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) label_sgpr_loop: - s_movreld_b32 s0, 0 - s_movreld_b32 s1, 0 - s_movreld_b32 s2, 0 - s_movreld_b32 s3, 0 + s_movreld_b32 s0, s0 + s_movreld_b32 s1, s0 + s_movreld_b32 s2, s0 + s_movreld_b32 s3, s0 s_sub_u32 m0, m0, 4 s_cbranch_scc0 label_sgpr_loop |