summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm13
2 files changed, 9 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
index 5255378af53c..f67569ccf9f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
@@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
0xd70f6a01, 0x000202ff,
0x00000400, 0x80828102,
0xbf84fff7, 0xbefc03ff,
- 0x00000068, 0xbe803080,
- 0xbe813080, 0xbe823080,
- 0xbe833080, 0x80fc847c,
+ 0x00000068, 0xbe803000,
+ 0xbe813000, 0xbe823000,
+ 0xbe833000, 0x80fc847c,
0xbf84fffa, 0xbeea0480,
0xbeec0480, 0xbeee0480,
0xbef00480, 0xbef20480,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
index 9ba3359253c9..54f7ed9e2801 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
@@ -40,7 +40,6 @@ shader main
type(CS)
wave_size(32)
// Note: original source code from SQ team
-
//
// Create 32 waves in a threadgroup (CS waves)
// Each allocates 64 VGPRs
@@ -71,8 +70,8 @@ label_0005:
s_sub_u32 s2, s2, 8
s_cbranch_scc0 label_0005
//
- s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
- s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
// CLEAR LDS
//
@@ -99,10 +98,10 @@ label_001F:
label_0023:
s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
label_sgpr_loop:
- s_movreld_b32 s0, 0
- s_movreld_b32 s1, 0
- s_movreld_b32 s2, 0
- s_movreld_b32 s3, 0
+ s_movreld_b32 s0, s0
+ s_movreld_b32 s1, s0
+ s_movreld_b32 s2, s0
+ s_movreld_b32 s3, s0
s_sub_u32 m0, m0, 4
s_cbranch_scc0 label_sgpr_loop