diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 60 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 25 |
2 files changed, 49 insertions, 36 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 5931a17b4da3..388b44ed5928 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -4124,7 +4124,7 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { }; static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { - 0xbf820001, 0xbf8202ea, + 0xbf820001, 0xbf8202d8, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, @@ -4321,9 +4321,9 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, 0xb8fb5306, 0x867bc17b, - 0xbf840064, 0xbf8a0000, + 0xbf840052, 0xbf8a0000, 0x867aff6f, 0x04000000, - 0xbf840060, 0x8e7b867b, + 0xbf84004e, 0x8e7b867b, 0x8e7b827b, 0xbef6007b, 0xb8f02985, 0x80708170, 0x8e708a70, 0x8e708170, @@ -4336,8 +4336,8 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850030, - 0x24040682, 0xd86e4000, + 0x10000000, 0xbf85001d, + 0x24040682, 0xd86c0000, 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, @@ -4348,29 +4348,20 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0x680404ff, 0x00000200, + 0xbf84ffee, 0x680404ff, + 0x00000100, 0xd0c9006a, + 0x0000f702, 0xbf87ffe5, + 0xbf820016, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, 0xd0c9006a, 0x0000f702, - 0xbf87ffd2, 0xbf820015, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x701d0002, - 0x68040702, 0xd0c9006a, - 0x0000f702, 0xbf87fff7, + 0xbefe016a, 0xbf87fff6, 0xbef70000, 0xbef000ff, 0x00000400, 0xbefe00c1, 0xbeff00c1, 0xb8fb2b05, @@ -4497,15 +4488,15 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, 0xbf85ffeb, - 0xbf9c0000, 0xbf8200ee, + 0xbf9c0000, 0xbf8200f4, 0xbef4007e, 0x8675ff7f, 0x0000ffff, 0x8775ff75, 0x00040000, 0xbef60080, 0xbef700ff, 0x00807fac, 0x866eff7f, 0x04000000, - 0xbf84001f, 0xbefe00c1, + 0xbf840025, 0xbefe00c1, 0xbeff00c1, 0xb8ef5306, - 0x866fc16f, 0xbf84001a, + 0x866fc16f, 0xbf840020, 0x8e6f866f, 0x8e6f826f, 0xbef6006f, 0xb8f82985, 0x80788178, 0x8e788a78, @@ -4516,9 +4507,12 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { 0x01000000, 0xbefc0080, 0xe0510000, 0x781d0000, 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, + 0xe0510200, 0x781d0000, + 0xe0510300, 0x781d0000, + 0xe0510400, 0x781d0000, + 0x807cff7c, 0x00000500, + 0x8078ff78, 0x00000500, + 0xbf0a6f7c, 0xbf85fff0, 0xbefe00c1, 0xbeff00c1, 0xbef600ff, 0x01000000, 0xb8ef2b05, 0x806f816f, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 0f4877a60140..0eabb7a8cab9 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -75,8 +75,10 @@ var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 #if ASIC_FAMILY >= CHIP_GC_9_5_0 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 11 +var LDS_RESTORE_GRANULARITY_BYTES = 1280 #else var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var LDS_RESTORE_GRANULARITY_BYTES = 512 #endif var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits @@ -572,12 +574,21 @@ if SAVE_AFTER_XNACK_ERROR v_lshlrev_b32 v2, 2, v3 L_SAVE_LDS_LOOP_SQC: +#if ASIC_FAMILY < CHIP_GC_9_5_0 ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40 s_waitcnt lgkmcnt(0) - write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset) v_add_u32 v2, 0x200, v2 +#else + // gfx950 needs to save in multiple of 256 bytes. + ds_read_b32 v0, v2 + s_waitcnt lgkmcnt(0) + write_vgprs_to_mem_with_sqc(v0, 1, s_save_buf_rsrc0, s_save_mem_offset) + + v_add_u32 v2, 0x100, v2 +#endif + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC @@ -601,6 +612,9 @@ L_SAVE_LDS_LOOP_VECTOR: // v_add_u32 v2, vcc[0:1], v2, v3 v_add_u32 v2, v2, v3 v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size +#if ASIC_FAMILY >= CHIP_GC_9_5_0 + s_mov_b64 exec, vcc +#endif s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR // restore rsrc3 @@ -763,8 +777,13 @@ L_RESTORE: L_RESTORE_LDS_LOOP: buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW - s_add_u32 m0, m0, 256*2 // 128 DW - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW +#if ASIC_FAMILY >= CHIP_GC_9_5_0 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:512 // third 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:768 // forth 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:1024 // fifth 64DW +#endif + s_add_u32 m0, m0, LDS_RESTORE_GRANULARITY_BYTES // 128/320 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, LDS_RESTORE_GRANULARITY_BYTES //mem offset increased by 128/320 DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? |