From 9884c2b1c38c33a9152f5aff162473f348fe3acd Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 31 Jul 2019 20:23:01 +0800 Subject: drm/amdgpu: add umc v6_1 query error count support Implement umc query_ras_error_count function to support querry both correctable and uncorrectable error Signed-off-by: Hawking Zhang Signed-off-by: Tao Zhou Reviewed-by: Dennis Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 162 ++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c new file mode 100644 index 000000000000..1ca5ae642946 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -0,0 +1,162 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_1.h" +#include "amdgpu_ras.h" +#include "amdgpu.h" + +#include "rsmu/rsmu_0_0_2_offset.h" +#include "rsmu/rsmu_0_0_2_sh_mask.h" +#include "umc/umc_6_1_1_offset.h" +#include "umc/umc_6_1_1_sh_mask.h" + +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, + uint32_t umc_instance) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 1); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_INSTANCE, umc_instance); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_WREN, 1 << umc_instance); + WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + rsmu_umc_index); +} + +static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 0); +} + +static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + *error_count += + REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt); + /* clear the lower chip err count */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, 0); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + *error_count += + REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt); + /* clear the higher chip err count */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, 0); + + /* check for SRAM correctable error + MCUMC_STATUS is a 64 bit register */ + mc_umc_status = + RREG32(mc_umc_status_addr + umc_reg_offset); + mc_umc_status |= + (uint64_t)RREG32(mc_umc_status_addr + umc_reg_offset + 1) << 32; + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; + + /* clear the MCUMC_STATUS */ + WREG32(mc_umc_status_addr + umc_reg_offset, 0); + WREG32(mc_umc_status_addr + umc_reg_offset + 1, 0); +} + +static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* check the MCUMC_STATUS */ + mc_umc_status = RREG32(mc_umc_status_addr + umc_reg_offset); + mc_umc_status |= + (uint64_t)RREG32(mc_umc_status_addr + umc_reg_offset + 1) << 32; + + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; + + /* clear the MCUMC_STATUS */ + WREG32(mc_umc_status_addr + umc_reg_offset, 0); + WREG32(mc_umc_status_addr + umc_reg_offset + 1, 0); +} + +static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t umc_inst, channel_inst, umc_reg_offset; + + for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) { + /* enable the index mode to query eror count per channel */ + umc_v6_1_enable_umc_index_mode(adev, umc_inst); + for (channel_inst = 0; channel_inst < UMC_V6_1_CHANNEL_INSTANCE_NUM; channel_inst++) { + /* calc the register offset according to channel instance */ + umc_reg_offset = UMC_V6_1_PER_CHANNEL_OFFSET * channel_inst; + umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, + &(err_data->ue_count)); + } + } + umc_v6_1_disable_umc_index_mode(adev); +} + +const struct amdgpu_umc_funcs umc_v6_1_funcs = { + .query_ras_error_count = umc_v6_1_query_ras_error_count, +}; -- cgit v1.2.3 From 5bbfb64a177f36d3d208e39c61ce6df3968df4d4 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 23 Jul 2019 11:57:15 +0800 Subject: drm/amdgpu: use 64bit operation macros for umc replace some 32bit macros with 64bit operations to simplify code Signed-off-by: Tao Zhou Reviewed-by: Dennis Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 1ca5ae642946..8fbd81d3ce70 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -94,18 +94,11 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ - mc_umc_status = - RREG32(mc_umc_status_addr + umc_reg_offset); - mc_umc_status |= - (uint64_t)RREG32(mc_umc_status_addr + umc_reg_offset + 1) << 32; + mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) *error_count += 1; - - /* clear the MCUMC_STATUS */ - WREG32(mc_umc_status_addr + umc_reg_offset, 0); - WREG32(mc_umc_status_addr + umc_reg_offset + 1, 0); } static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -119,10 +112,7 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); /* check the MCUMC_STATUS */ - mc_umc_status = RREG32(mc_umc_status_addr + umc_reg_offset); - mc_umc_status |= - (uint64_t)RREG32(mc_umc_status_addr + umc_reg_offset + 1) << 32; - + mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -130,17 +120,16 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) *error_count += 1; - - /* clear the MCUMC_STATUS */ - WREG32(mc_umc_status_addr + umc_reg_offset, 0); - WREG32(mc_umc_status_addr + umc_reg_offset + 1, 0); } static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t umc_inst, channel_inst, umc_reg_offset; + uint32_t umc_inst, channel_inst, umc_reg_offset, mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) { /* enable the index mode to query eror count per channel */ @@ -152,6 +141,8 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, &(err_data->ce_count)); umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, &(err_data->ue_count)); + /* clear umc status */ + WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); } } umc_v6_1_disable_umc_index_mode(adev); -- cgit v1.2.3 From f1ed4afa130291be918e4b65a3759108d8fc868b Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 23 Jul 2019 12:25:16 +0800 Subject: drm/amdgpu: update algorithm of umc uncorrectable error counting remove the check of ErrorCodeExt v2: refine the if condition for ue counting Signed-off-by: Tao Zhou Reviewed-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 8fbd81d3ce70..5b1ccb81b3a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -113,12 +113,12 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev /* check the MCUMC_STATUS */ mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) *error_count += 1; } -- cgit v1.2.3 From c2742aef4d17cca71346dc9327eef5840878a7d7 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 22 Jul 2019 18:30:59 +0800 Subject: drm/amdgpu: add structures for umc error address translation add related registers, callback function and channel index table Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 ++ drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index f5d6def96414..dfa1a39e57af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -24,6 +24,8 @@ struct amdgpu_umc_funcs { void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); + void (*query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); }; struct amdgpu_umc { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 5b1ccb81b3a2..e05f3e68edb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -29,6 +29,16 @@ #include "umc/umc_6_1_1_offset.h" #include "umc/umc_6_1_1_sh_mask.h" +#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 + +static uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { + {2, 18, 11, 27}, {4, 20, 13, 29}, + {1, 17, 8, 24}, {7, 23, 14, 30}, + {10, 26, 3, 19}, {12, 28, 5, 21}, + {9, 25, 0, 16}, {15, 31, 6, 22} +}; + static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, uint32_t umc_instance) { -- cgit v1.2.3 From 8c94810357fad9d583e37785534a8caec558bb24 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 24 Jul 2019 21:43:45 +0800 Subject: drm/amdgpu: query umc ras error address query umc ras error address, translate it to gpu 4k page view and save it. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Reviewed-by: Dennis Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 80 +++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index e05f3e68edb0..bff1a12f2cc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -31,6 +31,16 @@ #define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 +/* + * (addr / 256) * 8192, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +/* channel index is the index of 256B block */ +#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) +/* offset in 256B block */ +#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) + static uint32_t umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { {2, 18, 11, 27}, {4, 20, 13, 29}, @@ -158,6 +168,76 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, umc_v6_1_disable_umc_index_mode(adev); } +static void umc_v6_1_query_error_address(struct amdgpu_device *adev, + uint32_t umc_reg_offset, uint32_t channel_index, + struct ras_err_data *err_data) +{ + uint32_t lsb; + uint64_t mc_umc_status, err_addr; + uint32_t mc_umc_status_addr; + + /* skip error address process if -ENOMEM */ + if (!err_data->err_addr) + return; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + + /* the lowest lsb bits should be ignored */ + lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + err_addr &= ~((0x1ULL << lsb) - 1); + + /* translate umc channel address to soc pa, 3 parts are included */ + err_data->err_addr[err_data->err_addr_cnt] = + ADDR_OF_8KB_BLOCK(err_addr) + | ADDR_OF_256B_BLOCK(channel_index) + | OFFSET_IN_256B_BLOCK(err_addr); + + err_data->err_addr_cnt++; + } +} + +static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t umc_inst, channel_inst, umc_reg_offset; + uint32_t channel_index, mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) { + /* enable the index mode to query eror count per channel */ + umc_v6_1_enable_umc_index_mode(adev, umc_inst); + for (channel_inst = 0; channel_inst < UMC_V6_1_CHANNEL_INSTANCE_NUM; channel_inst++) { + /* calc the register offset according to channel instance */ + umc_reg_offset = UMC_V6_1_PER_CHANNEL_OFFSET * channel_inst; + /* get channel index of interleaved memory */ + channel_index = umc_v6_1_channel_idx_tbl[umc_inst][channel_inst]; + + umc_v6_1_query_error_address(adev, umc_reg_offset, + channel_index, err_data); + + /* clear umc status */ + WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + /* clear error address register */ + WREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4, 0x0ULL); + } + } + + umc_v6_1_disable_umc_index_mode(adev); +} + const struct amdgpu_umc_funcs umc_v6_1_funcs = { .query_ras_error_count = umc_v6_1_query_ras_error_count, + .query_ras_error_address = umc_v6_1_query_ras_error_address, }; -- cgit v1.2.3 From a55c8d7bda4f83e86e2b7ed7b1704e762ed50db3 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 29 Jul 2019 10:28:57 +0800 Subject: drm/amdgpu: remove the clear of MCA_ADDR clearing MCA_STATUS is enough to reset the whole MCA, writing zero to MCA_ADDR is unnecessary Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index bff1a12f2cc9..035e4fea472c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -229,8 +229,6 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, /* clear umc status */ WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); - /* clear error address register */ - WREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4, 0x0ULL); } } -- cgit v1.2.3 From 3aacf4ea1102f24c8dc63eb6f3d734cbc8bad86e Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 29 Jul 2019 14:28:35 +0800 Subject: drm/amdgpu: initialize new parameters and functions for amdgpu_umc structure add initialization for new members of amdgpu_umc structure Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 10 +++++++++- drivers/gpu/drm/amd/amdgpu/umc_v6_1.h | 3 +++ 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7c4d9d99c6d1..24387026fdee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -635,8 +635,11 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA20: - adev->umc.max_ras_err_cnt_per_query = - UMC_V6_1_UMC_INSTANCE_NUM * UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; adev->umc.funcs = &umc_v6_1_funcs; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 035e4fea472c..9ba015d7eb57 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -41,7 +41,7 @@ /* offset in 256B block */ #define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) -static uint32_t +const uint32_t umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { {2, 18, 11, 27}, {4, 20, 13, 29}, {1, 17, 8, 24}, {7, 23, 14, 30}, @@ -235,7 +235,15 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, umc_v6_1_disable_umc_index_mode(adev); } +static void umc_v6_1_ras_init(struct amdgpu_device *adev) +{ + +} + const struct amdgpu_umc_funcs umc_v6_1_funcs = { + .ras_init = umc_v6_1_ras_init, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, + .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, + .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h index bddaf14a77f9..ad4598c0e495 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -24,6 +24,7 @@ #define __UMC_V6_1_H__ #include "soc15_common.h" +#include "amdgpu.h" /* HBM Memory Channel Width */ #define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128 @@ -37,5 +38,7 @@ #define UMC_V6_1_PER_CHANNEL_OFFSET 0x800 extern const struct amdgpu_umc_funcs umc_v6_1_funcs; +extern const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; #endif -- cgit v1.2.3 From 2b671b6049efafc7ae6de9f67acb964b9c534f3a Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 1 Aug 2019 11:37:25 +0800 Subject: drm/amdgpu: apply umc_for_each_channel macro to umc_6_1 use umc_for_each_channel to make code simpler Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 84 ++++++++++++----------------------- 1 file changed, 28 insertions(+), 56 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 9ba015d7eb57..5747a0252624 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -142,46 +142,39 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev *error_count += 1; } +static void umc_v6_1_query_error_count(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint32_t umc_reg_offset, + uint32_t channel_index) +{ + umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, + &(err_data->ue_count)); +} + static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t umc_inst, channel_inst, umc_reg_offset, mc_umc_status_addr; - - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); - - for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) { - /* enable the index mode to query eror count per channel */ - umc_v6_1_enable_umc_index_mode(adev, umc_inst); - for (channel_inst = 0; channel_inst < UMC_V6_1_CHANNEL_INSTANCE_NUM; channel_inst++) { - /* calc the register offset according to channel instance */ - umc_reg_offset = UMC_V6_1_PER_CHANNEL_OFFSET * channel_inst; - umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, - &(err_data->ce_count)); - umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, - &(err_data->ue_count)); - /* clear umc status */ - WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); - } - } - umc_v6_1_disable_umc_index_mode(adev); + amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, - uint32_t umc_reg_offset, uint32_t channel_index, - struct ras_err_data *err_data) + struct ras_err_data *err_data, + uint32_t umc_reg_offset, uint32_t channel_index) { - uint32_t lsb; + uint32_t lsb, mc_umc_status_addr; uint64_t mc_umc_status, err_addr; - uint32_t mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); /* skip error address process if -ENOMEM */ - if (!err_data->err_addr) + if (!err_data->err_addr) { + /* clear umc status */ + WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); return; + } - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); /* calculate error address if ue/ce error is detected */ @@ -197,42 +190,21 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, /* translate umc channel address to soc pa, 3 parts are included */ err_data->err_addr[err_data->err_addr_cnt] = - ADDR_OF_8KB_BLOCK(err_addr) - | ADDR_OF_256B_BLOCK(channel_index) - | OFFSET_IN_256B_BLOCK(err_addr); + ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); err_data->err_addr_cnt++; } + + /* clear umc status */ + WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); } static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t umc_inst, channel_inst, umc_reg_offset; - uint32_t channel_index, mc_umc_status_addr; - - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); - - for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) { - /* enable the index mode to query eror count per channel */ - umc_v6_1_enable_umc_index_mode(adev, umc_inst); - for (channel_inst = 0; channel_inst < UMC_V6_1_CHANNEL_INSTANCE_NUM; channel_inst++) { - /* calc the register offset according to channel instance */ - umc_reg_offset = UMC_V6_1_PER_CHANNEL_OFFSET * channel_inst; - /* get channel index of interleaved memory */ - channel_index = umc_v6_1_channel_idx_tbl[umc_inst][channel_inst]; - - umc_v6_1_query_error_address(adev, umc_reg_offset, - channel_index, err_data); - - /* clear umc status */ - WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); - } - } - - umc_v6_1_disable_umc_index_mode(adev); + amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); } static void umc_v6_1_ras_init(struct amdgpu_device *adev) -- cgit v1.2.3 From b7f92097f5bc5129cb386340ec54e6f40639d6e3 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 29 Jul 2019 17:01:39 +0800 Subject: drm/amdgpu: implement umc ras init function enable umc ce interrupt and initialize ecc error count Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 32 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/umc_v6_1.h | 7 +++++++ 2 files changed, 39 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 5747a0252624..0ab2e96b4f77 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -207,9 +207,41 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); } +static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, uint32_t channel_index) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrInt, 0x1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); +} + static void umc_v6_1_ras_init(struct amdgpu_device *adev) { + void *ras_error_status = NULL; + amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel); } const struct amdgpu_umc_funcs umc_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h index ad4598c0e495..dab9cbd292c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -37,6 +37,13 @@ /* UMC regiser per channel offset */ #define UMC_V6_1_PER_CHANNEL_OFFSET 0x800 +/* EccErrCnt max value */ +#define UMC_V6_1_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V6_1_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) + extern const struct amdgpu_umc_funcs umc_v6_1_funcs; extern const uint32_t umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; -- cgit v1.2.3 From b1a5895352dc1a154f1605702745ef2f63a5d797 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 29 Jul 2019 17:19:57 +0800 Subject: drm/amdgpu: update the calc algorithm of umc ecc error count the initial value of ecc error count can be adjusted Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 0ab2e96b4f77..64df37b860dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -98,9 +98,10 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); *error_count += - REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt); + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); /* clear the lower chip err count */ - WREG32(ecc_err_cnt_addr + umc_reg_offset, 0); + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); /* select the higher chip and check the err counter */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, @@ -108,9 +109,10 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); *error_count += - REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt); + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); /* clear the higher chip err count */ - WREG32(ecc_err_cnt_addr + umc_reg_offset, 0); + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ -- cgit v1.2.3 From dd21a572c9068e9a59b46dea67e8a65a44aee90b Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Fri, 9 Aug 2019 15:57:50 +0800 Subject: drm/amdgpu: implement UMC 64 bits REG operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit implement 64 bits operations via 32 bits interface v2: make use of lower_32_bits() and upper_32_bits() macros Reviewed-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 9 +++++++++ drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 10 +++++----- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 9efdd66279e5..975afa04df09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -21,6 +21,15 @@ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__ +/* implement 64 bits REG operations via 32 bits interface */ +#define RREG64_UMC(reg) (RREG32(reg) | \ + ((uint64_t)RREG32((reg) + 1) << 32)) +#define WREG64_UMC(reg, v) \ + do { \ + WREG32((reg), lower_32_bits(v)); \ + WREG32((reg) + 1, upper_32_bits(v)); \ + } while (0) + /* * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data, * uint32_t umc_reg_offset, uint32_t channel_index) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 64df37b860dd..8502e736f721 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -116,7 +116,7 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ - mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) @@ -134,7 +134,7 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); /* check the MCUMC_STATUS */ - mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -173,11 +173,11 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, /* skip error address process if -ENOMEM */ if (!err_data->err_addr) { /* clear umc status */ - WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); return; } - mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -200,7 +200,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, } /* clear umc status */ - WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); } static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, -- cgit v1.2.3