diff options
author | Ingo Molnar <mingo@kernel.org> | 2020-11-07 12:50:48 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2020-11-07 13:20:17 +0100 |
commit | 666fab4a3ea143315a9c059fad9f3a0f1365d54b (patch) | |
tree | e9e4be3b0eeac79346d52a86183326617d0c9999 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | |
parent | 0a986ea81e1aa8ac17e82cda53cc95158217956e (diff) | |
parent | 659caaf65dc9c7150aa3e80225ec6e66b25ab3ce (diff) |
Merge branch 'linus' into perf/kprobes
Conflicts:
include/asm-generic/atomic-instrumented.h
kernel/kprobes.c
Use the upstream atomic-instrumented.h checksum, and pick
the kprobes version of kernel/kprobes.c, which effectively
reverts this upstream workaround:
645f224e7ba2: ("kprobes: Tell lockdep about kprobe nesting")
Since the new code *should* be fine without nesting.
Knock on wood ...
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b2667342cf67..6b8d7bb83bb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -31,6 +31,10 @@ #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1) +#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV (0x1 << 2) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -336,6 +340,12 @@ struct amdgpu_ras { struct amdgpu_ras_eeprom_control eeprom_control; bool error_query_ready; + + /* bad page count threshold */ + uint32_t bad_page_cnt_threshold; + + /* disable ras error count harvest in recovery */ + bool disable_ras_err_cnt_harvest; }; struct ras_fs_data { @@ -490,6 +500,8 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev); unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); +bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev); + /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, struct eeprom_table_record *bps, int pages); @@ -500,10 +512,14 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - /* save bad page to eeprom before gpu reset, - * i2c may be unstable in gpu reset + /* + * Save bad page to eeprom before gpu reset, i2c may be unstable + * in gpu reset. + * + * Also, exclude the case when ras recovery issuer is + * eeprom page write itself. */ - if (in_task()) + if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task()) amdgpu_ras_reserve_bad_pages(adev); if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) |