From 88f8575bca5fc70ba8608cfc49811f9b4d1eb6f9 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Fri, 5 Mar 2021 16:30:54 -0500 Subject: drm/amdgpu: enable watchdog feature for SQ of aldebaran SQ's watchdog timer monitors forward progress, a mask of which waves caused the watchdog timeout is recorded into ras status registers and then trigger a system fatal error event. v2: 1. change *query_timeout_status to *query_sq_timeout_status. 2. move query_sq_timeout_status into amdgpu_ras_do_recovery. 3. add module parameters to enable/disable fatal error event and modify the watchdog timer. v3: 1. remove unused parameters of *enable_watchdog_timer Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5179d5f032ee..e39d81b68169 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -175,6 +175,10 @@ struct amdgpu_mgpu_info mgpu_info = { int amdgpu_ras_enable = -1; uint amdgpu_ras_mask = 0xffffffff; int amdgpu_bad_page_threshold = 100; +struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { + .timeout_fatal_disable = false, + .period = 0x3f, /* about 8s */ +}; /** * DOC: vramlimit (int) @@ -530,6 +534,20 @@ module_param_named(ras_enable, amdgpu_ras_enable, int, 0444); MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1"); module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444); +/** + * DOC: timeout_fatal_disable (bool) + * Disable Watchdog timeout fatal error event + */ +MODULE_PARM_DESC(timeout_fatal_disable, "disable watchdog timeout fatal error (false = default)"); +module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_disable, bool, 0644); + +/** + * DOC: timeout_period (uint) + * Modify the watchdog timeout max_cycles as (1 << period) + */ +MODULE_PARM_DESC(timeout_period, "watchdog timeout period (0x1F = default), timeout maxCycles = (1 << period)"); +module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); + /** * DOC: si_support (int) * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled, -- cgit v1.2.3