drm/amdgpu: bypass querying ras error count registers

Once ras recovery is issued by ras sync flood interrupt or ras controller interrupt, add this guard to bypass or execute ras error count register harvest of all IPs. Signed-off-by: Guchun Chen <guchun.chen@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Dennis Li <Dennis.Li@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Guchun Chen <guchun.chen@amd.com> 2020-08-04 15:00:53 +0800
committer: Alex Deucher <alexander.deucher@amd.com> 2020-08-14 16:12:22 -0400
commit: f75e94d86829e92a758a26fc5bbdb4c9eba86260 (patch)
tree: aef308d7dcb3e9d3ac29814a597f916f7257a9a8 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent: 0cf0ee983b14af5e2160fa677b2eeb4c5dbb03ad (diff)
download: lwn-f75e94d86829e92a758a26fc5bbdb4c9eba86260.tar.gz
lwn-f75e94d86829e92a758a26fc5bbdb4c9eba86260.zip
1 files changed, 12 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1a55f6f492fd..fbe464c68e5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1547,17 +1547,19 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
 	struct list_head device_list, *device_list_handle =  NULL;
 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, false);
 
-	/* Build list of devices to query RAS related errors */
-	if  (hive && adev->gmc.xgmi.num_physical_nodes > 1)
-		device_list_handle = &hive->device_list;
-	else {
-		INIT_LIST_HEAD(&device_list);
-		list_add_tail(&adev->gmc.xgmi.head, &device_list);
-		device_list_handle = &device_list;
-	}
+	if (!ras->disable_ras_err_cnt_harvest) {
+		/* Build list of devices to query RAS related errors */
+		if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
+			device_list_handle = &hive->device_list;
+		} else {
+			INIT_LIST_HEAD(&device_list);
+			list_add_tail(&adev->gmc.xgmi.head, &device_list);
+			device_list_handle = &device_list;
+		}
 
-	list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) {
-		amdgpu_ras_log_on_err_counter(remote_adev);
+		list_for_each_entry(remote_adev,
+				device_list_handle, gmc.xgmi.head)
+			amdgpu_ras_log_on_err_counter(remote_adev);
 	}
 
 	if (amdgpu_device_should_recover_gpu(ras->adev))
author	Guchun Chen <guchun.chen@amd.com>	2020-08-04 15:00:53 +0800
committer	Alex Deucher <alexander.deucher@amd.com>	2020-08-14 16:12:22 -0400
commit	f75e94d86829e92a758a26fc5bbdb4c9eba86260 (patch)
tree	aef308d7dcb3e9d3ac29814a597f916f7257a9a8 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent	0cf0ee983b14af5e2160fa677b2eeb4c5dbb03ad (diff)
download	lwn-f75e94d86829e92a758a26fc5bbdb4c9eba86260.tar.gz lwn-f75e94d86829e92a758a26fc5bbdb4c9eba86260.zip