summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
diff options
context:
space:
mode:
authorAmber Lin <Amber.Lin@amd.com>2026-03-13 05:35:55 -0400
committerAlex Deucher <alexander.deucher@amd.com>2026-04-17 15:41:14 -0400
commit640482525554cc08370f2355be61b9fdf2b066d4 (patch)
tree8773e472afb9f7e3511f445375f108fc9a03842f /drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
parenta132fc9bc2f8b394a2f75947a0e1f5c22482a94c (diff)
downloadlinux-next-640482525554cc08370f2355be61b9fdf2b066d4.tar.gz
linux-next-640482525554cc08370f2355be61b9fdf2b066d4.zip
drm/amdgpu: Fixup detect and reset
Identify hung queues by comparing doorbells shown in hqd_info from MES with doorbells stored in the driver to find matching queues. Suggested-by: Jonathan Kim <jonathan.kim@amd.com> Signed-off-by: Amber Lin <Amber.Lin@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c38
1 files changed, 25 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 0d75d1aa60ec..932518934f5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -466,23 +466,35 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
&input);
- if (r) {
- dev_err(adev->dev, "failed to detect and reset\n");
- } else {
- *hung_db_num = 0;
- for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
- if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
- hung_db_array[i] = db_array[i];
- *hung_db_num += 1;
- }
+
+ if (r && detect_only) {
+ dev_err(adev->dev, "Failed to detect hung queues\n");
+ return r;
+ }
+
+ *hung_db_num = 0;
+ /* MES passes hung queues' doorbell to driver */
+ for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
+ /* Finding hung queues where db_array[i] is a valid doorbell */
+ if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
+ hung_db_array[i] = db_array[i];
+ *hung_db_num += 1;
}
+ }
- /*
- * TODO: return HQD info for MES scheduled user compute queue reset cases
- * stored in hung_db_array hqd info offset to full array size
- */
+ if (r && !hung_db_num) {
+ dev_err(adev->dev, "Failed to detect and reset hung queues\n");
+ return r;
}
+ /*
+ * TODO: return HQD info for MES scheduled user compute queue reset cases
+ * stored in hung_db_array hqd info offset to full array size
+ */
+
+ if (r)
+ dev_err(adev->dev, "failed to reset\n");
+
return r;
}