diff options
| author | Amber Lin <Amber.Lin@amd.com> | 2026-03-13 05:35:55 -0400 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2026-04-17 15:41:14 -0400 |
| commit | 640482525554cc08370f2355be61b9fdf2b066d4 (patch) | |
| tree | 8773e472afb9f7e3511f445375f108fc9a03842f /drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | |
| parent | a132fc9bc2f8b394a2f75947a0e1f5c22482a94c (diff) | |
| download | linux-next-640482525554cc08370f2355be61b9fdf2b066d4.tar.gz linux-next-640482525554cc08370f2355be61b9fdf2b066d4.zip | |
drm/amdgpu: Fixup detect and reset
Identify hung queues by comparing doorbells shown in hqd_info from MES
with doorbells stored in the driver to find matching queues.
Suggested-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 38 |
1 files changed, 25 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 0d75d1aa60ec..932518934f5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -466,23 +466,35 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, &input); - if (r) { - dev_err(adev->dev, "failed to detect and reset\n"); - } else { - *hung_db_num = 0; - for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { - if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { - hung_db_array[i] = db_array[i]; - *hung_db_num += 1; - } + + if (r && detect_only) { + dev_err(adev->dev, "Failed to detect hung queues\n"); + return r; + } + + *hung_db_num = 0; + /* MES passes hung queues' doorbell to driver */ + for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { + /* Finding hung queues where db_array[i] is a valid doorbell */ + if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { + hung_db_array[i] = db_array[i]; + *hung_db_num += 1; } + } - /* - * TODO: return HQD info for MES scheduled user compute queue reset cases - * stored in hung_db_array hqd info offset to full array size - */ + if (r && !hung_db_num) { + dev_err(adev->dev, "Failed to detect and reset hung queues\n"); + return r; } + /* + * TODO: return HQD info for MES scheduled user compute queue reset cases + * stored in hung_db_array hqd info offset to full array size + */ + + if (r) + dev_err(adev->dev, "failed to reset\n"); + return r; } |
