summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
diff options
context:
space:
mode:
authorAmber Lin <Amber.Lin@amd.com>2026-03-13 05:27:22 -0400
committerAlex Deucher <alexander.deucher@amd.com>2026-04-17 15:41:14 -0400
commita132fc9bc2f8b394a2f75947a0e1f5c22482a94c (patch)
treed4bbbdf1ab2afd2767929768ab5d168da3326610 /drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
parent9315a1e2bdf1ba4aace856cabcb5f9f3a5c09202 (diff)
downloadlinux-next-a132fc9bc2f8b394a2f75947a0e1f5c22482a94c.tar.gz
linux-next-a132fc9bc2f8b394a2f75947a0e1f5c22482a94c.zip
drm/amdgpu: Fixup boost mes detect hang array size
When allocate the hung queues memory, we need to take the number of queues into account for the worst hang case. Suggested-by: Jonathan Kim <jonathan.kim@amd.com> Signed-off-by: Amber Lin <Amber.Lin@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c34
1 files changed, 27 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 0d4c77c1b4b5..0d75d1aa60ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -103,7 +103,7 @@ static inline u32 amdgpu_mes_get_hqd_mask(u32 num_pipe,
int amdgpu_mes_init(struct amdgpu_device *adev)
{
- int i, r, num_pipes;
+ int i, r, num_pipes, num_queues = 0;
u32 total_vmid_mask, reserved_vmid_mask;
int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
u32 gfx_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.me.num_pipe_per_me,
@@ -159,7 +159,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.compute_hqd_mask[i] = compute_hqd_mask;
}
- num_pipes = adev->sdma.num_instances;
+ num_pipes = adev->sdma.num_inst_per_xcc ?
+ adev->sdma.num_inst_per_xcc : adev->sdma.num_instances;
if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES)
dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n",
num_pipes, AMDGPU_MES_MAX_SDMA_PIPES);
@@ -216,8 +217,27 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
if (r)
goto error_doorbell;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0)) {
+ /* When queue/pipe reset is done in MES instead of in the
+ * driver, MES passes hung queues information to the driver in
+ * hung_queue_hqd_info. Calculate required space to store this
+ * information.
+ */
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
+ num_queues += hweight32(adev->mes.gfx_hqd_mask[i]);
+
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++)
+ num_queues += hweight32(adev->mes.compute_hqd_mask[i]);
+
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
+ num_queues += hweight32(adev->mes.sdma_hqd_mask[i]) * num_xcc;
+
+ adev->mes.hung_queue_hqd_info_offset = num_queues;
+ adev->mes.hung_queue_db_array_size = num_queues * 2;
+ }
+
if (adev->mes.hung_queue_db_array_size) {
- for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
r = amdgpu_bo_create_kernel(adev,
adev->mes.hung_queue_db_array_size * sizeof(u32),
PAGE_SIZE,
@@ -264,10 +284,10 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
&adev->mes.event_log_cpu_addr);
for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
- amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i],
- &adev->mes.hung_queue_db_array_gpu_addr[i],
- &adev->mes.hung_queue_db_array_cpu_addr[i]);
-
+ if (adev->mes.hung_queue_db_array_gpu_obj[i])
+ amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i],
+ &adev->mes.hung_queue_db_array_gpu_addr[i],
+ &adev->mes.hung_queue_db_array_cpu_addr[i]);
if (adev->mes.sch_ctx_ptr[i])
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
if (adev->mes.query_status_fence_ptr[i])