summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
diff options
context:
space:
mode:
authorLijo Lazar <lijo.lazar@amd.com>2024-11-05 10:30:20 +0530
committerAlex Deucher <alexander.deucher@amd.com>2024-11-08 11:10:00 -0500
commitfa31798582882740f2b13d19e1bd43b4ef918e2f (patch)
treeb10dcd62d5c27ff9b6d8d92298cd6debee5cc1f6 /drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
parent2bb7dced1c2f8c0e705cc74840f776406db492c3 (diff)
downloadlwn-fa31798582882740f2b13d19e1bd43b4ef918e2f.tar.gz
lwn-fa31798582882740f2b13d19e1bd43b4ef918e2f.zip
drm/amdgpu: Fix map/unmap queue logic
In current logic, it calls ring_alloc followed by a ring_test. ring_test in turn will call another ring_alloc. This is illegal usage as a ring_alloc is expected to be closed properly with a ring_commit. Change to commit the map/unmap queue packet first followed by a ring_test. Add a comment about the usage of ring_test. Also, reorder the current pre-condition checks of job hang or kiq ring scheduler not ready. Without them being met, it is not useful to attempt ring or memory allocations. Fixes tag refers to the original patch which introduced this issue which then got carried over into newer code. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Le Ma <le.ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Fixes: 6c10b5cc4eaa ("drm/amdgpu: Remove duplicate code in gfx_v8_0.c")
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c63
1 files changed, 45 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 6cc6484bde06..5136cb3bdd4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -515,6 +515,17 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ if (!kiq_ring->sched.ready || adev->job_hang)
+ return 0;
+ /**
+ * This is workaround: only skip kiq_ring test
+ * during ras recovery in suspend stage for gfx9.4.3
+ */
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
+ amdgpu_ras_in_recovery(adev))
+ return 0;
+
spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_compute_rings)) {
@@ -528,20 +539,15 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.compute_ring[j],
RESET_QUEUES, 0, 0);
}
-
- /**
- * This is workaround: only skip kiq_ring test
- * during ras recovery in suspend stage for gfx9.4.3
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
*/
- if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
- amdgpu_ras_in_recovery(adev)) {
- spin_unlock(&kiq->ring_lock);
- return 0;
- }
+ r = amdgpu_ring_test_helper(kiq_ring);
- if (kiq_ring->sched.ready && !adev->job_hang)
- r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
return r;
@@ -569,8 +575,11 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
- spin_lock(&kiq->ring_lock);
+ if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
+ return 0;
+
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_gfx_rings)) {
spin_unlock(&kiq->ring_lock);
@@ -583,11 +592,17 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.gfx_ring[j],
PREEMPT_QUEUES, 0, 0);
}
- }
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
- if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+ /*
+ * Ring test will do a basic scratch register change check.
+ * Just run this to ensure that unmap queues that is submitted
+ * before got processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
- spin_unlock(&kiq->ring_lock);
+ spin_unlock(&kiq->ring_lock);
+ }
return r;
}
@@ -692,7 +707,13 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
kiq->pmf->kiq_map_queues(kiq_ring,
&adev->gfx.compute_ring[j]);
}
-
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r)
@@ -743,7 +764,13 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.gfx_ring[j]);
}
}
-
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r)