From a17ef941212bf26e9985ec31486a9606420d8257 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 15 Jan 2026 22:01:15 -0500 Subject: drm/amdgpu: rework ring reset backup and reemit v9 Store the start wptr and ib size in the IB fence. On queue reset, save the ring contents of all IBs. For reemit, reemit the entire IB state for non-guilty contexts. For guilty contexts, replace the IB submission with nops, but reemit the rest. Split the reemit per fence and when we reemit, update the wptr with the new values from reemit. This allows us to reemit jobs repeatedly as the wptrs get properly updated each time. v2: further simplify the logic v3: reemit vm state, not just vm fence v4: just nop the IB and possibly the VM portion of the submission v5: simplify the vm fence check v6: split the vm and ib fences v7: fix commit message v8: use wptr rather than count_dw to calculate offsets v9: fix missing documenation update spotted by the kernel test robot Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 46 +++++++------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 4638a686a84e..a345c3fb8ff4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; /* Make sure we aren't trying to allocate more space - * than the maximum for one submission + * than the maximum for one submission. Skip for reemit + * since we may be reemitting several submissions. */ - if (WARN_ON_ONCE(ndw > ring->max_dw)) - return -ENOMEM; + if (!ring->reemit) { + if (WARN_ON_ONCE(ndw > ring->max_dw)) + return -ENOMEM; + } ring->count_dw = ndw; ring->wptr_old = ring->wptr; @@ -104,29 +107,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) return 0; } -/** - * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit - * - * @ring: amdgpu_ring structure holding ring information - * @ndw: number of dwords to allocate in the ring buffer - * - * Allocate @ndw dwords in the ring buffer (all asics). - * doesn't check the max_dw limit as we may be reemitting - * several submissions. - */ -static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw) -{ - /* Align requested size with padding so unlock_commit can - * pad safely */ - ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; - - ring->count_dw = ndw; - ring->wptr_old = ring->wptr; - - if (ring->funcs->begin_use) - ring->funcs->begin_use(ring); -} - /** * amdgpu_ring_insert_nop - insert NOP packets * @@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, struct amdgpu_fence *guilty_fence) { - unsigned int i; int r; /* verify that the ring is functional */ @@ -883,16 +862,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, if (r) return r; - /* set an error on all fences from the context */ - if (guilty_fence) - amdgpu_fence_driver_update_timedout_fence_state(guilty_fence); - /* Re-emit the non-guilty commands */ - if (ring->ring_backup_entries_to_copy) { - amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy); - for (i = 0; i < ring->ring_backup_entries_to_copy; i++) - amdgpu_ring_write(ring, ring->ring_backup[i]); - amdgpu_ring_commit(ring); - } + /* set an error on all fences from the context and reemit */ + amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence); + return 0; } -- cgit v1.2.3