summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2026-07-03 15:44:49 +0100
committerMark Brown <broonie@kernel.org>2026-07-03 15:44:49 +0100
commit4dac529c9a2c0ed1286801ec62d23538035a3a1d (patch)
treec99335269438629bc089c9895e6156cf9a374a22 /drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
parent5a42c9f18870162ae14f0d8e193671017421a982 (diff)
parent50be7c9b5d5ea55fd40bb411cf324cec99ec7417 (diff)
downloadlinux-next-4dac529c9a2c0ed1286801ec62d23538035a3a1d.tar.gz
linux-next-4dac529c9a2c0ed1286801ec62d23538035a3a1d.zip
Merge branch 'drm-next' of https://gitlab.freedesktop.org/agd5f/linux.git
# Conflicts: # drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c # drivers/gpu/drm/amd/amdgpu/amdgpu_device.c # drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c # drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/mes_userqueue.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c147
1 files changed, 90 insertions, 57 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 16625c31bfd3..e947c16e694d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -133,8 +133,8 @@ static int mes_userq_map(struct amdgpu_usermode_queue *queue)
queue_input.gang_quantum = 10000;
queue_input.paging = false;
- queue_input.process_context_addr = ctx->gpu_addr;
- queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.process_context_addr = uq_mgr->proc_ctx_obj.gpu_addr;
+ queue_input.gang_context_addr = ctx->gpu_addr;
queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority);
@@ -169,7 +169,8 @@ static int mes_userq_unmap(struct amdgpu_usermode_queue *queue)
memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
queue_input.doorbell_offset = queue->doorbell_index;
- queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.gang_context_addr = ctx->gpu_addr;
+ queue_input.queue_type = queue->queue_type;
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
@@ -179,6 +180,63 @@ static int mes_userq_unmap(struct amdgpu_usermode_queue *queue)
return r;
}
+int mes_userq_reset(struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_reset_queue_input queue_input;
+ int r;
+
+ /* XXX: add a FW version check for SDMA per queue reset */
+ memset(&queue_input, 0x0, sizeof(struct mes_reset_queue_input));
+ queue_input.doorbell_offset = queue->doorbell_index;
+ queue_input.queue_type = queue->queue_type;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ return r;
+ return mes_userq_unmap(queue);
+}
+
+int mes_userq_reset_queue(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *guilty_uq,
+ int queue_type,
+ unsigned int pipe,
+ unsigned int queue,
+ unsigned int db)
+{
+ struct amdgpu_usermode_queue *uq;
+ bool use_mmio = adev->gfx.mec.use_mmio_for_reset;
+ unsigned long uq_id;
+ int r;
+
+ xa_for_each(&adev->userq_doorbell_xa, uq_id, uq) {
+ if (uq->queue_type == queue_type) {
+ if (uq == guilty_uq)
+ continue;
+ if (uq->doorbell_index == db) {
+ uq->state = AMDGPU_USERQ_STATE_HUNG;
+ if (use_mmio)
+ r = amdgpu_mes_reset_queue_mmio(adev, queue_type, 0, 1, pipe, queue, 0);
+ else
+ r = amdgpu_mes_reset_user_queue(adev, queue_type, db, 0);
+ if (r)
+ return r;
+ r = mes_userq_unmap(uq);
+ if (r)
+ return r;
+ atomic_inc(&adev->gpu_reset_counter);
+ amdgpu_userq_fence_driver_force_completion(uq);
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue,
struct drm_amdgpu_userq_in *mqd_user)
@@ -186,12 +244,8 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
int r, size;
- /*
- * The FW expects at least one page space allocated for
- * process ctx and gang ctx each. Create an object
- * for the same.
- */
- size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
+ /* The FW expects at least one page space allocated for gang ctx. */
+ size = AMDGPU_USERQ_GANG_CTX_SZ;
r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0,
AMDGPU_GEM_DOMAIN_GTT,
&ctx->obj, &ctx->gpu_addr,
@@ -205,54 +259,26 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
return 0;
}
-static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
- int queue_type)
+static int mes_userq_create_proc_ctx_space(struct amdgpu_userq_mgr *uq_mgr)
{
- int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
- struct mes_detect_and_reset_queue_input input;
- struct amdgpu_usermode_queue *queue;
- unsigned int hung_db_num = 0;
- unsigned long queue_id;
- u32 db_array[8];
- bool found_hung_queue = false;
- int r, i;
-
- if (db_array_size > 8) {
- dev_err(adev->dev, "DB array size (%d vs 8) too small\n",
- db_array_size);
- return -EINVAL;
- }
-
- memset(&input, 0x0, sizeof(struct mes_detect_and_reset_queue_input));
+ int r = 0;
- input.queue_type = queue_type;
-
- amdgpu_mes_lock(&adev->mes);
- r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false,
- &hung_db_num, db_array, 0);
- amdgpu_mes_unlock(&adev->mes);
- if (r) {
- dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r);
- } else if (hung_db_num) {
- xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
- if (queue->queue_type == queue_type) {
- for (i = 0; i < hung_db_num; i++) {
- if (queue->doorbell_index == db_array[i]) {
- queue->state = AMDGPU_USERQ_STATE_HUNG;
- found_hung_queue = true;
- atomic_inc(&adev->gpu_reset_counter);
- amdgpu_userq_fence_driver_force_completion(queue);
- drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
- }
- }
- }
- }
+ mutex_lock(&uq_mgr->proc_ctx_lock);
+ /* This check is a necessary because amdgpu_bo_create_kernel()
+ * calls helpers like amdgpu_bo_pin() and memset() unconditionally
+ */
+ if (!uq_mgr->proc_ctx_obj.obj) {
+ r = amdgpu_bo_create_kernel(uq_mgr->adev, AMDGPU_USERQ_PROC_CTX_SZ,
+ 0, AMDGPU_GEM_DOMAIN_GTT,
+ &uq_mgr->proc_ctx_obj.obj,
+ &uq_mgr->proc_ctx_obj.gpu_addr,
+ &uq_mgr->proc_ctx_obj.cpu_ptr);
+
+ if (!r)
+ memset(uq_mgr->proc_ctx_obj.cpu_ptr, 0, AMDGPU_USERQ_PROC_CTX_SZ);
}
- if (found_hung_queue) {
- /* Resume scheduling after hang recovery */
- r = amdgpu_mes_resume(adev, input.xcc_id);
- }
+ mutex_unlock(&uq_mgr->proc_ctx_lock);
return r;
}
@@ -429,7 +455,14 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
goto free_mqd;
}
- /* Create BO for FW operations */
+ /* Create per-process MES process context BO */
+ r = mes_userq_create_proc_ctx_space(uq_mgr);
+ if (r) {
+ DRM_ERROR("Failed to allocate MES process context space bo, error: %d\n", r);
+ goto free_mqd;
+ }
+
+ /* Create BO of a gang for FW operations */
r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
if (r) {
DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
@@ -497,7 +530,7 @@ static int mes_userq_preempt(struct amdgpu_usermode_queue *queue)
*fence_ptr = 0;
memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
- queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.gang_context_addr = ctx->gpu_addr;
queue_input.suspend_fence_addr = fence_gpu_addr;
queue_input.suspend_fence_value = 1;
amdgpu_mes_lock(&adev->mes);
@@ -534,7 +567,7 @@ static int mes_userq_restore(struct amdgpu_usermode_queue *queue)
return 0;
memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
- queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.gang_context_addr = ctx->gpu_addr;
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
@@ -549,7 +582,7 @@ const struct amdgpu_userq_funcs userq_mes_funcs = {
.mqd_destroy = mes_userq_mqd_destroy,
.unmap = mes_userq_unmap,
.map = mes_userq_map,
- .detect_and_reset = mes_userq_detect_and_reset,
.preempt = mes_userq_preempt,
.restore = mes_userq_restore,
+ .reset = mes_userq_reset,
};