summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c83
1 files changed, 54 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index a8abc3091801..0f7aa51b629e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -204,11 +204,12 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
size_t exception_data_size)
{
struct kfd_process *p;
+ struct kfd_process_device *pdd = NULL;
bool signaled_to_debugger_or_runtime = false;
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
- if (!p)
+ if (!pdd)
return false;
if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
@@ -238,9 +239,8 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
mutex_unlock(&p->mutex);
} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
- kfd_dqm_evict_pasid(dev->dqm, p->pasid);
- kfd_signal_vm_fault_event(dev, p->pasid, NULL,
- exception_data);
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, exception_data);
signaled_to_debugger_or_runtime = true;
}
@@ -276,8 +276,8 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
data = (struct kfd_hsa_memory_exception_data *)
pdd->vm_fault_exc_data;
- kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
- kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, data);
error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
}
@@ -357,12 +357,13 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
return 0;
if (!pdd->proc_ctx_cpu_ptr) {
- r = amdgpu_amdkfd_alloc_gtt_mem(adev,
- AMDGPU_MES_PROC_CTX_SIZE,
- &pdd->proc_ctx_bo,
- &pdd->proc_ctx_gpu_addr,
- &pdd->proc_ctx_cpu_ptr,
- false);
+ r = amdgpu_amdkfd_alloc_kernel_mem(adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
if (r) {
dev_err(adev->dev,
"failed to allocate process context bo\n");
@@ -371,8 +372,10 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
}
- return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
- pdd->watch_points, flags, sq_trap_en);
+ return amdgpu_mes_set_shader_debugger(pdd->dev->adev,
+ pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
+ pdd->watch_points, flags, sq_trap_en,
+ ffs(pdd->dev->xcc_mask) - 1);
}
#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1
@@ -401,27 +404,25 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i
return -ENOMEM;
}
-static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
+static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, u32 watch_id)
{
spin_lock(&pdd->dev->watch_points_lock);
/* process owns device watch point so safe to clear */
- if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
- pdd->alloc_watch_ids &= ~(0x1 << watch_id);
- pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
+ if (pdd->alloc_watch_ids & BIT(watch_id)) {
+ pdd->alloc_watch_ids &= ~BIT(watch_id);
+ pdd->dev->alloc_watch_ids &= ~BIT(watch_id);
}
spin_unlock(&pdd->dev->watch_points_lock);
}
-static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
+static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, u32 watch_id)
{
bool owns_watch_id = false;
spin_lock(&pdd->dev->watch_points_lock);
- owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
- ((pdd->alloc_watch_ids >> watch_id) & 0x1);
-
+ owns_watch_id = pdd->alloc_watch_ids & BIT(watch_id);
spin_unlock(&pdd->dev->watch_points_lock);
return owns_watch_id;
@@ -432,6 +433,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
{
int r;
+ if (watch_id >= MAX_WATCH_ADDRESSES)
+ return -EINVAL;
+
if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id))
return -EINVAL;
@@ -469,6 +473,9 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
if (r)
return r;
+ if (*watch_id >= MAX_WATCH_ADDRESSES)
+ return -EINVAL;
+
if (!pdd->dev->kfd->shared_resources.enable_mes) {
r = debug_lock_and_unmap(pdd->dev->dqm);
if (r) {
@@ -516,9 +523,15 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
int i, r = 0, rewind_count = 0;
for (i = 0; i < target->n_pdds; i++) {
+ uint32_t caps;
+ uint32_t caps2;
struct kfd_topology_device *topo_dev =
- kfd_topology_device_by_id(target->pdds[i]->dev->id);
- uint32_t caps = topo_dev->node_props.capability;
+ kfd_topology_device_by_id(target->pdds[i]->dev->id);
+ if (!topo_dev)
+ return -EINVAL;
+
+ caps = topo_dev->node_props.capability;
+ caps2 = topo_dev->node_props.capability2;
if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) &&
(*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
@@ -531,6 +544,12 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
*flags = prev_flags;
return -EACCES;
}
+
+ if (!(caps2 & HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED) &&
+ (*flags & KFD_DBG_TRAP_FLAG_LDS_OUT_OF_ADDR_RANGE)) {
+ *flags = prev_flags;
+ return -EACCES;
+ }
}
target->dbg_flags = *flags;
@@ -565,9 +584,9 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
continue;
if (!pdd->dev->kfd->shared_resources.enable_mes)
- debug_refresh_runlist(pdd->dev->dqm);
+ (void)debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd, true);
+ (void)kfd_dbg_set_mes_debug_mode(pdd, true);
}
}
@@ -627,9 +646,10 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
pr_err("Failed to release debug vmid on [%i]\n", pdd->dev->id);
if (!pdd->dev->kfd->shared_resources.enable_mes)
- debug_refresh_runlist(pdd->dev->dqm);
+ (void)debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
+ (void)kfd_dbg_set_mes_debug_mode(pdd,
+ !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
kfd_dbg_set_workaround(target, false);
@@ -1071,6 +1091,10 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
for (i = 0; i < tmp_num_devices; i++) {
struct kfd_process_device *pdd = target->pdds[i];
struct kfd_topology_device *topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+ if (!topo_dev) {
+ r = -EINVAL;
+ break;
+ }
device_info.gpu_id = pdd->dev->id;
device_info.exception_status = pdd->exception_status;
@@ -1098,6 +1122,7 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
device_info.num_xcc = NUM_XCC(pdd->dev->xcc_mask);
device_info.capability = topo_dev->node_props.capability;
device_info.debug_prop = topo_dev->node_props.debug_prop;
+ device_info.capability2 = topo_dev->node_props.capability2;
if (exception_clear_mask)
pdd->exception_status &= ~exception_clear_mask;