summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorPerry Yuan <perry.yuan@amd.com>2026-02-09 00:42:07 +0800
committerAlex Deucher <alexander.deucher@amd.com>2026-05-11 15:55:56 -0400
commitdd61e27535a6f5cfb32a847b282d2e3d5aebf46f (patch)
tree2014c59f8faf39adde7a8c99a53e98d333b6b2eb /drivers
parentd51a0439358d4eacd8beb39df619998d8340d232 (diff)
downloadlwn-dd61e27535a6f5cfb32a847b282d2e3d5aebf46f.tar.gz
lwn-dd61e27535a6f5cfb32a847b282d2e3d5aebf46f.zip
drm/amdkfd: Add PTL control IOCTL Option and unify refcount logic
Introduce a new IOCTL option to allow userspace explicit control over the Peak Tops Limiter (PTL) state for profiling Link: https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-sdk Signed-off-by: Perry Yuan <perry.yuan@amd.com> Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c102
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c4
-rw-r--r--drivers/gpu/drm/amd/include/amdgpu_ptl.h2
5 files changed, 118 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 467a3dbe1bfa..aab6a4de54fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2400,6 +2400,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct amdgpu_device *adev, bool ena
ptl->hw_supported = true;
+ atomic_set(&ptl->disable_ref, 0);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index fc00d0418684..883de31df04d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1774,6 +1774,104 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
}
#endif
+static int kfd_ptl_control(struct kfd_process_device *pdd, bool enable)
+{
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_ptl *ptl = &adev->psp.ptl;
+ enum amdgpu_ptl_fmt pref_format1 = ptl->fmt1;
+ enum amdgpu_ptl_fmt pref_format2 = ptl->fmt2;
+ uint32_t ptl_state = enable ? 1 : 0;
+ int ret;
+
+ if (!ptl->hw_supported)
+ return -EOPNOTSUPP;
+
+ if (!pdd->dev->kfd2kgd || !pdd->dev->kfd2kgd->ptl_ctrl)
+ return -EOPNOTSUPP;
+
+ ret = pdd->dev->kfd2kgd->ptl_ctrl(adev, PSP_PTL_PERF_MON_SET,
+ &ptl_state,
+ &pref_format1,
+ &pref_format2);
+ return ret;
+}
+
+int kfd_ptl_disable_request(struct kfd_process_device *pdd,
+ struct kfd_process *p)
+{
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_ptl *ptl = &adev->psp.ptl;
+ int ret = 0;
+
+ mutex_lock(&ptl->mutex);
+
+ if (pdd->ptl_disable_req)
+ goto out;
+
+ if (atomic_inc_return(&ptl->disable_ref) == 1) {
+ ret = kfd_ptl_control(pdd, false);
+ if (ret) {
+ atomic_dec(&ptl->disable_ref);
+ dev_warn(pdd->dev->adev->dev,
+ "failed to disable PTL\n");
+ goto out;
+ }
+ }
+ pdd->ptl_disable_req = true;
+
+out:
+ mutex_unlock(&ptl->mutex);
+ return ret;
+}
+
+int kfd_ptl_disable_release(struct kfd_process_device *pdd,
+ struct kfd_process *p)
+{
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_ptl *ptl = &adev->psp.ptl;
+ int ret = 0;
+
+ mutex_lock(&ptl->mutex);
+
+ if (!pdd->ptl_disable_req)
+ goto out;
+
+ if (atomic_dec_return(&ptl->disable_ref) == 0) {
+ ret = kfd_ptl_control(pdd, true);
+ if (ret) {
+ atomic_inc(&ptl->disable_ref);
+ dev_warn(adev->dev, "Failed to enable PTL on release: %d\n", ret);
+ goto out;
+ }
+ }
+ pdd->ptl_disable_req = false;
+
+out:
+ mutex_unlock(&ptl->mutex);
+ return ret;
+}
+
+static int kfd_profiler_ptl_control(struct kfd_process *p,
+ struct kfd_ioctl_ptl_control *args)
+{
+ struct kfd_process_device *pdd;
+ int ret;
+
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ mutex_unlock(&p->mutex);
+
+ if (!pdd || !pdd->dev || !pdd->dev->kfd)
+ return -EINVAL;
+
+ if (args->enable == 0)
+ ret = kfd_ptl_disable_request(pdd, p);
+ else
+ ret = kfd_ptl_disable_release(pdd, p);
+
+ return ret;
+}
+
static int criu_checkpoint_process(struct kfd_process *p,
uint8_t __user *user_priv_data,
uint64_t *priv_offset)
@@ -3242,6 +3340,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p,
if (!kfd->profiler_process) {
kfd->profiler_process = p;
status = 0;
+ kfd_ptl_disable_request(pdd, p);
} else if (kfd->profiler_process == p) {
status = -EALREADY;
} else {
@@ -3250,6 +3349,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p,
} else if (op == 0 && kfd->profiler_process == p) {
kfd->profiler_process = NULL;
status = 0;
+ kfd_ptl_disable_release(pdd, p);
}
mutex_unlock(&kfd->profiler_lock);
@@ -3292,6 +3392,8 @@ static int kfd_ioctl_profiler(struct file *filep, struct kfd_process *p, void *d
return 0;
case KFD_IOC_PROFILER_PMC:
return kfd_profiler_pmc(p, &args->pmc);
+ case KFD_IOC_PROFILER_PTL_CONTROL:
+ return kfd_profiler_ptl_control(p, &args->ptl);
}
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 903386e0740b..482bcfa10f82 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -872,6 +872,8 @@ struct kfd_process_device {
bool has_reset_queue;
u32 pasid;
+ /* Indicates this process has requested PTL stay disabled */
+ bool ptl_disable_req;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -1603,6 +1605,12 @@ static inline bool kfd_is_first_node(struct kfd_node *node)
return (node == node->kfd->nodes[0]);
}
+/* PTL support */
+int kfd_ptl_disable_request(struct kfd_process_device *pdd,
+ struct kfd_process *p);
+int kfd_ptl_disable_release(struct kfd_process_device *pdd,
+ struct kfd_process *p);
+
/* Debugfs */
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1a8cb512dfe3..368283d53077 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1128,6 +1128,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
pdd->dev->id, p->lead_thread->pid);
kfd_process_profiler_release(p, pdd);
+
+ if (pdd->ptl_disable_req)
+ kfd_ptl_disable_release(pdd, p);
+
kfd_process_device_destroy_cwsr_dgpu(pdd);
kfd_process_device_destroy_ib_mem(pdd);
diff --git a/drivers/gpu/drm/amd/include/amdgpu_ptl.h b/drivers/gpu/drm/amd/include/amdgpu_ptl.h
index ffed443a14ae..9e63a9a9680a 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_ptl.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_ptl.h
@@ -39,6 +39,8 @@ struct amdgpu_ptl {
enum amdgpu_ptl_fmt fmt2;
bool enabled;
bool hw_supported;
+ /* PTL disable reference counting */
+ atomic_t disable_ref;
struct mutex mutex;
};