summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2019-10-14 21:14:04 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2019-10-14 21:30:28 +0100
commit9cd20ef7803cc53a00c6eb7198b3d870ac7b3766 (patch)
treea2d4767b15a9b9084eecd2ffd6083ba490c6a5bf /drivers/gpu/drm/i915/i915_perf.c
parent7831e9a965ea2ca91855995d62197bc8078bb762 (diff)
downloadlwn-9cd20ef7803cc53a00c6eb7198b3d870ac7b3766.tar.gz
lwn-9cd20ef7803cc53a00c6eb7198b3d870ac7b3766.zip
drm/i915/perf: allow holding preemption on filtered ctx
We would like to make use of perf in Vulkan. The Vulkan API is much lower level than OpenGL, with applications directly exposed to the concept of command buffers (pretty much equivalent to our batch buffers). In Vulkan, queries are always limited in scope to a command buffer. In OpenGL, the lack of command buffer concept meant that queries' duration could span multiple command buffers. With that restriction gone in Vulkan, we would like to simplify measuring performance just by measuring the deltas between the counter snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the more complex scheme we currently have in the GL driver, using 2 MI_RECORD_PERF_COUNT commands and doing some post processing on the stream of OA reports, coming from the global OA buffer, to remove any unrelated deltas in between the 2 MI_RECORD_PERF_COUNT. Disabling preemption only apply to a single context with which want to query performance counters for and is considered a privileged operation, by default protected by CAP_SYS_ADMIN. It is possible to enable it for a normal user by disabling the paranoid stream setting. v2: Store preemption setting in intel_context (Chris) v3: Use priorities to avoid preemption rather than the HW mechanism v4: Just modify the port priority reporting function v5: Add nopreempt flag on gem context and always flag requests appropriately, regarless of OA reconfiguration. Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-4-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c34
1 files changed, 31 insertions, 3 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 372c91f6a28e..54ec1c4190ac 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -344,6 +344,8 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
* struct perf_open_properties - for validated properties given to open a stream
* @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
* @single_context: Whether a single or all gpu contexts should be monitored
+ * @hold_preemption: Whether the preemption is disabled for the filtered
+ * context
* @ctx_handle: A gem ctx handle for use with @single_context
* @metrics_set: An ID for an OA unit metric set advertised via sysfs
* @oa_format: An OA unit HW report format
@@ -359,6 +361,7 @@ struct perf_open_properties {
u32 sample_flags;
u64 single_context:1;
+ u64 hold_preemption:1;
u64 ctx_handle;
/* OA sampling state */
@@ -2514,6 +2517,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
if (WARN_ON(stream->oa_buffer.format_size == 0))
return -EINVAL;
+ stream->hold_preemption = props->hold_preemption;
+
stream->oa_buffer.format =
perf->oa_formats[props->oa_format].format;
@@ -2834,6 +2839,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
if (stream->ops->enable)
stream->ops->enable(stream);
+
+ if (stream->hold_preemption)
+ i915_gem_context_set_nopreempt(stream->ctx);
}
/**
@@ -2858,6 +2866,9 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
/* Allow stream->ops->disable() to refer to this */
stream->enabled = false;
+ if (stream->hold_preemption)
+ i915_gem_context_clear_nopreempt(stream->ctx);
+
if (stream->ops->disable)
stream->ops->disable(stream);
}
@@ -3067,6 +3078,15 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
}
}
+ if (props->hold_preemption) {
+ if (!props->single_context) {
+ DRM_DEBUG("preemption disable with no context\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ privileged_op = true;
+ }
+
/*
* On Haswell the OA unit supports clock gating off for a specific
* context and in this mode there's no visibility of metrics for the
@@ -3081,7 +3101,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
* MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
* enable the OA unit by default.
*/
- if (IS_HASWELL(perf->i915) && specific_ctx)
+ if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption)
privileged_op = false;
/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
@@ -3091,7 +3111,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
*/
if (privileged_op &&
i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
- DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
+ DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
ret = -EACCES;
goto err_ctx;
}
@@ -3293,6 +3313,9 @@ static int read_properties_unlocked(struct i915_perf *perf,
props->oa_periodic = true;
props->oa_period_exponent = value;
break;
+ case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
+ props->hold_preemption = !!value;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -4066,8 +4089,13 @@ int i915_perf_ioctl_version(void)
*
* 2: Added runtime modification of OA config.
* I915_PERF_IOCTL_CONFIG
+ *
+ * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold
+ * preemption on a particular context so that performance data is
+ * accessible from a delta of MI_RPC reports without looking at the
+ * OA buffer.
*/
- return 2;
+ return 3;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)