From 7ceb94e87bffff7c12b61eb29749e1d8ac976896 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 2 Feb 2024 14:00:27 -0500 Subject: drm/amd: Add gfx12 swizzle mode defs Add GFX12 swizzle mode definitions for use with DCN401 Signed-off-by: Aurabindo Pillai Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- include/uapi/drm/drm_fourcc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 84d502e42961..4168445fbb8b 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1476,6 +1476,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_TILE_VER_GFX10 2 #define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3 #define AMD_FMT_MOD_TILE_VER_GFX11 4 +#define AMD_FMT_MOD_TILE_VER_GFX12 5 /* * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical @@ -1486,6 +1487,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) /* * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has * GFX9 as canonical version. + * + * 64K_D_2D on GFX12 is identical to 64K_D on GFX11. */ #define AMD_FMT_MOD_TILE_GFX9_64K_D 10 #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 @@ -1493,6 +1496,19 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 #define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31 +/* Gfx12 swizzle modes: + * 0 - LINEAR + * 1 - 256B_2D - 2D block dimensions + * 2 - 4KB_2D + * 3 - 64KB_2D + * 4 - 256KB_2D + * 5 - 4KB_3D - 3D block dimensions + * 6 - 64KB_3D + * 7 - 256KB_3D + */ +#define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 +#define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 + #define AMD_FMT_MOD_DCC_BLOCK_64B 0 #define AMD_FMT_MOD_DCC_BLOCK_128B 1 #define AMD_FMT_MOD_DCC_BLOCK_256B 2 -- cgit v1.2.3 From 7d09d80b93ab15c1ff969facb8f6111fb9c084ce Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 19 Apr 2024 12:17:56 -0400 Subject: drm/amd: define new gfx12 uapi flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit define new gfx12 uapi flags Signed-off-by: Marek Olšák Signed-off-by: Aurabindo Pillai Acked-by: Hawking Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 96e32dafd4f0..feb47623458a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -392,7 +392,7 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_NUM_BANKS_SHIFT 21 #define AMDGPU_TILING_NUM_BANKS_MASK 0x3 -/* GFX9 and later: */ +/* GFX9 - GFX11: */ #define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 #define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f #define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 @@ -406,6 +406,17 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_SCANOUT_SHIFT 63 #define AMDGPU_TILING_SCANOUT_MASK 0x1 +/* GFX12 and later: */ +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 +/* These are DCC recompression setting for memory management: */ +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ + /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) @@ -1268,6 +1279,7 @@ struct drm_amdgpu_info_gpuvm_fault { #define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ #if defined(__cplusplus) } -- cgit v1.2.3 From 96557f785a7701c7e0c327bd25b701d0eb5dcee0 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 5 Mar 2024 14:38:31 -0500 Subject: drm/amd: GFX12 changes for converting tiling flags to modifiers GFX12 swizzle mode and GCC formats changed and is much simpler. Use a seperate function for the same. Changes: * Swizzle mode is now 3 bits only * DCC enablement doesn't come from tiling_flags, it is always set in modifiers * DCC max compressed block size of 128B Signed-off-by: Aurabindo Pillai Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 37 +++++++++++++++++++++++++++++ include/uapi/drm/drm_fourcc.h | 3 +++ 2 files changed, 40 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 3ecc7ef95172..cfec85563bc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -718,6 +718,37 @@ extract_render_dcc_offset(struct amdgpu_device *adev, return 0; } +static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb) +{ + struct amdgpu_device *adev = drm_to_adev(afb->base.dev); + const struct drm_format_info *format_info; + u64 modifier = 0; + int tile = 0; + int swizzle = 0; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) { + tile = AMD_FMT_MOD_TILE_VER_GFX12; + swizzle = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE); + } + + modifier = + AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, swizzle) | + AMD_FMT_MOD_SET(TILE_VERSION, tile) | + AMD_FMT_MOD_SET(DCC, 0) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, 0); + + format_info = amdgpu_lookup_format_info(afb->base.format->format, + modifier); + if (!format_info) + return -EINVAL; + + afb->base.modifier = modifier; + afb->base.flags |= DRM_MODE_FB_MODIFIERS; + + return 0; +} + static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) { struct amdgpu_device *adev = drm_to_adev(afb->base.dev); @@ -742,6 +773,12 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) int pipes = ilog2(num_pipes); uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B); + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) { + convert_tiling_flags_to_modifier_gfx12(afb); + return 0; + } + switch (swizzle >> 2) { case 0: /* 256B */ block_size_bits = 8; diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 4168445fbb8b..d0063ac6e09f 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1540,6 +1540,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18 #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 +#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ + /* * DCC supports embedding some clear colors directly in the DCC surface. * However, on older GPUs the rendering HW ignores the embedded clear color -- cgit v1.2.3 From 226e4ca5d4f683b3013947e495c6b433b35718b7 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Mon, 13 Feb 2023 18:19:27 +0800 Subject: drm/amdgpu: Add gfx v12_0_0 family id Add gfx v12_0_0 family id Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index feb47623458a..5b6c0055cfcf 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1279,7 +1279,7 @@ struct drm_amdgpu_info_gpuvm_fault { #define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ -#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ +#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ #if defined(__cplusplus) } -- cgit v1.2.3 From 0f1bb41bf39695c84c83ce6f69e125b562d1d7ab Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 May 2024 15:59:39 +0100 Subject: drm/i915: Support replaying GPU hangs with captured context image When debugging GPU hangs Mesa developers are finding it useful to replay the captured error state against the simulator. But due various simulator limitations which prevent replicating all hangs, one step further is being able to replay against a real GPU. This is almost doable today with the missing part being able to upload the captured context image into the driver state prior to executing the uploaded hanging batch and all the buffers. To enable this last part we add a new context parameter called I915_CONTEXT_PARAM_CONTEXT_IMAGE. It follows the existing SSEU configuration pattern of being able to select which context to apply against, paired with the actual image and its size. Since this is adding a new concept of debug only uapi, we hide it behind a new kconfig option and also require activation with a module parameter. Together with a warning banner printed at driver load, all those combined should be sufficient to guard against inadvertently enabling the feature. In terms of implementation we allow the legacy context set param to be used since that removes the need to record the per context data in the proto context, while still allowing flexibility of specifying context images for any context. Mesa MR using the uapi can be seen at: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27594 v2: * Fix whitespace alignment as per checkpatch. * Added warning on userspace misuse. * Rebase for extracting ce->default_state shadowing. v3: * Rebase for I915_CONTEXT_PARAM_LOW_LATENCY. Signed-off-by: Tvrtko Ursulin Cc: Lionel Landwerlin Cc: Carlos Santa Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Tested-by: Carlos Santa Signed-off-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240514145939.87427-2-tursulin@igalia.com --- drivers/gpu/drm/i915/Kconfig.debug | 17 ++++ drivers/gpu/drm/i915/gem/i915_gem_context.c | 113 ++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_context.c | 2 + drivers/gpu/drm/i915/gt/intel_context.h | 22 +++++ drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 3 +- drivers/gpu/drm/i915/i915_params.c | 5 ++ drivers/gpu/drm/i915/i915_params.h | 3 +- include/uapi/drm/i915_drm.h | 27 ++++++ 10 files changed, 193 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index d8397065c3f0..1852e0804942 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -16,6 +16,23 @@ config DRM_I915_WERROR If in doubt, say "N". +config DRM_I915_REPLAY_GPU_HANGS_API + bool "Enable GPU hang replay userspace API" + depends on DRM_I915 + depends on EXPERT + default n + help + Choose this option if you want to enable special and unstable + userspace API used for replaying GPU hangs on a running system. + + This API is intended to be used by userspace graphics stack developers + and provides no stability guarantees. + + The API needs to be activated at boot time using the + enable_debug_only_api module parameter. + + If in doubt, say "N". + config DRM_I915_DEBUG bool "Enable additional driver debugging" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 81f65cab1330..c0543c35cd6a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -78,6 +78,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" +#include "gt/shmem_utils.h" #include "pxp/intel_pxp.h" @@ -957,6 +958,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: default: ret = -EINVAL; break; @@ -2104,6 +2106,95 @@ static int get_protected(struct i915_gem_context *ctx, return 0; } +static int set_context_image(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_gem_context_param_context_image user; + struct intel_context *ce; + struct file *shmem_state; + unsigned long lookup; + void *state; + int ret = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)) + return -EINVAL; + + if (!ctx->i915->params.enable_debug_only_api) + return -EINVAL; + + if (args->size < sizeof(user)) + return -EINVAL; + + if (copy_from_user(&user, u64_to_user_ptr(args->value), sizeof(user))) + return -EFAULT; + + if (user.mbz) + return -EINVAL; + + if (user.flags & ~(I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX)) + return -EINVAL; + + lookup = 0; + if (user.flags & I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user.engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + if (user.size < ce->engine->context_size) { + ret = -EINVAL; + goto out_ce; + } + + if (drm_WARN_ON_ONCE(&ctx->i915->drm, + test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { + /* + * This is racy but for a debug only API, if userspace is keen + * to create and configure contexts, while simultaneously using + * them from a second thread, let them suffer by potentially not + * executing with the context image they just raced to apply. + */ + ret = -EBUSY; + goto out_ce; + } + + state = kmalloc(ce->engine->context_size, GFP_KERNEL); + if (!state) { + ret = -ENOMEM; + goto out_ce; + } + + if (copy_from_user(state, u64_to_user_ptr(user.image), + ce->engine->context_size)) { + ret = -EFAULT; + goto out_state; + } + + shmem_state = shmem_create_from_data(ce->engine->name, + state, ce->engine->context_size); + if (IS_ERR(shmem_state)) { + ret = PTR_ERR(shmem_state); + goto out_state; + } + + if (intel_context_set_own_state(ce)) { + ret = -EBUSY; + fput(shmem_state); + goto out_state; + } + + ce->default_state = shmem_state; + + args->size = sizeof(user); + +out_state: + kfree(state); +out_ce: + intel_context_put(ce); + return ret; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -2156,6 +2247,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: + ret = set_context_image(ctx, args); + break; + case I915_CONTEXT_PARAM_PROTECTED_CONTENT: case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: @@ -2500,6 +2595,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_ENGINES: case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: default: ret = -EINVAL; break; @@ -2612,5 +2708,22 @@ int __init i915_gem_context_module_init(void) if (!slab_luts) return -ENOMEM; + if (IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)) { + pr_notice("**************************************************************\n"); + pr_notice("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_notice("** **\n"); + if (i915_modparams.enable_debug_only_api) + pr_notice("** i915.enable_debug_only_api is intended to be set **\n"); + else + pr_notice("** CONFIG_DRM_I915_REPLAY_GPU_HANGS_API builds are intended **\n"); + pr_notice("** for specific userspace graphics stack developers only! **\n"); + pr_notice("** **\n"); + pr_notice("** If you are seeing this message please report this to the **\n"); + pr_notice("** provider of your kernel build. **\n"); + pr_notice("** **\n"); + pr_notice("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_notice("**************************************************************\n"); + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index a2f1245741bb..b1b8695ba7c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -27,6 +27,8 @@ static void rcu_context_free(struct rcu_head *rcu) struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); trace_intel_context_free(ce); + if (intel_context_has_own_state(ce)) + fput(ce->default_state); kmem_cache_free(slab_ce, ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 25564c01507e..9f523999acd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -375,6 +375,28 @@ intel_context_clear_nopreempt(struct intel_context *ce) clear_bit(CONTEXT_NOPREEMPT, &ce->flags); } +#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) +static inline bool intel_context_has_own_state(const struct intel_context *ce) +{ + return test_bit(CONTEXT_OWN_STATE, &ce->flags); +} + +static inline bool intel_context_set_own_state(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_OWN_STATE, &ce->flags); +} +#else +static inline bool intel_context_has_own_state(const struct intel_context *ce) +{ + return false; +} + +static inline bool intel_context_set_own_state(struct intel_context *ce) +{ + return true; +} +#endif + u64 intel_context_get_total_runtime_ns(struct intel_context *ce); u64 intel_context_get_avg_runtime_ns(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 6ae8abfeccdb..98c7f6052069 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -133,6 +133,7 @@ struct intel_context { #define CONTEXT_IS_PARKING 12 #define CONTEXT_EXITING 13 #define CONTEXT_LOW_LATENCY 14 +#define CONTEXT_OWN_STATE 15 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d4ffb352403c..7bd5d2c29056 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1130,7 +1130,8 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) GEM_BUG_ON(ce->state); - ce->default_state = engine->default_state; + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; vma = __lrc_alloc_state(ce, engine); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 8625e88e785f..72277bc8322e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -569,7 +569,8 @@ static int ring_context_alloc(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - ce->default_state = engine->default_state; + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8c00169e3ab7..316e55f3e87b 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -131,6 +131,11 @@ i915_param_named_unsafe(lmem_size, uint, 0400, i915_param_named_unsafe(lmem_bar_size, uint, 0400, "Set the lmem bar size(in MiB)."); +#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) +i915_param_named(enable_debug_only_api, bool, 0400, + "Enable support for unstable debug only userspace API. (default:false)"); +#endif + static void _param_print_bool(struct drm_printer *p, const char *name, bool val) { diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 2eb3f2115ff2..0fbcb5b6d7bf 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -63,7 +63,8 @@ struct drm_printer; /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \ - param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) + param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) \ + param(bool, enable_debug_only_api, false, IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) ? 0400 : 0) #define MEMBER(T, member, ...) T member; struct i915_params { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d4d86e566e07..535cb68fdb5c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. -- cgit v1.2.3 From 995f7dafd110eecbeef1e02846d897d64839d838 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 16 Apr 2024 14:50:37 +0000 Subject: drm/xe/uapi: Expose the L3 bank mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The L3 bank mask is already generated and stored internally with the rest of the GT topology. In user space, the compute runtime now needs this information to be added to the device properties therefore the topology mask query is extended to provide a new mask which represents the L3 banks enabled on the GT. The changes in the compute runtime are ready and approved, see link below. v2: Rewrite commit message and add a link to the compute runtime PR (Francois Dugast) Cc: Matt Roper Cc: Robert Krzemien Cc: Mateusz Jablonski Link: https://github.com/intel/compute-runtime/pull/722 Signed-off-by: Francois Dugast Acked-by: Mateusz Jablonski Reviewed-by: José Roberto de Souza Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240416145037.7-2-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_query.c | 9 ++++++++- include/uapi/drm/xe_drm.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 29f847debb5c..995effcb904b 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -455,9 +455,10 @@ static int query_hwconfig(struct xe_device *xe, static size_t calc_topo_query_size(struct xe_device *xe) { return xe->info.gt_count * - (3 * sizeof(struct drm_xe_query_topology_mask) + + (4 * sizeof(struct drm_xe_query_topology_mask) + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); } @@ -511,6 +512,12 @@ static int query_gt_topology(struct xe_device *xe, if (err) return err; + topo.type = DRM_XE_TOPO_L3_BANK; + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, + sizeof(gt->fuse_topo.l3_bank_mask)); + if (err) + return err; + topo.type = DRM_XE_TOPO_EU_PER_DSS; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.eu_mask_per_dss, diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1446c3bae515..d7b0903c22b2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -508,6 +508,7 @@ struct drm_xe_query_gt_list { * containing the following in mask: * ``DSS_COMPUTE ff ff ff ff 00 00 00 00`` * means 32 DSS are available for compute. + * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks * - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU) * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: @@ -520,6 +521,7 @@ struct drm_xe_query_topology_mask { #define DRM_XE_TOPO_DSS_GEOMETRY 1 #define DRM_XE_TOPO_DSS_COMPUTE 2 +#define DRM_XE_TOPO_L3_BANK 3 #define DRM_XE_TOPO_EU_PER_DSS 4 /** @type: type of mask */ __u16 type; -- cgit v1.2.3 From 73e1d104ef7f5c9843abf4686513b3706538572a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 29 Apr 2023 07:49:07 -0400 Subject: drm/amdgpu: define new gfx12 uapi flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit define new gfx12 uapi flags Signed-off-by: Marek Olšák Acked-by: Hawking Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5b6c0055cfcf..e44362e74fa1 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -171,6 +171,8 @@ extern "C" { * may override the MTYPE selected in AMDGPU_VA_OP_MAP. */ #define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) +/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ +#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From 52c2e956dcebecc8901911217a9647203ebcaf3c Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:53 -0700 Subject: drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the plan is to support multiple types of perf counter streams (OA is only one type of these streams). Rather than introduce NxM ioctls for these (N perf streams with M ioctl's per perf stream), we decide to multiplex these (N different stream types and the M ops for each of these stream types) through a single PERF ioctl. This multiplexing is the purpose of the PERF layer. In addition to PERF DRM ioctl's, another set of ioctl's on the PERF fd are defined. These are expected to be common to different PERF stream types and therefore defined at the PERF layer itself. v2: Add param_size to 'struct drm_xe_perf_param' (Umesh) v3: Rename 'enum drm_xe_perf_ops' to 'enum drm_xe_perf_ioctls' (Guy Zadicario) Add DRM_ prefix to ioctl names to indicate uapi names v4: Add 'enum drm_xe_perf_op' previously missed out (Guy Zadicario) v5: Squash the ops and PERF layer patches into a single patch (Umesh) Remove param_size from struct 'drm_xe_perf_param' (Umesh) v6: Add DRM_XE_PERF_IOCTL_STATUS v7: Add DRM_XE_PERF_IOCTL_INFO v8: Fix Copyright years, fix DRM_XE_PERF_TYPE_MAX, move '#include "xe_perf.h"' to xe_perf.c, add kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: Guy Zadicario Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-2-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_perf.c | 34 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_perf.h | 14 +++++++++ include/uapi/drm/xe_drm.h | 66 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_perf.c create mode 100644 drivers/gpu/drm/xe/xe_perf.h (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index cbf961b90237..f99492449e5d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -95,6 +95,7 @@ xe-y += xe_bb.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ + xe_perf.o \ xe_pm.o \ xe_preempt_fence.o \ xe_pt.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 64691a56d59c..a44093cbbb71 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -44,6 +44,7 @@ #include "xe_module.h" #include "xe_pat.h" #include "xe_pcode.h" +#include "xe_perf.h" #include "xe_pm.h" #include "xe_query.h" #include "xe_sriov.h" @@ -141,6 +142,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_PERF, xe_perf_ioctl, DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c new file mode 100644 index 000000000000..2963174ecd0e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include + +#include "xe_perf.h" + +/** + * xe_perf_ioctl - The top level perf layer ioctl + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + * + * The function is called for different perf streams types and allows execution + * of different operations supported by those perf stream types. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_xe_perf_param *arg = data; + + if (arg->extensions) + return -EINVAL; + + switch (arg->perf_type) { + default: + return -EINVAL; + } +} diff --git a/drivers/gpu/drm/xe/xe_perf.h b/drivers/gpu/drm/xe/xe_perf.h new file mode 100644 index 000000000000..e7e258eaf0a9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_PERF_H_ +#define _XE_PERF_H_ + +struct drm_device; +struct drm_file; + +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d7b0903c22b2..c1626027dc69 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,6 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE + * - &DRM_IOCTL_XE_PERF */ /* @@ -100,6 +101,8 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_PERF 0x0b + /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -113,6 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) /** * DOC: Xe IOCTL Extensions @@ -1370,6 +1374,68 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; +/** + * enum drm_xe_perf_type - Perf stream types + */ +enum drm_xe_perf_type { + __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ +}; + +/** + * enum drm_xe_perf_op - Perf stream ops + */ +enum drm_xe_perf_op { + /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ + DRM_XE_PERF_OP_STREAM_OPEN, + + /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ + DRM_XE_PERF_OP_ADD_CONFIG, + + /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ + DRM_XE_PERF_OP_REMOVE_CONFIG, +}; + +/** + * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * + * The perf layer enables multiplexing perf counter streams of multiple + * types. The actual params for a particular stream operation are supplied + * via the @param pointer (use __copy_from_user to get these params). + */ +struct drm_xe_perf_param { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ + __u64 perf_type; + /** @perf_op: Perf op, of enum @drm_xe_perf_op */ + __u64 perf_op; + /** @param: Pointer to actual stream params */ + __u64 param; +}; + +/** + * enum drm_xe_perf_ioctls - Perf fd ioctl's + * + * Information exchanged between userspace and kernel for perf fd ioctl's + * is stream type specific + */ +enum drm_xe_perf_ioctls { + /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ + DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), + + /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ + DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + + /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ + DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + + /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ + DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + + /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ + DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 67977882a2f1339f0a7d32576ad61967828b2ca5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:55 -0700 Subject: drm/xe/oa/uapi: Add OA data formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add and initialize supported OA data formats for various platforms (including Xe2). User can request OA data in any supported format. Bspec: 52198, 60942, 61101 v2: Start 'xe_oa_format_name' enum from 0 (Umesh) Fix error rewind with OA (Umesh) v3: Use graphics versions rather than absolute platform names v4: Add missing kernel doc for struct memebers and enum and other minor changes (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-4-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 11 +++- drivers/gpu/drm/xe/xe_device_types.h | 4 ++ drivers/gpu/drm/xe/xe_oa.c | 111 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 16 +++++ drivers/gpu/drm/xe/xe_oa_types.h | 83 ++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 19 ++++++ 7 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_oa.c create mode 100644 drivers/gpu/drm/xe/xe_oa.h create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f99492449e5d..7039008be234 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -92,6 +92,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_oa.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a44093cbbb71..1195c64a715a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -656,10 +656,14 @@ int xe_device_probe(struct xe_device *xe) xe_heci_gsc_init(xe); - err = xe_display_init(xe); + err = xe_oa_init(xe); if (err) goto err_fini_gt; + err = xe_display_init(xe); + if (err) + goto err_fini_oa; + err = drm_dev_register(&xe->drm, 0); if (err) goto err_fini_display; @@ -675,6 +679,9 @@ int xe_device_probe(struct xe_device *xe) err_fini_display: xe_display_driver_remove(xe); +err_fini_oa: + xe_oa_fini(xe); + err_fini_gt: for_each_gt(gt, xe, id) { if (id < last_gt) @@ -707,6 +714,8 @@ void xe_device_remove(struct xe_device *xe) xe_display_fini(xe); + xe_oa_fini(xe); + xe_heci_gsc_fini(xe); for_each_gt(gt, xe, id) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 52bc461171d5..185986e1d586 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -17,6 +17,7 @@ #include "xe_gt_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" +#include "xe_oa.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" @@ -462,6 +463,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @oa: oa perf counter subsystem */ + struct xe_oa oa; + /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c new file mode 100644 index 000000000000..5c0179ff4f60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_macros.h" +#include "xe_oa.h" + +#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x + +static const struct xe_oa_format oa_formats[] = { + [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, + [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, + [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, +}; + +static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) +{ + __set_bit(format, oa->format_mask); +} + +static void xe_oa_init_supported_formats(struct xe_oa *oa) +{ + if (GRAPHICS_VER(oa->xe) >= 20) { + /* Xe2+ */ + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u32); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); + } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { + /* XE_METEORLAKE */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { + /* XE_DG2, XE_PVC */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + } else { + /* Gen12+ */ + xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); + oa_format_add(oa, XE_OA_FORMAT_A12); + oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_C4_B8); + } +} + +/** + * xe_oa_init - OA initialization during device probe + * @xe: @xe_device + * + * Return: 0 on success or a negative error code on failure + */ +int xe_oa_init(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + /* Support OA only with GuC submission and Gen12+ */ + if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) + return 0; + + oa->xe = xe; + oa->oa_formats = oa_formats; + + xe_oa_init_supported_formats(oa); + return 0; +} + +/** + * xe_oa_fini - OA de-initialization during device remove + * @xe: @xe_device + */ +void xe_oa_fini(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + oa->xe = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h new file mode 100644 index 000000000000..2647c1947746 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_H_ +#define _XE_OA_H_ + +#include "xe_oa_types.h" + +struct xe_device; + +int xe_oa_init(struct xe_device *xe); +void xe_oa_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h new file mode 100644 index 000000000000..99940e25b1c6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_TYPES_H_ +#define _XE_OA_TYPES_H_ + +#include +#include + +enum xe_oa_report_header { + HDR_32_BIT = 0, + HDR_64_BIT, +}; + +enum xe_oa_format_name { + XE_OA_FORMAT_C4_B8, + + /* Gen8+ */ + XE_OA_FORMAT_A12, + XE_OA_FORMAT_A12_B8_C8, + XE_OA_FORMAT_A32u40_A4u32_B8_C8, + + /* DG2 */ + XE_OAR_FORMAT_A32u40_A4u32_B8_C8, + XE_OA_FORMAT_A24u40_A14u32_B8_C8, + + /* DG2/MTL OAC */ + XE_OAC_FORMAT_A24u64_B8_C8, + XE_OAC_FORMAT_A22u32_R2u32_B8_C8, + + /* MTL OAM */ + XE_OAM_FORMAT_MPEC8u64_B8_C8, + XE_OAM_FORMAT_MPEC8u32_B8_C8, + + /* Xe2+ */ + XE_OA_FORMAT_PEC64u64, + XE_OA_FORMAT_PEC64u64_B8_C8, + XE_OA_FORMAT_PEC64u32, + XE_OA_FORMAT_PEC32u64_G1, + XE_OA_FORMAT_PEC32u32_G1, + XE_OA_FORMAT_PEC32u64_G2, + XE_OA_FORMAT_PEC32u32_G2, + XE_OA_FORMAT_PEC36u64_G1_32_G2_4, + XE_OA_FORMAT_PEC36u64_G1_4_G2_32, + + __XE_OA_FORMAT_MAX, +}; + +/** + * struct xe_oa_format - Format fields for supported OA formats. OA format + * properties are specified in PRM/Bspec 52198 and 60942 + */ +struct xe_oa_format { + /** @counter_select: counter select value (see Bspec 52198/60942) */ + u32 counter_select; + /** @size: record size as written by HW (multiple of 64 byte cachelines) */ + int size; + /** @type: of enum @drm_xe_oa_format_type */ + int type; + /** @header: 32 or 64 bit report headers */ + enum xe_oa_report_header header; + /** @counter_size: counter size value (see Bspec 60942) */ + u16 counter_size; + /** @bc_report: BC report value (see Bspec 60942) */ + u16 bc_report; +}; + +/** + * struct xe_oa - OA device level information + */ +struct xe_oa { + /** @xe: back pointer to xe device */ + struct xe_device *xe; + + /** @oa_formats: tracks all OA formats across platforms */ + const struct xe_oa_format *oa_formats; + + /** @format_mask: tracks valid OA formats for a platform */ + unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; +}; +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c1626027dc69..7e10874bfb33 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,25 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec + * 52198/60942 + */ +enum drm_xe_oa_format_type { + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ + DRM_XE_OA_FMT_TYPE_OAG, + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ + DRM_XE_OA_FMT_TYPE_OAR, + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ + DRM_XE_OA_FMT_TYPE_OAM, + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ + DRM_XE_OA_FMT_TYPE_OAC, + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */ + DRM_XE_OA_FMT_TYPE_OAM_MPEC, + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ + DRM_XE_OA_FMT_TYPE_PEC, +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From a9f905ae7b6f29a337dda2ad773c08b92dafe9a5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:56 -0700 Subject: drm/xe/oa/uapi: Initialize OA units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize OA unit data struct's for each gt during device probe. Also assign OA units for hardware engines. v2: Remove XE_OA_UNIT_OAG/XE_OA_UNIT_OAM_SAMEDIA_0 enum (Umesh) Change mtl_oa_base to 0x13000 (Umesh) v3: Switch to drmm_ functions and other cleanups (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-5-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 92 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 4 + drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 + drivers/gpu/drm/xe/xe_oa.c | 156 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 54 +++++++++++ include/uapi/drm/xe_drm.h | 14 +++ 6 files changed, 322 insertions(+) create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h new file mode 100644 index 000000000000..99bad563d51d --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __XE_OA_REGS__ +#define __XE_OA_REGS__ + +#define RPM_CONFIG1 XE_REG(0xd04) +#define GT_NOA_ENABLE REG_BIT(9) + +#define EU_PERF_CNTL0 XE_REG(0xe458) +#define EU_PERF_CNTL4 XE_REG(0xe45c) +#define EU_PERF_CNTL1 XE_REG(0xe558) +#define EU_PERF_CNTL5 XE_REG(0xe55c) +#define EU_PERF_CNTL2 XE_REG(0xe658) +#define EU_PERF_CNTL6 XE_REG(0xe65c) +#define EU_PERF_CNTL3 XE_REG(0xe758) + +#define OA_TLB_INV_CR XE_REG(0xceec) + +/* OAR unit */ +#define OAR_OACONTROL XE_REG(0x2960) +#define OAR_OACONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAR_OACONTROL_COUNTER_ENABLE REG_BIT(0) + +#define OACTXCONTROL(base) XE_REG((base) + 0x360) +#define OAR_OASTATUS XE_REG(0x2968) +#define OA_COUNTER_RESUME REG_BIT(0) + +/* OAG unit */ +#define OAG_OAGLBCTXCTRL XE_REG(0x2b28) +#define OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK REG_GENMASK(7, 2) +#define OAG_OAGLBCTXCTRL_TIMER_ENABLE REG_BIT(1) +#define OAG_OAGLBCTXCTRL_COUNTER_RESUME REG_BIT(0) + +#define OAG_OAHEADPTR XE_REG(0xdb00) +#define OAG_OAHEADPTR_MASK REG_GENMASK(31, 6) +#define OAG_OATAILPTR XE_REG(0xdb04) +#define OAG_OATAILPTR_MASK REG_GENMASK(31, 6) + +#define OAG_OABUFFER XE_REG(0xdb08) +#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) +#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) +#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) +#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) +#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) +#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) +#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) +#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) +#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) +#define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ + +#define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) +#define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) +#define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) +/* Common to all OA units */ +#define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) +#define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) + +#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) +#define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) +#define OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) +#define OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) + +#define OAG_OASTATUS XE_REG(0xdafc) +#define OASTATUS_MMIO_TRG_Q_FULL REG_BIT(6) +#define OASTATUS_COUNTER_OVERFLOW REG_BIT(2) +#define OASTATUS_BUFFER_OVERFLOW REG_BIT(1) +#define OASTATUS_REPORT_LOST REG_BIT(0) +/* OAM unit */ +#define OAM_HEAD_POINTER_OFFSET (0x1a0) +#define OAM_TAIL_POINTER_OFFSET (0x1a4) +#define OAM_BUFFER_OFFSET (0x1a8) +#define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) +#define OAM_CONTROL_OFFSET (0x194) +#define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_DEBUG_OFFSET (0x198) +#define OAM_STATUS_OFFSET (0x19c) +#define OAM_MMIO_TRG_OFFSET (0x1d0) + +#define OAM_HEAD_POINTER(base) XE_REG((base) + OAM_HEAD_POINTER_OFFSET) +#define OAM_TAIL_POINTER(base) XE_REG((base) + OAM_TAIL_POINTER_OFFSET) +#define OAM_BUFFER(base) XE_REG((base) + OAM_BUFFER_OFFSET) +#define OAM_CONTEXT_CONTROL(base) XE_REG((base) + OAM_CONTEXT_CONTROL_OFFSET) +#define OAM_CONTROL(base) XE_REG((base) + OAM_CONTROL_OFFSET) +#define OAM_DEBUG(base) XE_REG((base) + OAM_DEBUG_OFFSET) +#define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) +#define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 10a9a9529377..24bb95de920f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -12,6 +12,7 @@ #include "xe_gt_sriov_vf_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" +#include "xe_oa.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" #include "xe_uc_types.h" @@ -387,6 +388,9 @@ struct xe_gt { */ u8 instances_per_class[XE_ENGINE_CLASS_MAX]; } user_engines; + + /** @oa: oa perf counter subsystem per gt info */ + struct xe_oa_gt oa; }; #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 580bbd7e83b2..70e6434f150d 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -148,6 +148,8 @@ struct xe_hw_engine { enum xe_hw_engine_id engine_id; /** @eclass: pointer to per hw engine class interface */ struct xe_hw_engine_class_intf *eclass; + /** @oa_unit: oa unit for this hw engine */ + struct xe_oa_unit *oa_unit; }; /** diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 5c0179ff4f60..e836fafa9fb3 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,13 +3,20 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include +#include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_macros.h" +#include "xe_mmio.h" #include "xe_oa.h" +#define XE_OA_UNIT_INVALID U32_MAX + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -34,6 +41,142 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static u32 num_oa_units_per_gt(struct xe_gt *gt) +{ + return 1; +} + +static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) +{ + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { + /* + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices + * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA + */ + xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + + return 0; + } + + return XE_OA_UNIT_INVALID; +} + +static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) +{ + switch (hwe->class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + return 0; + + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return __hwe_oam_unit(hwe); + + default: + return XE_OA_UNIT_INVALID; + } +} + +static struct xe_oa_regs __oam_regs(u32 base) +{ + return (struct xe_oa_regs) { + base, + OAM_HEAD_POINTER(base), + OAM_TAIL_POINTER(base), + OAM_BUFFER(base), + OAM_CONTEXT_CONTROL(base), + OAM_CONTROL(base), + OAM_DEBUG(base), + OAM_STATUS(base), + OAM_CONTROL_COUNTER_SEL_MASK, + }; +} + +static struct xe_oa_regs __oag_regs(void) +{ + return (struct xe_oa_regs) { + 0, + OAG_OAHEADPTR, + OAG_OATAILPTR, + OAG_OABUFFER, + OAG_OAGLBCTXCTRL, + OAG_OACONTROL, + OAG_OA_DEBUG, + OAG_OASTATUS, + OAG_OACONTROL_OA_COUNTER_SEL_MASK, + }; +} + +static void __xe_oa_init_oa_units(struct xe_gt *gt) +{ + const u32 mtl_oa_base[] = { 0x13000 }; + int i, num_units = gt->oa.num_oa_units; + + for (i = 0; i < num_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (gt->info.type != XE_GT_TYPE_MEDIA) { + u->regs = __oag_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_OAG; + } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + u->regs = __oam_regs(mtl_oa_base[i]); + u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } + + /* Set oa_unit_ids now to ensure ids remain contiguous */ + u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; + } +} + +static int xe_oa_init_gt(struct xe_gt *gt) +{ + u32 num_oa_units = num_oa_units_per_gt(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_oa_unit *u; + + u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); + if (!u) + return -ENOMEM; + + for_each_hw_engine(hwe, gt, id) { + u32 index = __hwe_oa_unit(hwe); + + hwe->oa_unit = NULL; + if (index < num_oa_units) { + u[index].num_engines++; + hwe->oa_unit = &u[index]; + } + } + + /* + * Fused off engines can result in oa_unit's with num_engines == 0. These units + * will appear in OA unit query, but no perf streams can be opened on them. + */ + gt->oa.num_oa_units = num_oa_units; + gt->oa.oa_unit = u; + + __xe_oa_init_oa_units(gt); + + drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); + + return 0; +} + +static int xe_oa_init_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int i, ret; + + for_each_gt(gt, oa->xe, i) { + ret = xe_oa_init_gt(gt); + if (ret) + return ret; + } + + return 0; +} + static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) { __set_bit(format, oa->format_mask); @@ -87,6 +230,7 @@ static void xe_oa_init_supported_formats(struct xe_oa *oa) int xe_oa_init(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; + int ret; /* Support OA only with GuC submission and Gen12+ */ if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) @@ -95,8 +239,17 @@ int xe_oa_init(struct xe_device *xe) oa->xe = xe; oa->oa_formats = oa_formats; + ret = xe_oa_init_oa_units(oa); + if (ret) { + drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); + goto exit; + } + xe_oa_init_supported_formats(oa); return 0; +exit: + oa->xe = NULL; + return ret; } /** @@ -107,5 +260,8 @@ void xe_oa_fini(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; + if (!oa->xe) + return; + oa->xe = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 99940e25b1c6..e7b91e31f0e8 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -7,8 +7,12 @@ #define _XE_OA_TYPES_H_ #include +#include #include +#include +#include "regs/xe_reg_defs.h" + enum xe_oa_report_header { HDR_32_BIT = 0, HDR_64_BIT, @@ -67,6 +71,53 @@ struct xe_oa_format { u16 bc_report; }; +/** struct xe_oa_regs - Registers for each OA unit */ +struct xe_oa_regs { + u32 base; + struct xe_reg oa_head_ptr; + struct xe_reg oa_tail_ptr; + struct xe_reg oa_buffer; + struct xe_reg oa_ctx_ctrl; + struct xe_reg oa_ctrl; + struct xe_reg oa_debug; + struct xe_reg oa_status; + u32 oa_ctrl_counter_select_mask; +}; + +/** + * struct xe_oa_unit - Hardware OA unit + */ +struct xe_oa_unit { + /** @oa_unit_id: identifier for the OA unit */ + u16 oa_unit_id; + + /** @type: Type of OA unit - OAM, OAG etc. */ + enum drm_xe_oa_unit_type type; + + /** @regs: OA registers for programming the OA unit */ + struct xe_oa_regs regs; + + /** @num_engines: number of engines attached to this OA unit */ + u32 num_engines; + + /** @exclusive_stream: The stream currently using the OA unit */ + struct xe_oa_stream *exclusive_stream; +}; + +/** + * struct xe_oa_gt - OA per-gt information + */ +struct xe_oa_gt { + /** @gt_lock: lock protecting create/destroy OA streams */ + struct mutex gt_lock; + + /** @num_oa_units: number of oa units for each gt */ + u32 num_oa_units; + + /** @oa_unit: array of oa_units */ + struct xe_oa_unit *oa_unit; +}; + /** * struct xe_oa - OA device level information */ @@ -79,5 +130,8 @@ struct xe_oa { /** @format_mask: tracks valid OA formats for a platform */ unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; + + /** @oa_unit_ids: tracks oa unit ids assigned across gt's */ + u16 oa_unit_ids; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 7e10874bfb33..323d899a276b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,20 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_unit_type - OA unit types + */ +enum drm_xe_oa_unit_type { + /** + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered + * sub-types of OAG. For OAR/OAC, use OAG. + */ + DRM_XE_OA_UNIT_TYPE_OAG, + + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM, +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 -- cgit v1.2.3 From cdf02fe1a94a768cbcd20f5c4e1a1d805f4a06c0 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:57 -0700 Subject: drm/xe/oa/uapi: Add/remove OA config perf ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce add/remove config perf ops for OA. OA configurations consist of a set of event/counter select register address/value pairs. The add_config perf op validates and stores such configurations and also exposes them in the metrics sysfs. These configurations will be programmed to OA unit HW when an OA stream using a configuration is opened. The OA stream can also switch to other stored configurations. v2: Start config id's from 1 and other minor review comments (Umesh) v3: Add 32 bit build v4: Add kernel doc for non-static functions (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-6-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_device.c | 4 + drivers/gpu/drm/xe/xe_oa.c | 434 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 6 + drivers/gpu/drm/xe/xe_oa_types.h | 10 + drivers/gpu/drm/xe/xe_perf.c | 16 ++ include/uapi/drm/xe_drm.h | 25 +++ 6 files changed, 495 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1195c64a715a..31b549f5f03a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -670,6 +670,8 @@ int xe_device_probe(struct xe_device *xe) xe_display_register(xe); + xe_oa_register(xe); + xe_debugfs_register(xe); xe_hwmon_register(xe); @@ -710,6 +712,8 @@ void xe_device_remove(struct xe_device *xe) struct xe_gt *gt; u8 id; + xe_oa_unregister(xe); + xe_device_remove_display(xe); xe_display_fini(xe); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index e836fafa9fb3..4122785735d4 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -14,9 +14,32 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" +#include "xe_perf.h" #define XE_OA_UNIT_INVALID U32_MAX +struct xe_oa_reg { + struct xe_reg addr; + u32 value; +}; + +struct xe_oa_config { + struct xe_oa *oa; + + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct xe_oa_reg *regs; + u32 regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct kobj_attribute sysfs_metric_id; + + struct kref ref; + struct rcu_head rcu; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -41,6 +64,405 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static void xe_oa_config_release(struct kref *ref) +{ + struct xe_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + + kfree(oa_config->regs); + + kfree_rcu(oa_config, rcu); +} + +static void xe_oa_config_put(struct xe_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, xe_oa_config_release); +} + +static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) +{ + static const struct xe_reg flex_eu_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { + if (flex_eu_regs[i].addr == addr) + return true; + } + return false; +} + +static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) +{ + while (table->start && table->end) { + if (addr >= table->start && addr <= table->end) + return true; + + table++; + } + + return false; +} + +static const struct xe_mmio_range xehp_oa_b_counters[] = { + { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ + { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ + {} +}; + +static const struct xe_mmio_range gen12_oa_b_counters[] = { + { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ + { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ + { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ + { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ + { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ + { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ + { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ + {} +}; + +static const struct xe_mmio_range mtl_oam_b_counters[] = { + { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ + { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ + { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ + { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ + {} +}; + +static const struct xe_mmio_range xe2_oa_b_counters[] = { + { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ + { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ + { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ + {}, +}; + +static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || + xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || + xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || + (GRAPHICS_VER(oa->xe) >= 20 && + xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); +} + +static const struct xe_mmio_range mtl_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ + {} +}; + +static const struct xe_mmio_range gen12_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ + {} +}; + +static const struct xe_mmio_range xe2_oa_mux_regs[] = { + { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ + { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ + { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ + { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ + {}, +}; + +static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) +{ + if (GRAPHICS_VER(oa->xe) >= 20) + return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); + else if (GRAPHICS_VERx100(oa->xe) >= 1270) + return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); + else + return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); +} + +static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_is_valid_flex_addr(oa, addr) || + xe_oa_is_valid_b_counter_addr(oa, addr) || + xe_oa_is_valid_mux_addr(oa, addr); +} + +static struct xe_oa_reg * +xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), + u32 __user *regs, u32 n_regs) +{ + struct xe_oa_reg *oa_regs; + int err; + u32 i; + + oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); + if (!oa_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < n_regs; i++) { + u32 addr, value; + + err = get_user(addr, regs); + if (err) + goto addr_err; + + if (!is_valid(oa, addr)) { + drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); + err = -EINVAL; + goto addr_err; + } + + err = get_user(value, regs + 1); + if (err) + goto addr_err; + + oa_regs[i].addr = XE_REG(addr); + oa_regs[i].value = value; + + regs += 2; + } + + return oa_regs; + +addr_err: + kfree(oa_regs); + return ERR_PTR(err); +} + +static ssize_t show_dynamic_id(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_oa_config *oa_config = + container_of(attr, typeof(*oa_config), sysfs_metric_id); + + return sysfs_emit(buf, "%d\n", oa_config->id); +} + +static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, + struct xe_oa_config *oa_config) +{ + sysfs_attr_init(&oa_config->sysfs_metric_id.attr); + oa_config->sysfs_metric_id.attr.name = "id"; + oa_config->sysfs_metric_id.attr.mode = 0444; + oa_config->sysfs_metric_id.show = show_dynamic_id; + oa_config->sysfs_metric_id.store = NULL; + + oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; + oa_config->attrs[1] = NULL; + + oa_config->sysfs_metric.name = oa_config->uuid; + oa_config->sysfs_metric.attrs = oa_config->attrs; + + return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); +} + +/** + * xe_oa_add_config_ioctl - Adds one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions adds an OA config to the set of OA configs maintained in + * the kernel. The config determines which OA metrics are collected for an + * OA stream. + */ +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct drm_xe_oa_config param; + struct drm_xe_oa_config *arg = ¶m; + struct xe_oa_config *oa_config, *tmp; + struct xe_oa_reg *regs; + int err, id; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); + return -EACCES; + } + + err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, arg->extensions) || + XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || + XE_IOCTL_DBG(oa->xe, !arg->n_regs)) + return -EINVAL; + + oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); + if (!oa_config) + return -ENOMEM; + + oa_config->oa = oa; + kref_init(&oa_config->ref); + + if (!uuid_is_valid(arg->uuid)) { + drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); + err = -EINVAL; + goto reg_err; + } + + /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ + memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); + + oa_config->regs_len = arg->n_regs; + regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, + u64_to_user_ptr(arg->regs_ptr), + arg->n_regs); + if (IS_ERR(regs)) { + drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); + err = PTR_ERR(regs); + goto reg_err; + } + oa_config->regs = regs; + + err = mutex_lock_interruptible(&oa->metrics_lock); + if (err) + goto reg_err; + + /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ + idr_for_each_entry(&oa->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, oa_config->uuid)) { + drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); + err = -EADDRINUSE; + goto sysfs_err; + } + } + + err = create_dynamic_oa_sysfs_entry(oa, oa_config); + if (err) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + goto sysfs_err; + } + + oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); + if (oa_config->id < 0) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + err = oa_config->id; + goto sysfs_err; + } + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + + return oa_config->id; + +sysfs_err: + mutex_unlock(&oa->metrics_lock); +reg_err: + xe_oa_config_put(oa_config); + drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); + return err; +} + +/** + * xe_oa_remove_config_ioctl - Removes one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + */ +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_oa_config *oa_config; + u64 arg, *ptr = u64_to_user_ptr(data); + int ret; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); + return -EACCES; + } + + ret = get_user(arg, ptr); + if (XE_IOCTL_DBG(oa->xe, ret)) + return ret; + + ret = mutex_lock_interruptible(&oa->metrics_lock); + if (ret) + return ret; + + oa_config = idr_find(&oa->metrics_idr, arg); + if (!oa_config) { + drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); + ret = -ENOENT; + goto err_unlock; + } + + WARN_ON(arg != oa_config->id); + + sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); + idr_remove(&oa->metrics_idr, arg); + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); + + xe_oa_config_put(oa_config); + + return 0; + +err_unlock: + mutex_unlock(&oa->metrics_lock); + return ret; +} + +/** + * xe_oa_register - Xe OA registration + * @xe: @xe_device + * + * Exposes the metrics sysfs directory upon completion of module initialization + */ +void xe_oa_register(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + oa->metrics_kobj = kobject_create_and_add("metrics", + &xe->drm.primary->kdev->kobj); +} + +/** + * xe_oa_unregister - Xe OA de-registration + * @xe: @xe_device + */ +void xe_oa_unregister(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->metrics_kobj) + return; + + kobject_put(oa->metrics_kobj); + oa->metrics_kobj = NULL; +} + static u32 num_oa_units_per_gt(struct xe_gt *gt) { return 1; @@ -239,6 +661,9 @@ int xe_oa_init(struct xe_device *xe) oa->xe = xe; oa->oa_formats = oa_formats; + drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); + idr_init_base(&oa->metrics_idr, 1); + ret = xe_oa_init_oa_units(oa); if (ret) { drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); @@ -252,6 +677,12 @@ exit: return ret; } +static int destroy_config(int id, void *p, void *data) +{ + xe_oa_config_put(p); + return 0; +} + /** * xe_oa_fini - OA de-initialization during device remove * @xe: @xe_device @@ -263,5 +694,8 @@ void xe_oa_fini(struct xe_device *xe) if (!oa->xe) return; + idr_for_each(&oa->metrics_idr, destroy_config, oa); + idr_destroy(&oa->metrics_idr); + oa->xe = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 2647c1947746..5ccc772e047a 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -8,9 +8,15 @@ #include "xe_oa_types.h" +struct drm_device; +struct drm_file; struct xe_device; int xe_oa_init(struct xe_device *xe); void xe_oa_fini(struct xe_device *xe); +void xe_oa_register(struct xe_device *xe); +void xe_oa_unregister(struct xe_device *xe); +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); #endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index e7b91e31f0e8..f8a45015cf49 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -7,6 +7,7 @@ #define _XE_OA_TYPES_H_ #include +#include #include #include @@ -125,6 +126,15 @@ struct xe_oa { /** @xe: back pointer to xe device */ struct xe_device *xe; + /** @metrics_kobj: kobj for metrics sysfs */ + struct kobject *metrics_kobj; + + /** @metrics_lock: lock protecting add/remove configs */ + struct mutex metrics_lock; + + /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ + struct idr metrics_idr; + /** @oa_formats: tracks all OA formats across platforms */ const struct xe_oa_format *oa_formats; diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index f619cf50b453..ca01042d75b1 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -8,11 +8,25 @@ #include +#include "xe_oa.h" #include "xe_perf.h" u32 xe_perf_stream_paranoid = true; static struct ctl_table_header *sysctl_header; +static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, + struct drm_file *file) +{ + switch (arg->perf_op) { + case DRM_XE_PERF_OP_ADD_CONFIG: + return xe_oa_add_config_ioctl(dev, arg->param, file); + case DRM_XE_PERF_OP_REMOVE_CONFIG: + return xe_oa_remove_config_ioctl(dev, arg->param, file); + default: + return -EINVAL; + } +} + /** * xe_perf_ioctl - The top level perf layer ioctl * @dev: @drm_device @@ -32,6 +46,8 @@ int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return -EINVAL; switch (arg->perf_type) { + case DRM_XE_PERF_TYPE_OA: + return xe_oa_ioctl(dev, arg, file); default: return -EINVAL; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 323d899a276b..fd9a4bd9e3d4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1378,6 +1378,7 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + DRM_XE_PERF_TYPE_OA, __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; @@ -1469,6 +1470,30 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * struct drm_xe_oa_config - OA metric configuration + * + * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * particular config can be specified when opening an OA stream using + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. + */ +struct drm_xe_oa_config { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ + char uuid[36]; + + /** @n_regs: Number of regs in @regs_ptr */ + __u32 n_regs; + + /** + * @regs_ptr: Pointer to (register address, value) pairs for OA config + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). + */ + __u64 regs_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From b6fd51c6211910b1db072a3fa2a17ba85cb3dd51 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:58 -0700 Subject: drm/xe/oa/uapi: Define and parse OA stream properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properties for OA streams are specified by user space, when the stream is opened, as a chain of drm_xe_ext_set_property struct's. Parse and validate these stream properties. v2: Remove struct drm_xe_oa_open_param (Harish Chegondi) Drop DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US (Umesh) Eliminate comparison with xe_oa_max_sample_rate (Umesh) Drop 'struct drm_xe_oa_record_header' (Umesh) v3: s/DRM_XE_OA_PROPERTY_OA_EXPONENT/ \ DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT/ (Jose) v4: Fix 32 bit build v5: Add non-static function kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-7-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 364 +++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 5 + drivers/gpu/drm/xe/xe_perf.c | 2 + include/uapi/drm/xe_drm.h | 72 +++++++++ 4 files changed, 443 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4122785735d4..9b23eadf56cd 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,18 +3,23 @@ * Copyright © 2023-2024 Intel Corporation */ +#include + #include #include +#include "regs/xe_gt_regs.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" #include "xe_perf.h" +#include "xe_pm.h" #define XE_OA_UNIT_INVALID U32_MAX @@ -40,6 +45,19 @@ struct xe_oa_config { struct rcu_head rcu; }; +struct xe_oa_open_param { + u32 oa_unit_id; + bool sample; + u32 metric_set; + enum xe_oa_format_name oa_format; + int period_exponent; + bool disabled; + int exec_queue_id; + int engine_instance; + struct xe_exec_queue *exec_q; + struct xe_hw_engine *hwe; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -82,6 +100,352 @@ static void xe_oa_config_put(struct xe_oa_config *oa_config) kref_put(&oa_config->ref, xe_oa_config_release); } +/** + * xe_oa_timestamp_frequency - Return OA timestamp frequency + * @gt: @xe_gt + * + * OA timestamp frequency = CS timestamp frequency in most platforms. On some + * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such + * cases, return the adjusted CS timestamp frequency to the user. + */ +u32 xe_oa_timestamp_frequency(struct xe_gt *gt) +{ + u32 reg, shift; + + /* + * Wa_18013179988:dg2 + * Wa_14015568240:pvc + * Wa_14015846243:mtl + */ + switch (gt_to_xe(gt)->info.platform) { + case XE_DG2: + case XE_PVC: + case XE_METEORLAKE: + xe_pm_runtime_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, RPM_CONFIG0); + xe_pm_runtime_put(gt_to_xe(gt)); + + shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); + return gt->info.reference_clock << (3 - shift); + + default: + return gt->info.reference_clock; + } +} + +static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) +{ + u64 nom = (2ULL << exponent) * NSEC_PER_SEC; + u32 den = xe_oa_timestamp_frequency(gt); + + return div_u64(nom + den - 1, den); +} + +static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +{ + switch (hwe->oa_unit->type) { + case DRM_XE_OA_UNIT_TYPE_OAG: + return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || + type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; + case DRM_XE_OA_UNIT_TYPE_OAM: + return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; + default: + return false; + } +} + +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) +{ + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); + int idx; + + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { + const struct xe_oa_format *f = &oa->oa_formats[idx]; + + if (counter_size == f->counter_size && bc_report == f->bc_report && + type == f->type && counter_sel == f->counter_select) { + *name = idx; + return 0; + } + } + + return -EINVAL; +} + +/** + * xe_oa_unit_id - Return OA unit ID for a hardware engine + * @hwe: @xe_hw_engine + * + * Return OA unit ID for a hardware engine when available + */ +u16 xe_oa_unit_id(struct xe_hw_engine *hwe) +{ + return hwe->oa_unit && hwe->oa_unit->num_engines ? + hwe->oa_unit->oa_unit_id : U16_MAX; +} + +static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) +{ + struct xe_gt *gt; + int i, ret = 0; + + if (param->exec_q) { + /* When we have an exec_q, get hwe from the exec_q */ + param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, + param->engine_instance, true); + } else { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + /* Else just get the first hwe attached to the oa unit */ + for_each_gt(gt, oa->xe, i) { + for_each_hw_engine(hwe, gt, id) { + if (xe_oa_unit_id(hwe) == param->oa_unit_id) { + param->hwe = hwe; + goto out; + } + } + } + } +out: + if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit_id); + ret = -EINVAL; + } + + return ret; +} + +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (value >= oa->oa_unit_ids) { + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); + return -EINVAL; + } + param->oa_unit_id = value; + return 0; +} + +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->sample = value; + return 0; +} + +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->metric_set = value; + return 0; +} + +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + int ret = decode_oa_format(oa, value, ¶m->oa_format); + + if (ret) { + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); + return ret; + } + return 0; +} + +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ +#define OA_EXPONENT_MAX 31 + + if (value > OA_EXPONENT_MAX) { + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); + return -EINVAL; + } + param->period_exponent = value; + return 0; +} + +static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->disabled = value; + return 0; +} + +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->exec_queue_id = value; + return 0; +} + +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->engine_instance = value; + return 0; +} + +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); +static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, +}; + +static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || + XE_IOCTL_DBG(oa->xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); + return xe_oa_set_property_funcs[idx](oa, ext.value, param); +} + +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param); +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, +}; + +static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(oa->xe, ext_number >= DRM_XE_OA_PROPERTY_MAX)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.pad) || + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); + err = xe_oa_user_extension_funcs[idx](oa, extension, param); + if (XE_IOCTL_DBG(oa->xe, err)) + return err; + + if (ext.next_extension) + return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); + + return 0; +} + +/** + * xe_oa_stream_open_ioctl - Opens an OA stream + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions opens an OA stream. An OA stream, opened with specified + * properties, enables perf counter samples to be collected, either + * periodically (time based sampling), or on request (using perf queries) + */ +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_file *xef = to_xe_file(file); + struct xe_oa_open_param param = {}; + const struct xe_oa_format *f; + bool privileged_op = true; + int ret; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + ret = xe_oa_user_extensions(oa, data, 0, ¶m); + if (ret) + return ret; + + if (param.exec_queue_id > 0) { + param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); + if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) + return -ENOENT; + } + + /* + * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, + * without global stream access, can be an unprivileged operation + */ + if (param.exec_q && !param.sample) + privileged_op = false; + + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); + ret = -EACCES; + goto err_exec_q; + } + + if (!param.exec_q && !param.sample) { + drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); + ret = -EINVAL; + goto err_exec_q; + } + + ret = xe_oa_assign_hwe(oa, ¶m); + if (ret) + goto err_exec_q; + + f = &oa->oa_formats[param.oa_format]; + if (!param.oa_format || !f->size || + !engine_supports_oa_format(param.hwe, f->type)) { + drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", + param.oa_format, f->type, f->size, param.hwe->class); + ret = -EINVAL; + goto err_exec_q; + } + + if (param.period_exponent > 0) { + u64 oa_period, oa_freq_hz; + + /* Requesting samples from OAG buffer is a privileged operation */ + if (!param.sample) { + drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); + ret = -EINVAL; + goto err_exec_q; + } + oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); + oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); + drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); + } +err_exec_q: + if (ret < 0 && param.exec_q) + xe_exec_queue_put(param.exec_q); + return ret; +} + static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) { static const struct xe_reg flex_eu_regs[] = { diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 5ccc772e047a..87a38820c317 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -11,12 +11,17 @@ struct drm_device; struct drm_file; struct xe_device; +struct xe_gt; +struct xe_hw_engine; int xe_oa_init(struct xe_device *xe); void xe_oa_fini(struct xe_device *xe); void xe_oa_register(struct xe_device *xe); void xe_oa_unregister(struct xe_device *xe); +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +u32 xe_oa_timestamp_frequency(struct xe_gt *gt); +u16 xe_oa_unit_id(struct xe_hw_engine *hwe); #endif diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index ca01042d75b1..d6cd74cadf34 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -18,6 +18,8 @@ static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, struct drm_file *file) { switch (arg->perf_op) { + case DRM_XE_PERF_OP_STREAM_OPEN: + return xe_oa_stream_open_ioctl(dev, arg->param, file); case DRM_XE_PERF_OP_ADD_CONFIG: return xe_oa_add_config_ioctl(dev, arg->param, file); case DRM_XE_PERF_OP_REMOVE_CONFIG: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index fd9a4bd9e3d4..307409f968e2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1470,6 +1470,78 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * enum drm_xe_oa_property_id - OA stream property id's + * + * Stream params are specified as a chain of @drm_xe_ext_set_property + * struct's, with @property values from enum @drm_xe_oa_property_id and + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. + * @param field in struct @drm_xe_perf_param points to the first + * @drm_xe_ext_set_property struct. + */ +enum drm_xe_oa_property_id { +#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open + * the OA stream, see @oa_unit_id in 'struct + * drm_xe_query_oa_units'. Defaults to 0 if not provided. + */ + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, + + /** + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw + * OA unit reports or stream samples in a global buffer attached to an + * OA unit. + */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, + + /** + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA + * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, + + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + DRM_XE_OA_PROPERTY_OA_FORMAT, + /* + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, + * in terms of the following quantities: a. enum @drm_xe_oa_format_type + * b. Counter select c. Counter size and d. BC report. Also refer to the + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. + */ +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) + + /** + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit + * sampling with sampling frequency proportional to 2^(period_exponent + 1) + */ + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, + + /** + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA + * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + */ + DRM_XE_OA_PROPERTY_OA_DISABLED, + + /** + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific + * @exec_queue_id. Perf queries can be executed on this exec queue. + */ + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, + + /** + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. + */ + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ + DRM_XE_OA_PROPERTY_MAX +}; + /** * struct drm_xe_oa_config - OA metric configuration * -- cgit v1.2.3 From e936f885f1e96f59d9d05fb6cb5a02b9b9b88a05 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:00 -0700 Subject: drm/xe/oa/uapi: Expose OA stream fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OA stream open perf op returns an fd with its own file_operations for the newly initialized OA stream. These file_operations allow userspace to enable or disable the stream, as well as apply a different metric configuration for the OA stream. Userspace can also poll for data availability. OA stream initialization is completed in this commit by enabling the OA stream. When sampling is enabled this starts a hrtimer which periodically checks for data availablility. v2: Use stream properties for stream reconfiguration with DRM_XE_PERF_IOCTL_CONFIG v3: Hold runtime_pm reference across oa buffer alloc/free v4: Fix 32 bit build Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-9-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 380 +++++++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 4 + 2 files changed, 384 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index c2fd2d22677f..a71111859190 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,8 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include #include +#include #include #include @@ -29,6 +31,7 @@ #include "xe_pm.h" #include "xe_sched_job.h" +#define OA_TAKEN(tail, head) (((tail) - (head)) & (XE_OA_BUFFER_SIZE - 1)) #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX @@ -147,6 +150,205 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) return &stream->hwe->oa_unit->regs; } +static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) +{ + return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) & + OAG_OATAILPTR_MASK; +} + +#define oa_report_header_64bit(__s) \ + ((__s)->oa_buffer.format->header == HDR_64_BIT) + +static u64 oa_report_id(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; +} + +static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? + *((u64 *)report + 1) : + *((u32 *)report + 1); +} + +static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + int report_size = stream->oa_buffer.format->size; + u32 tail, hw_tail; + unsigned long flags; + bool pollin; + u32 partial_report_size; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + hw_tail = xe_oa_hw_tail_read(stream); + hw_tail -= gtt_offset; + + /* + * The tail pointer increases in 64 byte (cacheline size), not in report_size + * increments. Also report size may not be a power of 2. Compute potential + * partially landed report in OA buffer. + */ + partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); + partial_report_size %= report_size; + + /* Subtract partial amount off the tail */ + hw_tail = OA_TAKEN(hw_tail, partial_report_size); + + tail = hw_tail; + + /* + * Walk the stream backward until we find a report with report id and timestamp + * not 0. We can't tell whether a report has fully landed in memory before the + * report id and timestamp of the following report have landed. + * + * This is assuming that the writes of the OA unit land in memory in the order + * they were written. If not : (╯°□°)╯︵ ┻━┻ + */ + while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { + void *report = stream->oa_buffer.vaddr + tail; + + if (oa_report_id(stream, report) || oa_timestamp(stream, report)) + break; + + tail = OA_TAKEN(tail, report_size); + } + + if (OA_TAKEN(hw_tail, tail) > report_size) + drm_dbg(&stream->oa->xe->drm, + "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", + stream->oa_buffer.head, tail, hw_tail); + + stream->oa_buffer.tail = tail; + + pollin = OA_TAKEN(stream->oa_buffer.tail, + stream->oa_buffer.head) >= report_size; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + return pollin; +} + +static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) +{ + struct xe_oa_stream *stream = + container_of(hrtimer, typeof(*stream), poll_check_timer); + + if (xe_oa_buffer_check_unlocked(stream)) { + stream->pollin = true; + wake_up(&stream->poll_wq); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); + + return HRTIMER_RESTART; +} + +static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, + gtt_offset & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = 0; + + /* + * PRM says: "This MMIO must be set before the OATAILPTR register and after the + * OAHEADPTR register. This is to enable proper functionality of the overflow bit". + */ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr, + gtt_offset & OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointer to read from */ + stream->oa_buffer.tail = 0; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ + memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); +} + +static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) +{ + return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | + REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | + REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); +} + +static void xe_oa_enable(struct xe_oa_stream *stream) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + const struct xe_oa_regs *regs; + u32 val; + + /* + * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA + * buffer must be correctly initialized + */ + xe_oa_init_oa_buffer(stream); + + regs = __oa_regs(stream); + val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | + OAG_OACONTROL_OA_COUNTER_ENABLE; + + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); +} + +static void xe_oa_disable(struct xe_oa_stream *stream) +{ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0); + if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, + OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA to be disabled timed out\n"); + + if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { + /* <= XE_METEORLAKE except XE_PVC */ + xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1); + if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA tlb invalidate timed out\n"); + } +} + +static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, + struct file *file, poll_table *wait) +{ + __poll_t events = 0; + + poll_wait(file, &stream->poll_wq, wait); + + /* + * We don't explicitly check whether there's something to read here since this + * path may be hot depending on what else userspace is polling, or on the timeout + * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there + * are samples to read + */ + if (stream->pollin) + events |= EPOLLIN; + + return events; +} + +static __poll_t xe_oa_poll(struct file *file, poll_table *wait) +{ + struct xe_oa_stream *stream = file->private_data; + __poll_t ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_poll_locked(stream, file, wait); + mutex_unlock(&stream->stream_lock); + + return ret; +} + static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) { struct xe_sched_job *job; @@ -246,6 +448,27 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); } +static void xe_oa_stream_destroy(struct xe_oa_stream *stream) +{ + struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_gt *gt = stream->hwe->gt; + + if (WARN_ON(stream != u->exclusive_stream)) + return; + + WRITE_ONCE(u->exclusive_stream, NULL); + + xe_oa_disable_metric_set(stream); + xe_exec_queue_put(stream->k_exec_q); + + xe_oa_free_oa_buffer(stream); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_pm_runtime_put(stream->oa->xe); + + xe_oa_free_configs(stream); +} + static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) { struct xe_bo *bo; @@ -383,6 +606,148 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) return xe_oa_emit_oa_config(stream); } +static void xe_oa_stream_enable(struct xe_oa_stream *stream) +{ + stream->pollin = false; + + xe_oa_enable(stream); + + if (stream->sample) + hrtimer_start(&stream->poll_check_timer, + ns_to_ktime(stream->poll_period_ns), + HRTIMER_MODE_REL_PINNED); +} + +static void xe_oa_stream_disable(struct xe_oa_stream *stream) +{ + xe_oa_disable(stream); + + if (stream->sample) + hrtimer_cancel(&stream->poll_check_timer); +} + +static void xe_oa_enable_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + return; + + stream->enabled = true; + + xe_oa_stream_enable(stream); +} + +static void xe_oa_disable_locked(struct xe_oa_stream *stream) +{ + if (!stream->enabled) + return; + + stream->enabled = false; + + xe_oa_stream_disable(stream); +} + +static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) +{ + struct drm_xe_ext_set_property ext; + long ret = stream->oa_config->id; + struct xe_oa_config *config; + int err; + + err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); + if (XE_IOCTL_DBG(stream->oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || + XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) + return -EINVAL; + + config = xe_oa_get_oa_config(stream->oa, ext.value); + if (!config) + return -ENODEV; + + if (config != stream->oa_config) { + err = xe_oa_emit_oa_config(stream); + if (!err) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + xe_oa_config_put(config); + + return ret; +} + +static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, + unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case DRM_XE_PERF_IOCTL_ENABLE: + xe_oa_enable_locked(stream); + return 0; + case DRM_XE_PERF_IOCTL_DISABLE: + xe_oa_disable_locked(stream); + return 0; + case DRM_XE_PERF_IOCTL_CONFIG: + return xe_oa_config_locked(stream, arg); + } + + return -EINVAL; +} + +static long xe_oa_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct xe_oa_stream *stream = file->private_data; + long ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_ioctl_locked(stream, cmd, arg); + mutex_unlock(&stream->stream_lock); + + return ret; +} + +static void xe_oa_destroy_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + xe_oa_disable_locked(stream); + + xe_oa_stream_destroy(stream); + + if (stream->exec_q) + xe_exec_queue_put(stream->exec_q); + + kfree(stream); +} + +static int xe_oa_release(struct inode *inode, struct file *file) +{ + struct xe_oa_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + + mutex_lock(>->oa.gt_lock); + xe_oa_destroy_locked(stream); + mutex_unlock(>->oa.gt_lock); + + /* Release the reference the perf stream kept on the driver */ + drm_dev_put(>_to_xe(gt)->drm); + + return 0; +} + +static const struct file_operations xe_oa_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .release = xe_oa_release, + .poll = xe_oa_poll, + .unlocked_ioctl = xe_oa_ioctl, +}; + static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { @@ -436,6 +801,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, WRITE_ONCE(u->exclusive_stream, stream); + hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; + init_waitqueue_head(&stream->poll_wq); + spin_lock_init(&stream->oa_buffer.ptr_lock); mutex_init(&stream->stream_lock); @@ -479,10 +848,21 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, if (ret) goto err_free; + stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); + if (stream_fd < 0) { + ret = stream_fd; + goto err_destroy; + } + + if (!param->disabled) + xe_oa_enable_locked(stream); + /* Hold a reference on the drm device till stream_fd is released */ drm_dev_get(&stream->oa->xe->drm); return stream_fd; +err_destroy: + xe_oa_stream_destroy(stream); err_free: kfree(stream); exit: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 307409f968e2..1e09f786b3e6 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1478,6 +1478,10 @@ enum drm_xe_oa_format_type { * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. * @param field in struct @drm_xe_perf_param points to the first * @drm_xe_ext_set_property struct. + * + * Exactly the same mechanism is also used for stream reconfiguration using + * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of + * properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 -- cgit v1.2.3 From efb315d0a013cdc8b1e49f5c07b1a2972bc624d4 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:01 -0700 Subject: drm/xe/oa/uapi: Read file_operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the OA stream read file_operation. Both blocking and non-blocking reads are supported. As part of read system call, the read copies OA perf data from the OA buffer to the user buffer, after appending packet headers for status and data packets. v2: Drop OA report headers, implement DRM_XE_PERF_IOCTL_STATUS (Umesh) v3: Introduce 'struct drm_xe_oa_stream_status' v4: Define oa_status register bitfields (Umesh) v5: Add extensions to 'struct drm_xe_oa_stream_status' v6: Minor cleanup, eliminate report32 variable v7: Use -EIO to signal to userspace to read OASTATUS using DRM_XE_PERF_IOCTL_STATUS, change previous sites returning -EIO to return -EINVAL Make drm_xe_oa_stream_status bits contiguous (Jose, Umesh) rmw oa_status bits (Umesh) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-10-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 201 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 3 + include/uapi/drm/xe_drm.h | 20 ++++ 3 files changed, 224 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a71111859190..86d56b080eff 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -164,6 +164,14 @@ static u64 oa_report_id(struct xe_oa_stream *stream, void *report) return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; } +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)report = 0; + else + *report = 0; +} + static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) { return oa_report_header_64bit(stream) ? @@ -171,6 +179,14 @@ static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) *((u32 *)report + 1); } +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)&report[2] = 0; + else + report[1] = 0; +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -245,6 +261,95 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) return HRTIMER_RESTART; } +static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset, const u8 *report) +{ + int report_size = stream->oa_buffer.format->size; + int report_size_partial; + u8 *oa_buf_end; + + if ((count - *offset) < report_size) + return -ENOSPC; + + buf += *offset; + + oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + report_size_partial = oa_buf_end - report; + + if (report_size_partial < report_size) { + if (copy_to_user(buf, report, report_size_partial)) + return -EFAULT; + buf += report_size_partial; + + if (copy_to_user(buf, stream->oa_buffer.vaddr, + report_size - report_size_partial)) + return -EFAULT; + } else if (copy_to_user(buf, report, report_size)) { + return -EFAULT; + } + + *offset += report_size; + + return 0; +} + +static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + int report_size = stream->oa_buffer.format->size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 mask = (XE_OA_BUFFER_SIZE - 1); + size_t start_offset = *offset; + unsigned long flags; + u32 head, tail; + int ret = 0; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + head = stream->oa_buffer.head; + tail = stream->oa_buffer.tail; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); + + for (; OA_TAKEN(tail, head); head = (head + report_size) & mask) { + u8 *report = oa_buf_base + head; + + ret = xe_oa_append_report(stream, buf, count, offset, report); + if (ret) + break; + + if (is_power_of_2(report_size)) { + /* Clear out report id and timestamp to detect unlanded reports */ + oa_report_id_clear(stream, (void *)report); + oa_timestamp_clear(stream, (void *)report); + } else { + u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + u32 part = oa_buf_end - report; + + /* Zero out the entire report */ + if (report_size <= part) { + memset(report, 0, report_size); + } else { + memset(report, 0, part); + memset(oa_buf_base, 0, report_size - part); + } + } + } + + if (start_offset != *offset) { + struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + xe_mmio_write32(stream->gt, oaheadptr, + (head + gtt_offset) & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = head; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + } + + return ret; +} + static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -318,6 +423,78 @@ static void xe_oa_disable(struct xe_oa_stream *stream) } } +static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) +{ + /* We might wait indefinitely if periodic sampling is not enabled */ + if (!stream->periodic) + return -EINVAL; + + return wait_event_interruptible(stream->poll_wq, + xe_oa_buffer_check_unlocked(stream)); +} + +#define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ + OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) + +static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + /* Only clear our bits to avoid side-effects */ + stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, + OASTATUS_RELEVANT_BITS, 0); + /* + * Signal to userspace that there is non-zero OA status to read via + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl + */ + if (stream->oa_status & OASTATUS_RELEVANT_BITS) + return -EIO; + + return xe_oa_append_reports(stream, buf, count, offset); +} + +static ssize_t xe_oa_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct xe_oa_stream *stream = file->private_data; + size_t offset = 0; + int ret; + + /* Can't read from disabled streams */ + if (!stream->enabled || !stream->sample) + return -EINVAL; + + if (!(file->f_flags & O_NONBLOCK)) { + do { + ret = xe_oa_wait_unlocked(stream); + if (ret) + return ret; + + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } while (!offset && !ret); + } else { + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } + + /* + * Typically we clear pollin here in order to wait for the new hrtimer callback + * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, + * which means that more OA data is available than could fit in the user provided + * buffer. In this case we want the next poll() call to not block. + * + * Also in case of -EIO, we have already waited for data before returning + * -EIO, so need to wait again + */ + if (ret != -ENOSPC && ret != -EIO) + stream->pollin = false; + + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ + return offset ?: (ret ?: -EAGAIN); +} + static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, struct file *file, poll_table *wait) { @@ -680,6 +857,27 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return ret; } +static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_status status = {}; + void __user *uaddr = (void __user *)arg; + + /* Map from register to uapi bits */ + if (stream->oa_status & OASTATUS_REPORT_LOST) + status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; + if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; + if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; + if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) + status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; + + if (copy_to_user(uaddr, &status, sizeof(status))) + return -EFAULT; + + return 0; +} + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned int cmd, unsigned long arg) @@ -693,6 +891,8 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, return 0; case DRM_XE_PERF_IOCTL_CONFIG: return xe_oa_config_locked(stream, arg); + case DRM_XE_PERF_IOCTL_STATUS: + return xe_oa_status_locked(stream, arg); } return -EINVAL; @@ -745,6 +945,7 @@ static const struct file_operations xe_oa_fops = { .llseek = no_llseek, .release = xe_oa_release, .poll = xe_oa_poll, + .read = xe_oa_read, .unlocked_ioctl = xe_oa_ioctl, }; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 6700383b1a52..5bb8ce0d71c9 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -222,5 +222,8 @@ struct xe_oa_stream { /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ u64 poll_period_ns; + + /** @oa_status: temporary storage for oa_status register value */ + u32 oa_status; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1e09f786b3e6..03a6e479227a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1570,6 +1570,26 @@ struct drm_xe_oa_config { __u64 regs_ptr; }; +/** + * struct drm_xe_oa_stream_status - OA stream status returned from + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to + * query stream status in response to EIO errno from perf fd read(). + */ +struct drm_xe_oa_stream_status { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_status: OA stream status (see Bspec 46717/61226) */ + __u64 oa_status; +#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) +#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From dd6b4718c3bab611588922ae8a7736c58eafcc93 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:04 -0700 Subject: drm/xe/oa/uapi: Query OA unit properties Implement query for properties of OA units present on a device. v2: Clean up reserved/pad fields (Umesh) Follow the same scheme as other query structs v3: Skip reporting reserved engines attached to OA units v4: Expose oa_buf_size via DRM_XE_PERF_IOCTL_INFO (Umesh) v5: Don't expose capabilities as OR of properties (Umesh) v6: Add extensions to query output structs: drm_xe_oa_unit, drm_xe_query_oa_units and drm_xe_oa_stream_info v7: Change oa_units[] array to __u64 type Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-13-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 13 +++++++ drivers/gpu/drm/xe/xe_query.c | 77 +++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 85 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 42b0ba014e35..038caeb7c9e7 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1050,6 +1050,17 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) return 0; } +static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + void __user *uaddr = (void __user *)arg; + + if (copy_to_user(uaddr, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned int cmd, unsigned long arg) @@ -1065,6 +1076,8 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, return xe_oa_config_locked(stream, arg); case DRM_XE_PERF_IOCTL_STATUS: return xe_oa_status_locked(stream, arg); + case DRM_XE_PERF_IOCTL_INFO: + return xe_oa_info_locked(stream, arg); } return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 995effcb904b..4e01df6b1b7a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -602,6 +602,82 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) return 0; } +static size_t calc_oa_unit_query_size(struct xe_device *xe) +{ + size_t size = sizeof(struct drm_xe_query_oa_units); + struct xe_gt *gt; + int i, id; + + for_each_gt(gt, xe, id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + size += sizeof(struct drm_xe_oa_unit); + size += gt->oa.oa_unit[i].num_engines * + sizeof(struct drm_xe_engine_class_instance); + } + } + + return size; +} + +static int query_oa_units(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_oa_unit_query_size(xe); + struct drm_xe_query_oa_units *qoa; + enum xe_hw_engine_id hwe_id; + struct drm_xe_oa_unit *du; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + int gt_id, i, j, ret; + struct xe_gt *gt; + u8 *pdu; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + qoa = kzalloc(size, GFP_KERNEL); + if (!qoa) + return -ENOMEM; + + pdu = (u8 *)&qoa->oa_units[0]; + for_each_gt(gt, xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + du = (struct drm_xe_oa_unit *)pdu; + + du->oa_unit_id = u->oa_unit_id; + du->oa_unit_type = u->type; + du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); + du->capabilities = DRM_XE_OA_CAPS_BASE; + + j = 0; + for_each_hw_engine(hwe, gt, hwe_id) { + if (!xe_hw_engine_is_reserved(hwe) && + xe_oa_unit_id(hwe) == u->oa_unit_id) { + du->eci[j].engine_class = + xe_to_user_engine_class[hwe->class]; + du->eci[j].engine_instance = hwe->logical_instance; + du->eci[j].gt_id = gt->info.id; + j++; + } + } + du->num_engines = j; + pdu += sizeof(*du) + j * sizeof(du->eci[0]); + qoa->num_oa_units++; + } + } + + ret = copy_to_user(query_ptr, qoa, size); + kfree(qoa); + + return ret ? -EFAULT : 0; +} + static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, @@ -612,6 +688,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_gt_topology, query_engine_cycles, query_uc_fw_version, + query_oa_units, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 03a6e479227a..93e00be44b2d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -689,6 +689,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 +#define DRM_XE_DEVICE_QUERY_OA_UNITS 8 /** @query: The type of data to query */ __u32 query; @@ -1451,6 +1452,75 @@ enum drm_xe_oa_unit_type { DRM_XE_OA_UNIT_TYPE_OAM, }; +/** + * struct drm_xe_oa_unit - describe OA unit + */ +struct drm_xe_oa_unit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_unit_id: OA unit ID */ + __u32 oa_unit_id; + + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ + __u32 oa_unit_type; + + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_OA_CAPS_BASE (1 << 0) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; + + /** @reserved: MBZ */ + __u64 reserved[4]; + + /** @num_engines: number of engines in @eci array */ + __u64 num_engines; + + /** @eci: engines attached to this OA unit */ + struct drm_xe_engine_class_instance eci[]; +}; + +/** + * struct drm_xe_query_oa_units - describe OA units + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct + * drm_xe_query_oa_units in .data. + * + * OA unit properties for all OA units can be accessed using a code block + * such as the one below: + * + * .. code-block:: C + * + * struct drm_xe_query_oa_units *qoa; + * struct drm_xe_oa_unit *oau; + * u8 *poau; + * + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: + * poau = (u8 *)&qoa->oa_units[0]; + * for (int i = 0; i < qoa->num_oa_units; i++) { + * oau = (struct drm_xe_oa_unit *)poau; + * // Access 'struct drm_xe_oa_unit' fields here + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); + * } + */ +struct drm_xe_query_oa_units { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @num_oa_units: number of OA units returned in oau[] */ + __u32 num_oa_units; + /** @pad: MBZ */ + __u32 pad; + /** + * @oa_units: struct @drm_xe_oa_unit array returned for this device. + * Written below as a u64 array to avoid problems with nested flexible + * arrays with some compilers + */ + __u64 oa_units[]; +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 @@ -1590,6 +1660,21 @@ struct drm_xe_oa_stream_status { __u64 reserved[3]; }; +/** + * struct drm_xe_oa_stream_info - OA stream info returned from + * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + */ +struct drm_xe_oa_stream_info { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_buf_size: OA buffer size */ + __u64 oa_buf_size; + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 41fd54ef74b02233a419b4929d26662e5f105f46 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 30 Apr 2024 11:43:19 +0100 Subject: drm/msm: Add MSM_PARAM_RAYTRACING uapi Expose the value of the software fuse to userspace. Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/592044/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++ include/uapi/drm/msm_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 71a9acc002fe..1c6626747b98 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -376,6 +376,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, case MSM_PARAM_HIGHEST_BANK_BIT: *value = adreno_gpu->ubwc_config.highest_bank_bit; return 0; + case MSM_PARAM_RAYTRACING: + *value = adreno_gpu->has_ray_tracing; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index d8a6b3472760..3fca72f73861 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -87,6 +87,7 @@ struct drm_msm_timespec { #define MSM_PARAM_VA_START 0x0e /* RO: start of valid GPU iova range */ #define MSM_PARAM_VA_SIZE 0x0f /* RO: size of valid GPU iova range (bytes) */ #define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */ +#define MSM_PARAM_RAYTRACING 0x11 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From 7e5161da9d267957b726a29f3efe6cb50fdfed04 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 23 Jun 2024 13:31:19 -0700 Subject: drm/xe/oa: Fix kernel doc in xe_drm.h Fix kernel doc in xe_drm.h. Also eliminate private/non-abi enum definitions. v2: Remove __DRM_XE_PERF_TYPE_MAX since it is unused (Michal) v3: Also remove DRM_XE_OA_PROPERTY_MAX since it can also be eliminated (Michal) Suggested-by: Michal Wajdeczko Signed-off-by: Ashutosh Dixit Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240623203119.3840283-1-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 3 ++- include/uapi/drm/xe_drm.h | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4168b51cf7b5..9263ae9a864e 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1684,6 +1684,7 @@ static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, }; +#define MAX_USER_EXTENSIONS 16 static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, struct xe_oa_open_param *param) { @@ -1692,7 +1693,7 @@ static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number int err; u32 idx; - if (XE_IOCTL_DBG(oa->xe, ext_number >= DRM_XE_OA_PROPERTY_MAX)) + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; err = __copy_from_user(&ext, address, sizeof(ext)); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 93e00be44b2d..b410553faa9b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1379,8 +1379,8 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ DRM_XE_PERF_TYPE_OA, - __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; /** @@ -1611,9 +1611,6 @@ enum drm_xe_oa_property_id { * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. */ DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, - - /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ - DRM_XE_OA_PROPERTY_MAX }; /** -- cgit v1.2.3 From 406d058dc323ae152d380ac90153eb56a75850c1 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 26 Jun 2024 11:18:17 -0700 Subject: drm/xe/oa/uapi: Allow preemption to be disabled on the stream exec queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa VK_KHR_performance_query use case requires preemption and timeslicing to be disabled for the stream exec queue. Implement this functionality here. v2: Minor change to debug print to print both ret values (Umesh) Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240626181817.1516229-3-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 70 +++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_oa_types.h | 3 ++ include/uapi/drm/xe_drm.h | 6 ++++ 3 files changed, 78 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a68659fd5386..6cc3f0217341 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -80,6 +80,7 @@ struct xe_oa_open_param { int engine_instance; struct xe_exec_queue *exec_q; struct xe_hw_engine *hwe; + bool no_preempt; }; struct xe_oa_config_bo { @@ -1013,11 +1014,55 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream) hrtimer_cancel(&stream->poll_check_timer); } +static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) +{ + struct xe_exec_queue *q = stream->exec_q; + int ret1, ret2; + + /* Best effort recovery: try to revert both to original, irrespective of error */ + ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); + ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); + if (ret1 || ret2) + goto err; + return 0; +err: + drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2); + return ret1 ?: ret2; +} + +static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) +{ + struct xe_exec_queue *q = stream->exec_q; + int ret; + + /* Setting values to 0 will disable timeslice and preempt_timeout */ + ret = q->ops->set_timeslice(q, 0); + if (ret) + goto err; + + ret = q->ops->set_preempt_timeout(q, 0); + if (ret) + goto err; + + return 0; +err: + xe_oa_enable_preempt_timeslice(stream); + drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret); + return ret; +} + static int xe_oa_enable_locked(struct xe_oa_stream *stream) { if (stream->enabled) return 0; + if (stream->no_preempt) { + int ret = xe_oa_disable_preempt_timeslice(stream); + + if (ret) + return ret; + } + xe_oa_stream_enable(stream); stream->enabled = true; @@ -1026,13 +1071,18 @@ static int xe_oa_enable_locked(struct xe_oa_stream *stream) static int xe_oa_disable_locked(struct xe_oa_stream *stream) { + int ret = 0; + if (!stream->enabled) return 0; xe_oa_stream_disable(stream); + if (stream->no_preempt) + ret = xe_oa_enable_preempt_timeslice(stream); + stream->enabled = false; - return 0; + return ret; } static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) @@ -1307,6 +1357,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->sample = param->sample; stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; + stream->no_preempt = param->no_preempt; /* * For Xe2+, when overrun mode is enabled, there are no partial reports at the end @@ -1651,6 +1702,13 @@ static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, return 0; } +static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->no_preempt = value; + return 0; +} + typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param); static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { @@ -1662,6 +1720,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, }; static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, @@ -1766,6 +1825,15 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (param.exec_q && !param.sample) privileged_op = false; + if (param.no_preempt) { + if (!param.exec_q) { + drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n"); + ret = -EINVAL; + goto err_exec_q; + } + privileged_op = true; + } + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); ret = -EACCES; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 706d45577dae..540c3ec53a6d 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -235,5 +235,8 @@ struct xe_oa_stream { /** @oa_status: temporary storage for oa_status register value */ u32 oa_status; + + /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ + u32 no_preempt; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b410553faa9b..12eaa8532b5c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1611,6 +1611,12 @@ enum drm_xe_oa_property_id { * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. */ DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** + * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing + * to be disabled for the stream exec queue. + */ + DRM_XE_OA_PROPERTY_NO_PREEMPT, }; /** -- cgit v1.2.3 From 8d9ffd15ff5c9da7bc6171f2536aaaff40bcab6e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 1 Jun 2024 14:56:16 -0400 Subject: drm/amdgpu: remove AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_* definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They were added accidentally. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- include/uapi/drm/drm_fourcc.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index d0063ac6e09f..4168445fbb8b 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1540,9 +1540,6 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18 #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 -#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 -#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ - /* * DCC supports embedding some clear colors directly in the DCC surface. * However, on older GPUs the rendering HW ignores the embedded clear color -- cgit v1.2.3 From 8dd1426e2c80e32ac1995007330c8f95ffa28ebb Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 1 Jun 2024 19:53:01 -0400 Subject: drm/amdgpu: handle gfx12 in amdgpu_display_verify_sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It verified GFX9-11 swizzle modes on GFX12, which has undefined behavior. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 27 ++++++++++++++++++++++++++- include/uapi/drm/drm_fourcc.h | 2 ++ 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b69a4e1b864b..3bb4ca9b8a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1082,6 +1082,30 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) block_width = 256 / format_info->cpp[i]; block_height = 1; block_size_log2 = 8; + } else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) { + int swizzle = AMD_FMT_MOD_GET(TILE, modifier); + + switch (swizzle) { + case AMD_FMT_MOD_TILE_GFX12_256B_2D: + block_size_log2 = 8; + break; + case AMD_FMT_MOD_TILE_GFX12_4K_2D: + block_size_log2 = 12; + break; + case AMD_FMT_MOD_TILE_GFX12_64K_2D: + block_size_log2 = 16; + break; + case AMD_FMT_MOD_TILE_GFX12_256K_2D: + block_size_log2 = 18; + break; + default: + drm_dbg_kms(rfb->base.dev, + "Gfx12 swizzle mode with unknown block size: %d\n", swizzle); + return -EINVAL; + } + + get_block_dimensions(block_size_log2, format_info->cpp[i], + &block_width, &block_height); } else { int swizzle = AMD_FMT_MOD_GET(TILE, modifier); @@ -1117,7 +1141,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) return ret; } - if (AMD_FMT_MOD_GET(DCC, modifier)) { + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 && + AMD_FMT_MOD_GET(DCC, modifier)) { if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) { block_size_log2 = get_dcc_block_size(modifier, false, false); get_block_dimensions(block_size_log2 + 8, format_info->cpp[0], diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 4168445fbb8b..2d84a8052b15 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1506,6 +1506,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * 6 - 64KB_3D * 7 - 256KB_3D */ +#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1 +#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2 #define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 #define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 -- cgit v1.2.3 From 63347fe031e3bd738a2a16aee8eba889376e49a8 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 3 Jul 2024 09:48:01 -0700 Subject: drm/xe/uapi: Rename xe perf layer as xe observation layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the perf layer allows capture of HW counter streams. These HW counters are generally performance related but don't have to be necessarily so. Also, the name "perf" is a carryover from i915 and is not preferred. Here we propose the name "observation" for this common layer which allows capture of different types of these counter streams. v2: Rename observability layer to observation layer (Lucas/Rodrigo) v3: Rename sysctl file to "observation_paranoid" (Jose) Fixes: 52c2e956dceb ("drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types") Fixes: fe8929bdf835 ("drm/xe/perf/uapi: Add perf_stream_paranoid sysctl") Acked-by: Lucas De Marchi Acked-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Acked-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240703164801.2561423-1-ashutosh.dixit@intel.com (cherry picked from commit 8169b2097d88d99d7e4a72e20e4b549efe9eb8d7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/xe_device.c | 4 +- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_gt_types.h | 2 +- drivers/gpu/drm/xe/xe_module.c | 6 +-- drivers/gpu/drm/xe/xe_oa.c | 34 ++++++------ drivers/gpu/drm/xe/xe_observation.c | 93 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_observation.h | 20 +++++++ drivers/gpu/drm/xe/xe_perf.c | 92 ------------------------------- drivers/gpu/drm/xe/xe_perf.h | 20 ------- include/uapi/drm/xe_drm.h | 102 ++++++++++++++++++----------------- 11 files changed, 190 insertions(+), 187 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_observation.c create mode 100644 drivers/gpu/drm/xe/xe_observation.h delete mode 100644 drivers/gpu/drm/xe/xe_perf.c delete mode 100644 drivers/gpu/drm/xe/xe_perf.h (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b1e03bfe4a68..628c245c4822 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -96,10 +96,10 @@ xe-y += xe_bb.o \ xe_mocs.o \ xe_module.o \ xe_oa.o \ + xe_observation.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ - xe_perf.o \ xe_pm.o \ xe_preempt_fence.o \ xe_pt.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index cfda7cb5df2c..03492fbcb8fb 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -42,9 +42,9 @@ #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_observation.h" #include "xe_pat.h" #include "xe_pcode.h" -#include "xe_perf.h" #include "xe_pm.h" #include "xe_query.h" #include "xe_sriov.h" @@ -142,7 +142,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_PERF, xe_perf_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c37be471d11c..3bca6d344744 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -463,7 +463,7 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; - /** @oa: oa perf counter subsystem */ + /** @oa: oa observation subsystem */ struct xe_oa oa; /** @needs_flr_on_fini: requests function-reset on fini */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 24bb95de920f..6b5e0b45efb0 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -389,7 +389,7 @@ struct xe_gt { u8 instances_per_class[XE_ENGINE_CLASS_MAX]; } user_engines; - /** @oa: oa perf counter subsystem per gt info */ + /** @oa: oa observation subsystem per gt info */ struct xe_oa_gt oa; }; diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 893858a2eea0..499540add465 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -11,7 +11,7 @@ #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" -#include "xe_perf.h" +#include "xe_observation.h" #include "xe_sched_job.h" struct xe_modparam xe_modparam = { @@ -80,8 +80,8 @@ static const struct init_funcs init_funcs[] = { .exit = xe_unregister_pci_driver, }, { - .init = xe_perf_sysctl_register, - .exit = xe_perf_sysctl_unregister, + .init = xe_observation_sysctl_register, + .exit = xe_observation_sysctl_unregister, }, }; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4188516a7816..6d69f751bf78 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -32,7 +32,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" -#include "xe_perf.h" +#include "xe_observation.h" #include "xe_pm.h" #include "xe_sched_job.h" #include "xe_sriov.h" @@ -481,7 +481,7 @@ static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, OASTATUS_RELEVANT_BITS, 0); /* * Signal to userspace that there is non-zero OA status to read via - * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl + * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl */ if (stream->oa_status & OASTATUS_RELEVANT_BITS) return -EIO; @@ -1158,15 +1158,15 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned long arg) { switch (cmd) { - case DRM_XE_PERF_IOCTL_ENABLE: + case DRM_XE_OBSERVATION_IOCTL_ENABLE: return xe_oa_enable_locked(stream); - case DRM_XE_PERF_IOCTL_DISABLE: + case DRM_XE_OBSERVATION_IOCTL_DISABLE: return xe_oa_disable_locked(stream); - case DRM_XE_PERF_IOCTL_CONFIG: + case DRM_XE_OBSERVATION_IOCTL_CONFIG: return xe_oa_config_locked(stream, arg); - case DRM_XE_PERF_IOCTL_STATUS: + case DRM_XE_OBSERVATION_IOCTL_STATUS: return xe_oa_status_locked(stream, arg); - case DRM_XE_PERF_IOCTL_INFO: + case DRM_XE_OBSERVATION_IOCTL_INFO: return xe_oa_info_locked(stream, arg); } @@ -1209,7 +1209,7 @@ static int xe_oa_release(struct inode *inode, struct file *file) xe_oa_destroy_locked(stream); mutex_unlock(>->oa.gt_lock); - /* Release the reference the perf stream kept on the driver */ + /* Release the reference the OA stream kept on the driver */ drm_dev_put(>_to_xe(gt)->drm); return 0; @@ -1222,7 +1222,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) unsigned long start = vma->vm_start; int i, ret; - if (xe_perf_stream_paranoid && !perfmon_capable()) { + if (xe_observation_paranoid && !perfmon_capable()) { drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); return -EACCES; } @@ -1789,8 +1789,8 @@ static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number * @file: @drm_file * * The functions opens an OA stream. An OA stream, opened with specified - * properties, enables perf counter samples to be collected, either - * periodically (time based sampling), or on request (using perf queries) + * properties, enables OA counter samples to be collected, either + * periodically (time based sampling), or on request (using OA queries) */ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) { @@ -1836,8 +1836,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f privileged_op = true; } - if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { - drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); + if (privileged_op && xe_observation_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n"); ret = -EACCES; goto err_exec_q; } @@ -2097,7 +2097,7 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi return -ENODEV; } - if (xe_perf_stream_paranoid && !perfmon_capable()) { + if (xe_observation_paranoid && !perfmon_capable()) { drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); return -EACCES; } @@ -2181,7 +2181,7 @@ reg_err: /** * xe_oa_remove_config_ioctl - Removes one OA config * @dev: @drm_device - * @data: pointer to struct @drm_xe_perf_param + * @data: pointer to struct @drm_xe_observation_param * @file: @drm_file */ int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) @@ -2197,7 +2197,7 @@ int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file return -ENODEV; } - if (xe_perf_stream_paranoid && !perfmon_capable()) { + if (xe_observation_paranoid && !perfmon_capable()) { drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); return -EACCES; } @@ -2381,7 +2381,7 @@ static int xe_oa_init_gt(struct xe_gt *gt) /* * Fused off engines can result in oa_unit's with num_engines == 0. These units - * will appear in OA unit query, but no perf streams can be opened on them. + * will appear in OA unit query, but no OA streams can be opened on them. */ gt->oa.num_oa_units = num_oa_units; gt->oa.oa_unit = u; diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c new file mode 100644 index 000000000000..fcb584b42a7d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include +#include + +#include + +#include "xe_oa.h" +#include "xe_observation.h" + +u32 xe_observation_paranoid = true; +static struct ctl_table_header *sysctl_header; + +static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_observation_param *arg, + struct drm_file *file) +{ + switch (arg->observation_op) { + case DRM_XE_OBSERVATION_OP_STREAM_OPEN: + return xe_oa_stream_open_ioctl(dev, arg->param, file); + case DRM_XE_OBSERVATION_OP_ADD_CONFIG: + return xe_oa_add_config_ioctl(dev, arg->param, file); + case DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: + return xe_oa_remove_config_ioctl(dev, arg->param, file); + default: + return -EINVAL; + } +} + +/** + * xe_observation_ioctl - The top level observation layer ioctl + * @dev: @drm_device + * @data: pointer to struct @drm_xe_observation_param + * @file: @drm_file + * + * The function is called for different observation streams types and + * allows execution of different operations supported by those stream + * types. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_xe_observation_param *arg = data; + + if (arg->extensions) + return -EINVAL; + + switch (arg->observation_type) { + case DRM_XE_OBSERVATION_TYPE_OA: + return xe_oa_ioctl(dev, arg, file); + default: + return -EINVAL; + } +} + +static struct ctl_table observation_ctl_table[] = { + { + .procname = "observation_paranoid", + .data = &xe_observation_paranoid, + .maxlen = sizeof(xe_observation_paranoid), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +/** + * xe_observation_sysctl_register - Register xe_observation_paranoid sysctl + * + * Normally only superuser/root can access observation stream + * data. However, superuser can set xe_observation_paranoid sysctl to 0 to + * allow non-privileged users to also access observation data. + * + * Return: always returns 0 + */ +int xe_observation_sysctl_register(void) +{ + sysctl_header = register_sysctl("dev/xe", observation_ctl_table); + return 0; +} + +/** + * xe_observation_sysctl_unregister - Unregister xe_observation_paranoid sysctl + */ +void xe_observation_sysctl_unregister(void) +{ + unregister_sysctl_table(sysctl_header); +} diff --git a/drivers/gpu/drm/xe/xe_observation.h b/drivers/gpu/drm/xe/xe_observation.h new file mode 100644 index 000000000000..17816998e966 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_observation.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OBSERVATION_H_ +#define _XE_OBSERVATION_H_ + +#include + +struct drm_device; +struct drm_file; + +extern u32 xe_observation_paranoid; + +int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int xe_observation_sysctl_register(void); +void xe_observation_sysctl_unregister(void); + +#endif diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c deleted file mode 100644 index d6cd74cadf34..000000000000 --- a/drivers/gpu/drm/xe/xe_perf.c +++ /dev/null @@ -1,92 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023-2024 Intel Corporation - */ - -#include -#include - -#include - -#include "xe_oa.h" -#include "xe_perf.h" - -u32 xe_perf_stream_paranoid = true; -static struct ctl_table_header *sysctl_header; - -static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, - struct drm_file *file) -{ - switch (arg->perf_op) { - case DRM_XE_PERF_OP_STREAM_OPEN: - return xe_oa_stream_open_ioctl(dev, arg->param, file); - case DRM_XE_PERF_OP_ADD_CONFIG: - return xe_oa_add_config_ioctl(dev, arg->param, file); - case DRM_XE_PERF_OP_REMOVE_CONFIG: - return xe_oa_remove_config_ioctl(dev, arg->param, file); - default: - return -EINVAL; - } -} - -/** - * xe_perf_ioctl - The top level perf layer ioctl - * @dev: @drm_device - * @data: pointer to struct @drm_xe_perf_param - * @file: @drm_file - * - * The function is called for different perf streams types and allows execution - * of different operations supported by those perf stream types. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) -{ - struct drm_xe_perf_param *arg = data; - - if (arg->extensions) - return -EINVAL; - - switch (arg->perf_type) { - case DRM_XE_PERF_TYPE_OA: - return xe_oa_ioctl(dev, arg, file); - default: - return -EINVAL; - } -} - -static struct ctl_table perf_ctl_table[] = { - { - .procname = "perf_stream_paranoid", - .data = &xe_perf_stream_paranoid, - .maxlen = sizeof(xe_perf_stream_paranoid), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - {} -}; - -/** - * xe_perf_sysctl_register - Register "perf_stream_paranoid" sysctl - * - * Normally only superuser/root can access perf counter data. However, - * superuser can set perf_stream_paranoid sysctl to 0 to allow non-privileged - * users to also access perf data. - * - * Return: always returns 0 - */ -int xe_perf_sysctl_register(void) -{ - sysctl_header = register_sysctl("dev/xe", perf_ctl_table); - return 0; -} - -/** - * xe_perf_sysctl_unregister - Unregister "perf_stream_paranoid" sysctl - */ -void xe_perf_sysctl_unregister(void) -{ - unregister_sysctl_table(sysctl_header); -} diff --git a/drivers/gpu/drm/xe/xe_perf.h b/drivers/gpu/drm/xe/xe_perf.h deleted file mode 100644 index 53a8377a1bb1..000000000000 --- a/drivers/gpu/drm/xe/xe_perf.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023-2024 Intel Corporation - */ - -#ifndef _XE_PERF_H_ -#define _XE_PERF_H_ - -#include - -struct drm_device; -struct drm_file; - -extern u32 xe_perf_stream_paranoid; - -int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -int xe_perf_sysctl_register(void); -void xe_perf_sysctl_unregister(void); - -#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 12eaa8532b5c..33544ef78d3e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,7 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE - * - &DRM_IOCTL_XE_PERF + * - &DRM_IOCTL_XE_OBSERVATION */ /* @@ -101,7 +101,7 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a -#define DRM_XE_PERF 0x0b +#define DRM_XE_OBSERVATION 0x0b /* Must be kept compact -- no holes */ @@ -116,7 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) -#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) +#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) /** * DOC: Xe IOCTL Extensions @@ -1376,66 +1376,67 @@ struct drm_xe_wait_user_fence { }; /** - * enum drm_xe_perf_type - Perf stream types + * enum drm_xe_observation_type - Observation stream types */ -enum drm_xe_perf_type { - /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ - DRM_XE_PERF_TYPE_OA, +enum drm_xe_observation_type { + /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */ + DRM_XE_OBSERVATION_TYPE_OA, }; /** - * enum drm_xe_perf_op - Perf stream ops + * enum drm_xe_observation_op - Observation stream ops */ -enum drm_xe_perf_op { - /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ - DRM_XE_PERF_OP_STREAM_OPEN, +enum drm_xe_observation_op { + /** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */ + DRM_XE_OBSERVATION_OP_STREAM_OPEN, - /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ - DRM_XE_PERF_OP_ADD_CONFIG, + /** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */ + DRM_XE_OBSERVATION_OP_ADD_CONFIG, - /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ - DRM_XE_PERF_OP_REMOVE_CONFIG, + /** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream config */ + DRM_XE_OBSERVATION_OP_REMOVE_CONFIG, }; /** - * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION * - * The perf layer enables multiplexing perf counter streams of multiple - * types. The actual params for a particular stream operation are supplied - * via the @param pointer (use __copy_from_user to get these params). + * The observation layer enables multiplexing observation streams of + * multiple types. The actual params for a particular stream operation are + * supplied via the @param pointer (use __copy_from_user to get these + * params). */ -struct drm_xe_perf_param { +struct drm_xe_observation_param { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ - __u64 perf_type; - /** @perf_op: Perf op, of enum @drm_xe_perf_op */ - __u64 perf_op; + /** @observation_type: observation stream type, of enum @drm_xe_observation_type */ + __u64 observation_type; + /** @observation_op: observation stream op, of enum @drm_xe_observation_op */ + __u64 observation_op; /** @param: Pointer to actual stream params */ __u64 param; }; /** - * enum drm_xe_perf_ioctls - Perf fd ioctl's + * enum drm_xe_observation_ioctls - Observation stream fd ioctl's * - * Information exchanged between userspace and kernel for perf fd ioctl's - * is stream type specific + * Information exchanged between userspace and kernel for observation fd + * ioctl's is stream type specific */ -enum drm_xe_perf_ioctls { - /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ - DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), +enum drm_xe_observation_ioctls { + /** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an observation stream */ + DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0), - /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ - DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + /** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a observation stream */ + DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1), - /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ - DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + /** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream configuration */ + DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2), - /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ - DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + /** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status */ + DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3), - /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ - DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), + /** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */ + DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4), }; /** @@ -1546,12 +1547,12 @@ enum drm_xe_oa_format_type { * Stream params are specified as a chain of @drm_xe_ext_set_property * struct's, with @property values from enum @drm_xe_oa_property_id and * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. - * @param field in struct @drm_xe_perf_param points to the first + * @param field in struct @drm_xe_observation_param points to the first * @drm_xe_ext_set_property struct. * - * Exactly the same mechanism is also used for stream reconfiguration using - * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of - * properties below can be specified for stream reconfiguration. + * Exactly the same mechanism is also used for stream reconfiguration using the + * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a + * subset of properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 @@ -1571,11 +1572,11 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA - * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG. */ DRM_XE_OA_PROPERTY_OA_METRIC_SET, - /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */ DRM_XE_OA_PROPERTY_OA_FORMAT, /* * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, @@ -1596,13 +1597,13 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA - * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE). */ DRM_XE_OA_PROPERTY_OA_DISABLED, /** * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific - * @exec_queue_id. Perf queries can be executed on this exec queue. + * @exec_queue_id. OA queries can be executed on this exec queue. */ DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, @@ -1622,7 +1623,7 @@ enum drm_xe_oa_property_id { /** * struct drm_xe_oa_config - OA metric configuration * - * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A * particular config can be specified when opening an OA stream using * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. */ @@ -1645,8 +1646,9 @@ struct drm_xe_oa_config { /** * struct drm_xe_oa_stream_status - OA stream status returned from - * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to - * query stream status in response to EIO errno from perf fd read(). + * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can + * call the ioctl to query stream status in response to EIO errno from + * observation fd read(). */ struct drm_xe_oa_stream_status { /** @extensions: Pointer to the first extension struct, if any */ @@ -1665,7 +1667,7 @@ struct drm_xe_oa_stream_status { /** * struct drm_xe_oa_stream_info - OA stream info returned from - * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl */ struct drm_xe_oa_stream_info { /** @extensions: Pointer to the first extension struct, if any */ -- cgit v1.2.3