From 3b9c181bcde8555ca81b2394c2dc2201cefc2dd4 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 11 Jun 2024 10:47:15 -0700 Subject: devcoredump: Add dev_coredumpm_timeout() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add function to set a custom coredump timeout. For Xe driver usage, current 5 minutes timeout may be too short for users to search and understand what needs to be done to capture coredump to report bugs. We have plans to automate(distribute a udev script) it but at the end will be up to distros and users to pack it so having a option to increase the timeout is a safer option. v2: - replace dev_coredump_timeout_set() by dev_coredumpm_timeout() (Mukesh) v3: - make dev_coredumpm() static inline (Johannes) v5: - rename DEVCOREDUMP_TIMEOUT -> DEVCD_TIMEOUT to avoid redefinition in include/net/bluetooth/coredump.h v6: - fix definition of dev_coredumpm_timeout() when CONFIG_DEV_COREDUMP is disabled Cc: Rodrigo Vivi Cc: Mukesh Ojha Cc: Johannes Berg Cc: Jonathan Cavitt Reviewed-by: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Signed-off-by: José Roberto de Souza Acked-by: Greg Kroah-Hartman Acked-by: Johannes Berg Link: https://patchwork.freedesktop.org/patch/msgid/20240611174716.72660-1-jose.souza@intel.com Signed-off-by: Rodrigo Vivi --- include/linux/devcoredump.h | 53 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c8f7eb6cc191..377892604ff4 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -12,6 +12,9 @@ #include #include +/* if data isn't read by userspace after 5 minutes then delete it */ +#define DEVCD_TIMEOUT (HZ * 60 * 5) + /* * _devcd_free_sgtable - free all the memory of the given scatterlist table * (i.e. both pages and scatterlist instances) @@ -50,16 +53,17 @@ static inline void _devcd_free_sgtable(struct scatterlist *table) kfree(delete_iter); } - #ifdef CONFIG_DEV_COREDUMP void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp); -void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)); +void dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout); void dev_coredumpsg(struct device *dev, struct scatterlist *table, size_t datalen, gfp_t gfp); @@ -73,11 +77,13 @@ static inline void dev_coredumpv(struct device *dev, void *data, } static inline void -dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)) +dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout) { free(data); } @@ -92,4 +98,29 @@ static inline void dev_coredump_put(struct device *dev) } #endif /* CONFIG_DEV_COREDUMP */ +/** + * dev_coredumpm - create device coredump with read/free methods + * @dev: the struct device for the crashed device + * @owner: the module that contains the read/free functions, use %THIS_MODULE + * @data: data cookie for the @read/@free functions + * @datalen: length of the data + * @gfp: allocation flags + * @read: function to read from the given buffer + * @free: function to free the given buffer + * + * Creates a new device coredump for the given device. If a previous one hasn't + * been read yet, the new coredump is discarded. The data lifetime is determined + * by the device coredump framework and when it is no longer needed the @free + * function will be called to free the data. + */ +static inline void dev_coredumpm(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen), + void (*free)(void *data)) +{ + dev_coredumpm_timeout(dev, owner, data, datalen, gfp, read, free, + DEVCD_TIMEOUT); +} + #endif /* __DEVCOREDUMP_H */ -- cgit v1.2.3 From e54700f7d6aa2ae0d0a0aeeebedcecd7ce1123fe Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 3 Jun 2024 20:24:30 +0530 Subject: drm/xe/bmg: Add PCI IDs Add the initial set of device IDs for Battlemage. Signed-off-by: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20240603145430.1260817-1-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 3 ++- include/drm/xe_pciids.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e84da0cbb8e9..08583fdd7643 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -340,7 +340,7 @@ static const struct xe_device_desc lnl_desc = { .require_force_probe = true, }; -static const struct xe_device_desc bmg_desc __maybe_unused = { +static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .require_force_probe = true, @@ -389,6 +389,7 @@ static const struct pci_device_id pciidlist[] = { XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), + XE_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index adb37bc541e4..644872a35c35 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -192,4 +192,11 @@ MACRO__(0x64A0, ## __VA_ARGS__), \ MACRO__(0x64B0, ## __VA_ARGS__) +#define XE_BMG_IDS(MACRO__, ...) \ + MACRO__(0xE202, ## __VA_ARGS__), \ + MACRO__(0xE20B, ## __VA_ARGS__), \ + MACRO__(0xE20C, ## __VA_ARGS__), \ + MACRO__(0xE20D, ## __VA_ARGS__), \ + MACRO__(0xE212, ## __VA_ARGS__) + #endif -- cgit v1.2.3 From 52c2e956dcebecc8901911217a9647203ebcaf3c Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:53 -0700 Subject: drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the plan is to support multiple types of perf counter streams (OA is only one type of these streams). Rather than introduce NxM ioctls for these (N perf streams with M ioctl's per perf stream), we decide to multiplex these (N different stream types and the M ops for each of these stream types) through a single PERF ioctl. This multiplexing is the purpose of the PERF layer. In addition to PERF DRM ioctl's, another set of ioctl's on the PERF fd are defined. These are expected to be common to different PERF stream types and therefore defined at the PERF layer itself. v2: Add param_size to 'struct drm_xe_perf_param' (Umesh) v3: Rename 'enum drm_xe_perf_ops' to 'enum drm_xe_perf_ioctls' (Guy Zadicario) Add DRM_ prefix to ioctl names to indicate uapi names v4: Add 'enum drm_xe_perf_op' previously missed out (Guy Zadicario) v5: Squash the ops and PERF layer patches into a single patch (Umesh) Remove param_size from struct 'drm_xe_perf_param' (Umesh) v6: Add DRM_XE_PERF_IOCTL_STATUS v7: Add DRM_XE_PERF_IOCTL_INFO v8: Fix Copyright years, fix DRM_XE_PERF_TYPE_MAX, move '#include "xe_perf.h"' to xe_perf.c, add kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: Guy Zadicario Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-2-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_perf.c | 34 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_perf.h | 14 +++++++++ include/uapi/drm/xe_drm.h | 66 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_perf.c create mode 100644 drivers/gpu/drm/xe/xe_perf.h (limited to 'include') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index cbf961b90237..f99492449e5d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -95,6 +95,7 @@ xe-y += xe_bb.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ + xe_perf.o \ xe_pm.o \ xe_preempt_fence.o \ xe_pt.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 64691a56d59c..a44093cbbb71 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -44,6 +44,7 @@ #include "xe_module.h" #include "xe_pat.h" #include "xe_pcode.h" +#include "xe_perf.h" #include "xe_pm.h" #include "xe_query.h" #include "xe_sriov.h" @@ -141,6 +142,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_PERF, xe_perf_ioctl, DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c new file mode 100644 index 000000000000..2963174ecd0e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include + +#include "xe_perf.h" + +/** + * xe_perf_ioctl - The top level perf layer ioctl + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + * + * The function is called for different perf streams types and allows execution + * of different operations supported by those perf stream types. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_xe_perf_param *arg = data; + + if (arg->extensions) + return -EINVAL; + + switch (arg->perf_type) { + default: + return -EINVAL; + } +} diff --git a/drivers/gpu/drm/xe/xe_perf.h b/drivers/gpu/drm/xe/xe_perf.h new file mode 100644 index 000000000000..e7e258eaf0a9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_PERF_H_ +#define _XE_PERF_H_ + +struct drm_device; +struct drm_file; + +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d7b0903c22b2..c1626027dc69 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,6 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE + * - &DRM_IOCTL_XE_PERF */ /* @@ -100,6 +101,8 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_PERF 0x0b + /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -113,6 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) /** * DOC: Xe IOCTL Extensions @@ -1370,6 +1374,68 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; +/** + * enum drm_xe_perf_type - Perf stream types + */ +enum drm_xe_perf_type { + __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ +}; + +/** + * enum drm_xe_perf_op - Perf stream ops + */ +enum drm_xe_perf_op { + /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ + DRM_XE_PERF_OP_STREAM_OPEN, + + /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ + DRM_XE_PERF_OP_ADD_CONFIG, + + /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ + DRM_XE_PERF_OP_REMOVE_CONFIG, +}; + +/** + * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * + * The perf layer enables multiplexing perf counter streams of multiple + * types. The actual params for a particular stream operation are supplied + * via the @param pointer (use __copy_from_user to get these params). + */ +struct drm_xe_perf_param { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ + __u64 perf_type; + /** @perf_op: Perf op, of enum @drm_xe_perf_op */ + __u64 perf_op; + /** @param: Pointer to actual stream params */ + __u64 param; +}; + +/** + * enum drm_xe_perf_ioctls - Perf fd ioctl's + * + * Information exchanged between userspace and kernel for perf fd ioctl's + * is stream type specific + */ +enum drm_xe_perf_ioctls { + /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ + DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), + + /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ + DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + + /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ + DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + + /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ + DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + + /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ + DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 67977882a2f1339f0a7d32576ad61967828b2ca5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:55 -0700 Subject: drm/xe/oa/uapi: Add OA data formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add and initialize supported OA data formats for various platforms (including Xe2). User can request OA data in any supported format. Bspec: 52198, 60942, 61101 v2: Start 'xe_oa_format_name' enum from 0 (Umesh) Fix error rewind with OA (Umesh) v3: Use graphics versions rather than absolute platform names v4: Add missing kernel doc for struct memebers and enum and other minor changes (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-4-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 11 +++- drivers/gpu/drm/xe/xe_device_types.h | 4 ++ drivers/gpu/drm/xe/xe_oa.c | 111 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 16 +++++ drivers/gpu/drm/xe/xe_oa_types.h | 83 ++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 19 ++++++ 7 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_oa.c create mode 100644 drivers/gpu/drm/xe/xe_oa.h create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h (limited to 'include') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f99492449e5d..7039008be234 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -92,6 +92,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_oa.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a44093cbbb71..1195c64a715a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -656,10 +656,14 @@ int xe_device_probe(struct xe_device *xe) xe_heci_gsc_init(xe); - err = xe_display_init(xe); + err = xe_oa_init(xe); if (err) goto err_fini_gt; + err = xe_display_init(xe); + if (err) + goto err_fini_oa; + err = drm_dev_register(&xe->drm, 0); if (err) goto err_fini_display; @@ -675,6 +679,9 @@ int xe_device_probe(struct xe_device *xe) err_fini_display: xe_display_driver_remove(xe); +err_fini_oa: + xe_oa_fini(xe); + err_fini_gt: for_each_gt(gt, xe, id) { if (id < last_gt) @@ -707,6 +714,8 @@ void xe_device_remove(struct xe_device *xe) xe_display_fini(xe); + xe_oa_fini(xe); + xe_heci_gsc_fini(xe); for_each_gt(gt, xe, id) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 52bc461171d5..185986e1d586 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -17,6 +17,7 @@ #include "xe_gt_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" +#include "xe_oa.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" @@ -462,6 +463,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @oa: oa perf counter subsystem */ + struct xe_oa oa; + /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c new file mode 100644 index 000000000000..5c0179ff4f60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_macros.h" +#include "xe_oa.h" + +#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x + +static const struct xe_oa_format oa_formats[] = { + [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, + [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, + [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, +}; + +static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) +{ + __set_bit(format, oa->format_mask); +} + +static void xe_oa_init_supported_formats(struct xe_oa *oa) +{ + if (GRAPHICS_VER(oa->xe) >= 20) { + /* Xe2+ */ + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u32); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); + } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { + /* XE_METEORLAKE */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { + /* XE_DG2, XE_PVC */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + } else { + /* Gen12+ */ + xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); + oa_format_add(oa, XE_OA_FORMAT_A12); + oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_C4_B8); + } +} + +/** + * xe_oa_init - OA initialization during device probe + * @xe: @xe_device + * + * Return: 0 on success or a negative error code on failure + */ +int xe_oa_init(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + /* Support OA only with GuC submission and Gen12+ */ + if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) + return 0; + + oa->xe = xe; + oa->oa_formats = oa_formats; + + xe_oa_init_supported_formats(oa); + return 0; +} + +/** + * xe_oa_fini - OA de-initialization during device remove + * @xe: @xe_device + */ +void xe_oa_fini(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + oa->xe = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h new file mode 100644 index 000000000000..2647c1947746 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_H_ +#define _XE_OA_H_ + +#include "xe_oa_types.h" + +struct xe_device; + +int xe_oa_init(struct xe_device *xe); +void xe_oa_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h new file mode 100644 index 000000000000..99940e25b1c6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_TYPES_H_ +#define _XE_OA_TYPES_H_ + +#include +#include + +enum xe_oa_report_header { + HDR_32_BIT = 0, + HDR_64_BIT, +}; + +enum xe_oa_format_name { + XE_OA_FORMAT_C4_B8, + + /* Gen8+ */ + XE_OA_FORMAT_A12, + XE_OA_FORMAT_A12_B8_C8, + XE_OA_FORMAT_A32u40_A4u32_B8_C8, + + /* DG2 */ + XE_OAR_FORMAT_A32u40_A4u32_B8_C8, + XE_OA_FORMAT_A24u40_A14u32_B8_C8, + + /* DG2/MTL OAC */ + XE_OAC_FORMAT_A24u64_B8_C8, + XE_OAC_FORMAT_A22u32_R2u32_B8_C8, + + /* MTL OAM */ + XE_OAM_FORMAT_MPEC8u64_B8_C8, + XE_OAM_FORMAT_MPEC8u32_B8_C8, + + /* Xe2+ */ + XE_OA_FORMAT_PEC64u64, + XE_OA_FORMAT_PEC64u64_B8_C8, + XE_OA_FORMAT_PEC64u32, + XE_OA_FORMAT_PEC32u64_G1, + XE_OA_FORMAT_PEC32u32_G1, + XE_OA_FORMAT_PEC32u64_G2, + XE_OA_FORMAT_PEC32u32_G2, + XE_OA_FORMAT_PEC36u64_G1_32_G2_4, + XE_OA_FORMAT_PEC36u64_G1_4_G2_32, + + __XE_OA_FORMAT_MAX, +}; + +/** + * struct xe_oa_format - Format fields for supported OA formats. OA format + * properties are specified in PRM/Bspec 52198 and 60942 + */ +struct xe_oa_format { + /** @counter_select: counter select value (see Bspec 52198/60942) */ + u32 counter_select; + /** @size: record size as written by HW (multiple of 64 byte cachelines) */ + int size; + /** @type: of enum @drm_xe_oa_format_type */ + int type; + /** @header: 32 or 64 bit report headers */ + enum xe_oa_report_header header; + /** @counter_size: counter size value (see Bspec 60942) */ + u16 counter_size; + /** @bc_report: BC report value (see Bspec 60942) */ + u16 bc_report; +}; + +/** + * struct xe_oa - OA device level information + */ +struct xe_oa { + /** @xe: back pointer to xe device */ + struct xe_device *xe; + + /** @oa_formats: tracks all OA formats across platforms */ + const struct xe_oa_format *oa_formats; + + /** @format_mask: tracks valid OA formats for a platform */ + unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; +}; +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c1626027dc69..7e10874bfb33 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,25 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec + * 52198/60942 + */ +enum drm_xe_oa_format_type { + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ + DRM_XE_OA_FMT_TYPE_OAG, + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ + DRM_XE_OA_FMT_TYPE_OAR, + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ + DRM_XE_OA_FMT_TYPE_OAM, + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ + DRM_XE_OA_FMT_TYPE_OAC, + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */ + DRM_XE_OA_FMT_TYPE_OAM_MPEC, + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ + DRM_XE_OA_FMT_TYPE_PEC, +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From a9f905ae7b6f29a337dda2ad773c08b92dafe9a5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:56 -0700 Subject: drm/xe/oa/uapi: Initialize OA units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize OA unit data struct's for each gt during device probe. Also assign OA units for hardware engines. v2: Remove XE_OA_UNIT_OAG/XE_OA_UNIT_OAM_SAMEDIA_0 enum (Umesh) Change mtl_oa_base to 0x13000 (Umesh) v3: Switch to drmm_ functions and other cleanups (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-5-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 92 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 4 + drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 + drivers/gpu/drm/xe/xe_oa.c | 156 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 54 +++++++++++ include/uapi/drm/xe_drm.h | 14 +++ 6 files changed, 322 insertions(+) create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h (limited to 'include') diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h new file mode 100644 index 000000000000..99bad563d51d --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __XE_OA_REGS__ +#define __XE_OA_REGS__ + +#define RPM_CONFIG1 XE_REG(0xd04) +#define GT_NOA_ENABLE REG_BIT(9) + +#define EU_PERF_CNTL0 XE_REG(0xe458) +#define EU_PERF_CNTL4 XE_REG(0xe45c) +#define EU_PERF_CNTL1 XE_REG(0xe558) +#define EU_PERF_CNTL5 XE_REG(0xe55c) +#define EU_PERF_CNTL2 XE_REG(0xe658) +#define EU_PERF_CNTL6 XE_REG(0xe65c) +#define EU_PERF_CNTL3 XE_REG(0xe758) + +#define OA_TLB_INV_CR XE_REG(0xceec) + +/* OAR unit */ +#define OAR_OACONTROL XE_REG(0x2960) +#define OAR_OACONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAR_OACONTROL_COUNTER_ENABLE REG_BIT(0) + +#define OACTXCONTROL(base) XE_REG((base) + 0x360) +#define OAR_OASTATUS XE_REG(0x2968) +#define OA_COUNTER_RESUME REG_BIT(0) + +/* OAG unit */ +#define OAG_OAGLBCTXCTRL XE_REG(0x2b28) +#define OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK REG_GENMASK(7, 2) +#define OAG_OAGLBCTXCTRL_TIMER_ENABLE REG_BIT(1) +#define OAG_OAGLBCTXCTRL_COUNTER_RESUME REG_BIT(0) + +#define OAG_OAHEADPTR XE_REG(0xdb00) +#define OAG_OAHEADPTR_MASK REG_GENMASK(31, 6) +#define OAG_OATAILPTR XE_REG(0xdb04) +#define OAG_OATAILPTR_MASK REG_GENMASK(31, 6) + +#define OAG_OABUFFER XE_REG(0xdb08) +#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) +#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) +#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) +#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) +#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) +#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) +#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) +#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) +#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) +#define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ + +#define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) +#define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) +#define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) +/* Common to all OA units */ +#define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) +#define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) + +#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) +#define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) +#define OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) +#define OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) + +#define OAG_OASTATUS XE_REG(0xdafc) +#define OASTATUS_MMIO_TRG_Q_FULL REG_BIT(6) +#define OASTATUS_COUNTER_OVERFLOW REG_BIT(2) +#define OASTATUS_BUFFER_OVERFLOW REG_BIT(1) +#define OASTATUS_REPORT_LOST REG_BIT(0) +/* OAM unit */ +#define OAM_HEAD_POINTER_OFFSET (0x1a0) +#define OAM_TAIL_POINTER_OFFSET (0x1a4) +#define OAM_BUFFER_OFFSET (0x1a8) +#define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) +#define OAM_CONTROL_OFFSET (0x194) +#define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_DEBUG_OFFSET (0x198) +#define OAM_STATUS_OFFSET (0x19c) +#define OAM_MMIO_TRG_OFFSET (0x1d0) + +#define OAM_HEAD_POINTER(base) XE_REG((base) + OAM_HEAD_POINTER_OFFSET) +#define OAM_TAIL_POINTER(base) XE_REG((base) + OAM_TAIL_POINTER_OFFSET) +#define OAM_BUFFER(base) XE_REG((base) + OAM_BUFFER_OFFSET) +#define OAM_CONTEXT_CONTROL(base) XE_REG((base) + OAM_CONTEXT_CONTROL_OFFSET) +#define OAM_CONTROL(base) XE_REG((base) + OAM_CONTROL_OFFSET) +#define OAM_DEBUG(base) XE_REG((base) + OAM_DEBUG_OFFSET) +#define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) +#define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 10a9a9529377..24bb95de920f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -12,6 +12,7 @@ #include "xe_gt_sriov_vf_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" +#include "xe_oa.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" #include "xe_uc_types.h" @@ -387,6 +388,9 @@ struct xe_gt { */ u8 instances_per_class[XE_ENGINE_CLASS_MAX]; } user_engines; + + /** @oa: oa perf counter subsystem per gt info */ + struct xe_oa_gt oa; }; #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 580bbd7e83b2..70e6434f150d 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -148,6 +148,8 @@ struct xe_hw_engine { enum xe_hw_engine_id engine_id; /** @eclass: pointer to per hw engine class interface */ struct xe_hw_engine_class_intf *eclass; + /** @oa_unit: oa unit for this hw engine */ + struct xe_oa_unit *oa_unit; }; /** diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 5c0179ff4f60..e836fafa9fb3 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,13 +3,20 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include +#include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_macros.h" +#include "xe_mmio.h" #include "xe_oa.h" +#define XE_OA_UNIT_INVALID U32_MAX + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -34,6 +41,142 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static u32 num_oa_units_per_gt(struct xe_gt *gt) +{ + return 1; +} + +static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) +{ + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { + /* + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices + * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA + */ + xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + + return 0; + } + + return XE_OA_UNIT_INVALID; +} + +static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) +{ + switch (hwe->class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + return 0; + + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return __hwe_oam_unit(hwe); + + default: + return XE_OA_UNIT_INVALID; + } +} + +static struct xe_oa_regs __oam_regs(u32 base) +{ + return (struct xe_oa_regs) { + base, + OAM_HEAD_POINTER(base), + OAM_TAIL_POINTER(base), + OAM_BUFFER(base), + OAM_CONTEXT_CONTROL(base), + OAM_CONTROL(base), + OAM_DEBUG(base), + OAM_STATUS(base), + OAM_CONTROL_COUNTER_SEL_MASK, + }; +} + +static struct xe_oa_regs __oag_regs(void) +{ + return (struct xe_oa_regs) { + 0, + OAG_OAHEADPTR, + OAG_OATAILPTR, + OAG_OABUFFER, + OAG_OAGLBCTXCTRL, + OAG_OACONTROL, + OAG_OA_DEBUG, + OAG_OASTATUS, + OAG_OACONTROL_OA_COUNTER_SEL_MASK, + }; +} + +static void __xe_oa_init_oa_units(struct xe_gt *gt) +{ + const u32 mtl_oa_base[] = { 0x13000 }; + int i, num_units = gt->oa.num_oa_units; + + for (i = 0; i < num_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (gt->info.type != XE_GT_TYPE_MEDIA) { + u->regs = __oag_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_OAG; + } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + u->regs = __oam_regs(mtl_oa_base[i]); + u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } + + /* Set oa_unit_ids now to ensure ids remain contiguous */ + u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; + } +} + +static int xe_oa_init_gt(struct xe_gt *gt) +{ + u32 num_oa_units = num_oa_units_per_gt(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_oa_unit *u; + + u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); + if (!u) + return -ENOMEM; + + for_each_hw_engine(hwe, gt, id) { + u32 index = __hwe_oa_unit(hwe); + + hwe->oa_unit = NULL; + if (index < num_oa_units) { + u[index].num_engines++; + hwe->oa_unit = &u[index]; + } + } + + /* + * Fused off engines can result in oa_unit's with num_engines == 0. These units + * will appear in OA unit query, but no perf streams can be opened on them. + */ + gt->oa.num_oa_units = num_oa_units; + gt->oa.oa_unit = u; + + __xe_oa_init_oa_units(gt); + + drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); + + return 0; +} + +static int xe_oa_init_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int i, ret; + + for_each_gt(gt, oa->xe, i) { + ret = xe_oa_init_gt(gt); + if (ret) + return ret; + } + + return 0; +} + static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) { __set_bit(format, oa->format_mask); @@ -87,6 +230,7 @@ static void xe_oa_init_supported_formats(struct xe_oa *oa) int xe_oa_init(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; + int ret; /* Support OA only with GuC submission and Gen12+ */ if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) @@ -95,8 +239,17 @@ int xe_oa_init(struct xe_device *xe) oa->xe = xe; oa->oa_formats = oa_formats; + ret = xe_oa_init_oa_units(oa); + if (ret) { + drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); + goto exit; + } + xe_oa_init_supported_formats(oa); return 0; +exit: + oa->xe = NULL; + return ret; } /** @@ -107,5 +260,8 @@ void xe_oa_fini(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; + if (!oa->xe) + return; + oa->xe = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 99940e25b1c6..e7b91e31f0e8 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -7,8 +7,12 @@ #define _XE_OA_TYPES_H_ #include +#include #include +#include +#include "regs/xe_reg_defs.h" + enum xe_oa_report_header { HDR_32_BIT = 0, HDR_64_BIT, @@ -67,6 +71,53 @@ struct xe_oa_format { u16 bc_report; }; +/** struct xe_oa_regs - Registers for each OA unit */ +struct xe_oa_regs { + u32 base; + struct xe_reg oa_head_ptr; + struct xe_reg oa_tail_ptr; + struct xe_reg oa_buffer; + struct xe_reg oa_ctx_ctrl; + struct xe_reg oa_ctrl; + struct xe_reg oa_debug; + struct xe_reg oa_status; + u32 oa_ctrl_counter_select_mask; +}; + +/** + * struct xe_oa_unit - Hardware OA unit + */ +struct xe_oa_unit { + /** @oa_unit_id: identifier for the OA unit */ + u16 oa_unit_id; + + /** @type: Type of OA unit - OAM, OAG etc. */ + enum drm_xe_oa_unit_type type; + + /** @regs: OA registers for programming the OA unit */ + struct xe_oa_regs regs; + + /** @num_engines: number of engines attached to this OA unit */ + u32 num_engines; + + /** @exclusive_stream: The stream currently using the OA unit */ + struct xe_oa_stream *exclusive_stream; +}; + +/** + * struct xe_oa_gt - OA per-gt information + */ +struct xe_oa_gt { + /** @gt_lock: lock protecting create/destroy OA streams */ + struct mutex gt_lock; + + /** @num_oa_units: number of oa units for each gt */ + u32 num_oa_units; + + /** @oa_unit: array of oa_units */ + struct xe_oa_unit *oa_unit; +}; + /** * struct xe_oa - OA device level information */ @@ -79,5 +130,8 @@ struct xe_oa { /** @format_mask: tracks valid OA formats for a platform */ unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; + + /** @oa_unit_ids: tracks oa unit ids assigned across gt's */ + u16 oa_unit_ids; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 7e10874bfb33..323d899a276b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,20 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_unit_type - OA unit types + */ +enum drm_xe_oa_unit_type { + /** + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered + * sub-types of OAG. For OAR/OAC, use OAG. + */ + DRM_XE_OA_UNIT_TYPE_OAG, + + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM, +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 -- cgit v1.2.3 From cdf02fe1a94a768cbcd20f5c4e1a1d805f4a06c0 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:57 -0700 Subject: drm/xe/oa/uapi: Add/remove OA config perf ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce add/remove config perf ops for OA. OA configurations consist of a set of event/counter select register address/value pairs. The add_config perf op validates and stores such configurations and also exposes them in the metrics sysfs. These configurations will be programmed to OA unit HW when an OA stream using a configuration is opened. The OA stream can also switch to other stored configurations. v2: Start config id's from 1 and other minor review comments (Umesh) v3: Add 32 bit build v4: Add kernel doc for non-static functions (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-6-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_device.c | 4 + drivers/gpu/drm/xe/xe_oa.c | 434 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 6 + drivers/gpu/drm/xe/xe_oa_types.h | 10 + drivers/gpu/drm/xe/xe_perf.c | 16 ++ include/uapi/drm/xe_drm.h | 25 +++ 6 files changed, 495 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1195c64a715a..31b549f5f03a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -670,6 +670,8 @@ int xe_device_probe(struct xe_device *xe) xe_display_register(xe); + xe_oa_register(xe); + xe_debugfs_register(xe); xe_hwmon_register(xe); @@ -710,6 +712,8 @@ void xe_device_remove(struct xe_device *xe) struct xe_gt *gt; u8 id; + xe_oa_unregister(xe); + xe_device_remove_display(xe); xe_display_fini(xe); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index e836fafa9fb3..4122785735d4 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -14,9 +14,32 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" +#include "xe_perf.h" #define XE_OA_UNIT_INVALID U32_MAX +struct xe_oa_reg { + struct xe_reg addr; + u32 value; +}; + +struct xe_oa_config { + struct xe_oa *oa; + + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct xe_oa_reg *regs; + u32 regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct kobj_attribute sysfs_metric_id; + + struct kref ref; + struct rcu_head rcu; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -41,6 +64,405 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static void xe_oa_config_release(struct kref *ref) +{ + struct xe_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + + kfree(oa_config->regs); + + kfree_rcu(oa_config, rcu); +} + +static void xe_oa_config_put(struct xe_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, xe_oa_config_release); +} + +static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) +{ + static const struct xe_reg flex_eu_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { + if (flex_eu_regs[i].addr == addr) + return true; + } + return false; +} + +static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) +{ + while (table->start && table->end) { + if (addr >= table->start && addr <= table->end) + return true; + + table++; + } + + return false; +} + +static const struct xe_mmio_range xehp_oa_b_counters[] = { + { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ + { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ + {} +}; + +static const struct xe_mmio_range gen12_oa_b_counters[] = { + { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ + { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ + { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ + { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ + { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ + { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ + { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ + {} +}; + +static const struct xe_mmio_range mtl_oam_b_counters[] = { + { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ + { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ + { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ + { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ + {} +}; + +static const struct xe_mmio_range xe2_oa_b_counters[] = { + { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ + { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ + { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ + {}, +}; + +static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || + xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || + xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || + (GRAPHICS_VER(oa->xe) >= 20 && + xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); +} + +static const struct xe_mmio_range mtl_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ + {} +}; + +static const struct xe_mmio_range gen12_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ + {} +}; + +static const struct xe_mmio_range xe2_oa_mux_regs[] = { + { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ + { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ + { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ + { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ + {}, +}; + +static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) +{ + if (GRAPHICS_VER(oa->xe) >= 20) + return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); + else if (GRAPHICS_VERx100(oa->xe) >= 1270) + return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); + else + return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); +} + +static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_is_valid_flex_addr(oa, addr) || + xe_oa_is_valid_b_counter_addr(oa, addr) || + xe_oa_is_valid_mux_addr(oa, addr); +} + +static struct xe_oa_reg * +xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), + u32 __user *regs, u32 n_regs) +{ + struct xe_oa_reg *oa_regs; + int err; + u32 i; + + oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); + if (!oa_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < n_regs; i++) { + u32 addr, value; + + err = get_user(addr, regs); + if (err) + goto addr_err; + + if (!is_valid(oa, addr)) { + drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); + err = -EINVAL; + goto addr_err; + } + + err = get_user(value, regs + 1); + if (err) + goto addr_err; + + oa_regs[i].addr = XE_REG(addr); + oa_regs[i].value = value; + + regs += 2; + } + + return oa_regs; + +addr_err: + kfree(oa_regs); + return ERR_PTR(err); +} + +static ssize_t show_dynamic_id(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_oa_config *oa_config = + container_of(attr, typeof(*oa_config), sysfs_metric_id); + + return sysfs_emit(buf, "%d\n", oa_config->id); +} + +static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, + struct xe_oa_config *oa_config) +{ + sysfs_attr_init(&oa_config->sysfs_metric_id.attr); + oa_config->sysfs_metric_id.attr.name = "id"; + oa_config->sysfs_metric_id.attr.mode = 0444; + oa_config->sysfs_metric_id.show = show_dynamic_id; + oa_config->sysfs_metric_id.store = NULL; + + oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; + oa_config->attrs[1] = NULL; + + oa_config->sysfs_metric.name = oa_config->uuid; + oa_config->sysfs_metric.attrs = oa_config->attrs; + + return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); +} + +/** + * xe_oa_add_config_ioctl - Adds one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions adds an OA config to the set of OA configs maintained in + * the kernel. The config determines which OA metrics are collected for an + * OA stream. + */ +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct drm_xe_oa_config param; + struct drm_xe_oa_config *arg = ¶m; + struct xe_oa_config *oa_config, *tmp; + struct xe_oa_reg *regs; + int err, id; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); + return -EACCES; + } + + err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, arg->extensions) || + XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || + XE_IOCTL_DBG(oa->xe, !arg->n_regs)) + return -EINVAL; + + oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); + if (!oa_config) + return -ENOMEM; + + oa_config->oa = oa; + kref_init(&oa_config->ref); + + if (!uuid_is_valid(arg->uuid)) { + drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); + err = -EINVAL; + goto reg_err; + } + + /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ + memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); + + oa_config->regs_len = arg->n_regs; + regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, + u64_to_user_ptr(arg->regs_ptr), + arg->n_regs); + if (IS_ERR(regs)) { + drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); + err = PTR_ERR(regs); + goto reg_err; + } + oa_config->regs = regs; + + err = mutex_lock_interruptible(&oa->metrics_lock); + if (err) + goto reg_err; + + /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ + idr_for_each_entry(&oa->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, oa_config->uuid)) { + drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); + err = -EADDRINUSE; + goto sysfs_err; + } + } + + err = create_dynamic_oa_sysfs_entry(oa, oa_config); + if (err) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + goto sysfs_err; + } + + oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); + if (oa_config->id < 0) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + err = oa_config->id; + goto sysfs_err; + } + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + + return oa_config->id; + +sysfs_err: + mutex_unlock(&oa->metrics_lock); +reg_err: + xe_oa_config_put(oa_config); + drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); + return err; +} + +/** + * xe_oa_remove_config_ioctl - Removes one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + */ +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_oa_config *oa_config; + u64 arg, *ptr = u64_to_user_ptr(data); + int ret; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); + return -EACCES; + } + + ret = get_user(arg, ptr); + if (XE_IOCTL_DBG(oa->xe, ret)) + return ret; + + ret = mutex_lock_interruptible(&oa->metrics_lock); + if (ret) + return ret; + + oa_config = idr_find(&oa->metrics_idr, arg); + if (!oa_config) { + drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); + ret = -ENOENT; + goto err_unlock; + } + + WARN_ON(arg != oa_config->id); + + sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); + idr_remove(&oa->metrics_idr, arg); + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); + + xe_oa_config_put(oa_config); + + return 0; + +err_unlock: + mutex_unlock(&oa->metrics_lock); + return ret; +} + +/** + * xe_oa_register - Xe OA registration + * @xe: @xe_device + * + * Exposes the metrics sysfs directory upon completion of module initialization + */ +void xe_oa_register(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + oa->metrics_kobj = kobject_create_and_add("metrics", + &xe->drm.primary->kdev->kobj); +} + +/** + * xe_oa_unregister - Xe OA de-registration + * @xe: @xe_device + */ +void xe_oa_unregister(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->metrics_kobj) + return; + + kobject_put(oa->metrics_kobj); + oa->metrics_kobj = NULL; +} + static u32 num_oa_units_per_gt(struct xe_gt *gt) { return 1; @@ -239,6 +661,9 @@ int xe_oa_init(struct xe_device *xe) oa->xe = xe; oa->oa_formats = oa_formats; + drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); + idr_init_base(&oa->metrics_idr, 1); + ret = xe_oa_init_oa_units(oa); if (ret) { drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); @@ -252,6 +677,12 @@ exit: return ret; } +static int destroy_config(int id, void *p, void *data) +{ + xe_oa_config_put(p); + return 0; +} + /** * xe_oa_fini - OA de-initialization during device remove * @xe: @xe_device @@ -263,5 +694,8 @@ void xe_oa_fini(struct xe_device *xe) if (!oa->xe) return; + idr_for_each(&oa->metrics_idr, destroy_config, oa); + idr_destroy(&oa->metrics_idr); + oa->xe = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 2647c1947746..5ccc772e047a 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -8,9 +8,15 @@ #include "xe_oa_types.h" +struct drm_device; +struct drm_file; struct xe_device; int xe_oa_init(struct xe_device *xe); void xe_oa_fini(struct xe_device *xe); +void xe_oa_register(struct xe_device *xe); +void xe_oa_unregister(struct xe_device *xe); +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); #endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index e7b91e31f0e8..f8a45015cf49 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -7,6 +7,7 @@ #define _XE_OA_TYPES_H_ #include +#include #include #include @@ -125,6 +126,15 @@ struct xe_oa { /** @xe: back pointer to xe device */ struct xe_device *xe; + /** @metrics_kobj: kobj for metrics sysfs */ + struct kobject *metrics_kobj; + + /** @metrics_lock: lock protecting add/remove configs */ + struct mutex metrics_lock; + + /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ + struct idr metrics_idr; + /** @oa_formats: tracks all OA formats across platforms */ const struct xe_oa_format *oa_formats; diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index f619cf50b453..ca01042d75b1 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -8,11 +8,25 @@ #include +#include "xe_oa.h" #include "xe_perf.h" u32 xe_perf_stream_paranoid = true; static struct ctl_table_header *sysctl_header; +static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, + struct drm_file *file) +{ + switch (arg->perf_op) { + case DRM_XE_PERF_OP_ADD_CONFIG: + return xe_oa_add_config_ioctl(dev, arg->param, file); + case DRM_XE_PERF_OP_REMOVE_CONFIG: + return xe_oa_remove_config_ioctl(dev, arg->param, file); + default: + return -EINVAL; + } +} + /** * xe_perf_ioctl - The top level perf layer ioctl * @dev: @drm_device @@ -32,6 +46,8 @@ int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return -EINVAL; switch (arg->perf_type) { + case DRM_XE_PERF_TYPE_OA: + return xe_oa_ioctl(dev, arg, file); default: return -EINVAL; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 323d899a276b..fd9a4bd9e3d4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1378,6 +1378,7 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + DRM_XE_PERF_TYPE_OA, __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; @@ -1469,6 +1470,30 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * struct drm_xe_oa_config - OA metric configuration + * + * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * particular config can be specified when opening an OA stream using + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. + */ +struct drm_xe_oa_config { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ + char uuid[36]; + + /** @n_regs: Number of regs in @regs_ptr */ + __u32 n_regs; + + /** + * @regs_ptr: Pointer to (register address, value) pairs for OA config + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). + */ + __u64 regs_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From b6fd51c6211910b1db072a3fa2a17ba85cb3dd51 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:58 -0700 Subject: drm/xe/oa/uapi: Define and parse OA stream properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properties for OA streams are specified by user space, when the stream is opened, as a chain of drm_xe_ext_set_property struct's. Parse and validate these stream properties. v2: Remove struct drm_xe_oa_open_param (Harish Chegondi) Drop DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US (Umesh) Eliminate comparison with xe_oa_max_sample_rate (Umesh) Drop 'struct drm_xe_oa_record_header' (Umesh) v3: s/DRM_XE_OA_PROPERTY_OA_EXPONENT/ \ DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT/ (Jose) v4: Fix 32 bit build v5: Add non-static function kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-7-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 364 +++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 5 + drivers/gpu/drm/xe/xe_perf.c | 2 + include/uapi/drm/xe_drm.h | 72 +++++++++ 4 files changed, 443 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4122785735d4..9b23eadf56cd 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,18 +3,23 @@ * Copyright © 2023-2024 Intel Corporation */ +#include + #include #include +#include "regs/xe_gt_regs.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" #include "xe_perf.h" +#include "xe_pm.h" #define XE_OA_UNIT_INVALID U32_MAX @@ -40,6 +45,19 @@ struct xe_oa_config { struct rcu_head rcu; }; +struct xe_oa_open_param { + u32 oa_unit_id; + bool sample; + u32 metric_set; + enum xe_oa_format_name oa_format; + int period_exponent; + bool disabled; + int exec_queue_id; + int engine_instance; + struct xe_exec_queue *exec_q; + struct xe_hw_engine *hwe; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -82,6 +100,352 @@ static void xe_oa_config_put(struct xe_oa_config *oa_config) kref_put(&oa_config->ref, xe_oa_config_release); } +/** + * xe_oa_timestamp_frequency - Return OA timestamp frequency + * @gt: @xe_gt + * + * OA timestamp frequency = CS timestamp frequency in most platforms. On some + * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such + * cases, return the adjusted CS timestamp frequency to the user. + */ +u32 xe_oa_timestamp_frequency(struct xe_gt *gt) +{ + u32 reg, shift; + + /* + * Wa_18013179988:dg2 + * Wa_14015568240:pvc + * Wa_14015846243:mtl + */ + switch (gt_to_xe(gt)->info.platform) { + case XE_DG2: + case XE_PVC: + case XE_METEORLAKE: + xe_pm_runtime_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, RPM_CONFIG0); + xe_pm_runtime_put(gt_to_xe(gt)); + + shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); + return gt->info.reference_clock << (3 - shift); + + default: + return gt->info.reference_clock; + } +} + +static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) +{ + u64 nom = (2ULL << exponent) * NSEC_PER_SEC; + u32 den = xe_oa_timestamp_frequency(gt); + + return div_u64(nom + den - 1, den); +} + +static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +{ + switch (hwe->oa_unit->type) { + case DRM_XE_OA_UNIT_TYPE_OAG: + return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || + type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; + case DRM_XE_OA_UNIT_TYPE_OAM: + return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; + default: + return false; + } +} + +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) +{ + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); + int idx; + + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { + const struct xe_oa_format *f = &oa->oa_formats[idx]; + + if (counter_size == f->counter_size && bc_report == f->bc_report && + type == f->type && counter_sel == f->counter_select) { + *name = idx; + return 0; + } + } + + return -EINVAL; +} + +/** + * xe_oa_unit_id - Return OA unit ID for a hardware engine + * @hwe: @xe_hw_engine + * + * Return OA unit ID for a hardware engine when available + */ +u16 xe_oa_unit_id(struct xe_hw_engine *hwe) +{ + return hwe->oa_unit && hwe->oa_unit->num_engines ? + hwe->oa_unit->oa_unit_id : U16_MAX; +} + +static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) +{ + struct xe_gt *gt; + int i, ret = 0; + + if (param->exec_q) { + /* When we have an exec_q, get hwe from the exec_q */ + param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, + param->engine_instance, true); + } else { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + /* Else just get the first hwe attached to the oa unit */ + for_each_gt(gt, oa->xe, i) { + for_each_hw_engine(hwe, gt, id) { + if (xe_oa_unit_id(hwe) == param->oa_unit_id) { + param->hwe = hwe; + goto out; + } + } + } + } +out: + if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit_id); + ret = -EINVAL; + } + + return ret; +} + +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (value >= oa->oa_unit_ids) { + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); + return -EINVAL; + } + param->oa_unit_id = value; + return 0; +} + +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->sample = value; + return 0; +} + +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->metric_set = value; + return 0; +} + +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + int ret = decode_oa_format(oa, value, ¶m->oa_format); + + if (ret) { + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); + return ret; + } + return 0; +} + +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ +#define OA_EXPONENT_MAX 31 + + if (value > OA_EXPONENT_MAX) { + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); + return -EINVAL; + } + param->period_exponent = value; + return 0; +} + +static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->disabled = value; + return 0; +} + +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->exec_queue_id = value; + return 0; +} + +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->engine_instance = value; + return 0; +} + +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); +static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, +}; + +static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || + XE_IOCTL_DBG(oa->xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); + return xe_oa_set_property_funcs[idx](oa, ext.value, param); +} + +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param); +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, +}; + +static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(oa->xe, ext_number >= DRM_XE_OA_PROPERTY_MAX)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.pad) || + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); + err = xe_oa_user_extension_funcs[idx](oa, extension, param); + if (XE_IOCTL_DBG(oa->xe, err)) + return err; + + if (ext.next_extension) + return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); + + return 0; +} + +/** + * xe_oa_stream_open_ioctl - Opens an OA stream + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions opens an OA stream. An OA stream, opened with specified + * properties, enables perf counter samples to be collected, either + * periodically (time based sampling), or on request (using perf queries) + */ +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_file *xef = to_xe_file(file); + struct xe_oa_open_param param = {}; + const struct xe_oa_format *f; + bool privileged_op = true; + int ret; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + ret = xe_oa_user_extensions(oa, data, 0, ¶m); + if (ret) + return ret; + + if (param.exec_queue_id > 0) { + param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); + if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) + return -ENOENT; + } + + /* + * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, + * without global stream access, can be an unprivileged operation + */ + if (param.exec_q && !param.sample) + privileged_op = false; + + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); + ret = -EACCES; + goto err_exec_q; + } + + if (!param.exec_q && !param.sample) { + drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); + ret = -EINVAL; + goto err_exec_q; + } + + ret = xe_oa_assign_hwe(oa, ¶m); + if (ret) + goto err_exec_q; + + f = &oa->oa_formats[param.oa_format]; + if (!param.oa_format || !f->size || + !engine_supports_oa_format(param.hwe, f->type)) { + drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", + param.oa_format, f->type, f->size, param.hwe->class); + ret = -EINVAL; + goto err_exec_q; + } + + if (param.period_exponent > 0) { + u64 oa_period, oa_freq_hz; + + /* Requesting samples from OAG buffer is a privileged operation */ + if (!param.sample) { + drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); + ret = -EINVAL; + goto err_exec_q; + } + oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); + oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); + drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); + } +err_exec_q: + if (ret < 0 && param.exec_q) + xe_exec_queue_put(param.exec_q); + return ret; +} + static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) { static const struct xe_reg flex_eu_regs[] = { diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 5ccc772e047a..87a38820c317 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -11,12 +11,17 @@ struct drm_device; struct drm_file; struct xe_device; +struct xe_gt; +struct xe_hw_engine; int xe_oa_init(struct xe_device *xe); void xe_oa_fini(struct xe_device *xe); void xe_oa_register(struct xe_device *xe); void xe_oa_unregister(struct xe_device *xe); +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +u32 xe_oa_timestamp_frequency(struct xe_gt *gt); +u16 xe_oa_unit_id(struct xe_hw_engine *hwe); #endif diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index ca01042d75b1..d6cd74cadf34 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -18,6 +18,8 @@ static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, struct drm_file *file) { switch (arg->perf_op) { + case DRM_XE_PERF_OP_STREAM_OPEN: + return xe_oa_stream_open_ioctl(dev, arg->param, file); case DRM_XE_PERF_OP_ADD_CONFIG: return xe_oa_add_config_ioctl(dev, arg->param, file); case DRM_XE_PERF_OP_REMOVE_CONFIG: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index fd9a4bd9e3d4..307409f968e2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1470,6 +1470,78 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * enum drm_xe_oa_property_id - OA stream property id's + * + * Stream params are specified as a chain of @drm_xe_ext_set_property + * struct's, with @property values from enum @drm_xe_oa_property_id and + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. + * @param field in struct @drm_xe_perf_param points to the first + * @drm_xe_ext_set_property struct. + */ +enum drm_xe_oa_property_id { +#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open + * the OA stream, see @oa_unit_id in 'struct + * drm_xe_query_oa_units'. Defaults to 0 if not provided. + */ + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, + + /** + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw + * OA unit reports or stream samples in a global buffer attached to an + * OA unit. + */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, + + /** + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA + * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, + + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + DRM_XE_OA_PROPERTY_OA_FORMAT, + /* + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, + * in terms of the following quantities: a. enum @drm_xe_oa_format_type + * b. Counter select c. Counter size and d. BC report. Also refer to the + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. + */ +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) + + /** + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit + * sampling with sampling frequency proportional to 2^(period_exponent + 1) + */ + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, + + /** + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA + * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + */ + DRM_XE_OA_PROPERTY_OA_DISABLED, + + /** + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific + * @exec_queue_id. Perf queries can be executed on this exec queue. + */ + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, + + /** + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. + */ + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ + DRM_XE_OA_PROPERTY_MAX +}; + /** * struct drm_xe_oa_config - OA metric configuration * -- cgit v1.2.3 From e936f885f1e96f59d9d05fb6cb5a02b9b9b88a05 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:00 -0700 Subject: drm/xe/oa/uapi: Expose OA stream fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OA stream open perf op returns an fd with its own file_operations for the newly initialized OA stream. These file_operations allow userspace to enable or disable the stream, as well as apply a different metric configuration for the OA stream. Userspace can also poll for data availability. OA stream initialization is completed in this commit by enabling the OA stream. When sampling is enabled this starts a hrtimer which periodically checks for data availablility. v2: Use stream properties for stream reconfiguration with DRM_XE_PERF_IOCTL_CONFIG v3: Hold runtime_pm reference across oa buffer alloc/free v4: Fix 32 bit build Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-9-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 380 +++++++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 4 + 2 files changed, 384 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index c2fd2d22677f..a71111859190 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,8 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include #include +#include #include #include @@ -29,6 +31,7 @@ #include "xe_pm.h" #include "xe_sched_job.h" +#define OA_TAKEN(tail, head) (((tail) - (head)) & (XE_OA_BUFFER_SIZE - 1)) #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX @@ -147,6 +150,205 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) return &stream->hwe->oa_unit->regs; } +static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) +{ + return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) & + OAG_OATAILPTR_MASK; +} + +#define oa_report_header_64bit(__s) \ + ((__s)->oa_buffer.format->header == HDR_64_BIT) + +static u64 oa_report_id(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; +} + +static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? + *((u64 *)report + 1) : + *((u32 *)report + 1); +} + +static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + int report_size = stream->oa_buffer.format->size; + u32 tail, hw_tail; + unsigned long flags; + bool pollin; + u32 partial_report_size; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + hw_tail = xe_oa_hw_tail_read(stream); + hw_tail -= gtt_offset; + + /* + * The tail pointer increases in 64 byte (cacheline size), not in report_size + * increments. Also report size may not be a power of 2. Compute potential + * partially landed report in OA buffer. + */ + partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); + partial_report_size %= report_size; + + /* Subtract partial amount off the tail */ + hw_tail = OA_TAKEN(hw_tail, partial_report_size); + + tail = hw_tail; + + /* + * Walk the stream backward until we find a report with report id and timestamp + * not 0. We can't tell whether a report has fully landed in memory before the + * report id and timestamp of the following report have landed. + * + * This is assuming that the writes of the OA unit land in memory in the order + * they were written. If not : (╯°□°)╯︵ ┻━┻ + */ + while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { + void *report = stream->oa_buffer.vaddr + tail; + + if (oa_report_id(stream, report) || oa_timestamp(stream, report)) + break; + + tail = OA_TAKEN(tail, report_size); + } + + if (OA_TAKEN(hw_tail, tail) > report_size) + drm_dbg(&stream->oa->xe->drm, + "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", + stream->oa_buffer.head, tail, hw_tail); + + stream->oa_buffer.tail = tail; + + pollin = OA_TAKEN(stream->oa_buffer.tail, + stream->oa_buffer.head) >= report_size; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + return pollin; +} + +static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) +{ + struct xe_oa_stream *stream = + container_of(hrtimer, typeof(*stream), poll_check_timer); + + if (xe_oa_buffer_check_unlocked(stream)) { + stream->pollin = true; + wake_up(&stream->poll_wq); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); + + return HRTIMER_RESTART; +} + +static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, + gtt_offset & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = 0; + + /* + * PRM says: "This MMIO must be set before the OATAILPTR register and after the + * OAHEADPTR register. This is to enable proper functionality of the overflow bit". + */ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr, + gtt_offset & OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointer to read from */ + stream->oa_buffer.tail = 0; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ + memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); +} + +static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) +{ + return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | + REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | + REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); +} + +static void xe_oa_enable(struct xe_oa_stream *stream) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + const struct xe_oa_regs *regs; + u32 val; + + /* + * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA + * buffer must be correctly initialized + */ + xe_oa_init_oa_buffer(stream); + + regs = __oa_regs(stream); + val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | + OAG_OACONTROL_OA_COUNTER_ENABLE; + + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); +} + +static void xe_oa_disable(struct xe_oa_stream *stream) +{ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0); + if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, + OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA to be disabled timed out\n"); + + if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { + /* <= XE_METEORLAKE except XE_PVC */ + xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1); + if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA tlb invalidate timed out\n"); + } +} + +static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, + struct file *file, poll_table *wait) +{ + __poll_t events = 0; + + poll_wait(file, &stream->poll_wq, wait); + + /* + * We don't explicitly check whether there's something to read here since this + * path may be hot depending on what else userspace is polling, or on the timeout + * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there + * are samples to read + */ + if (stream->pollin) + events |= EPOLLIN; + + return events; +} + +static __poll_t xe_oa_poll(struct file *file, poll_table *wait) +{ + struct xe_oa_stream *stream = file->private_data; + __poll_t ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_poll_locked(stream, file, wait); + mutex_unlock(&stream->stream_lock); + + return ret; +} + static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) { struct xe_sched_job *job; @@ -246,6 +448,27 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); } +static void xe_oa_stream_destroy(struct xe_oa_stream *stream) +{ + struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_gt *gt = stream->hwe->gt; + + if (WARN_ON(stream != u->exclusive_stream)) + return; + + WRITE_ONCE(u->exclusive_stream, NULL); + + xe_oa_disable_metric_set(stream); + xe_exec_queue_put(stream->k_exec_q); + + xe_oa_free_oa_buffer(stream); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_pm_runtime_put(stream->oa->xe); + + xe_oa_free_configs(stream); +} + static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) { struct xe_bo *bo; @@ -383,6 +606,148 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) return xe_oa_emit_oa_config(stream); } +static void xe_oa_stream_enable(struct xe_oa_stream *stream) +{ + stream->pollin = false; + + xe_oa_enable(stream); + + if (stream->sample) + hrtimer_start(&stream->poll_check_timer, + ns_to_ktime(stream->poll_period_ns), + HRTIMER_MODE_REL_PINNED); +} + +static void xe_oa_stream_disable(struct xe_oa_stream *stream) +{ + xe_oa_disable(stream); + + if (stream->sample) + hrtimer_cancel(&stream->poll_check_timer); +} + +static void xe_oa_enable_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + return; + + stream->enabled = true; + + xe_oa_stream_enable(stream); +} + +static void xe_oa_disable_locked(struct xe_oa_stream *stream) +{ + if (!stream->enabled) + return; + + stream->enabled = false; + + xe_oa_stream_disable(stream); +} + +static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) +{ + struct drm_xe_ext_set_property ext; + long ret = stream->oa_config->id; + struct xe_oa_config *config; + int err; + + err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); + if (XE_IOCTL_DBG(stream->oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || + XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) + return -EINVAL; + + config = xe_oa_get_oa_config(stream->oa, ext.value); + if (!config) + return -ENODEV; + + if (config != stream->oa_config) { + err = xe_oa_emit_oa_config(stream); + if (!err) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + xe_oa_config_put(config); + + return ret; +} + +static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, + unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case DRM_XE_PERF_IOCTL_ENABLE: + xe_oa_enable_locked(stream); + return 0; + case DRM_XE_PERF_IOCTL_DISABLE: + xe_oa_disable_locked(stream); + return 0; + case DRM_XE_PERF_IOCTL_CONFIG: + return xe_oa_config_locked(stream, arg); + } + + return -EINVAL; +} + +static long xe_oa_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct xe_oa_stream *stream = file->private_data; + long ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_ioctl_locked(stream, cmd, arg); + mutex_unlock(&stream->stream_lock); + + return ret; +} + +static void xe_oa_destroy_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + xe_oa_disable_locked(stream); + + xe_oa_stream_destroy(stream); + + if (stream->exec_q) + xe_exec_queue_put(stream->exec_q); + + kfree(stream); +} + +static int xe_oa_release(struct inode *inode, struct file *file) +{ + struct xe_oa_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + + mutex_lock(>->oa.gt_lock); + xe_oa_destroy_locked(stream); + mutex_unlock(>->oa.gt_lock); + + /* Release the reference the perf stream kept on the driver */ + drm_dev_put(>_to_xe(gt)->drm); + + return 0; +} + +static const struct file_operations xe_oa_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .release = xe_oa_release, + .poll = xe_oa_poll, + .unlocked_ioctl = xe_oa_ioctl, +}; + static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { @@ -436,6 +801,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, WRITE_ONCE(u->exclusive_stream, stream); + hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; + init_waitqueue_head(&stream->poll_wq); + spin_lock_init(&stream->oa_buffer.ptr_lock); mutex_init(&stream->stream_lock); @@ -479,10 +848,21 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, if (ret) goto err_free; + stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); + if (stream_fd < 0) { + ret = stream_fd; + goto err_destroy; + } + + if (!param->disabled) + xe_oa_enable_locked(stream); + /* Hold a reference on the drm device till stream_fd is released */ drm_dev_get(&stream->oa->xe->drm); return stream_fd; +err_destroy: + xe_oa_stream_destroy(stream); err_free: kfree(stream); exit: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 307409f968e2..1e09f786b3e6 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1478,6 +1478,10 @@ enum drm_xe_oa_format_type { * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. * @param field in struct @drm_xe_perf_param points to the first * @drm_xe_ext_set_property struct. + * + * Exactly the same mechanism is also used for stream reconfiguration using + * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of + * properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 -- cgit v1.2.3 From efb315d0a013cdc8b1e49f5c07b1a2972bc624d4 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:01 -0700 Subject: drm/xe/oa/uapi: Read file_operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the OA stream read file_operation. Both blocking and non-blocking reads are supported. As part of read system call, the read copies OA perf data from the OA buffer to the user buffer, after appending packet headers for status and data packets. v2: Drop OA report headers, implement DRM_XE_PERF_IOCTL_STATUS (Umesh) v3: Introduce 'struct drm_xe_oa_stream_status' v4: Define oa_status register bitfields (Umesh) v5: Add extensions to 'struct drm_xe_oa_stream_status' v6: Minor cleanup, eliminate report32 variable v7: Use -EIO to signal to userspace to read OASTATUS using DRM_XE_PERF_IOCTL_STATUS, change previous sites returning -EIO to return -EINVAL Make drm_xe_oa_stream_status bits contiguous (Jose, Umesh) rmw oa_status bits (Umesh) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-10-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 201 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 3 + include/uapi/drm/xe_drm.h | 20 ++++ 3 files changed, 224 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a71111859190..86d56b080eff 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -164,6 +164,14 @@ static u64 oa_report_id(struct xe_oa_stream *stream, void *report) return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; } +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)report = 0; + else + *report = 0; +} + static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) { return oa_report_header_64bit(stream) ? @@ -171,6 +179,14 @@ static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) *((u32 *)report + 1); } +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)&report[2] = 0; + else + report[1] = 0; +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -245,6 +261,95 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) return HRTIMER_RESTART; } +static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset, const u8 *report) +{ + int report_size = stream->oa_buffer.format->size; + int report_size_partial; + u8 *oa_buf_end; + + if ((count - *offset) < report_size) + return -ENOSPC; + + buf += *offset; + + oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + report_size_partial = oa_buf_end - report; + + if (report_size_partial < report_size) { + if (copy_to_user(buf, report, report_size_partial)) + return -EFAULT; + buf += report_size_partial; + + if (copy_to_user(buf, stream->oa_buffer.vaddr, + report_size - report_size_partial)) + return -EFAULT; + } else if (copy_to_user(buf, report, report_size)) { + return -EFAULT; + } + + *offset += report_size; + + return 0; +} + +static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + int report_size = stream->oa_buffer.format->size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 mask = (XE_OA_BUFFER_SIZE - 1); + size_t start_offset = *offset; + unsigned long flags; + u32 head, tail; + int ret = 0; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + head = stream->oa_buffer.head; + tail = stream->oa_buffer.tail; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); + + for (; OA_TAKEN(tail, head); head = (head + report_size) & mask) { + u8 *report = oa_buf_base + head; + + ret = xe_oa_append_report(stream, buf, count, offset, report); + if (ret) + break; + + if (is_power_of_2(report_size)) { + /* Clear out report id and timestamp to detect unlanded reports */ + oa_report_id_clear(stream, (void *)report); + oa_timestamp_clear(stream, (void *)report); + } else { + u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + u32 part = oa_buf_end - report; + + /* Zero out the entire report */ + if (report_size <= part) { + memset(report, 0, report_size); + } else { + memset(report, 0, part); + memset(oa_buf_base, 0, report_size - part); + } + } + } + + if (start_offset != *offset) { + struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + xe_mmio_write32(stream->gt, oaheadptr, + (head + gtt_offset) & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = head; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + } + + return ret; +} + static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -318,6 +423,78 @@ static void xe_oa_disable(struct xe_oa_stream *stream) } } +static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) +{ + /* We might wait indefinitely if periodic sampling is not enabled */ + if (!stream->periodic) + return -EINVAL; + + return wait_event_interruptible(stream->poll_wq, + xe_oa_buffer_check_unlocked(stream)); +} + +#define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ + OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) + +static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + /* Only clear our bits to avoid side-effects */ + stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, + OASTATUS_RELEVANT_BITS, 0); + /* + * Signal to userspace that there is non-zero OA status to read via + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl + */ + if (stream->oa_status & OASTATUS_RELEVANT_BITS) + return -EIO; + + return xe_oa_append_reports(stream, buf, count, offset); +} + +static ssize_t xe_oa_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct xe_oa_stream *stream = file->private_data; + size_t offset = 0; + int ret; + + /* Can't read from disabled streams */ + if (!stream->enabled || !stream->sample) + return -EINVAL; + + if (!(file->f_flags & O_NONBLOCK)) { + do { + ret = xe_oa_wait_unlocked(stream); + if (ret) + return ret; + + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } while (!offset && !ret); + } else { + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } + + /* + * Typically we clear pollin here in order to wait for the new hrtimer callback + * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, + * which means that more OA data is available than could fit in the user provided + * buffer. In this case we want the next poll() call to not block. + * + * Also in case of -EIO, we have already waited for data before returning + * -EIO, so need to wait again + */ + if (ret != -ENOSPC && ret != -EIO) + stream->pollin = false; + + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ + return offset ?: (ret ?: -EAGAIN); +} + static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, struct file *file, poll_table *wait) { @@ -680,6 +857,27 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return ret; } +static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_status status = {}; + void __user *uaddr = (void __user *)arg; + + /* Map from register to uapi bits */ + if (stream->oa_status & OASTATUS_REPORT_LOST) + status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; + if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; + if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; + if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) + status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; + + if (copy_to_user(uaddr, &status, sizeof(status))) + return -EFAULT; + + return 0; +} + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned int cmd, unsigned long arg) @@ -693,6 +891,8 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, return 0; case DRM_XE_PERF_IOCTL_CONFIG: return xe_oa_config_locked(stream, arg); + case DRM_XE_PERF_IOCTL_STATUS: + return xe_oa_status_locked(stream, arg); } return -EINVAL; @@ -745,6 +945,7 @@ static const struct file_operations xe_oa_fops = { .llseek = no_llseek, .release = xe_oa_release, .poll = xe_oa_poll, + .read = xe_oa_read, .unlocked_ioctl = xe_oa_ioctl, }; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 6700383b1a52..5bb8ce0d71c9 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -222,5 +222,8 @@ struct xe_oa_stream { /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ u64 poll_period_ns; + + /** @oa_status: temporary storage for oa_status register value */ + u32 oa_status; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1e09f786b3e6..03a6e479227a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1570,6 +1570,26 @@ struct drm_xe_oa_config { __u64 regs_ptr; }; +/** + * struct drm_xe_oa_stream_status - OA stream status returned from + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to + * query stream status in response to EIO errno from perf fd read(). + */ +struct drm_xe_oa_stream_status { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_status: OA stream status (see Bspec 46717/61226) */ + __u64 oa_status; +#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) +#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From dd6b4718c3bab611588922ae8a7736c58eafcc93 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:04 -0700 Subject: drm/xe/oa/uapi: Query OA unit properties Implement query for properties of OA units present on a device. v2: Clean up reserved/pad fields (Umesh) Follow the same scheme as other query structs v3: Skip reporting reserved engines attached to OA units v4: Expose oa_buf_size via DRM_XE_PERF_IOCTL_INFO (Umesh) v5: Don't expose capabilities as OR of properties (Umesh) v6: Add extensions to query output structs: drm_xe_oa_unit, drm_xe_query_oa_units and drm_xe_oa_stream_info v7: Change oa_units[] array to __u64 type Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-13-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 13 +++++++ drivers/gpu/drm/xe/xe_query.c | 77 +++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 85 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 42b0ba014e35..038caeb7c9e7 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1050,6 +1050,17 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) return 0; } +static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + void __user *uaddr = (void __user *)arg; + + if (copy_to_user(uaddr, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned int cmd, unsigned long arg) @@ -1065,6 +1076,8 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, return xe_oa_config_locked(stream, arg); case DRM_XE_PERF_IOCTL_STATUS: return xe_oa_status_locked(stream, arg); + case DRM_XE_PERF_IOCTL_INFO: + return xe_oa_info_locked(stream, arg); } return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 995effcb904b..4e01df6b1b7a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -602,6 +602,82 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) return 0; } +static size_t calc_oa_unit_query_size(struct xe_device *xe) +{ + size_t size = sizeof(struct drm_xe_query_oa_units); + struct xe_gt *gt; + int i, id; + + for_each_gt(gt, xe, id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + size += sizeof(struct drm_xe_oa_unit); + size += gt->oa.oa_unit[i].num_engines * + sizeof(struct drm_xe_engine_class_instance); + } + } + + return size; +} + +static int query_oa_units(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_oa_unit_query_size(xe); + struct drm_xe_query_oa_units *qoa; + enum xe_hw_engine_id hwe_id; + struct drm_xe_oa_unit *du; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + int gt_id, i, j, ret; + struct xe_gt *gt; + u8 *pdu; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + qoa = kzalloc(size, GFP_KERNEL); + if (!qoa) + return -ENOMEM; + + pdu = (u8 *)&qoa->oa_units[0]; + for_each_gt(gt, xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + du = (struct drm_xe_oa_unit *)pdu; + + du->oa_unit_id = u->oa_unit_id; + du->oa_unit_type = u->type; + du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); + du->capabilities = DRM_XE_OA_CAPS_BASE; + + j = 0; + for_each_hw_engine(hwe, gt, hwe_id) { + if (!xe_hw_engine_is_reserved(hwe) && + xe_oa_unit_id(hwe) == u->oa_unit_id) { + du->eci[j].engine_class = + xe_to_user_engine_class[hwe->class]; + du->eci[j].engine_instance = hwe->logical_instance; + du->eci[j].gt_id = gt->info.id; + j++; + } + } + du->num_engines = j; + pdu += sizeof(*du) + j * sizeof(du->eci[0]); + qoa->num_oa_units++; + } + } + + ret = copy_to_user(query_ptr, qoa, size); + kfree(qoa); + + return ret ? -EFAULT : 0; +} + static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, @@ -612,6 +688,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_gt_topology, query_engine_cycles, query_uc_fw_version, + query_oa_units, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 03a6e479227a..93e00be44b2d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -689,6 +689,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 +#define DRM_XE_DEVICE_QUERY_OA_UNITS 8 /** @query: The type of data to query */ __u32 query; @@ -1451,6 +1452,75 @@ enum drm_xe_oa_unit_type { DRM_XE_OA_UNIT_TYPE_OAM, }; +/** + * struct drm_xe_oa_unit - describe OA unit + */ +struct drm_xe_oa_unit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_unit_id: OA unit ID */ + __u32 oa_unit_id; + + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ + __u32 oa_unit_type; + + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_OA_CAPS_BASE (1 << 0) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; + + /** @reserved: MBZ */ + __u64 reserved[4]; + + /** @num_engines: number of engines in @eci array */ + __u64 num_engines; + + /** @eci: engines attached to this OA unit */ + struct drm_xe_engine_class_instance eci[]; +}; + +/** + * struct drm_xe_query_oa_units - describe OA units + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct + * drm_xe_query_oa_units in .data. + * + * OA unit properties for all OA units can be accessed using a code block + * such as the one below: + * + * .. code-block:: C + * + * struct drm_xe_query_oa_units *qoa; + * struct drm_xe_oa_unit *oau; + * u8 *poau; + * + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: + * poau = (u8 *)&qoa->oa_units[0]; + * for (int i = 0; i < qoa->num_oa_units; i++) { + * oau = (struct drm_xe_oa_unit *)poau; + * // Access 'struct drm_xe_oa_unit' fields here + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); + * } + */ +struct drm_xe_query_oa_units { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @num_oa_units: number of OA units returned in oau[] */ + __u32 num_oa_units; + /** @pad: MBZ */ + __u32 pad; + /** + * @oa_units: struct @drm_xe_oa_unit array returned for this device. + * Written below as a u64 array to avoid problems with nested flexible + * arrays with some compilers + */ + __u64 oa_units[]; +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 @@ -1590,6 +1660,21 @@ struct drm_xe_oa_stream_status { __u64 reserved[3]; }; +/** + * struct drm_xe_oa_stream_info - OA stream info returned from + * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + */ +struct drm_xe_oa_stream_info { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_buf_size: OA buffer size */ + __u64 oa_buf_size; + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 7e5161da9d267957b726a29f3efe6cb50fdfed04 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 23 Jun 2024 13:31:19 -0700 Subject: drm/xe/oa: Fix kernel doc in xe_drm.h Fix kernel doc in xe_drm.h. Also eliminate private/non-abi enum definitions. v2: Remove __DRM_XE_PERF_TYPE_MAX since it is unused (Michal) v3: Also remove DRM_XE_OA_PROPERTY_MAX since it can also be eliminated (Michal) Suggested-by: Michal Wajdeczko Signed-off-by: Ashutosh Dixit Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240623203119.3840283-1-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 3 ++- include/uapi/drm/xe_drm.h | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4168b51cf7b5..9263ae9a864e 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1684,6 +1684,7 @@ static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, }; +#define MAX_USER_EXTENSIONS 16 static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, struct xe_oa_open_param *param) { @@ -1692,7 +1693,7 @@ static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number int err; u32 idx; - if (XE_IOCTL_DBG(oa->xe, ext_number >= DRM_XE_OA_PROPERTY_MAX)) + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; err = __copy_from_user(&ext, address, sizeof(ext)); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 93e00be44b2d..b410553faa9b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1379,8 +1379,8 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ DRM_XE_PERF_TYPE_OA, - __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; /** @@ -1611,9 +1611,6 @@ enum drm_xe_oa_property_id { * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. */ DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, - - /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ - DRM_XE_OA_PROPERTY_MAX }; /** -- cgit v1.2.3 From 406d058dc323ae152d380ac90153eb56a75850c1 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 26 Jun 2024 11:18:17 -0700 Subject: drm/xe/oa/uapi: Allow preemption to be disabled on the stream exec queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa VK_KHR_performance_query use case requires preemption and timeslicing to be disabled for the stream exec queue. Implement this functionality here. v2: Minor change to debug print to print both ret values (Umesh) Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240626181817.1516229-3-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 70 +++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_oa_types.h | 3 ++ include/uapi/drm/xe_drm.h | 6 ++++ 3 files changed, 78 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a68659fd5386..6cc3f0217341 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -80,6 +80,7 @@ struct xe_oa_open_param { int engine_instance; struct xe_exec_queue *exec_q; struct xe_hw_engine *hwe; + bool no_preempt; }; struct xe_oa_config_bo { @@ -1013,11 +1014,55 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream) hrtimer_cancel(&stream->poll_check_timer); } +static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) +{ + struct xe_exec_queue *q = stream->exec_q; + int ret1, ret2; + + /* Best effort recovery: try to revert both to original, irrespective of error */ + ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); + ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); + if (ret1 || ret2) + goto err; + return 0; +err: + drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2); + return ret1 ?: ret2; +} + +static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) +{ + struct xe_exec_queue *q = stream->exec_q; + int ret; + + /* Setting values to 0 will disable timeslice and preempt_timeout */ + ret = q->ops->set_timeslice(q, 0); + if (ret) + goto err; + + ret = q->ops->set_preempt_timeout(q, 0); + if (ret) + goto err; + + return 0; +err: + xe_oa_enable_preempt_timeslice(stream); + drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret); + return ret; +} + static int xe_oa_enable_locked(struct xe_oa_stream *stream) { if (stream->enabled) return 0; + if (stream->no_preempt) { + int ret = xe_oa_disable_preempt_timeslice(stream); + + if (ret) + return ret; + } + xe_oa_stream_enable(stream); stream->enabled = true; @@ -1026,13 +1071,18 @@ static int xe_oa_enable_locked(struct xe_oa_stream *stream) static int xe_oa_disable_locked(struct xe_oa_stream *stream) { + int ret = 0; + if (!stream->enabled) return 0; xe_oa_stream_disable(stream); + if (stream->no_preempt) + ret = xe_oa_enable_preempt_timeslice(stream); + stream->enabled = false; - return 0; + return ret; } static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) @@ -1307,6 +1357,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->sample = param->sample; stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; + stream->no_preempt = param->no_preempt; /* * For Xe2+, when overrun mode is enabled, there are no partial reports at the end @@ -1651,6 +1702,13 @@ static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, return 0; } +static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->no_preempt = value; + return 0; +} + typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param); static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { @@ -1662,6 +1720,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, }; static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, @@ -1766,6 +1825,15 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (param.exec_q && !param.sample) privileged_op = false; + if (param.no_preempt) { + if (!param.exec_q) { + drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n"); + ret = -EINVAL; + goto err_exec_q; + } + privileged_op = true; + } + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); ret = -EACCES; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 706d45577dae..540c3ec53a6d 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -235,5 +235,8 @@ struct xe_oa_stream { /** @oa_status: temporary storage for oa_status register value */ u32 oa_status; + + /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ + u32 no_preempt; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b410553faa9b..12eaa8532b5c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1611,6 +1611,12 @@ enum drm_xe_oa_property_id { * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. */ DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** + * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing + * to be disabled for the stream exec queue. + */ + DRM_XE_OA_PROPERTY_NO_PREEMPT, }; /** -- cgit v1.2.3