summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShekhar Chauhan <shekhar.chauhan@intel.com>2024-10-07 08:41:44 -0700
committerMatt Roper <matthew.d.roper@intel.com>2024-10-08 06:56:51 -0700
commit9ab440a9d0426cf7842240891cc457155db1a97e (patch)
treef2c2e0c5f136700137a3882b4b5244f415068f7c
parent691b5a6af36022648a0aa23899515a466eccbb61 (diff)
downloadlwn-9ab440a9d0426cf7842240891cc457155db1a97e.tar.gz
lwn-9ab440a9d0426cf7842240891cc457155db1a97e.zip
drm/xe/ptl: L3bank mask is not available on the media GT
On PTL platforms with media version 30.00, the fuse registers for reporting L3 bank availability to the GT just read out as ~0 and do not provide proper values. Xe does not use the L3 bank mask for anything internally; it only passes the mask through to userspace via the GT topology query. Since we don't have any way to get the real L3 bank mask, we don't want to pass garbage to userspace. Passing a zeroed mask or a copy of the primary GT's L3 bank mask would also be inaccurate and likely to cause confusion for userspace. The best approach is to simply not include L3 in the list of masks returned by the topology query in cases where we aren't able to provide a meaningful value. This won't change the behavior for any existing platforms (where we can always obtain L3 masks successfully for all GTs), it will only prevent us from mis-reporting bad information on upcoming platform(s). There's a good chance this will become a formal workaround in the future, but for now we don't have a lineage number so "no_media_l3" is used in place of a lineage as the OOB workaround descriptor. v2: - Re-calculate query size to properly match data returned. (Gustavo) - Update kerneldoc to clarify that the L3bank mask may not be included in the query results if the hardware doesn't make it available. (Gustavo) Cc: Matt Atwood <matthew.s.atwood@intel.com> Cc: Gustavo Sousa <gustavo.sousa@intel.com> Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com> Co-developed-by: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com> Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com> Acked-by: Francois Dugast <francois.dugast@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20241007154143.2021124-2-matthew.d.roper@intel.com
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c14
-rw-r--r--drivers/gpu/drm/xe/xe_query.c42
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules1
-rw-r--r--include/uapi/drm/xe_drm.h4
4 files changed, 49 insertions, 12 deletions
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 651ba53623e5..df2042db7ee6 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -5,6 +5,7 @@
#include "xe_gt_topology.h"
+#include <generated/xe_wa_oob.h>
#include <linux/bitmap.h>
#include <linux/compiler.h>
@@ -12,6 +13,7 @@
#include "xe_assert.h"
#include "xe_gt.h"
#include "xe_mmio.h"
+#include "xe_wa.h"
static void
load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
@@ -129,6 +131,18 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
struct xe_device *xe = gt_to_xe(gt);
u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
+ /*
+ * PTL platforms with media version 30.00 do not provide proper values
+ * for the media GT's L3 bank registers. Skip the readout since we
+ * don't have any way to obtain real values.
+ *
+ * This may get re-described as an official workaround in the future,
+ * but there's no tracking number assigned yet so we use a custom
+ * OOB workaround descriptor.
+ */
+ if (XE_WA(gt, no_media_l3))
+ return;
+
if (GRAPHICS_VER(xe) >= 20) {
xe_l3_bank_mask_t per_node = {};
u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 5246a4a2740e..158629971eab 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -9,6 +9,7 @@
#include <linux/sched/clock.h>
#include <drm/ttm/ttm_placement.h>
+#include <generated/xe_wa_oob.h>
#include <uapi/drm/xe_drm.h>
#include "regs/xe_engine_regs.h"
@@ -23,6 +24,7 @@
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
static const u16 xe_to_user_engine_class[] = {
[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
@@ -455,12 +457,23 @@ static int query_hwconfig(struct xe_device *xe,
static size_t calc_topo_query_size(struct xe_device *xe)
{
- return xe->info.gt_count *
- (4 * sizeof(struct drm_xe_query_topology_mask) +
- sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
- sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
- sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
- sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+ struct xe_gt *gt;
+ size_t query_size = 0;
+ int id;
+
+ for_each_gt(gt, xe, id) {
+ query_size += 3 * sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss);
+
+ /* L3bank mask may not be available for some GTs */
+ if (!XE_WA(gt, no_media_l3))
+ query_size += sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask);
+ }
+
+ return query_size;
}
static int copy_mask(void __user **ptr,
@@ -513,11 +526,18 @@ static int query_gt_topology(struct xe_device *xe,
if (err)
return err;
- topo.type = DRM_XE_TOPO_L3_BANK;
- err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
- sizeof(gt->fuse_topo.l3_bank_mask));
- if (err)
- return err;
+ /*
+ * If the kernel doesn't have a way to obtain a correct L3bank
+ * mask, then it's better to omit L3 from the query rather than
+ * reporting bogus or zeroed information to userspace.
+ */
+ if (!XE_WA(gt, no_media_l3)) {
+ topo.type = DRM_XE_TOPO_L3_BANK;
+ err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
+ sizeof(gt->fuse_topo.l3_bank_mask));
+ if (err)
+ return err;
+ }
topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
DRM_XE_TOPO_SIMD16_EU_PER_DSS :
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 920ca5060146..0154fbe154e9 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -37,3 +37,4 @@
16023588340 GRAPHICS_VERSION(2001)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
+no_media_l3 MEDIA_VERSION(3000)
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index b6fbe4988f2e..c4182e95a619 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -512,7 +512,9 @@ struct drm_xe_query_gt_list {
* containing the following in mask:
* ``DSS_COMPUTE ff ff ff ff 00 00 00 00``
* means 32 DSS are available for compute.
- * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks
+ * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks. This type
+ * may be omitted if the driver is unable to query the mask from the
+ * hardware.
* - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU)
* available per Dual Sub Slices (DSS). For example a query response
* containing the following in mask: