Merge branch 'drm-xe-next' of https://gitlab.freedesktop.org/drm/xe/kernel.git

# Conflicts: # drivers/gpu/drm/xe/tests/xe_rtp_test.c # drivers/gpu/drm/xe/xe_hw_engine.c # drivers/gpu/drm/xe/xe_reg_whitelist.c # drivers/gpu/drm/xe/xe_tuning.c # drivers/gpu/drm/xe/xe_wa.c
author: Mark Brown <broonie@kernel.org> 2026-07-03 16:20:43 +0100
committer: Mark Brown <broonie@kernel.org> 2026-07-03 16:20:43 +0100
commit: 9dea607a7a2b140280f27f48fb068f8ac01ce701 (patch)
tree: fdbfb7b63d2fd8e14ff56a8cd050fa6f662201ae
parent: 10182b0e6181d81cbba867e2a098c42088285224 (diff)
parent: 820de07bba7b7c97e0f52e1d66bf6147a25ab67f (diff)
download: linux-next-9dea607a7a2b140280f27f48fb068f8ac01ce701.tar.gz
linux-next-9dea607a7a2b140280f27f48fb068f8ac01ce701.zip
65 files changed, 1432 insertions, 433 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index 55ab45f669ac..0da739d9a816 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -251,6 +251,13 @@ Description:	RO. Fan 2 speed in RPM.
 
 		Only supported for particular Intel Xe graphics platforms.
 
+		On DG2 the driver always shows two fan channels, because the
+		FSC_READ_NUM_FANS command does not work on some cards. OEMs
+		decide how the fans map to tach channels, so two fans can share
+		one tach line. When that happens, the second channel
+		reads 0 RPM even though the fan is spinning. This is normal, not
+		a bug.
+
 What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input
 Date:		March 2025
 KernelVersion:	6.16
diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile
index 7530df998148..e07517d120e0 100644
--- a/drivers/gpu/drm/xe/Kconfig.profile
+++ b/drivers/gpu/drm/xe/Kconfig.profile
@@ -1,50 +1,71 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_XE_JOB_TIMEOUT_MAX
-	int "Default max job timeout (ms)"
+	int "Hard upper limit for job timeout (ms)"
 	default 10000 # milliseconds
 	help
-	  Configures the default max job timeout after which job will
-	  be forcefully taken away from scheduler.
+	  Absolute upper bound (in milliseconds) for the per-engine-class job
+	  timeout. This is the maximum value that can be written to the sysfs
+	  job_timeout_ms knob, regardless of privileges. To raise this ceiling,
+	  increase this value and rebuild the kernel.
 config DRM_XE_JOB_TIMEOUT_MIN
-	int "Default min job timeout (ms)"
+	int "Hard lower limit for job timeout (ms)"
 	default 1 # milliseconds
 	help
-	  Configures the default min job timeout after which job will
-	  be forcefully taken away from scheduler.
+	  Absolute lower bound (in milliseconds) for the per-engine-class job
+	  timeout. This is the minimum value that can be written to the sysfs
+	  job_timeout_ms knob, regardless of privileges.
+
+	  Note: the job timeout default (5000 ms) is hardcoded in the driver
+	  and is not configurable here. Use the sysfs job_timeout_ms knob at
+	  runtime to change the engine-class default.
 config DRM_XE_TIMESLICE_MAX
-	int "Default max timeslice duration (us)"
+	int "Hard upper limit for timeslice duration (us)"
 	default 10000000 # microseconds
 	help
-	  Configures the default max timeslice duration between multiple
-	  contexts by guc scheduling.
+	  Absolute upper bound (in microseconds) for the timeslice duration.
+	  This caps both the sysfs timeslice_duration_us knob and the value
+	  accepted via the DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE UAPI for
+	  processes with CAP_SYS_NICE when DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT
+	  is enabled.
 config DRM_XE_TIMESLICE_MIN
-	int "Default min timeslice duration (us)"
+	int "Hard lower limit for timeslice duration (us)"
 	default 1 # microseconds
 	help
-	  Configures the default min timeslice duration between multiple
-	  contexts by guc scheduling.
+	  Absolute lower bound (in microseconds) for the timeslice duration.
+	  This caps both the sysfs timeslice_duration_us knob and the value
+	  accepted via the DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE UAPI for
+	  processes with CAP_SYS_NICE when DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT
+	  is enabled.
 config DRM_XE_PREEMPT_TIMEOUT
-	int "Preempt timeout (us, jiffy granularity)"
+	int "Default preempt timeout (us, jiffy granularity)"
 	default 640000 # microseconds
 	help
-	  How long to wait (in microseconds) for a preemption event to occur
-	  when submitting a new context. If the current context does not hit
-	  an arbitration point and yield to HW before the timer expires, the
-	  HW will be reset to allow the more important context to execute.
+	  Initial per-engine-class preemption timeout (in microseconds). This
+	  is the value the driver programs at boot; it can be changed at
+	  runtime via the sysfs preempt_timeout_us knob.
+
+	  This is how long the driver waits for the current context to reach
+	  an arbitration point and yield the GPU voluntarily when a
+	  higher-priority context becomes runnable. If the context does not
+	  yield before the timer expires, the HW is reset to allow the
+	  higher-priority context to execute.
+
+	  The range userspace may write via sysfs is bounded by
+	  DRM_XE_PREEMPT_TIMEOUT_MIN and DRM_XE_PREEMPT_TIMEOUT_MAX.
 config DRM_XE_PREEMPT_TIMEOUT_MAX
-	int "Default max preempt timeout (us)"
+	int "Hard upper limit for preempt timeout (us)"
 	default 10000000 # microseconds
 	help
-	  Configures the default max preempt timeout after which context
-	  will be forcefully taken away and higher priority context will
-	  run.
+	  Absolute upper bound (in microseconds) for the per-engine-class
+	  preemption timeout. This is the maximum value that can be written to
+	  the sysfs preempt_timeout_us knob, regardless of privileges.
 config DRM_XE_PREEMPT_TIMEOUT_MIN
-	int "Default min preempt timeout (us)"
+	int "Hard lower limit for preempt timeout (us)"
 	default 1 # microseconds
 	help
-	  Configures the default min preempt timeout after which context
-	  will be forcefully taken away and higher priority context will
-	  run.
+	  Absolute lower bound (in microseconds) for the per-engine-class
+	  preemption timeout. This is the minimum value that can be written to
+	  the sysfs preempt_timeout_us knob, regardless of privileges.
 config DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT
 	bool "Default configuration of limitation on scheduler timeout"
 	default y
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e5a04253e73b..6d728f8c4c39 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -101,6 +101,7 @@ xe-y += xe_bb.o \
 	xe_page_reclaim.o \
 	xe_pat.o \
 	xe_pci.o \
+	xe_pci_error.o \
 	xe_pci_rebar.o \
 	xe_pcode.o \
 	xe_pm.o \
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 0e3408f4952c..f7aa47f11a36 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -9,5 +9,6 @@ obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o
 xe_test-y = xe_test_mod.o \
 	xe_args_test.o \
 	xe_pci_test.o \
+	xe_rtp_tables_test.o \
 	xe_rtp_test.o \
 	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 9240aff779da..8df9029afcd3 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -311,40 +311,44 @@ const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param);
 
-static void fake_init_devid(struct xe_device *xe)
+static int fake_probe_info(struct xe_device *xe,
+			   const struct xe_device_desc *desc,
+			   struct xe_pci_fake_data *data,
+			   struct xe_probed_info *probed_info)
 {
-	/* Nothing to do, just keep zero. */
-}
-
-static int fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type,
-			   u32 *ver, u32 *revid)
-{
-	struct kunit *test = kunit_get_current_test();
-	struct xe_pci_fake_data *data = test->priv;
+	probed_info->tile_count = 1 + desc->max_remote_tiles;
 
-	if (type == GMDID_MEDIA) {
-		*ver = data->media_verx100;
-		*revid = xe_step_to_gmdid(data->step.media);
+	if (!data || desc->pre_gmdid_graphics_ip) {
+		probed_info->graphics_ip = desc->pre_gmdid_graphics_ip;
+		probed_info->media_ip = desc->pre_gmdid_media_ip;
 	} else {
-		*ver = data->graphics_verx100;
-		*revid = xe_step_to_gmdid(data->step.graphics);
+		probed_info->graphics_ip = find_graphics_ip(data->graphics_verx100);
+
+		if (data->media_verx100) {
+			probed_info->media_ip = find_media_ip(data->media_verx100);
+			xe_assert(xe, probed_info->media_ip);
+		}
 	}
 
-	return 0;
-}
+	xe_assert(xe, probed_info->graphics_ip);
+	if (!probed_info->graphics_ip)
+		return -ENODEV;
 
-static void fake_xe_info_probe_tile_count(struct xe_device *xe)
-{
-	/* Nothing to do, just use the statically defined value. */
+	if (data)
+		probed_info->step = data->step;
+
+	return 0;
 }
 
 int xe_pci_fake_device_init(struct xe_device *xe)
 {
 	struct kunit *test = kunit_get_current_test();
 	struct xe_pci_fake_data *data = test->priv;
+	struct xe_probed_info probed_info = {};
 	const struct pci_device_id *ent = pciidlist;
 	const struct xe_device_desc *desc;
 	const struct xe_subplatform_desc *subplatform_desc;
+	int err;
 
 	if (!data) {
 		desc = (const void *)ent->driver_data;
@@ -374,13 +378,12 @@ done:
 	xe->sriov.__mode = data && data->sriov_mode ?
 			   data->sriov_mode : XE_SRIOV_MODE_NONE;
 
-	kunit_activate_static_stub(test, init_devid, fake_init_devid);
-	kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid);
-	kunit_activate_static_stub(test, xe_info_probe_tile_count,
-				   fake_xe_info_probe_tile_count);
+	err = fake_probe_info(xe, desc, data, &probed_info);
+	if (err)
+		return err;
 
-	xe_info_init_early(xe, desc, subplatform_desc);
-	xe_info_init(xe, desc);
+	xe_info_init_early(xe, desc, subplatform_desc, &probed_info);
+	xe_info_init(xe, desc, &probed_info);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_tables_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_tables_test.c
new file mode 100644
index 000000000000..7e2fc39ac62c
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_tables_test.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_reg_whitelist.h"
+#include "xe_rtp_types.h"
+#include "xe_tuning.h"
+#include "xe_wa.h"
+
+#define RTP_TABLE_PARAM(table)									\
+	static const void *table##_gen_params(struct kunit *test,				\
+					     const void *prev, char *desc)			\
+	{											\
+		typeof((table.entries)[0]) *__next = prev ?					\
+			((typeof(__next))prev) + 1 : (table.entries);				\
+		if (__next - table.entries < table.n_entries) {					\
+			scnprintf(desc, KUNIT_PARAM_DESC_SIZE, #table "/%s", __next->name);	\
+			return __next;								\
+		}										\
+		return NULL;									\
+	}
+
+static void xe_rtp_table_gt_test(struct kunit *test)
+{
+	const struct xe_rtp_entry_sr *entry = test->param_value;
+
+	for (int i = 0; i < entry->n_rules; i++) {
+		KUNIT_EXPECT_TRUE(test,
+				  entry->rules[i].match_type != XE_RTP_MATCH_ENGINE_CLASS ||
+				  entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE);
+		KUNIT_EXPECT_TRUE(test,
+				  entry->rules[i].match_type != XE_RTP_MATCH_NOT_ENGINE_CLASS ||
+				  entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE);
+	}
+}
+
+RTP_TABLE_PARAM(gt_was);
+RTP_TABLE_PARAM(gt_tunings);
+
+static void xe_rtp_table_oob_test(struct kunit *test)
+{
+	const struct xe_rtp_entry *entry = test->param_value;
+
+	for (int i = 0; i < entry->n_rules; i++) {
+		u8 match_type = entry->rules[i].match_type;
+
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_ENGINE_CLASS);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_NOT_ENGINE_CLASS);
+	}
+}
+
+RTP_TABLE_PARAM(oob_was);
+
+static void xe_rtp_table_dev_oob_test(struct kunit *test)
+{
+	const struct xe_rtp_entry *entry = test->param_value;
+
+	for (int i = 0; i < entry->n_rules; i++) {
+		u8 match_type = entry->rules[i].match_type;
+
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_ENGINE_CLASS);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_NOT_ENGINE_CLASS);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_GRAPHICS_VERSION);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_GRAPHICS_VERSION_RANGE);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_GRAPHICS_STEP);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_MEDIA_VERSION);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_MEDIA_VERSION_RANGE);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_MEDIA_VERSION_ANY_GT);
+		KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_MEDIA_STEP);
+	}
+}
+
+RTP_TABLE_PARAM(device_oob_was);
+
+static void xe_rtp_table_missing_upper_bound_test(struct kunit *test)
+{
+	const struct xe_rtp_entry_sr *entry = test->param_value;
+
+	for (int i = 0; i < entry->n_rules; i++) {
+		u8 match_type = entry->rules[i].match_type;
+
+		KUNIT_EXPECT_FALSE(test,
+				   match_type == XE_RTP_MATCH_GRAPHICS_VERSION_RANGE &&
+				   entry->rules[i].ver_end == XE_RTP_END_VERSION_UNDEFINED);
+		KUNIT_EXPECT_FALSE(test,
+				   match_type == XE_RTP_MATCH_MEDIA_VERSION_RANGE &&
+				   entry->rules[i].ver_end == XE_RTP_END_VERSION_UNDEFINED);
+	}
+}
+
+RTP_TABLE_PARAM(register_whitelist);
+
+static struct kunit_case xe_rtp_table_tests[] = {
+	KUNIT_CASE_PARAM(xe_rtp_table_gt_test, gt_was_gen_params),
+	KUNIT_CASE_PARAM(xe_rtp_table_gt_test, gt_tunings_gen_params),
+	KUNIT_CASE_PARAM(xe_rtp_table_oob_test, oob_was_gen_params),
+	KUNIT_CASE_PARAM(xe_rtp_table_dev_oob_test, device_oob_was_gen_params),
+	KUNIT_CASE_PARAM(xe_rtp_table_missing_upper_bound_test,
+			 register_whitelist_gen_params),
+	{}
+};
+
+static struct kunit_suite xe_rtp_tables_test_suite = {
+	.name = "xe_rtp_tables_test",
+	.test_cases = xe_rtp_table_tests,
+};
+
+kunit_test_suite(xe_rtp_tables_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 3d0688d058d9..367811621880 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -280,6 +280,11 @@ static void xe_rtp_rules_tests(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, err, param->expected_err);
 }
 
+static u32 bits_2_3_set(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	return REG_BIT(2) | REG_BIT(3);
+}
+
 static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = {
 	{
 		.name = "coalesce-same-reg",
@@ -301,6 +306,29 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = {
 		),
 	},
 	{
+		.name = "coalesce-same-reg-literal-and-func",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2) | REG_BIT(3),
+		.expected_clr_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2) | REG_BIT(3),
+		.expected_active = BIT(0) | BIT(1),
+		.expected_count_sr_entries = 1,
+		/* Different bits on the same register: create a single entry */
+		.table = XE_RTP_TABLE_SR(
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1,
+						   REG_BIT(0) | REG_BIT(1),
+						   REG_BIT(0) | REG_BIT(1)))
+			},
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(FIELD_SET_FUNC(REGULAR_REG1,
+							REG_BIT(2) | REG_BIT(3),
+							bits_2_3_set))
+			},
+		),
+	},
+	{
 		.name = "no-match-no-add",
 		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
@@ -418,6 +446,30 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = {
 		),
 	},
 	{
+		.name = "conflict-not-disjoint-literal-and-func",
+		.expected_reg = REGULAR_REG1,
+		.expected_set_bits = REG_BIT(1) | REG_BIT(2),
+		.expected_clr_bits = REG_BIT(1) | REG_BIT(2),
+		.expected_active = BIT(0) | BIT(1),
+		.expected_count_sr_entries = 1,
+		.expected_sr_errors = 1,
+		.table = XE_RTP_TABLE_SR(
+			{ XE_RTP_NAME("basic-1"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1,
+						   REG_BIT(1) | REG_BIT(2),
+						   REG_BIT(1) | REG_BIT(2)))
+			},
+			/* drop: bits are not disjoint with previous entries */
+			{ XE_RTP_NAME("basic-2"),
+			  XE_RTP_RULES(FUNC(match_yes)),
+			  XE_RTP_ACTIONS(FIELD_SET_FUNC(REGULAR_REG1,
+							REG_BIT(2) | REG_BIT(3),
+							bits_2_3_set))
+			},
+		),
+	},
+	{
 		.name = "conflict-reg-type",
 		.expected_reg = REGULAR_REG1,
 		.expected_set_bits = REG_BIT(0),
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index ff0e2502b39f..21601e9df353 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -43,9 +43,6 @@ static int xe_wa_test_init(struct kunit *test)
 		xe_gt_mmio_init(gt);
 	}
 
-	if (!param->graphics_verx100)
-		xe->info.step = param->step;
-
 	/* TODO: init hw engines for engine/LRC WAs */
 	xe->drm.dev = dev;
 	test->priv = xe;
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 22b471303984..3c018dbccc07 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -117,7 +117,6 @@ static int info(struct seq_file *m, void *data)
 	drm_printf(&p, "revid %d\n", xe->info.revid);
 	drm_printf(&p, "tile_count %d\n", xe->info.tile_count);
 	drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level);
-	drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist));
 	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
 	drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm));
 	drm_printf(&p, "skip_guc_pc %s\n", str_yes_no(xe->info.skip_guc_pc));
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b60a651a3c9b..ad7f3e61d457 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -61,6 +61,7 @@
 #include "xe_psmi.h"
 #include "xe_pxp.h"
 #include "xe_query.h"
+#include "xe_ras.h"
 #include "xe_shrinker.h"
 #include "xe_soc_remapper.h"
 #include "xe_survivability_mode.h"
@@ -738,9 +739,11 @@ static void vf_update_device_info(struct xe_device *xe)
 	xe->info.probe_display = 0;
 	xe->info.has_heci_cscfi = 0;
 	xe->info.has_heci_gscfi = 0;
+	xe->info.has_i2c = 0;
 	xe->info.has_late_bind = 0;
 	xe->info.skip_guc_pc = 1;
 	xe->info.skip_pcode = 1;
+	xe->info.has_drm_ras = false;
 }
 
 static int xe_device_vram_alloc(struct xe_device *xe)
@@ -949,6 +952,15 @@ int xe_device_probe(struct xe_device *xe)
 			return err;
 	}
 
+	/*
+	 * Wa_16029380221: The affected GT will always use non-coherent
+	 * access to page tables, so we must do uncached writes from the
+	 * CPU.
+	 */
+	for_each_gt(gt, xe, id)
+		if (XE_GT_WA(gt, 16029380221))
+			xe->info.has_cached_pt = false;
+
 	for_each_tile(tile, xe, id) {
 		err = xe_ggtt_init_early(tile->mem.ggtt);
 		if (err)
@@ -989,6 +1001,16 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		return err;
 
+	err = xe_soc_remapper_init(xe);
+	if (err)
+		return err;
+
+	err = xe_sysctrl_init(xe);
+	if (err)
+		return err;
+
+	xe_ras_init(xe);
+
 	/*
 	 * Now that GT is initialized (TTM in particular),
 	 * we can try to init display, and inherit the initial fb.
@@ -1029,10 +1051,6 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_nvm_init(xe);
 
-	err = xe_soc_remapper_init(xe);
-	if (err)
-		return err;
-
 	err = xe_heci_gsc_init(xe);
 	if (err)
 		return err;
@@ -1071,10 +1089,6 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		goto err_unregister_display;
 
-	err = xe_sysctrl_init(xe);
-	if (err)
-		goto err_unregister_display;
-
 	err = xe_device_sysfs_init(xe);
 	if (err)
 		goto err_unregister_display;
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 975768a6a9c8..a03760d0ce38 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -116,7 +116,7 @@ static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe)
 
 static inline bool xe_device_uc_enabled(struct xe_device *xe)
 {
-	return !xe->info.force_execlist;
+	return true;
 }
 
 #define for_each_tile(tile__, xe__, id__) \
@@ -181,6 +181,21 @@ static inline bool xe_device_has_mert(const struct xe_device *xe)
 	return xe->info.has_mert;
 }
 
+static inline bool xe_device_is_in_reset(struct xe_device *xe)
+{
+	return atomic_read(&xe->in_reset);
+}
+
+static inline void xe_device_set_in_reset(struct xe_device *xe)
+{
+	atomic_set(&xe->in_reset, 1);
+}
+
+static inline void xe_device_clear_in_reset(struct xe_device *xe)
+{
+	atomic_set(&xe->in_reset, 0);
+}
+
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
 
 void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 32dd2ffbc796..022e08205897 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -144,8 +144,6 @@ struct xe_device {
 		 * Keep all flags below alphabetically sorted
 		 */
 
-		/** @info.force_execlist: Forced execlist submission */
-		u8 force_execlist:1;
 		/** @info.has_access_counter: Device supports access counter */
 		u8 has_access_counter:1;
 		/** @info.has_asid: Has address space ID */
@@ -156,6 +154,8 @@ struct xe_device {
 		u8 has_cached_pt:1;
 		/** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
 		u8 has_device_atomics_on_smem:1;
+		/** @info.has_drm_ras: Device supports drm_ras (Reliability, Availability, Serviceability) */
+		u8 has_drm_ras:1;
 		/** @info.has_fan_control: Device supports fan control */
 		u8 has_fan_control:1;
 		/** @info.has_flat_ccs: Whether flat CCS metadata is used */
@@ -483,6 +483,9 @@ struct xe_device {
 	/** @needs_flr_on_fini: requests function-reset on fini */
 	bool needs_flr_on_fini;
 
+	/** @in_reset: Indicates if device is in reset */
+	atomic_t in_reset;
+
 	/** @wedged: Struct to control Wedged States and mode */
 	struct {
 		/** @wedged.flag: Xe device faced a critical error and is now blocked. */
@@ -495,6 +498,9 @@ struct xe_device {
 		bool inconsistent_reset;
 	} wedged;
 
+	/** @devres_group: devres group */
+	void *devres_group;
+
 	/** @bo_device: Struct to control async free of BOs */
 	struct xe_bo_dev {
 		/** @bo_device.async_free: Free worker */
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 81020b4b344e..e116fb562c4c 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -193,7 +193,7 @@ static void bo_meminfo(struct xe_bo *bo,
 		if (!dma_resv_test_signaled(bo->ttm.base.resv,
 					    DMA_RESV_USAGE_BOOKKEEP))
 			stats[mem_type].active += sz;
-		else if (mem_type == XE_PL_SYSTEM)
+		else if (mem_type == XE_PL_SYSTEM || xe_bo_madv_is_dontneed(bo))
 			stats[mem_type].purgeable += sz;
 	}
 }
@@ -273,8 +273,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
 					       &stats[mem_type],
 					       DRM_GEM_OBJECT_ACTIVE |
 					       DRM_GEM_OBJECT_RESIDENT |
-					       (mem_type != XE_PL_SYSTEM ? 0 :
-					       DRM_GEM_OBJECT_PURGEABLE),
+					       DRM_GEM_OBJECT_PURGEABLE,
 					       xe_mem_type_to_name[mem_type]);
 		}
 	}
diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
index cd236f53699e..7937d8ba0ed9 100644
--- a/drivers/gpu/drm/xe/xe_drm_ras.c
+++ b/drivers/gpu/drm/xe/xe_drm_ras.c
@@ -11,27 +11,46 @@
 
 #include "xe_device_types.h"
 #include "xe_drm_ras.h"
+#include "xe_ras.h"
 
 static const char * const error_components[] = DRM_XE_RAS_ERROR_COMPONENT_NAMES;
 static const char * const error_severity[] = DRM_XE_RAS_ERROR_SEVERITY_NAMES;
 
-static int hw_query_error_counter(struct xe_drm_ras_counter *info,
-				  u32 error_id, const char **name, u32 *val)
+static int query_error_counter(struct xe_device *xe,
+			       enum drm_xe_ras_error_severity severity,
+			       u32 error_id, const char **name, u32 *val)
 {
+	struct xe_drm_ras *ras = &xe->ras;
+	struct xe_drm_ras_counter *info = ras->info[severity];
+
 	if (!info || !info[error_id].name)
 		return -ENOENT;
 
 	*name = info[error_id].name;
+
+	/* Fetch counter from system controller if supported */
+	if (xe->info.has_sysctrl)
+		return xe_ras_get_counter(xe, severity, error_id, val);
+
 	*val = atomic_read(&info[error_id].counter);
 
 	return 0;
 }
 
-static int hw_clear_error_counter(struct xe_drm_ras_counter *info, u32 error_id)
+static int clear_error_counter(struct xe_device *xe,
+			       enum drm_xe_ras_error_severity severity,
+			       u32 error_id)
 {
+	struct xe_drm_ras *ras = &xe->ras;
+	struct xe_drm_ras_counter *info = ras->info[severity];
+
 	if (!info || !info[error_id].name)
 		return -ENOENT;
 
+	/* Clear counter from system controller if supported */
+	if (xe->info.has_sysctrl)
+		return xe_ras_clear_counter(xe, severity, error_id);
+
 	atomic_set(&info[error_id].counter, 0);
 
 	return 0;
@@ -41,38 +60,30 @@ static int query_uncorrectable_error_counter(struct drm_ras_node *ep, u32 error_
 					     const char **name, u32 *val)
 {
 	struct xe_device *xe = ep->priv;
-	struct xe_drm_ras *ras = &xe->ras;
-	struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_UNCORRECTABLE];
 
-	return hw_query_error_counter(info, error_id, name, val);
+	return query_error_counter(xe, DRM_XE_RAS_ERR_SEV_UNCORRECTABLE, error_id, name, val);
 }
 
 static int clear_uncorrectable_error_counter(struct drm_ras_node *node, u32 error_id)
 {
 	struct xe_device *xe = node->priv;
-	struct xe_drm_ras *ras = &xe->ras;
-	struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_UNCORRECTABLE];
 
-	return hw_clear_error_counter(info, error_id);
+	return clear_error_counter(xe, DRM_XE_RAS_ERR_SEV_UNCORRECTABLE, error_id);
 }
 
 static int query_correctable_error_counter(struct drm_ras_node *ep, u32 error_id,
 					   const char **name, u32 *val)
 {
 	struct xe_device *xe = ep->priv;
-	struct xe_drm_ras *ras = &xe->ras;
-	struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_CORRECTABLE];
 
-	return hw_query_error_counter(info, error_id, name, val);
+	return query_error_counter(xe, DRM_XE_RAS_ERR_SEV_CORRECTABLE, error_id, name, val);
 }
 
 static int clear_correctable_error_counter(struct drm_ras_node *node, u32 error_id)
 {
 	struct xe_device *xe = node->priv;
-	struct xe_drm_ras *ras = &xe->ras;
-	struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_CORRECTABLE];
 
-	return hw_clear_error_counter(info, error_id);
+	return clear_error_counter(xe, DRM_XE_RAS_ERR_SEV_CORRECTABLE, error_id);
 }
 
 static struct xe_drm_ras_counter *allocate_and_copy_counters(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index 297be3c42b20..d37770c58c5d 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -985,9 +985,10 @@ int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *f
 		return -ENODEV;
 	}
 
-	if (xe_observation_paranoid && !perfmon_capable()) {
+	ret = xe_observation_paranoid_check();
+	if (ret) {
 		drm_dbg(&xe->drm,  "Insufficient privileges for EU stall monitoring\n");
-		return -EACCES;
+		return ret;
 	}
 
 	/* Initialize and set default values */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 2f5ccf294675..d27ce24daae5 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -318,8 +318,6 @@ struct xe_exec_queue_ops {
 	void (*resume)(struct xe_exec_queue *q);
 	/** @reset_status: check exec queue reset status */
 	bool (*reset_status)(struct xe_exec_queue *q);
-	/** @active: check exec queue is active */
-	bool (*active)(struct xe_exec_queue *q);
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 9fb99c038ea8..6b86b4f9cc1c 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -458,12 +458,6 @@ static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
 	return false;
 }
 
-static bool execlist_exec_queue_active(struct xe_exec_queue *q)
-{
-	/* NIY */
-	return false;
-}
-
 static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
 	.init = execlist_exec_queue_init,
 	.kill = execlist_exec_queue_kill,
@@ -476,7 +470,6 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
 	.suspend_wait = execlist_exec_queue_suspend_wait,
 	.resume = execlist_exec_queue_resume,
 	.reset_status = execlist_exec_queue_reset_status,
-	.active = execlist_exec_queue_active,
 };
 
 int xe_execlist_init(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index a351c578b170..8ec23862477f 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -111,14 +111,14 @@ struct xe_ggtt_pt_ops {
 struct xe_ggtt {
 	/** @tile: Back pointer to tile where this GGTT belongs */
 	struct xe_tile *tile;
-        /** @start: Start offset of GGTT */
+	/** @start: Start offset of GGTT */
 	u64 start;
 	/** @size: Total usable size of this GGTT */
 	u64 size;
-
 	/**
-	 * @flags: Flags for this GGTT
+	 * @flags: Flags for this GGTT.
 	 * Acceptable flags:
+	 *
 	 * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K.
 	 * - %XE_GGTT_FLAGS_ONLINE - is GGTT online, protected by ggtt->lock
 	 *   after init
@@ -129,7 +129,7 @@ struct xe_ggtt {
 	/** @lock: Mutex lock to protect GGTT data */
 	struct mutex lock;
 	/**
-	 *  @gsm: The iomem pointer to the actual location of the translation
+	 * @gsm: The iomem pointer to the actual location of the translation
 	 * table located in the GSM for easy PTE manipulation
 	 */
 	u64 __iomem *gsm;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 783eb6d631b5..d904527a8898 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -917,6 +917,9 @@ static void gt_reset_worker(struct work_struct *w)
 	if (xe_device_wedged(gt_to_xe(gt)))
 		goto err_pm_put;
 
+	if (xe_device_is_in_reset(gt_to_xe(gt)))
+		goto err_pm_put;
+
 	/* We only support GT resets with GuC submission */
 	if (!xe_device_uc_enabled(gt_to_xe(gt)))
 		goto err_pm_put;
@@ -977,18 +980,21 @@ err_pm_put:
 
 void xe_gt_reset_async(struct xe_gt *gt)
 {
-	xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0));
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe_device_is_in_reset(xe))
+		return;
 
 	/* Don't do a reset while one is already in flight */
 	if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
 		return;
 
-	xe_gt_info(gt, "reset queued\n");
+	xe_gt_info(gt, "reset queued from %ps\n", __builtin_return_address(0));
 
 	/* Pair with put in gt_reset_worker() if work is enqueued */
-	xe_pm_runtime_get_noresume(gt_to_xe(gt));
+	xe_pm_runtime_get_noresume(xe);
 	if (!queue_work(gt->ordered_wq, &gt->reset.worker))
-		xe_pm_runtime_put(gt_to_xe(gt));
+		xe_pm_runtime_put(xe);
 }
 
 void xe_gt_suspend_prepare(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 04f0098070a4..a97b236dab7c 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -404,8 +404,7 @@ fallback:
 	 * Some older platforms don't have tables or don't have complete tables.
 	 * Newer platforms should always have the required info.
 	 */
-	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000 &&
-	    !gt_to_xe(gt)->info.force_execlist)
+	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000)
 		xe_gt_err(gt, "Slice/Subslice counts missing from hwconfig table; using typical fallback values\n");
 
 	if (gt_to_xe(gt)->info.platform == XE_PVC)
@@ -507,7 +506,7 @@ void xe_gt_mcr_init_early(struct xe_gt *gt)
 	spin_lock_init(&gt->mcr_lock);
 
 	if (gt->info.type == XE_GT_TYPE_MEDIA) {
-		drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
+		xe_gt_WARN_ON(gt, MEDIA_VER(xe) < 13);
 
 		if (MEDIA_VER(xe) >= 30) {
 			gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table;
@@ -662,9 +661,9 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 
 	for (int type = 0; type < IMPLICIT_STEERING; type++) {
 		if (reg_in_steering_type_ranges(gt, reg, type)) {
-			drm_WARN(&gt_to_xe(gt)->drm, !gt->steering[type].initialized,
-				 "Uninitialized usage of MCR register %s/%#x\n",
-				 xe_steering_types[type].name, reg.addr);
+			xe_gt_WARN(gt, !gt->steering[type].initialized,
+				   "Uninitialized usage of MCR register %s/%#x\n",
+				   xe_steering_types[type].name, reg.addr);
 
 			*group = gt->steering[type].group_target;
 			*instance = gt->steering[type].instance_target;
@@ -679,9 +678,9 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
 	 * Not found in a steering table and not a register with implicit
 	 * steering. Just steer to 0/0 as a guess and raise a warning.
 	 */
-	drm_WARN(&gt_to_xe(gt)->drm, true,
-		 "Did not find MCR register %#x in any MCR steering table\n",
-		 reg.addr);
+	xe_gt_WARN(gt, true,
+		   "Did not find MCR register %#x in any MCR steering table\n",
+		   reg.addr);
 	*group = 0;
 	*instance = 0;
 
@@ -710,7 +709,7 @@ static void mcr_lock(struct xe_gt *gt) __acquires(&gt->mcr_lock)
 		ret = xe_mmio_wait32(&gt->mmio, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL,
 				     true);
 
-	drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
+	xe_gt_WARN_ON_ONCE(gt, ret == -ETIMEDOUT);
 }
 
 static void mcr_unlock(struct xe_gt *gt) __releases(&gt->mcr_lock)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index f5c3d8a97ec6..12416bfa3255 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1532,7 +1532,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	 * If devcoredump not captured and GuC capture for the job is not ready
 	 * do manual capture first and decide later if we need to use it
 	 */
-	if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
+	if (!xe_device_is_in_reset(xe) && !exec_queue_killed(q) && !xe->devcoredump.captured &&
 	    !xe_guc_capture_get_matching_and_lock(q)) {
 		/* take force wake before engine register manual capture */
 		CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
@@ -1554,8 +1554,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	set_exec_queue_banned(q);
 
 	/* Kick job / queue off hardware */
-	if (!wedged && (exec_queue_enabled(primary) ||
-			exec_queue_pending_disable(primary))) {
+	if (!xe_device_is_in_reset(xe) && !wedged &&
+	    (exec_queue_enabled(primary) || exec_queue_pending_disable(primary))) {
 		int ret;
 
 		if (exec_queue_reset(primary))
@@ -1623,7 +1623,8 @@ trigger_reset:
 
 	trace_xe_sched_job_timedout(job);
 
-	if (!exec_queue_killed(q))
+	/* Do not access device if in reset */
+	if (!xe_device_is_in_reset(xe) && !exec_queue_killed(q))
 		xe_devcoredump(q, job,
 			       "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
 			       xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
@@ -2244,14 +2245,6 @@ static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
 	return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
 }
 
-static bool guc_exec_queue_active(struct xe_exec_queue *q)
-{
-	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
-
-	return exec_queue_enabled(primary) &&
-		!exec_queue_pending_disable(primary);
-}
-
 /*
  * All of these functions are an abstraction layer which other parts of Xe can
  * use to trap into the GuC backend. All of these functions, aside from init,
@@ -2271,7 +2264,6 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
 	.suspend_wait = guc_exec_queue_suspend_wait,
 	.resume = guc_exec_queue_resume,
 	.reset_status = guc_exec_queue_reset_status,
-	.active = guc_exec_queue_active,
 };
 
 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index cf6d106e6036..046d0655122f 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -208,9 +208,6 @@ static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
 
 	lockdep_assert_held(&tlb_inval->seqno_lock);
 
-	if (guc_to_xe(guc)->info.force_execlist)
-		return -ECANCELED;
-
 	return send_tlb_inval_ppgtt(guc, seqno, start, end, asid,
 				    XE_GUC_TLB_INVAL_PAGE_SELECTIVE, prl_sa);
 }
@@ -228,9 +225,6 @@ static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
 
 	lockdep_assert_held(&tlb_inval->seqno_lock);
 
-	if (xe->info.force_execlist)
-		return -ECANCELED;
-
 	vm = xe_device_asid_to_vm(xe, asid);
 	if (IS_ERR(vm))
 		return PTR_ERR(vm);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 0b193c451a11..87d60c4117bd 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -337,39 +337,41 @@ static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_device *xe,
 	return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
 }
 
+static u32 blit_cctl_val(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	return REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, gt->mocs.uc_index) |
+		REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, gt->mocs.uc_index);
+}
+
+static const struct xe_rtp_table_sr lrc_setup = XE_RTP_TABLE_SR(
+	/*
+	 * Some blitter commands do not have a field for MOCS, those
+	 * commands will use MOCS index pointed by BLIT_CCTL.
+	 * BLIT_CCTL registers are needed to be programmed to un-cached.
+	 */
+	{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
+		       ENGINE_CLASS(COPY)),
+	  XE_RTP_ACTIONS(FIELD_SET_FUNC(BLIT_CCTL(0),
+					BLIT_CCTL_DST_MOCS_MASK |
+					BLIT_CCTL_SRC_MOCS_MASK,
+					blit_cctl_val,
+					XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	/* Disable WMTP if HW doesn't support it */
+	{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
+	  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
+	  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
+				   PREEMPT_GPGPU_LEVEL_MASK,
+				   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
+	},
+);
+
 static void
 hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 {
-	struct xe_gt *gt = hwe->gt;
-	const u8 mocs_write_idx = gt->mocs.uc_index;
-	const u8 mocs_read_idx = gt->mocs.uc_index;
-	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
-			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
-	const struct xe_rtp_table_sr lrc_setup = XE_RTP_TABLE_SR(
-		/*
-		 * Some blitter commands do not have a field for MOCS, those
-		 * commands will use MOCS index pointed by BLIT_CCTL.
-		 * BLIT_CCTL registers are needed to be programmed to un-cached.
-		 */
-		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
-		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
-			       ENGINE_CLASS(COPY)),
-		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
-				 BLIT_CCTL_DST_MOCS_MASK |
-				 BLIT_CCTL_SRC_MOCS_MASK,
-				 blit_cctl_val,
-				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
-		},
-		/* Disable WMTP if HW doesn't support it */
-		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
-		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
-		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
-					   PREEMPT_GPGPU_LEVEL_MASK,
-					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
-		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
-		},
-	);
 
 	xe_rtp_process_to_sr(&ctx, &lrc_setup, &hwe->reg_lrc, true);
 }
@@ -385,86 +387,92 @@ void xe_hw_engine_setup_reg_lrc(struct xe_hw_engine *hwe)
 	xe_tuning_process_lrc(hwe);
 }
 
-static void
-hw_engine_setup_default_state(struct xe_hw_engine *hwe)
+/*
+ * RING_CMD_CCTL specifies the default MOCS entry that will be
+ * used by the command streamer when executing commands that
+ * don't have a way to explicitly specify a MOCS setting.
+ * The default should usually reference whichever MOCS entry
+ * corresponds to uncached behavior, although use of a WB cached
+ * entry is recommended by the spec in certain circumstances on
+ * specific platforms.
+ * Bspec: 72161
+ */
+static u32 ring_cmd_cctl_val(struct xe_gt *gt, struct xe_hw_engine *hwe)
 {
-	struct xe_gt *gt = hwe->gt;
 	struct xe_device *xe = gt_to_xe(gt);
+	u8 mocs_read_idx = gt->mocs.uc_index;
+
+	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) &&
+	    (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC))
+		mocs_read_idx = gt->mocs.wb_index;
+
+	return REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, gt->mocs.uc_index) |
+		REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
+}
+
+static const struct xe_rtp_table_sr engine_sr = XE_RTP_TABLE_SR(
+	{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
+	  XE_RTP_RULES(FUNC(xe_rtp_match_always)),
+	  XE_RTP_ACTIONS(FIELD_SET_FUNC(RING_CMD_CCTL(0),
+					CMD_CCTL_WRITE_OVERRIDE_MASK |
+					CMD_CCTL_READ_OVERRIDE_MASK,
+					ring_cmd_cctl_val,
+					XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	{ XE_RTP_NAME("Disable HW status page updates for interrupts"),
+	  XE_RTP_RULES(FUNC(xe_rtp_match_always)),
+	  XE_RTP_ACTIONS(SET(RING_HWSTAM(0), ~0x0,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	{ XE_RTP_NAME("Disable engine 'legacy' mode"),
+	  XE_RTP_RULES(FUNC(xe_rtp_match_always)),
+	  XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_DISABLE_LEGACY_MODE,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
 	/*
-	 * RING_CMD_CCTL specifies the default MOCS entry that will be
-	 * used by the command streamer when executing commands that
-	 * don't have a way to explicitly specify a MOCS setting.
-	 * The default should usually reference whichever MOCS entry
-	 * corresponds to uncached behavior, although use of a WB cached
-	 * entry is recommended by the spec in certain circumstances on
-	 * specific platforms.
-	 * Bspec: 72161
+	 * To allow the GSC engine to go idle on MTL we need to enable
+	 * idle messaging and set the hysteresis value (we use 0xA=5us
+	 * as recommended in spec). On platforms after MTL this is
+	 * enabled by default.
 	 */
-	const u8 mocs_write_idx = gt->mocs.uc_index;
-	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) &&
-				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
-				 gt->mocs.wb_index : gt->mocs.uc_index;
-	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
-				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
+	{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
+	  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
+	  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
+			     IDLE_MSG_DISABLE,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
+			 FIELD_SET(RING_PWRCTX_MAXCNT(0),
+				   IDLE_WAIT_TIME,
+				   0xA,
+				   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	/* Enable Priority Mem Read */
+	{ XE_RTP_NAME("Priority_Mem_Read"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
+	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	{ XE_RTP_NAME("Enable CCS Engine(s)"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1255, XE_RTP_END_VERSION_UNDEFINED),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(RCU_MODE, RCU_MODE_CCS_ENABLE))
+	},
+	/* Use Fixed slice CCS mode */
+	{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
+	  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
+	  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
+				   RCU_MODE_FIXED_SLICE_CCS_MODE))
+	},
+	{ XE_RTP_NAME("Enable MSI-X interrupt support"),
+	  XE_RTP_RULES(FUNC(xe_rtp_match_has_msix)),
+	  XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_MSIX_INTERRUPT_ENABLE,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+);
+
+static void
+hw_engine_setup_default_state(struct xe_hw_engine *hwe)
+{
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
-	const struct xe_rtp_table_sr engine_sr = XE_RTP_TABLE_SR(
-		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
-		  XE_RTP_RULES(FUNC(xe_rtp_match_always)),
-		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
-					   CMD_CCTL_WRITE_OVERRIDE_MASK |
-					   CMD_CCTL_READ_OVERRIDE_MASK,
-					   ring_cmd_cctl_val,
-					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
-		},
-		{ XE_RTP_NAME("Disable HW status page updates for interrupts"),
-		  XE_RTP_RULES(FUNC(xe_rtp_match_always)),
-		  XE_RTP_ACTIONS(SET(RING_HWSTAM(0), ~0x0,
-				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
-		},
-		{ XE_RTP_NAME("Disable engine 'legacy' mode"),
-		  XE_RTP_RULES(FUNC(xe_rtp_match_always)),
-		  XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_DISABLE_LEGACY_MODE,
-				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
-		},
-		/*
-		 * To allow the GSC engine to go idle on MTL we need to enable
-		 * idle messaging and set the hysteresis value (we use 0xA=5us
-		 * as recommended in spec). On platforms after MTL this is
-		 * enabled by default.
-		 */
-		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
-		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
-		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
-				     IDLE_MSG_DISABLE,
-				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
-				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
-					   IDLE_WAIT_TIME,
-					   0xA,
-					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
-		},
-		/* Enable Priority Mem Read */
-		{ XE_RTP_NAME("Priority_Mem_Read"),
-		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
-		  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
-				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
-		},
-		{ XE_RTP_NAME("Enable CCS Engine(s)"),
-		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1255, XE_RTP_END_VERSION_UNDEFINED),
-			       FUNC(xe_rtp_match_first_render_or_compute)),
-		  XE_RTP_ACTIONS(SET(RCU_MODE, RCU_MODE_CCS_ENABLE))
-		},
-		/* Use Fixed slice CCS mode */
-		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
-		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
-		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
-					   RCU_MODE_FIXED_SLICE_CCS_MODE))
-		},
-		{ XE_RTP_NAME("Enable MSI-X interrupt support"),
-		  XE_RTP_RULES(FUNC(xe_rtp_match_has_msix)),
-		  XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_MSIX_INTERRUPT_ENABLE,
-				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
-		},
-	);
 
 	xe_rtp_process_to_sr(&ctx, &engine_sr, &hwe->reg_sr, false);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
index 4b72959b2276..4a4b363fc844 100644
--- a/drivers/gpu/drm/xe/xe_hw_error.c
+++ b/drivers/gpu/drm/xe/xe_hw_error.c
@@ -437,6 +437,16 @@ static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_er
 	if (!IS_DGFX(xe))
 		return;
 
+	/*
+	 * Hardware errors are reported through System Controller on the platforms that
+	 * support it, and never routed as direct IRQ to SGUnit. So we should never be
+	 * here for those platforms.
+	 */
+	if (xe->info.has_sysctrl) {
+		drm_err_ratelimited(&xe->drm, HW_ERR "Invalid error routing\n");
+		return;
+	}
+
 	spin_lock_irqsave(&xe->irq.lock, flags);
 	err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err));
 	if (!err_src) {
@@ -516,14 +526,6 @@ void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl)
 	}
 }
 
-static int hw_error_info_init(struct xe_device *xe)
-{
-	if (xe->info.platform != XE_PVC)
-		return 0;
-
-	return xe_drm_ras_init(xe);
-}
-
 /*
  * Process hardware errors during boot
  */
@@ -550,16 +552,11 @@ static void process_hw_errors(struct xe_device *xe)
 void xe_hw_error_init(struct xe_device *xe)
 {
 	struct xe_tile *tile = xe_device_get_root_tile(xe);
-	int ret;
 
 	if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
 		return;
 
 	INIT_WORK(&tile->csc_hw_error_work, csc_hw_error_work);
 
-	ret = hw_error_info_init(xe);
-	if (ret)
-		drm_err(&xe->drm, "Failed to initialize XE DRM RAS (%pe)\n", ERR_PTR(ret));
-
 	process_hw_errors(xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c
index 706783863d07..bd956776b10b 100644
--- a/drivers/gpu/drm/xe/xe_i2c.c
+++ b/drivers/gpu/drm/xe/xe_i2c.c
@@ -334,9 +334,6 @@ int xe_i2c_probe(struct xe_device *xe)
 	if (!xe->info.has_i2c)
 		return 0;
 
-	if (IS_SRIOV_VF(xe))
-		return 0;
-
 	xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep);
 	if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE)
 		return 0;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index a4292a11391d..3e7c995085d0 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -2618,13 +2618,19 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
 	kfree(snapshot);
 }
 
+static bool engine_valid_for_utilization(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+	/* The USM-reserved copy engine runs kernel migrate contexts queried here */
+	return hwe && (!xe_hw_engine_is_reserved(hwe) || xe_gt_is_usm_hwe(gt, hwe));
+}
+
 static struct xe_hw_engine *engine_id_to_hwe(struct xe_gt *gt, u32 engine_id)
 {
 	u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
 	u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
 	struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, class, instance, false);
 
-	if (xe_gt_WARN_ONCE(gt, !hwe || xe_hw_engine_is_reserved(hwe),
+	if (xe_gt_WARN_ONCE(gt, !engine_valid_for_utilization(gt, hwe),
 			    "Unexpected engine class:instance %d:%d for utilization\n",
 			    class, instance))
 		return NULL;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 78adb303b663..7fa18dfcb5a2 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -11,27 +11,18 @@
 #include <linux/pci.h>
 
 #include <drm/drm_managed.h>
-#include <drm/drm_print.h>
 
 #include "regs/xe_bars.h"
 #include "xe_device.h"
 #include "xe_gt_sriov_vf.h"
+#include "xe_printk.h"
 #include "xe_sriov.h"
+#include "xe_tile_printk.h"
 #include "xe_trace.h"
 #include "xe_wa.h"
 
 #include "generated/xe_device_wa_oob.h"
 
-static void tiles_fini(void *arg)
-{
-	struct xe_device *xe = arg;
-	struct xe_tile *tile;
-	int id;
-
-	for_each_remote_tile(tile, xe, id)
-		tile->mmio.regs = NULL;
-}
-
 /*
  * On multi-tile devices, partition the BAR space for MMIO on each tile,
  * possibly accounting for register override on the number of tiles available.
@@ -56,50 +47,71 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
 	struct xe_tile *tile;
 	u8 id;
 
-	/*
-	 * Nothing to be done as tile 0 has already been setup earlier with the
-	 * entire BAR mapped - see xe_mmio_probe_early()
-	 */
-	if (xe->info.tile_count == 1)
-		return;
-
 	for_each_remote_tile(tile, xe, id)
 		xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M);
 }
 
+/**
+ * xe_mmio_probe_tiles() - Initialize all tiles' MMIO
+ * @xe: the &xe_device
+ *
+ * Initialize the remaining tiles' MMIO instances.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
 int xe_mmio_probe_tiles(struct xe_device *xe)
 {
 	size_t tile_mmio_size = SZ_16M;
 
-	mmio_multi_tile_setup(xe, tile_mmio_size);
+	/*
+	 * Nothing to be done as tile 0 has already been setup earlier with the
+	 * entire BAR mapped - see xe_mmio_probe_early()
+	 */
+	if (xe->info.tile_count == 1)
+		return 0;
 
-	return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe);
+	if (xe->mmio.size < xe->info.tile_count * tile_mmio_size) {
+		xe_err(xe, "GTTMMADR_BAR is too small for %d tiles: %zu\n",
+		       xe->info.tile_count, xe->mmio.size);
+		return -EIO;
+	}
+
+	mmio_multi_tile_setup(xe, tile_mmio_size);
+	return 0;
 }
 
 static void mmio_fini(void *arg)
 {
 	struct xe_device *xe = arg;
-	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
 
-	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
 	xe->mmio.regs = NULL;
-	root_tile->mmio.regs = NULL;
 }
 
+/**
+ * xe_mmio_probe_early() - Probe and initialize device's MMIO
+ * @xe: the &xe_device
+ *
+ * Map the entire GTTMMADR_BAR and initialize the first tile's MMIO instance.
+ *
+ * The first 16MB of the GTTMMADR_BAR always belongs to the root tile, and
+ * includes: registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
 int xe_mmio_probe_early(struct xe_device *xe)
 {
 	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
-	/*
-	 * Map the entire BAR.
-	 * The first 16MB of the BAR, belong to the root tile, and include:
-	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
-	 */
-	xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR);
-	xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0);
+	xe->mmio.regs = pcim_iomap(pdev, GTTMMADR_BAR, 0);
 	if (!xe->mmio.regs) {
-		drm_err(&xe->drm, "failed to map registers\n");
+		xe_err(xe, "Failed to map GTTMMADR_BAR\n");
+		return -EIO;
+	}
+
+	xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR);
+	if (xe->mmio.size < SZ_16M) {
+		xe_err(xe, "GTTMMADR_BAR is too small: %zu\n", xe->mmio.size);
 		return -EIO;
 	}
 
@@ -128,6 +140,11 @@ void xe_mmio_init(struct xe_mmio *mmio, struct xe_tile *tile, void __iomem *ptr,
 	mmio->tile = tile;
 }
 
+static bool mmio_available(struct xe_mmio *mmio)
+{
+	return !xe_tile_WARN_ON_ONCE(mmio->tile, !mmio->tile->xe->mmio.regs);
+}
+
 static void mmio_flush_pending_writes(struct xe_mmio *mmio)
 {
 #define DUMMY_REG_OFFSET	0x130030
@@ -146,6 +163,9 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg)
 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
 	u8 val;
 
+	if (!mmio_available(mmio))
+		return 0;
+
 	mmio_flush_pending_writes(mmio);
 
 	val = readb(mmio->regs + addr);
@@ -158,6 +178,9 @@ void xe_mmio_write8(struct xe_mmio *mmio, struct xe_reg reg, u8 val)
 {
 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
 
+	if (!mmio_available(mmio))
+		return;
+
 	trace_xe_reg_rw(mmio, true, addr, val, sizeof(val));
 
 	writeb(val, mmio->regs + addr);
@@ -168,6 +191,9 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg)
 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
 	u16 val;
 
+	if (!mmio_available(mmio))
+		return 0;
+
 	mmio_flush_pending_writes(mmio);
 
 	val = readw(mmio->regs + addr);
@@ -180,6 +206,9 @@ void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val)
 {
 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
 
+	if (!mmio_available(mmio))
+		return;
+
 	trace_xe_reg_rw(mmio, true, addr, val, sizeof(val));
 
 	if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
@@ -194,6 +223,9 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg)
 	u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
 	u32 val;
 
+	if (!mmio_available(mmio))
+		return 0;
+
 	mmio_flush_pending_writes(mmio);
 
 	if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
@@ -282,8 +314,8 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg)
 		oldudw = udw;
 	}
 
-	drm_WARN(&mmio->tile->xe->drm, retries == 0,
-		 "64-bit read of %#x did not stabilize\n", reg.addr);
+	xe_tile_WARN(mmio->tile, retries == 0,
+		     "MMIO: 64-bit read of %#x did not stabilize\n", reg.addr);
 
 	return (u64)udw << 32 | ldw;
 }
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 4cb578182912..39e4fc85f019 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -36,9 +36,6 @@ module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600)
 MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 "
 		 "[default=" __stringify(XE_DEFAULT_SVM_NOTIFIER_SIZE) "]");
 
-module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
-MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
-
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
 module_param_named(probe_display, xe_modparam.probe_display, bool, 0444);
 MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched "
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 79cb9639c0f3..c75153471248 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -10,7 +10,6 @@
 
 /* Module modprobe variables */
 struct xe_modparam {
-	bool force_execlist;
 	bool probe_display;
 	int force_vram_bar_size;
 	int guc_log_level;
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 2dce6a47202c..b3acbcd678b7 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1702,11 +1702,12 @@ static int xe_oa_release(struct inode *inode, struct file *file)
 static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct xe_oa_stream *stream = file->private_data;
+	int ret = xe_observation_paranoid_check();
 	struct xe_bo *bo = stream->oa_buffer.bo;
 
-	if (xe_observation_paranoid && !perfmon_capable()) {
+	if (ret) {
 		drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n");
-		return -EACCES;
+		return ret;
 	}
 
 	/* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */
@@ -2080,10 +2081,12 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
 		privileged_op = true;
 	}
 
-	if (privileged_op && xe_observation_paranoid && !perfmon_capable()) {
-		drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n");
-		ret = -EACCES;
-		goto err_exec_q;
+	if (privileged_op) {
+		ret = xe_observation_paranoid_check();
+		if (ret) {
+			drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n");
+			goto err_exec_q;
+		}
 	}
 
 	if (!param.exec_q && !param.sample) {
@@ -2365,9 +2368,10 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi
 		return -ENODEV;
 	}
 
-	if (xe_observation_paranoid && !perfmon_capable()) {
+	err = xe_observation_paranoid_check();
+	if (err) {
 		drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n");
-		return -EACCES;
+		return err;
 	}
 
 	err = copy_from_user(&param, u64_to_user_ptr(data), sizeof(param));
@@ -2467,9 +2471,10 @@ int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file
 		return -ENODEV;
 	}
 
-	if (xe_observation_paranoid && !perfmon_capable()) {
+	ret = xe_observation_paranoid_check();
+	if (ret) {
 		drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n");
-		return -EACCES;
+		return ret;
 	}
 
 	ret = get_user(arg, ptr);
diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c
index e3f9b546207e..39e05b9131a7 100644
--- a/drivers/gpu/drm/xe/xe_observation.c
+++ b/drivers/gpu/drm/xe/xe_observation.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/perf_event.h>
 #include <linux/sysctl.h>
 
 #include <uapi/drm/xe_drm.h>
@@ -12,9 +13,28 @@
 #include "xe_oa.h"
 #include "xe_observation.h"
 
-u32 xe_observation_paranoid = true;
+static u32 xe_observation_paranoid = true;
 static struct ctl_table_header *sysctl_header;
 
+/**
+ * xe_observation_paranoid_check - Gate access to xe observation streams.
+ *
+ * When the xe-specific observation_paranoid sysctl is enabled (the
+ * default), defer to perf_allow_cpu() so that access is governed by the
+ * same policy as system-wide perf CPU events: kernel.perf_event_paranoid
+ * plus the security_perf_event_open() LSM hook. When the sysctl has been
+ * cleared by a privileged user, observation is open to all callers.
+ *
+ * Return: 0 if access is permitted, a negative errno otherwise.
+ */
+int xe_observation_paranoid_check(void)
+{
+	if (!xe_observation_paranoid)
+		return 0;
+
+	return perf_allow_cpu();
+}
+
 static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_observation_param *arg,
 		       struct drm_file *file)
 {
@@ -83,11 +103,13 @@ static const struct ctl_table observation_ctl_table[] = {
 };
 
 /**
- * xe_observation_sysctl_register - Register xe_observation_paranoid sysctl
+ * xe_observation_sysctl_register - Register the observation_paranoid sysctl
  *
- * Normally only superuser/root can access observation stream
- * data. However, superuser can set xe_observation_paranoid sysctl to 0 to
- * allow non-privileged users to also access observation data.
+ * When dev.xe.observation_paranoid is set (the default), access to
+ * observation streams follows the system-wide perf_allow_cpu() policy:
+ * kernel.perf_event_paranoid plus the security_perf_event_open() LSM
+ * hook. A privileged user can clear the sysctl to bypass that gate and
+ * allow unprivileged access to observation data.
  *
  * Return: always returns 0
  */
diff --git a/drivers/gpu/drm/xe/xe_observation.h b/drivers/gpu/drm/xe/xe_observation.h
index 17816998e966..73a03e03c96a 100644
--- a/drivers/gpu/drm/xe/xe_observation.h
+++ b/drivers/gpu/drm/xe/xe_observation.h
@@ -11,8 +11,7 @@
 struct drm_device;
 struct drm_file;
 
-extern u32 xe_observation_paranoid;
-
+int xe_observation_paranoid_check(void);
 int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
 int xe_observation_sysctl_register(void);
 void xe_observation_sysctl_unregister(void);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 3165686e3e04..91af603e9431 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -26,6 +26,7 @@
 #include "xe_guc.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
+#include "xe_pci_error.h"
 #include "xe_pci_rebar.h"
 #include "xe_pci_sriov.h"
 #include "xe_pci_types.h"
@@ -355,6 +356,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 	PLATFORM(PVC),
 	.dma_mask_size = 52,
 	.has_display = false,
+	.has_drm_ras = true,
 	.has_gsc_nvm = 1,
 	.has_heci_gscfi = 1,
 	.max_gt_per_tile = 1,
@@ -457,6 +459,7 @@ static const struct xe_device_desc cri_desc = {
 	PLATFORM(CRESCENTISLAND),
 	.dma_mask_size = 52,
 	.has_display = false,
+	.has_drm_ras = true,
 	.has_flat_ccs = false,
 	.has_gsc_nvm = 1,
 	.has_i2c = true,
@@ -599,8 +602,6 @@ static int read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u
 	struct xe_reg gmdid_reg = GMD_ID;
 	u32 val;
 
-	KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
-
 	if (IS_SRIOV_VF(xe)) {
 		/*
 		 * To get the value of the GMDID register, VFs must obtain it
@@ -726,14 +727,30 @@ static int handle_gmdid(struct xe_device *xe,
 	return 0;
 }
 
-static void init_devid(struct xe_device *xe)
+struct xe_probed_info {
+	u16 devid;
+	u8 revid;
+	u8 tile_count;
+	struct xe_step_info step;
+	const struct xe_ip *graphics_ip;
+	const struct xe_ip *media_ip;
+};
+
+/*
+ * Probe from the hardware the info required by xe_info_init_early().
+ */
+static int xe_probe_info_early(struct xe_device *xe,
+			       const struct xe_device_desc *desc,
+			       struct xe_probed_info *probed_info)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
-	KUNIT_STATIC_STUB_REDIRECT(init_devid, xe);
+	probed_info->devid = pdev->device;
+	probed_info->revid = pdev->revision;
 
-	xe->info.devid = pdev->device;
-	xe->info.revid = pdev->revision;
+	xe_step_platform_get(desc->platform, probed_info->revid, &probed_info->step);
+
+	return 0;
 }
 
 /*
@@ -742,17 +759,20 @@ static void init_devid(struct xe_device *xe)
  */
 static int xe_info_init_early(struct xe_device *xe,
 			      const struct xe_device_desc *desc,
-			      const struct xe_subplatform_desc *subplatform_desc)
+			      const struct xe_subplatform_desc *subplatform_desc,
+			      struct xe_probed_info *probed_info)
 {
 	int err;
 
+	xe->info.devid = probed_info->devid;
+	xe->info.revid = probed_info->revid;
+	xe->info.step.platform = probed_info->step.platform;
+
 	xe->info.platform_name = desc->platform_name;
 	xe->info.platform = desc->platform;
 	xe->info.subplatform = subplatform_desc ?
 		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
 
-	init_devid(xe);
-
 	xe->info.dma_mask_size = desc->dma_mask_size;
 	xe->info.va_bits = desc->va_bits;
 	xe->info.vm_max_level = desc->vm_max_level;
@@ -760,6 +780,7 @@ static int xe_info_init_early(struct xe_device *xe,
 
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.has_cached_pt = desc->has_cached_pt;
+	xe->info.has_drm_ras = desc->has_drm_ras;
 	xe->info.has_fan_control = desc->has_fan_control;
 	/* runtime fusing may force flat_ccs to disabled later */
 	xe->info.has_flat_ccs = desc->has_flat_ccs;
@@ -789,14 +810,10 @@ static int xe_info_init_early(struct xe_device *xe,
 	xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
 				 xe_modparam.probe_display &&
 				 desc->has_display;
-	xe->info.force_execlist = xe_modparam.force_execlist;
 
 	xe_assert(xe, desc->max_gt_per_tile > 0);
 	xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE);
 	xe->info.max_gt_per_tile = desc->max_gt_per_tile;
-	xe->info.tile_count = 1 + desc->max_remote_tiles;
-
-	xe_step_platform_get(xe);
 
 	err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0);
 	if (err)
@@ -805,22 +822,21 @@ static int xe_info_init_early(struct xe_device *xe,
 	return 0;
 }
 
-/*
- * Possibly override number of tile based on configuration register.
- */
-static void xe_info_probe_tile_count(struct xe_device *xe)
+static void xe_probe_tile_count(struct xe_device *xe,
+				const struct xe_device_desc *desc,
+				struct xe_probed_info *probed_info)
 {
 	struct xe_mmio *mmio;
 	u8 tile_count;
 	u32 mtcfg;
 
-	KUNIT_STATIC_STUB_REDIRECT(xe_info_probe_tile_count, xe);
+	probed_info->tile_count = 1 + desc->max_remote_tiles;
 
 	/*
 	 * Probe for tile count only for platforms that support multiple
 	 * tiles.
 	 */
-	if (xe->info.tile_count == 1)
+	if (probed_info->tile_count == 1)
 		return;
 
 	mmio = xe_root_tile_mmio(xe);
@@ -833,10 +849,10 @@ static void xe_info_probe_tile_count(struct xe_device *xe)
 	mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR);
 	tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
 
-	if (tile_count < xe->info.tile_count) {
+	if (tile_count < probed_info->tile_count) {
 		drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
-			 xe->info.tile_count, tile_count);
-		xe->info.tile_count = tile_count;
+			 probed_info->tile_count, tile_count);
+		probed_info->tile_count = tile_count;
 	}
 }
 
@@ -909,25 +925,10 @@ static struct xe_gt *alloc_media_gt(struct xe_tile *tile,
 	return gt;
 }
 
-/*
- * Initialize device info content that does require knowledge about
- * graphics / media IP version.
- * Make sure that GT / tile structures allocated by the driver match the data
- * present in device info.
- */
-static int xe_info_init(struct xe_device *xe,
-			const struct xe_device_desc *desc)
+static int xe_probe_ips(struct xe_device *xe,
+			const struct xe_device_desc *desc,
+			struct xe_probed_info *probed_info)
 {
-	u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0;
-	const struct xe_ip *graphics_ip;
-	const struct xe_ip *media_ip;
-	const struct xe_graphics_desc *graphics_desc;
-	const struct xe_media_desc *media_desc;
-	struct xe_tile *tile;
-	struct xe_gt *gt;
-	int ret;
-	u8 id;
-
 	/*
 	 * If this platform supports GMD_ID, we'll detect the proper IP
 	 * descriptor to use from hardware registers.
@@ -936,17 +937,21 @@ static int xe_info_init(struct xe_device *xe,
 	 * versions are simply derived from that.
 	 */
 	if (desc->pre_gmdid_graphics_ip) {
-		graphics_ip = desc->pre_gmdid_graphics_ip;
-		media_ip = desc->pre_gmdid_media_ip;
-		xe_step_pre_gmdid_get(xe);
+		probed_info->graphics_ip = desc->pre_gmdid_graphics_ip;
+		probed_info->media_ip = desc->pre_gmdid_media_ip;
+		xe_step_pre_gmdid_get(xe, &probed_info->step);
 	} else {
+		int err;
+		u32 graphics_revid, media_revid;
+
 		xe_assert(xe, !desc->pre_gmdid_media_ip);
-		ret = handle_gmdid(xe, &graphics_ip, &media_ip,
-				   &graphics_gmdid_revid, &media_gmdid_revid);
-		if (ret)
-			return ret;
 
-		xe_step_gmdid_get(xe, graphics_gmdid_revid, media_gmdid_revid);
+		err = handle_gmdid(xe, &probed_info->graphics_ip, &probed_info->media_ip,
+				   &graphics_revid, &media_revid);
+		if (err)
+			return err;
+
+		xe_step_gmdid_get(xe, graphics_revid, media_revid, &probed_info->step);
 	}
 
 	/*
@@ -954,9 +959,56 @@ static int xe_info_init(struct xe_device *xe,
 	 * error and we should abort driver load.  Failing to detect media
 	 * IP is non-fatal; we'll just proceed without enabling media support.
 	 */
-	if (!graphics_ip)
+	if (!probed_info->graphics_ip)
 		return -ENODEV;
 
+	return 0;
+}
+
+/*
+ * Probe from the hardware the info required by xe_info_init().
+ */
+static int xe_probe_info(struct xe_device *xe,
+			 const struct xe_device_desc *desc,
+			 struct xe_probed_info *probed_info)
+{
+	int err;
+
+	xe_probe_tile_count(xe, desc, probed_info);
+
+	err = xe_probe_ips(xe, desc, probed_info);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/*
+ * Initialize device info content that does require knowledge about
+ * graphics / media IP version.
+ * Make sure that GT / tile structures allocated by the driver match the data
+ * present in device info.
+ */
+static int xe_info_init(struct xe_device *xe,
+			const struct xe_device_desc *desc,
+			struct xe_probed_info *probed_info)
+{
+	const struct xe_ip *graphics_ip;
+	const struct xe_ip *media_ip;
+	const struct xe_graphics_desc *graphics_desc;
+	const struct xe_media_desc *media_desc;
+	struct xe_tile *tile;
+	struct xe_gt *gt;
+	u8 id;
+
+	graphics_ip = probed_info->graphics_ip;
+	media_ip = probed_info->media_ip;
+
+	xe->info.tile_count = probed_info->tile_count;
+	xe->info.step.basedie = probed_info->step.basedie;
+	xe->info.step.graphics = probed_info->step.graphics;
+	xe->info.step.media = probed_info->step.media;
+
 	xe->info.graphics_verx100 = graphics_ip->verx100;
 	xe->info.graphics_name = graphics_ip->name;
 	graphics_desc = graphics_ip->desc;
@@ -988,8 +1040,6 @@ static int xe_info_init(struct xe_device *xe,
 		xe->info.has_soc_remapper_telem = 0;
 	}
 
-	xe_info_probe_tile_count(xe);
-
 	for_each_remote_tile(tile, xe, id) {
 		int err;
 
@@ -1072,9 +1122,11 @@ static void xe_pci_remove(struct pci_dev *pdev)
  */
 static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
+	struct xe_probed_info probed_info = {};
 	const struct xe_device_desc *desc = (const void *)ent->driver_data;
 	const struct xe_subplatform_desc *subplatform_desc;
 	struct xe_device *xe;
+	void *group;
 	int err;
 
 	subplatform_desc = find_subplatform(desc, pdev->device);
@@ -1102,6 +1154,11 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (xe_display_driver_probe_defer(pdev))
 		return -EPROBE_DEFER;
 
+	/* Group all devres so xe_pci_error_slot_reset() can release them as a unit. */
+	group = devres_open_group(&pdev->dev, NULL, GFP_KERNEL);
+	if (!group)
+		return -ENOMEM;
+
 	err = pcim_enable_device(pdev);
 	if (err)
 		return err;
@@ -1110,13 +1167,19 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (IS_ERR(xe))
 		return PTR_ERR(xe);
 
+	xe->devres_group = group;
+
 	pci_set_drvdata(pdev, &xe->drm);
 
 	xe_pm_assert_unbounded_bridge(xe);
 
 	pci_set_master(pdev);
 
-	err = xe_info_init_early(xe, desc, subplatform_desc);
+	err = xe_probe_info_early(xe, desc, &probed_info);
+	if (err)
+		return err;
+
+	err = xe_info_init_early(xe, desc, subplatform_desc, &probed_info);
 	if (err)
 		return err;
 
@@ -1135,7 +1198,11 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		return err;
 
-	err = xe_info_init(xe, desc);
+	err = xe_probe_info(xe, desc, &probed_info);
+	if (err)
+		return err;
+
+	err = xe_info_init(xe, desc, &probed_info);
 	if (err)
 		return err;
 
@@ -1348,6 +1415,7 @@ static struct pci_driver xe_pci_driver = {
 	.remove = xe_pci_remove,
 	.shutdown = xe_pci_shutdown,
 	.sriov_configure = xe_pci_sriov_configure,
+	.err_handler = &xe_pci_error_handlers,
 #ifdef CONFIG_PM_SLEEP
 	.driver.pm = &xe_pm_ops,
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pci_error.c b/drivers/gpu/drm/xe/xe_pci_error.c
new file mode 100644
index 000000000000..9b78cc0d3293
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_error.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <linux/pci.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_pci.h"
+#include "xe_pm.h"
+#include "xe_printk.h"
+#include "xe_survivability_mode.h"
+
+static void prepare_device_for_reset(struct pci_dev *pdev)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	struct xe_gt *gt;
+	u8 id;
+
+	/*
+	 * Wedge the device to prevent userspace access but do not send the uevent.
+	 * xe_device_wedged_fini() releases runtime pm if wedged flag is set, so acquire a runtime
+	 * pm reference to avoid underflow.
+	 */
+	if (!atomic_xchg(&xe->wedged.flag, 1))
+		xe_pm_runtime_get_noresume(xe);
+
+	xe_device_set_in_reset(xe);
+
+	for_each_gt(gt, xe, id)
+		xe_gt_declare_wedged(gt);
+
+	pci_disable_device(pdev);
+}
+
+static pci_ers_result_t xe_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+
+	xe_info(xe, "PCI error: detected state = %d\n", state);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	/* If the device is already wedged or in survivability mode, do not attempt recovery */
+	if (xe_survivability_mode_is_boot_enabled(xe) || xe_device_wedged(xe))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	switch (state) {
+	case pci_channel_io_normal:
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	case pci_channel_io_frozen:
+		prepare_device_for_reset(pdev);
+		return PCI_ERS_RESULT_NEED_RESET;
+	default:
+		xe_info(xe, "PCI error: unknown state %d\n", state);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+}
+
+static pci_ers_result_t xe_pci_error_mmio_enabled(struct pci_dev *pdev)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+
+	xe_info(xe, "PCI error: MMIO enabled\n");
+
+	/* TODO: Query system controller for the type of error and take appropriate action */
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t xe_pci_error_slot_reset(struct pci_dev *pdev)
+{
+	const struct pci_device_id *ent = pci_match_id(pdev->driver->id_table, pdev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+
+	xe_info(xe, "PCI error: slot reset\n");
+
+	pci_restore_state(pdev);
+
+	if (pci_enable_device(pdev)) {
+		xe_err(xe, "Cannot re-enable PCI device after reset\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/*
+	 * Secondary Bus Reset causes all VRAM state to be lost along with
+	 * hardware state. As an initial step, re-probe the device to
+	 * re-initialize the driver and hardware.
+	 * TODO: optimize by re-initializing only the hardware state and re-creating
+	 * kernel BOs.
+	 */
+	xe_device_clear_in_reset(xe);
+	pdev->driver->remove(pdev);
+	devres_release_group(&pdev->dev, xe->devres_group);
+
+	if (pdev->driver->probe(pdev, ent))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	xe = pdev_to_xe_device(pdev);
+
+	/* Wedge the device to prevent I/O operations till the resume callback */
+	atomic_set(&xe->wedged.flag, 1);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void xe_pci_error_resume(struct pci_dev *pdev)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+
+	xe_info(xe, "PCI error: resume\n");
+
+	atomic_set(&xe->wedged.flag, 0);
+}
+
+const struct pci_error_handlers xe_pci_error_handlers = {
+	.error_detected	= xe_pci_error_detected,
+	.mmio_enabled	= xe_pci_error_mmio_enabled,
+	.slot_reset	= xe_pci_error_slot_reset,
+	.resume		= xe_pci_error_resume,
+};
diff --git a/drivers/gpu/drm/xe/xe_pci_error.h b/drivers/gpu/drm/xe/xe_pci_error.h
new file mode 100644
index 000000000000..725ad0214e62
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_error.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_PCI_ERROR_H_
+#define _XE_PCI_ERROR_H_
+
+struct pci_error_handlers;
+
+extern const struct pci_error_handlers xe_pci_error_handlers;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 5b85e2c24b7b..24d4a3d00517 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -40,6 +40,7 @@ struct xe_device_desc {
 
 	u8 has_cached_pt:1;
 	u8 has_display:1;
+	u8 has_drm_ras:1;
 	u8 has_fan_control:1;
 	u8 has_flat_ccs:1;
 	u8 has_gsc_nvm:1;
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index dc66d0c7ee06..866986694d9c 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -323,15 +323,17 @@ int xe_pcode_ready(struct xe_device *xe, bool locked)
 }
 
 /**
- * xe_pcode_init: initialize components of PCODE
+ * xe_pcode_init_early() - Initialize components of PCODE
  * @tile: tile instance
  *
  * This function initializes the xe_pcode component.
  * To be called once only during probe.
+ *
+ * Return: 0 on success or a negative error code on failure.
  */
-void xe_pcode_init(struct xe_tile *tile)
+int xe_pcode_init_early(struct xe_tile *tile)
 {
-	drmm_mutex_init(&tile_to_xe(tile)->drm, &tile->pcode.lock);
+	return drmm_mutex_init(&tile_to_xe(tile)->drm, &tile->pcode.lock);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
index 490e4f269607..18260c29e620 100644
--- a/drivers/gpu/drm/xe/xe_pcode.h
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -12,7 +12,7 @@ struct drm_device;
 struct xe_device;
 struct xe_tile;
 
-void xe_pcode_init(struct xe_tile *tile);
+int xe_pcode_init_early(struct xe_tile *tile);
 int xe_pcode_probe_early(struct xe_device *xe);
 int xe_pcode_ready(struct xe_device *xe, bool locked);
 int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq,
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 670bc2206fea..5fdad444009f 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1026,12 +1026,22 @@ xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *t
 	u64 *ptr = data;
 	u32 i;
 
+	/*
+	 * @qword_ofs is the absolute entry offset within the page table, while
+	 * @ptes is indexed relative to @update->ofs (its first entry). The GPU
+	 * path (write_pgtable) splits a single update into MAX_PTE_PER_SDI-sized
+	 * chunks, calling this with an advancing @qword_ofs but a fresh @data
+	 * pointer per chunk, so translate back into a @ptes index rather than
+	 * assuming the chunk starts at ptes[0].
+	 */
 	for (i = 0; i < num_qwords; i++) {
+		u32 idx = qword_ofs - update->ofs + i;
+
 		if (map)
 			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
-				  sizeof(u64), u64, ptes[i].pte);
+				  sizeof(u64), u64, ptes[idx].pte);
 		else
-			ptr[i] = ptes[i].pte;
+			ptr[i] = ptes[idx].pte;
 	}
 }
 
@@ -2070,6 +2080,9 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
 		 * automatically when the context is re-enabled by the rebind worker,
 		 * or in fault mode it was invalidated on PTE zapping.
 		 *
+		 * If rebind, we have to invalidate TLB on context based TLB invalidation
+		 * LR vms, as they cannot be relied on context re-enable.
+		 *
 		 * If !rebind, and scratch enabled VMs, there is a chance the scratch
 		 * PTE is already cached in the TLB so it needs to be invalidated.
 		 * On !LR VMs this is done in the ring ops preceding a batch, but on
@@ -2079,6 +2092,9 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
 		if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
 		     xe_vm_in_lr_mode(vm)))
 			pt_update_ops->needs_invalidation = true;
+		else if (pt_op->rebind && xe_vm_in_preempt_fence_mode(vm) &&
+			 vm->xe->info.has_ctx_tlb_inval)
+			pt_update_ops->needs_invalidation = true;
 		else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
 			/* We bump also if batch_invalidate_tlb is true */
 			vm->tlb_flush_seqno++;
diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
index 968b7e70b3f9..fea3d8ceeddb 100644
--- a/drivers/gpu/drm/xe/xe_pxp.c
+++ b/drivers/gpu/drm/xe/xe_pxp.c
@@ -59,6 +59,7 @@ bool xe_pxp_is_enabled(const struct xe_pxp *pxp)
 static bool pxp_prerequisites_done(const struct xe_pxp *pxp)
 {
 	struct xe_gt *gt = pxp->gt;
+	bool huc_ok;
 	bool ready;
 
 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
@@ -73,9 +74,14 @@ static bool pxp_prerequisites_done(const struct xe_pxp *pxp)
 	 */
 	XE_WARN_ON(!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL));
 
-	/* PXP requires both HuC authentication via GSC and GSC proxy initialized */
-	ready = xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC) &&
-		xe_gsc_proxy_init_done(&gt->uc.gsc);
+	/*
+	 * PXP requires GSC proxy to be initialized. On platforms where the HuC
+	 * is loaded by the kernel driver (i.e., pre media 35) PXP also requires
+	 * the HuC to be authenticated by GSC.
+	 */
+	huc_ok = MEDIA_VER(gt_to_xe(gt)) >= 35 ||
+		 xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+	ready = huc_ok && xe_gsc_proxy_init_done(&gt->uc.gsc);
 
 	return ready;
 }
@@ -97,9 +103,13 @@ int xe_pxp_get_readiness_status(struct xe_pxp *pxp)
 	if (!xe_pxp_is_enabled(pxp))
 		return -ENODEV;
 
-	/* if the GSC or HuC FW are in an error state, PXP will never work */
-	if (xe_uc_fw_status_to_error(pxp->gt->uc.huc.fw.status) ||
-	    xe_uc_fw_status_to_error(pxp->gt->uc.gsc.fw.status))
+	/* If the GSC FW is in an error state, PXP will never work */
+	if (xe_uc_fw_status_to_error(pxp->gt->uc.gsc.fw.status))
+		return -EIO;
+
+	/* Same for HuC FW, but only if the kernel owns HuC-loading (i.e. pre-NVL) */
+	if (MEDIA_VER(gt_to_xe(pxp->gt)) < 35 &&
+	    xe_uc_fw_status_to_error(pxp->gt->uc.huc.fw.status))
 		return -EIO;
 
 	guard(xe_pm_runtime)(pxp->xe);
@@ -361,6 +371,7 @@ static void pxp_fini(void *arg)
 int xe_pxp_init(struct xe_device *xe)
 {
 	struct xe_gt *gt = xe->tiles[0].media_gt;
+	bool gsc_ok, huc_ok;
 	struct xe_pxp *pxp;
 	int err;
 
@@ -375,10 +386,14 @@ int xe_pxp_init(struct xe_device *xe)
 	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
 		return 0;
 
-	/* PXP requires both GSC and HuC firmwares to be available */
-	if (!xe_uc_fw_is_loadable(&gt->uc.gsc.fw) ||
-	    !xe_uc_fw_is_loadable(&gt->uc.huc.fw)) {
-		drm_info(&xe->drm, "skipping PXP init due to missing FW dependencies");
+	/* PXP requires GSC FW to be available. Pre-NVL it also requires HuC FW */
+	gsc_ok = xe_uc_fw_is_loadable(&gt->uc.gsc.fw);
+	huc_ok = MEDIA_VER(xe) >= 35 || xe_uc_fw_is_loadable(&gt->uc.huc.fw);
+
+	if (!gsc_ok || !huc_ok) {
+		drm_info(&xe->drm, "Skipping PXP due to unsatisfied FW deps - GSC=%s, HuC=%s\n",
+			 str_yes_no(gsc_ok),
+			 MEDIA_VER(xe) >= 35 ? "not needed" : str_yes_no(huc_ok));
 		return 0;
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 8c7d54498f38..dc975f595368 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -119,6 +119,7 @@ query_engine_cycles(struct xe_device *xe,
 	struct drm_xe_engine_class_instance *eci;
 	struct drm_xe_query_engine_cycles resp;
 	size_t size = sizeof(resp);
+	enum xe_force_wake_domains fw_domain;
 	__ktime_func_t cpu_clock;
 	struct xe_hw_engine *hwe;
 	struct xe_gt *gt;
@@ -154,8 +155,10 @@ query_engine_cycles(struct xe_device *xe,
 	if (!hwe)
 		return -EINVAL;
 
-	xe_with_force_wake(fw_ref, gt_to_fw(gt), XE_FORCEWAKE_ALL) {
-		if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
+	fw_domain = xe_hw_engine_to_fw_domain(hwe);
+
+	xe_with_force_wake(fw_ref, gt_to_fw(gt), fw_domain) {
+		if (!xe_force_wake_ref_has_domain(fw_ref.domains, fw_domain))
 			return -EIO;
 
 		hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp,
diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
index 4cb16b419b0c..74d5016d9ffe 100644
--- a/drivers/gpu/drm/xe/xe_ras.c
+++ b/drivers/gpu/drm/xe/xe_ras.c
@@ -4,11 +4,15 @@
  */
 
 #include "xe_device.h"
+#include "xe_drm_ras.h"
+#include "xe_pm.h"
 #include "xe_printk.h"
 #include "xe_ras.h"
 #include "xe_ras_types.h"
 #include "xe_sysctrl.h"
 #include "xe_sysctrl_event_types.h"
+#include "xe_sysctrl_mailbox.h"
+#include "xe_sysctrl_mailbox_types.h"
 
 /* Severity of detected errors  */
 enum xe_ras_severity {
@@ -31,6 +35,17 @@ enum xe_ras_component {
 	XE_RAS_COMP_MAX
 };
 
+/* RAS response status codes */
+enum xe_ras_response_status {
+	XE_RAS_STATUS_SUCCESS = 0,
+	XE_RAS_STATUS_INVALID_PARAM,
+	XE_RAS_STATUS_OP_NOT_SUPPORTED,
+	XE_RAS_STATUS_TIMEOUT,
+	XE_RAS_STATUS_HARDWARE_FAILURE,
+	XE_RAS_STATUS_INSUFFICIENT_RESOURCES,
+	XE_RAS_STATUS_MAX
+};
+
 static const char *const xe_ras_severities[] = {
 	[XE_RAS_SEV_NOT_SUPPORTED]		= "Not Supported",
 	[XE_RAS_SEV_CORRECTABLE]		= "Correctable Error",
@@ -50,6 +65,56 @@ static const char *const xe_ras_components[] = {
 };
 static_assert(ARRAY_SIZE(xe_ras_components) == XE_RAS_COMP_MAX);
 
+static u8 drm_to_xe_ras_severity(u8 severity)
+{
+	switch (severity) {
+	case DRM_XE_RAS_ERR_SEV_CORRECTABLE:
+		return XE_RAS_SEV_CORRECTABLE;
+	case DRM_XE_RAS_ERR_SEV_UNCORRECTABLE:
+		return XE_RAS_SEV_UNCORRECTABLE;
+	default:
+		return XE_RAS_SEV_NOT_SUPPORTED;
+	}
+}
+
+static u8 drm_to_xe_ras_component(u8 component)
+{
+	switch (component) {
+	case DRM_XE_RAS_ERR_COMP_CORE_COMPUTE:
+		return XE_RAS_COMP_CORE_COMPUTE;
+	case DRM_XE_RAS_ERR_COMP_SOC_INTERNAL:
+		return XE_RAS_COMP_SOC_INTERNAL;
+	case DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY:
+		return XE_RAS_COMP_DEVICE_MEMORY;
+	case DRM_XE_RAS_ERR_COMP_PCIE:
+		return XE_RAS_COMP_PCIE;
+	case DRM_XE_RAS_ERR_COMP_FABRIC:
+		return XE_RAS_COMP_FABRIC;
+	default:
+		return XE_RAS_COMP_NOT_SUPPORTED;
+	}
+}
+
+static int ras_status_to_errno(u32 status)
+{
+	switch (status) {
+	case XE_RAS_STATUS_SUCCESS:
+		return 0;
+	case XE_RAS_STATUS_INVALID_PARAM:
+		return -EINVAL;
+	case XE_RAS_STATUS_OP_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case XE_RAS_STATUS_TIMEOUT:
+		return -ETIMEDOUT;
+	case XE_RAS_STATUS_HARDWARE_FAILURE:
+		return -EIO;
+	case XE_RAS_STATUS_INSUFFICIENT_RESOURCES:
+		return -ENOSPC;
+	default:
+		return -EPROTO;
+	}
+}
+
 static inline const char *sev_to_str(u8 severity)
 {
 	if (severity >= XE_RAS_SEV_MAX)
@@ -66,6 +131,68 @@ static inline const char *comp_to_str(u8 component)
 	return xe_ras_components[component];
 }
 
+static struct pci_dev *find_usp_dev(struct pci_dev *pdev)
+{
+	struct pci_dev *vsp;
+
+	/*
+	 * Device Hierarchy:
+	 *
+	 * Upstream Switch Port (USP) --> Virtual Switch Port (VSP) --> SGunit (GPU endpoint)
+	 */
+	vsp = pci_upstream_bridge(pdev);
+	if (!vsp)
+		return NULL;
+
+	return pci_upstream_bridge(vsp);
+}
+
+static void ras_usp_aer_init(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_dev *usp;
+	u16 aer_cap;
+	u32 status;
+
+	usp = find_usp_dev(pdev);
+	if (!usp)
+		return;
+
+	aer_cap = pci_find_ext_capability(usp, PCI_EXT_CAP_ID_ERR);
+	if (!aer_cap) {
+		dev_warn(&usp->dev, "AER capability unavailable\n");
+		return;
+	}
+
+	/*
+	 * Clear any stale Uncorrectable Internal Error Status event in Uncorrectable Error
+	 * Status Register.
+	 */
+	pci_read_config_dword(usp, aer_cap + PCI_ERR_UNCOR_STATUS, &status);
+	if (status & PCI_ERR_UNC_INTN)
+		pci_write_config_dword(usp, aer_cap + PCI_ERR_UNCOR_STATUS, PCI_ERR_UNC_INTN);
+
+	/*
+	 * All errors are steered to USP which is a PCIe AER Compliant device.
+	 * Downgrade all the errors to non-fatal to prevent PCIe bus driver
+	 * from triggering a Secondary Bus Reset (SBR). This allows error
+	 * detection, containment and recovery in the driver.
+	 *
+	 * The Uncorrectable Error Severity Register has the 'Uncorrectable
+	 * Internal Error Severity' set to fatal by default. Set this to
+	 * non-fatal and unmask the error.
+	 */
+
+	/* Downgrade Uncorrectable Internal Error to non-fatal */
+	pci_clear_and_set_config_dword(usp, aer_cap + PCI_ERR_UNCOR_SEVER, PCI_ERR_UNC_INTN, 0);
+
+	/* Unmask Uncorrectable Internal Error */
+	pci_clear_and_set_config_dword(usp, aer_cap + PCI_ERR_UNCOR_MASK, PCI_ERR_UNC_INTN, 0);
+
+	pci_save_state(usp);
+	dev_dbg(&usp->dev, "Uncorrectable Internal Errors downgraded and unmasked\n");
+}
+
 void xe_ras_counter_threshold_crossed(struct xe_device *xe,
 				      struct xe_sysctrl_event_response *response)
 {
@@ -91,3 +218,136 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe,
 			comp_to_str(component), sev_to_str(severity));
 	}
 }
+
+static int get_counter(struct xe_device *xe, struct xe_ras_error_class *counter, u32 *value)
+{
+	struct xe_ras_get_counter_response response = {0};
+	struct xe_ras_get_counter_request request = {0};
+	struct xe_sysctrl_mailbox_command command = {0};
+	struct xe_ras_error_common *common;
+	size_t rlen;
+	int ret;
+
+	request.counter = *counter;
+
+	xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP, XE_SYSCTRL_CMD_GET_COUNTER,
+				  &request, sizeof(request), &response, sizeof(response));
+
+	ret = xe_sysctrl_send_command(&xe->sc, &command, &rlen);
+	if (ret) {
+		xe_err(xe, "sysctrl: failed to get counter %d\n", ret);
+		return ret;
+	}
+
+	if (rlen != sizeof(response)) {
+		xe_err(xe, "sysctrl: unexpected get counter response length %zu (expected %zu)\n",
+		       rlen, sizeof(response));
+		return -EIO;
+	}
+
+	common = &response.counter.common;
+	*value = response.value;
+
+	xe_dbg(xe, "[RAS]: get counter %u for %s %s\n", *value, comp_to_str(common->component),
+	       sev_to_str(common->severity));
+
+	return 0;
+}
+
+/**
+ * xe_ras_get_counter() - Get error counter value
+ * @xe: Xe device instance
+ * @severity: Error severity to be queried (&enum drm_xe_ras_error_severity)
+ * @component: Error component to be queried (&enum drm_xe_ras_error_component)
+ * @value: Counter value
+ *
+ * This function retrieves the value of a specific error counter based on
+ * the error severity and component.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int xe_ras_get_counter(struct xe_device *xe, u8 severity, u8 component, u32 *value)
+{
+	struct xe_ras_error_class counter = {0};
+
+	counter.common.severity = drm_to_xe_ras_severity(severity);
+	counter.common.component = drm_to_xe_ras_component(component);
+
+	guard(xe_pm_runtime)(xe);
+	return get_counter(xe, &counter, value);
+}
+
+/**
+ * xe_ras_clear_counter() - Clear error counter value
+ * @xe: Xe device instance
+ * @severity: Error severity to be cleared (&enum drm_xe_ras_error_severity)
+ * @component: Error component to be cleared (&enum drm_xe_ras_error_component)
+ *
+ * This function clears the value of a specific error counter based on
+ * the error severity and component.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int xe_ras_clear_counter(struct xe_device *xe, u8 severity, u8 component)
+{
+	struct xe_ras_clear_counter_response response = {0};
+	struct xe_ras_clear_counter_request request = {0};
+	struct xe_sysctrl_mailbox_command command = {0};
+	struct xe_ras_error_class *counter;
+	size_t rlen;
+	int ret;
+
+	counter = &request.counter;
+	counter->common.severity = drm_to_xe_ras_severity(severity);
+	counter->common.component = drm_to_xe_ras_component(component);
+
+	xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP, XE_SYSCTRL_CMD_CLEAR_COUNTER,
+				  &request, sizeof(request), &response, sizeof(response));
+
+	guard(xe_pm_runtime)(xe);
+	ret = xe_sysctrl_send_command(&xe->sc, &command, &rlen);
+	if (ret) {
+		xe_err(xe, "sysctrl: failed to clear counter %d\n", ret);
+		return ret;
+	}
+
+	if (rlen != sizeof(response)) {
+		xe_err(xe, "sysctrl: unexpected clear counter response length %zu (expected %zu)\n",
+		       rlen, sizeof(response));
+		return -EIO;
+	}
+
+	ret = ras_status_to_errno(response.status);
+	if (ret) {
+		xe_err(xe, "sysctrl: clear counter command failed with status %#x\n",
+		       response.status);
+		return ret;
+	}
+
+	counter = &response.counter;
+
+	xe_dbg(xe, "[RAS]: clear counter for %s %s\n", comp_to_str(counter->common.component),
+	       sev_to_str(counter->common.severity));
+
+	return 0;
+}
+
+/**
+ * xe_ras_init - Initialize Xe RAS
+ * @xe: xe device instance
+ *
+ * Initialize Xe RAS
+ */
+void xe_ras_init(struct xe_device *xe)
+{
+	if (!xe->info.has_drm_ras)
+		return;
+
+	xe_drm_ras_init(xe);
+
+	if (!xe->info.has_sysctrl)
+		return;
+
+	if (IS_ENABLED(CONFIG_PCIEAER))
+		ras_usp_aer_init(xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_ras.h b/drivers/gpu/drm/xe/xe_ras.h
index ea90593b62dc..ba0b0224df23 100644
--- a/drivers/gpu/drm/xe/xe_ras.h
+++ b/drivers/gpu/drm/xe/xe_ras.h
@@ -6,10 +6,15 @@
 #ifndef _XE_RAS_H_
 #define _XE_RAS_H_
 
+#include <linux/types.h>
+
 struct xe_device;
 struct xe_sysctrl_event_response;
 
 void xe_ras_counter_threshold_crossed(struct xe_device *xe,
 				      struct xe_sysctrl_event_response *response);
+int xe_ras_get_counter(struct xe_device *xe, u8 severity, u8 component, u32 *value);
+int xe_ras_clear_counter(struct xe_device *xe, u8 severity, u8 component);
+void xe_ras_init(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ras_types.h b/drivers/gpu/drm/xe/xe_ras_types.h
index 4e63c67f806a..6688e11f57a8 100644
--- a/drivers/gpu/drm/xe/xe_ras_types.h
+++ b/drivers/gpu/drm/xe/xe_ras_types.h
@@ -70,4 +70,55 @@ struct xe_ras_threshold_crossed {
 	struct xe_ras_error_class counters[XE_RAS_NUM_COUNTERS];
 } __packed;
 
+/**
+ * struct xe_ras_get_counter_request - Request structure for get counter
+ */
+struct xe_ras_get_counter_request {
+	/** @counter: Error counter to be queried */
+	struct xe_ras_error_class counter;
+	/** @reserved: Reserved for future use */
+	u32 reserved;
+} __packed;
+
+/**
+ * struct xe_ras_get_counter_response - Response structure for get counter
+ */
+struct xe_ras_get_counter_response {
+	/** @counter: Error counter that was queried */
+	struct xe_ras_error_class counter;
+	/** @value: Current counter value */
+	u32 value;
+	/** @timestamp: Timestamp when counter was last updated */
+	u64 timestamp;
+	/** @threshold: Threshold value for the counter */
+	u32 threshold;
+	/** @reserved: Reserved  */
+	u32 reserved[57];
+} __packed;
+
+/**
+ * struct xe_ras_clear_counter_request - Request structure for clear counter
+ */
+struct xe_ras_clear_counter_request {
+	/** @counter: Counter class to be cleared */
+	struct xe_ras_error_class counter;
+	/** @reserved: Reserved for future use */
+	u32 reserved;
+} __packed;
+
+/**
+ * struct xe_ras_clear_counter_response - Response structure for clear counter
+ */
+struct xe_ras_clear_counter_response {
+	/** @counter: Counter class that was cleared */
+	struct xe_ras_error_class counter;
+	/** @reserved: Reserved */
+	u32 reserved;
+	/** @timestamp: Timestamp when the counter was cleared */
+	u64 timestamp;
+	/** @status: Status of the clear operation */
+	u32 status;
+	/** @reserved1: Reserved for future use */
+	u32 reserved1[3];
+} __packed;
 #endif
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 526907d2d824..cab1b578ca0e 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -5,6 +5,8 @@
 
 #include "xe_reg_whitelist.h"
 
+#include <kunit/visibility.h>
+
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_oa_regs.h"
@@ -41,7 +43,7 @@ static bool match_multi_queue_class(const struct xe_device *xe,
 	return xe_gt_supports_multi_queue(gt, hwe->class);
 }
 
-static const struct xe_rtp_table_sr register_whitelist = XE_RTP_TABLE_SR(
+VISIBLE_IF_KUNIT const struct xe_rtp_table_sr register_whitelist = XE_RTP_TABLE_SR(
 	{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT,
@@ -104,6 +106,7 @@ static const struct xe_rtp_table_sr register_whitelist = XE_RTP_TABLE_SR(
 				   RING_FORCE_TO_NONPRIV_ACCESS_RW))
 	},
 );
+EXPORT_SYMBOL_IF_KUNIT(register_whitelist);
 
 static const struct xe_rtp_table_sr oa_whitelist = XE_RTP_TABLE_SR(
 
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h
index e1eb1b7d5480..c0248063d515 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.h
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h
@@ -14,6 +14,10 @@ struct xe_hw_engine;
 struct xe_reg_sr;
 struct xe_reg_sr_entry;
 
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+extern const struct xe_rtp_table_sr register_whitelist;
+#endif
+
 void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
 
 void xe_reg_whitelist_oa_regs(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 83a40e1f9528..6a8d6ea68f25 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -227,17 +227,23 @@ static bool rule_matches(const struct xe_device *xe,
 
 static void rtp_add_sr_entry(const struct xe_rtp_action *action,
 			     struct xe_gt *gt,
+			     struct xe_hw_engine *hwe,
 			     u32 mmio_base,
 			     struct xe_reg_sr *sr)
 {
 	struct xe_reg_sr_entry sr_entry = {
 		.reg = action->reg,
 		.clr_bits = action->clr_bits,
-		.set_bits = action->set_bits,
 		.read_mask = action->read_mask,
 	};
 
+	if (action->use_func)
+		sr_entry.set_bits = action->set_func(gt, hwe);
+	else
+		sr_entry.set_bits = action->set_bits;
+
 	sr_entry.reg.addr += mmio_base;
+
 	xe_reg_sr_add(sr, &sr_entry, gt);
 }
 
@@ -259,7 +265,7 @@ static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry,
 		else
 			mmio_base = 0;
 
-		rtp_add_sr_entry(action, gt, mmio_base, sr);
+		rtp_add_sr_entry(action, gt, hwe, mmio_base, sr);
 	}
 
 	return true;
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 2cc65053cd07..0032f68ea187 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -323,6 +323,25 @@ struct xe_reg_sr;
 	  .read_mask = 0, ##__VA_ARGS__ }
 
 /**
+ * XE_RTP_ACTION_FIELD_SET_FUNC: Set a bit range to the value returned by a function
+ * @reg_: Register
+ * @mask_bits_: Mask of bits to be changed in the register, forming a field
+ * @func_: Function that returns value to set in the field denoted by @mask_bits_
+ * @...: Additional fields to override in the struct xe_rtp_action entry
+ *
+ * This macro works like XE_RTP_ACTION_FIELD_SET(), except that the
+ * field value is evaluated at the time the RTP table is processed.
+ *
+ * @func_ will only be called a single time, when the RTP table is being
+ * processed.  After processing, the value in the reg_sr entry is fixed and
+ * will not be re-evaluated.
+ */
+#define XE_RTP_ACTION_FIELD_SET_FUNC(reg_, mask_bits_, func_, ...)		\
+	{ .reg = XE_RTP_DROP_CAST(reg_),					\
+	  .clr_bits = mask_bits_, .set_func = func_, .use_func = 1,		\
+	  .read_mask = mask_bits_, ##__VA_ARGS__ }
+
+/**
  * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist
  * @reg_: Register
  * @val_: Whitelist-specific flags to set
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 58018ae4f8cc..b78092fa06e0 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -22,20 +22,37 @@ struct xe_gt;
  */
 struct xe_rtp_action {
 	/** @reg: Register */
-	struct xe_reg		reg;
+	struct xe_reg reg;
+
 	/**
 	 * @clr_bits: bits to clear when updating register. It's always a
 	 * superset of bits being modified
 	 */
-	u32			clr_bits;
-	/** @set_bits: bits to set when updating register */
-	u32			set_bits;
+	u32 clr_bits;
+
+	union {
+		/** @set_bits: bits to set when updating register */
+		u32 set_bits;
+
+		/** @set_func: function to provide bits to set when updating register */
+		u32 (*set_func)(struct xe_gt *gt,
+				struct xe_hw_engine *hwe);
+	};
+
 #define XE_RTP_NOCHECK		.read_mask = 0
 	/** @read_mask: mask for bits to consider when reading value back */
-	u32			read_mask;
+	u32 read_mask;
+
 #define XE_RTP_ACTION_FLAG_ENGINE_BASE		BIT(0)
 	/** @flags: flags to apply on rule evaluation or action */
-	u8			flags;
+	u8 flags;
+
+	/**
+	 * @use_func:
+	 *   Internal flag indicating @set_func should be called instead of
+	 *   using @set_bits.
+	 */
+	u8 use_func:1;
 };
 
 enum {
@@ -69,6 +86,7 @@ struct xe_rtp_rule {
 			u8 platform;
 			u8 subplatform;
 		};
+
 		/*
 		 * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE /
 		 * MATCH_MEDIA_VERSION  / XE_RTP_MATCH_MEDIA_VERSION_RANGE
@@ -78,15 +96,18 @@ struct xe_rtp_rule {
 #define XE_RTP_END_VERSION_UNDEFINED	U32_MAX
 			u32 ver_end;
 		};
+
 		/* MATCH_STEP */
 		struct {
 			u8 step_start;
 			u8 step_end;
 		};
+
 		/* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */
 		struct {
 			u8 engine_class;
 		};
+
 		/* MATCH_FUNC */
 		bool (*match_func)(const struct xe_device *xe,
 				   const struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index fb9c31613ca7..55c1996f689e 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -110,12 +110,14 @@ __diag_pop();
 
 /**
  * xe_step_platform_get - Determine platform-level stepping from PCI revid
- * @xe: Xe device
+ * @platform: The Xe platform
+ * @revid: The PCI revid
+ * @step: Pointer to the step struct to update
  *
  * Convert the PCI revid into a platform-level stepping value and store that
- * in the device info.
+ * in @step->platform.
  */
-void xe_step_platform_get(struct xe_device *xe)
+void xe_step_platform_get(enum xe_platform platform, u8 revid, struct xe_step_info *step)
 {
 	/*
 	 * Not all platforms map PCI revid directly into our symbolic stepping
@@ -126,18 +128,21 @@ void xe_step_platform_get(struct xe_device *xe)
 	 * checks.
 	 */
 
-	if (xe->info.platform == XE_NOVALAKE_P)
-		xe->info.step.platform = STEP_A0 + xe->info.revid;
+	if (platform == XE_NOVALAKE_P)
+		step->platform = STEP_A0 + revid;
 }
 
 /**
  * xe_step_pre_gmdid_get - Determine IP steppings from PCI revid
  * @xe: Xe device
+ * @step: Pointer to the step struct to update
+ *
+ * Convert the PCI revid into proper IP steppings and update @step->basedie,
+ * @step->graphics and @step->media accordingly.
  *
- * Convert the PCI revid into proper IP steppings.  This should only be
- * used on platforms that do not have GMD_ID support.
+ * This should only be used on platforms that do not have GMD_ID support.
  */
-void xe_step_pre_gmdid_get(struct xe_device *xe)
+void xe_step_pre_gmdid_get(struct xe_device *xe, struct xe_step_info *step)
 {
 	const struct xe_step_info *revids = NULL;
 	u16 revid = xe->info.revid;
@@ -234,9 +239,9 @@ void xe_step_pre_gmdid_get(struct xe_device *xe)
 	}
 
 done:
-	xe->info.step.graphics = graphics;
-	xe->info.step.media = media;
-	xe->info.step.basedie = basedie;
+	step->graphics = graphics;
+	step->media = media;
+	step->basedie = basedie;
 }
 
 /**
@@ -244,8 +249,10 @@ done:
  * @xe: Xe device
  * @graphics_gmdid_revid: value of graphics GMD_ID register's revid field
  * @media_gmdid_revid: value of media GMD_ID register's revid field
+ * @step: Poninter to the step struct to update.
  *
- * Convert the revid fields of the GMD_ID registers into proper IP steppings.
+ * Convert the revid fields of the GMD_ID registers into proper IP steppings
+ * and update @step->graphics and @step->media accordingly.
  *
  * GMD_ID revid values are currently expected to have consistent meanings on
  * all platforms:  major steppings (A0, B0, etc.) are 4 apart, with minor
@@ -253,7 +260,8 @@ done:
  */
 void xe_step_gmdid_get(struct xe_device *xe,
 		       u32 graphics_gmdid_revid,
-		       u32 media_gmdid_revid)
+		       u32 media_gmdid_revid,
+		       struct xe_step_info *step)
 {
 	u8 graphics = STEP_A0 + graphics_gmdid_revid;
 	u8 media = STEP_A0 + media_gmdid_revid;
@@ -270,8 +278,8 @@ void xe_step_gmdid_get(struct xe_device *xe,
 			media_gmdid_revid);
 	}
 
-	xe->info.step.graphics = graphics;
-	xe->info.step.media = media;
+	step->graphics = graphics;
+	step->media = media;
 }
 
 #define STEP_NAME_CASE(name)	\
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
index ea36b22cc297..5a5845335740 100644
--- a/drivers/gpu/drm/xe/xe_step.h
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -10,14 +10,16 @@
 
 #include "xe_step_types.h"
 
+enum xe_platform;
 struct xe_device;
 
-void xe_step_platform_get(struct xe_device *xe);
+void xe_step_platform_get(enum xe_platform platform, u8 revid, struct xe_step_info *step);
 
-void xe_step_pre_gmdid_get(struct xe_device *xe);
+void xe_step_pre_gmdid_get(struct xe_device *xe, struct xe_step_info *step);
 void xe_step_gmdid_get(struct xe_device *xe,
 		       u32 graphics_gmdid_revid,
-		       u32 media_gmdid_revid);
+		       u32 media_gmdid_revid,
+		       struct xe_step_info *step);
 static inline u32 xe_step_to_gmdid(enum intel_step step) { return step - STEP_A0; }
 
 const char *xe_step_name(enum intel_step step);
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox.c b/drivers/gpu/drm/xe/xe_sysctrl_mailbox.c
index 3caa9f15875f..e13eebaac1d0 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox.c
+++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox.c
@@ -294,6 +294,34 @@ static int sysctrl_send_command(struct xe_sysctrl *sc,
 }
 
 /**
+ * xe_sysctrl_create_command() - Create system controller command
+ * @command: Sysctrl command structure
+ * @group_id: Command group ID
+ * @cmd_id: Command ID
+ * @request: Pointer to request buffer (can be NULL)
+ * @request_len: Size of request buffer
+ * @response: Pointer to response buffer
+ * @response_len: Size of response buffer
+ *
+ * Helper function to create sysctrl command to be sent via %xe_sysctrl_send_command()
+ */
+void xe_sysctrl_create_command(struct xe_sysctrl_mailbox_command *command, u8 group_id, u8 cmd_id,
+			       void *request, size_t request_len, void *response,
+			       size_t response_len)
+{
+	struct xe_sysctrl_app_msg_hdr header = {0};
+
+	header.data = FIELD_PREP(APP_HDR_GROUP_ID_MASK, group_id) |
+		      FIELD_PREP(APP_HDR_COMMAND_MASK, cmd_id);
+
+	command->header = header;
+	command->data_in = request;
+	command->data_in_len = request_len;
+	command->data_out = response;
+	command->data_out_len = response_len;
+}
+
+/**
  * xe_sysctrl_mailbox_init - Initialize System Controller mailbox interface
  * @sc: System controller structure
  *
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox.h b/drivers/gpu/drm/xe/xe_sysctrl_mailbox.h
index f67e9234de48..fb434cc165b2 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox.h
@@ -23,6 +23,9 @@ struct xe_sysctrl_mailbox_command;
 #define XE_SYSCTRL_APP_HDR_VERSION(hdr) \
 	FIELD_GET(APP_HDR_VERSION_MASK, (hdr)->data)
 
+void xe_sysctrl_create_command(struct xe_sysctrl_mailbox_command *command, u8 group_id, u8 cmd_id,
+			       void *request, size_t request_len, void *response,
+			       size_t response_len);
 void xe_sysctrl_mailbox_init(struct xe_sysctrl *sc);
 int xe_sysctrl_send_command(struct xe_sysctrl *sc,
 			    struct xe_sysctrl_mailbox_command *cmd,
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
index 84d7c647e743..6e3753554510 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
@@ -22,9 +22,13 @@ enum xe_sysctrl_group {
 /**
  * enum xe_sysctrl_gfsp_cmd - Commands supported by GFSP group
  *
+ * @XE_SYSCTRL_CMD_GET_COUNTER: Get error counter value
+ * @XE_SYSCTRL_CMD_CLEAR_COUNTER: Clear error counter value
  * @XE_SYSCTRL_CMD_GET_PENDING_EVENT: Retrieve pending event
  */
 enum xe_sysctrl_gfsp_cmd {
+	XE_SYSCTRL_CMD_GET_COUNTER		= 0x03,
+	XE_SYSCTRL_CMD_CLEAR_COUNTER		= 0x04,
 	XE_SYSCTRL_CMD_GET_PENDING_EVENT	= 0x07,
 };
 
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index c465aae7883c..74d925a337b7 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -157,7 +157,9 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
 	if (err)
 		return err;
 
-	xe_pcode_init(tile);
+	err = xe_pcode_init_early(tile);
+	if (err)
+		return err;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index bf3fad9cdbef..bcec40ca2d35 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -20,7 +20,7 @@
 #undef XE_REG_MCR
 #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
 
-static const struct xe_rtp_table_sr gt_tunings = XE_RTP_TABLE_SR(
+VISIBLE_IF_KUNIT const struct xe_rtp_table_sr gt_tunings = XE_RTP_TABLE_SR(
 	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
@@ -101,6 +101,7 @@ static const struct xe_rtp_table_sr gt_tunings = XE_RTP_TABLE_SR(
 				   BANK_HASH_4KB_MODE))
 	},
 );
+EXPORT_SYMBOL_IF_KUNIT(gt_tunings);
 
 static const struct xe_rtp_table_sr engine_tunings = XE_RTP_TABLE_SR(
 	{ XE_RTP_NAME("Tuning: L3 Hashing Mask"),
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
index d18e187debf6..869564e3e992 100644
--- a/drivers/gpu/drm/xe/xe_tuning.h
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -6,6 +6,8 @@
 #ifndef _XE_TUNING_H_
 #define _XE_TUNING_H_
 
+#include <kunit/visibility.h>
+
 struct drm_printer;
 struct xe_gt;
 struct xe_hw_engine;
@@ -16,4 +18,8 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe);
 void xe_tuning_process_lrc(struct xe_hw_engine *hwe);
 int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p);
 
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+extern const struct xe_rtp_table_sr gt_tunings;
+#endif
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index c4fb29004195..9e343f9aa44d 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -332,6 +332,20 @@ static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
 	return err;
 }
 
+/**
+ * madvise_range_needs_invalidation() - Check whether madvise needs invalidation
+ * @args: madvise ioctl arguments
+ *
+ * Purgeable state updates only touch VMA/BO metadata. PTEs stay valid and are
+ * zapped only if the BO is later purged.
+ *
+ * Return: true when the update needs PTE invalidation.
+ */
+static bool madvise_range_needs_invalidation(const struct drm_xe_madvise *args)
+{
+	return args->type != DRM_XE_VMA_ATTR_PURGEABLE_STATE;
+}
+
 static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
 {
 	if (XE_IOCTL_DBG(xe, !args))
@@ -708,8 +722,9 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
 	madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
 				 &details);
 
-	err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr,
-					     madvise_range.addr + args->range);
+	if (madvise_range_needs_invalidation(args))
+		err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr,
+						     madvise_range.addr + args->range);
 
 	if (madvise_range.has_svm_userptr_vmas)
 		xe_svm_notifier_unlock(vm);
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index b9d9fe0801aa..139434946f8f 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -130,7 +130,7 @@
 __diag_push();
 __diag_ignore_all("-Woverride-init", "Allow field overrides in table");
 
-static const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR(
+VISIBLE_IF_KUNIT const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR(
 	/* Workarounds applying over a range of IPs */
 
 	{ XE_RTP_NAME("14011060649"),
@@ -293,7 +293,7 @@ static const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR(
 	  XE_RTP_ACTIONS(SET(MMIOATSREQLIMIT_GAM_WALK_3D,
 			     DIS_ATS_WRONLY_PG))
 	},
-	{ XE_RTP_NAME("14026144927, 16029437861"),
+	{ XE_RTP_NAME("14026144927, 16029437861, 14026127056"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)),
 	  XE_RTP_ACTIONS(SET(L3SQCREG2, L3_SQ_DISABLE_COAMA_2WAY_COH |
 			     L3_SQ_DISABLE_COAMA))
@@ -307,6 +307,7 @@ static const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR(
 	  XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES))
 	},
 );
+EXPORT_SYMBOL_IF_KUNIT(gt_was);
 
 static const struct xe_rtp_table_sr engine_was = XE_RTP_TABLE_SR(
 	/* Workarounds applying over a range of IPs */
@@ -586,12 +587,12 @@ static const struct xe_rtp_table_sr engine_was = XE_RTP_TABLE_SR(
 
 	/* Xe3p_LPG*/
 
-	{ XE_RTP_NAME("22021149932"),
+	{ XE_RTP_NAME("22021149932, 14026290593"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, SAMPLER_LD_LSC_DISABLE))
 	},
-	{ XE_RTP_NAME("14025676848"),
+	{ XE_RTP_NAME("14025676848, 14026270459"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE))
@@ -802,10 +803,11 @@ static const struct xe_rtp_entry oob_was_entries[] = {
 
 static_assert(ARRAY_SIZE(oob_was_entries)  == _XE_WA_OOB_COUNT);
 
-static __maybe_unused const struct xe_rtp_table oob_was = {
+VISIBLE_IF_KUNIT __maybe_unused const struct xe_rtp_table oob_was = {
 	.entries = oob_was_entries,
 	.n_entries = ARRAY_SIZE(oob_was_entries),
 };
+EXPORT_SYMBOL_IF_KUNIT(oob_was);
 
 static const struct xe_rtp_entry device_oob_was_entries[] = {
 #include <generated/xe_device_wa_oob.c>
@@ -813,10 +815,11 @@ static const struct xe_rtp_entry device_oob_was_entries[] = {
 
 static_assert(ARRAY_SIZE(device_oob_was_entries) == _XE_DEVICE_WA_OOB_COUNT);
 
-static __maybe_unused const struct xe_rtp_table device_oob_was = {
+VISIBLE_IF_KUNIT __maybe_unused const struct xe_rtp_table device_oob_was = {
 	.entries = device_oob_was_entries,
 	.n_entries = ARRAY_SIZE(device_oob_was_entries),
 };
+EXPORT_SYMBOL_IF_KUNIT(device_oob_was);
 
 __diag_pop();
 
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
index a5f7d33c1b32..f4da2b271396 100644
--- a/drivers/gpu/drm/xe/xe_wa.h
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -6,6 +6,7 @@
 #ifndef _XE_WA_H_
 #define _XE_WA_H_
 
+#include <kunit/visibility.h>
 #include "xe_assert.h"
 
 struct drm_printer;
@@ -24,6 +25,12 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile);
 void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p);
 int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p);
 
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+extern const struct xe_rtp_table_sr gt_was;
+extern __maybe_unused const struct xe_rtp_table oob_was;
+extern __maybe_unused const struct xe_rtp_table device_oob_was;
+#endif
+
 /**
  * XE_GT_WA - Out-of-band GT workarounds, to be queried and called as needed.
  * @gt__: gt instance
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index f8a185103b80..9027365f0043 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -65,3 +65,4 @@
 
 14025883347	MEDIA_VERSION_RANGE(1301, 3503)
 		GRAPHICS_VERSION_RANGE(2004, 3005)
+16029380221	MEDIA_VERSION(3500)
diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
index e32ef763427c..dff389b56eb3 100644
--- a/include/drm/intel/pciids.h
+++ b/include/drm/intel/pciids.h
@@ -893,8 +893,9 @@
 	MACRO__(0xD741, ## __VA_ARGS__), \
 	MACRO__(0xD742, ## __VA_ARGS__), \
 	MACRO__(0xD743, ## __VA_ARGS__), \
-	MACRO__(0xD744, ## __VA_ARGS__), \
-	MACRO__(0xD745, ## __VA_ARGS__)
+	MACRO__(0xD745, ## __VA_ARGS__), \
+	MACRO__(0xD74A, ## __VA_ARGS__), \
+	MACRO__(0xD74B, ## __VA_ARGS__)
 
 /* CRI */
 #define INTEL_CRI_IDS(MACRO__, ...) \
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 48d851fbd8ea..5842552294c1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1791,22 +1791,8 @@ static inline int perf_is_paranoid(void)
 }
 
 extern int perf_allow_kernel(void);
-
-static inline int perf_allow_cpu(void)
-{
-	if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
-		return -EACCES;
-
-	return security_perf_event_open(PERF_SECURITY_CPU);
-}
-
-static inline int perf_allow_tracepoint(void)
-{
-	if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
-		return -EPERM;
-
-	return security_perf_event_open(PERF_SECURITY_TRACEPOINT);
-}
+extern int perf_allow_cpu(void);
+extern int perf_allow_tracepoint(void);
 
 extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
 
@@ -2023,6 +2009,19 @@ perf_event_pause(struct perf_event *event, bool reset)			{ return 0; }
 static inline int
 perf_exclude_event(struct perf_event *event, struct pt_regs *regs)	{ return 0; }
 
+static inline int perf_allow_kernel(void)
+{
+	return perfmon_capable() ? 0 : -EACCES;
+}
+static inline int perf_allow_cpu(void)
+{
+	return perfmon_capable() ? 0 : -EACCES;
+}
+static inline int perf_allow_tracepoint(void)
+{
+	return perfmon_capable() ? 0 : -EPERM;
+}
+
 #endif /* !CONFIG_PERF_EVENTS */
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 48e9f1fdb78d..50c80af4ad4e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -2589,6 +2589,12 @@ enum drm_xe_ras_error_component {
 	DRM_XE_RAS_ERR_COMP_CORE_COMPUTE = 1,
 	/** @DRM_XE_RAS_ERR_COMP_SOC_INTERNAL: SoC Internal Error */
 	DRM_XE_RAS_ERR_COMP_SOC_INTERNAL,
+	/** @DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY: Device Memory Error */
+	DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY,
+	/** @DRM_XE_RAS_ERR_COMP_PCIE: PCIe Subsystem Error */
+	DRM_XE_RAS_ERR_COMP_PCIE,
+	/** @DRM_XE_RAS_ERR_COMP_FABRIC: Fabric Subsystem Error */
+	DRM_XE_RAS_ERR_COMP_FABRIC,
 	/** @DRM_XE_RAS_ERR_COMP_MAX: Max Error */
 	DRM_XE_RAS_ERR_COMP_MAX	/* non-ABI */
 };
@@ -2606,7 +2612,10 @@ enum drm_xe_ras_error_component {
  */
 #define DRM_XE_RAS_ERROR_COMPONENT_NAMES {				\
 	[DRM_XE_RAS_ERR_COMP_CORE_COMPUTE] = "core-compute",		\
-	[DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal"		\
+	[DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal",		\
+	[DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY] = "device-memory",		\
+	[DRM_XE_RAS_ERR_COMP_PCIE] = "pcie",				\
+	[DRM_XE_RAS_ERR_COMP_FABRIC] = "fabric",			\
 }
 
 #if defined(__cplusplus)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 954c36e28101..38c35123f23c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -14768,6 +14768,24 @@ int perf_allow_kernel(void)
 }
 EXPORT_SYMBOL_GPL(perf_allow_kernel);
 
+int perf_allow_cpu(void)
+{
+	if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
+		return -EACCES;
+
+	return security_perf_event_open(PERF_SECURITY_CPU);
+}
+EXPORT_SYMBOL_GPL(perf_allow_cpu);
+
+int perf_allow_tracepoint(void)
+{
+	if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
+		return -EPERM;
+
+	return security_perf_event_open(PERF_SECURITY_TRACEPOINT);
+}
+EXPORT_SYMBOL_GPL(perf_allow_tracepoint);
+
 /*
  * Inherit an event from parent task to child task.
  *
author	Mark Brown <broonie@kernel.org>	2026-07-03 16:20:43 +0100
committer	Mark Brown <broonie@kernel.org>	2026-07-03 16:20:43 +0100
commit	9dea607a7a2b140280f27f48fb068f8ac01ce701 (patch)
tree	fdbfb7b63d2fd8e14ff56a8cd050fa6f662201ae
parent	10182b0e6181d81cbba867e2a098c42088285224 (diff)
parent	820de07bba7b7c97e0f52e1d66bf6147a25ab67f (diff)
download	linux-next-9dea607a7a2b140280f27f48fb068f8ac01ce701.tar.gz linux-next-9dea607a7a2b140280f27f48fb068f8ac01ce701.zip