diff options
| author | Mark Brown <broonie@kernel.org> | 2026-07-03 16:20:43 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-07-03 16:20:43 +0100 |
| commit | 9dea607a7a2b140280f27f48fb068f8ac01ce701 (patch) | |
| tree | fdbfb7b63d2fd8e14ff56a8cd050fa6f662201ae | |
| parent | 10182b0e6181d81cbba867e2a098c42088285224 (diff) | |
| parent | 820de07bba7b7c97e0f52e1d66bf6147a25ab67f (diff) | |
| download | linux-next-9dea607a7a2b140280f27f48fb068f8ac01ce701.tar.gz linux-next-9dea607a7a2b140280f27f48fb068f8ac01ce701.zip | |
Merge branch 'drm-xe-next' of https://gitlab.freedesktop.org/drm/xe/kernel.git
# Conflicts:
# drivers/gpu/drm/xe/tests/xe_rtp_test.c
# drivers/gpu/drm/xe/xe_hw_engine.c
# drivers/gpu/drm/xe/xe_reg_whitelist.c
# drivers/gpu/drm/xe/xe_tuning.c
# drivers/gpu/drm/xe/xe_wa.c
65 files changed, 1432 insertions, 433 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 55ab45f669ac..0da739d9a816 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -251,6 +251,13 @@ Description: RO. Fan 2 speed in RPM. Only supported for particular Intel Xe graphics platforms. + On DG2 the driver always shows two fan channels, because the + FSC_READ_NUM_FANS command does not work on some cards. OEMs + decide how the fans map to tach channels, so two fans can share + one tach line. When that happens, the second channel + reads 0 RPM even though the fan is spinning. This is normal, not + a bug. + What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input Date: March 2025 KernelVersion: 6.16 diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile index 7530df998148..e07517d120e0 100644 --- a/drivers/gpu/drm/xe/Kconfig.profile +++ b/drivers/gpu/drm/xe/Kconfig.profile @@ -1,50 +1,71 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE_JOB_TIMEOUT_MAX - int "Default max job timeout (ms)" + int "Hard upper limit for job timeout (ms)" default 10000 # milliseconds help - Configures the default max job timeout after which job will - be forcefully taken away from scheduler. + Absolute upper bound (in milliseconds) for the per-engine-class job + timeout. This is the maximum value that can be written to the sysfs + job_timeout_ms knob, regardless of privileges. To raise this ceiling, + increase this value and rebuild the kernel. config DRM_XE_JOB_TIMEOUT_MIN - int "Default min job timeout (ms)" + int "Hard lower limit for job timeout (ms)" default 1 # milliseconds help - Configures the default min job timeout after which job will - be forcefully taken away from scheduler. + Absolute lower bound (in milliseconds) for the per-engine-class job + timeout. This is the minimum value that can be written to the sysfs + job_timeout_ms knob, regardless of privileges. + + Note: the job timeout default (5000 ms) is hardcoded in the driver + and is not configurable here. Use the sysfs job_timeout_ms knob at + runtime to change the engine-class default. config DRM_XE_TIMESLICE_MAX - int "Default max timeslice duration (us)" + int "Hard upper limit for timeslice duration (us)" default 10000000 # microseconds help - Configures the default max timeslice duration between multiple - contexts by guc scheduling. + Absolute upper bound (in microseconds) for the timeslice duration. + This caps both the sysfs timeslice_duration_us knob and the value + accepted via the DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE UAPI for + processes with CAP_SYS_NICE when DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT + is enabled. config DRM_XE_TIMESLICE_MIN - int "Default min timeslice duration (us)" + int "Hard lower limit for timeslice duration (us)" default 1 # microseconds help - Configures the default min timeslice duration between multiple - contexts by guc scheduling. + Absolute lower bound (in microseconds) for the timeslice duration. + This caps both the sysfs timeslice_duration_us knob and the value + accepted via the DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE UAPI for + processes with CAP_SYS_NICE when DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT + is enabled. config DRM_XE_PREEMPT_TIMEOUT - int "Preempt timeout (us, jiffy granularity)" + int "Default preempt timeout (us, jiffy granularity)" default 640000 # microseconds help - How long to wait (in microseconds) for a preemption event to occur - when submitting a new context. If the current context does not hit - an arbitration point and yield to HW before the timer expires, the - HW will be reset to allow the more important context to execute. + Initial per-engine-class preemption timeout (in microseconds). This + is the value the driver programs at boot; it can be changed at + runtime via the sysfs preempt_timeout_us knob. + + This is how long the driver waits for the current context to reach + an arbitration point and yield the GPU voluntarily when a + higher-priority context becomes runnable. If the context does not + yield before the timer expires, the HW is reset to allow the + higher-priority context to execute. + + The range userspace may write via sysfs is bounded by + DRM_XE_PREEMPT_TIMEOUT_MIN and DRM_XE_PREEMPT_TIMEOUT_MAX. config DRM_XE_PREEMPT_TIMEOUT_MAX - int "Default max preempt timeout (us)" + int "Hard upper limit for preempt timeout (us)" default 10000000 # microseconds help - Configures the default max preempt timeout after which context - will be forcefully taken away and higher priority context will - run. + Absolute upper bound (in microseconds) for the per-engine-class + preemption timeout. This is the maximum value that can be written to + the sysfs preempt_timeout_us knob, regardless of privileges. config DRM_XE_PREEMPT_TIMEOUT_MIN - int "Default min preempt timeout (us)" + int "Hard lower limit for preempt timeout (us)" default 1 # microseconds help - Configures the default min preempt timeout after which context - will be forcefully taken away and higher priority context will - run. + Absolute lower bound (in microseconds) for the per-engine-class + preemption timeout. This is the minimum value that can be written to + the sysfs preempt_timeout_us knob, regardless of privileges. config DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT bool "Default configuration of limitation on scheduler timeout" default y diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e5a04253e73b..6d728f8c4c39 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -101,6 +101,7 @@ xe-y += xe_bb.o \ xe_page_reclaim.o \ xe_pat.o \ xe_pci.o \ + xe_pci_error.o \ xe_pci_rebar.o \ xe_pcode.o \ xe_pm.o \ diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 0e3408f4952c..f7aa47f11a36 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -9,5 +9,6 @@ obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o xe_test-y = xe_test_mod.o \ xe_args_test.o \ xe_pci_test.o \ + xe_rtp_tables_test.o \ xe_rtp_test.o \ xe_wa_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 9240aff779da..8df9029afcd3 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -311,40 +311,44 @@ const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc } EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); -static void fake_init_devid(struct xe_device *xe) +static int fake_probe_info(struct xe_device *xe, + const struct xe_device_desc *desc, + struct xe_pci_fake_data *data, + struct xe_probed_info *probed_info) { - /* Nothing to do, just keep zero. */ -} - -static int fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, - u32 *ver, u32 *revid) -{ - struct kunit *test = kunit_get_current_test(); - struct xe_pci_fake_data *data = test->priv; + probed_info->tile_count = 1 + desc->max_remote_tiles; - if (type == GMDID_MEDIA) { - *ver = data->media_verx100; - *revid = xe_step_to_gmdid(data->step.media); + if (!data || desc->pre_gmdid_graphics_ip) { + probed_info->graphics_ip = desc->pre_gmdid_graphics_ip; + probed_info->media_ip = desc->pre_gmdid_media_ip; } else { - *ver = data->graphics_verx100; - *revid = xe_step_to_gmdid(data->step.graphics); + probed_info->graphics_ip = find_graphics_ip(data->graphics_verx100); + + if (data->media_verx100) { + probed_info->media_ip = find_media_ip(data->media_verx100); + xe_assert(xe, probed_info->media_ip); + } } - return 0; -} + xe_assert(xe, probed_info->graphics_ip); + if (!probed_info->graphics_ip) + return -ENODEV; -static void fake_xe_info_probe_tile_count(struct xe_device *xe) -{ - /* Nothing to do, just use the statically defined value. */ + if (data) + probed_info->step = data->step; + + return 0; } int xe_pci_fake_device_init(struct xe_device *xe) { struct kunit *test = kunit_get_current_test(); struct xe_pci_fake_data *data = test->priv; + struct xe_probed_info probed_info = {}; const struct pci_device_id *ent = pciidlist; const struct xe_device_desc *desc; const struct xe_subplatform_desc *subplatform_desc; + int err; if (!data) { desc = (const void *)ent->driver_data; @@ -374,13 +378,12 @@ done: xe->sriov.__mode = data && data->sriov_mode ? data->sriov_mode : XE_SRIOV_MODE_NONE; - kunit_activate_static_stub(test, init_devid, fake_init_devid); - kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid); - kunit_activate_static_stub(test, xe_info_probe_tile_count, - fake_xe_info_probe_tile_count); + err = fake_probe_info(xe, desc, data, &probed_info); + if (err) + return err; - xe_info_init_early(xe, desc, subplatform_desc); - xe_info_init(xe, desc); + xe_info_init_early(xe, desc, subplatform_desc, &probed_info); + xe_info_init(xe, desc, &probed_info); return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_tables_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_tables_test.c new file mode 100644 index 000000000000..7e2fc39ac62c --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_rtp_tables_test.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2026 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_reg_whitelist.h" +#include "xe_rtp_types.h" +#include "xe_tuning.h" +#include "xe_wa.h" + +#define RTP_TABLE_PARAM(table) \ + static const void *table##_gen_params(struct kunit *test, \ + const void *prev, char *desc) \ + { \ + typeof((table.entries)[0]) *__next = prev ? \ + ((typeof(__next))prev) + 1 : (table.entries); \ + if (__next - table.entries < table.n_entries) { \ + scnprintf(desc, KUNIT_PARAM_DESC_SIZE, #table "/%s", __next->name); \ + return __next; \ + } \ + return NULL; \ + } + +static void xe_rtp_table_gt_test(struct kunit *test) +{ + const struct xe_rtp_entry_sr *entry = test->param_value; + + for (int i = 0; i < entry->n_rules; i++) { + KUNIT_EXPECT_TRUE(test, + entry->rules[i].match_type != XE_RTP_MATCH_ENGINE_CLASS || + entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE); + KUNIT_EXPECT_TRUE(test, + entry->rules[i].match_type != XE_RTP_MATCH_NOT_ENGINE_CLASS || + entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE); + } +} + +RTP_TABLE_PARAM(gt_was); +RTP_TABLE_PARAM(gt_tunings); + +static void xe_rtp_table_oob_test(struct kunit *test) +{ + const struct xe_rtp_entry *entry = test->param_value; + + for (int i = 0; i < entry->n_rules; i++) { + u8 match_type = entry->rules[i].match_type; + + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_ENGINE_CLASS); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_NOT_ENGINE_CLASS); + } +} + +RTP_TABLE_PARAM(oob_was); + +static void xe_rtp_table_dev_oob_test(struct kunit *test) +{ + const struct xe_rtp_entry *entry = test->param_value; + + for (int i = 0; i < entry->n_rules; i++) { + u8 match_type = entry->rules[i].match_type; + + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_ENGINE_CLASS); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_NOT_ENGINE_CLASS); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_GRAPHICS_VERSION); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_GRAPHICS_VERSION_RANGE); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_GRAPHICS_STEP); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_MEDIA_VERSION); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_MEDIA_VERSION_RANGE); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_MEDIA_VERSION_ANY_GT); + KUNIT_EXPECT_NE(test, match_type, XE_RTP_MATCH_MEDIA_STEP); + } +} + +RTP_TABLE_PARAM(device_oob_was); + +static void xe_rtp_table_missing_upper_bound_test(struct kunit *test) +{ + const struct xe_rtp_entry_sr *entry = test->param_value; + + for (int i = 0; i < entry->n_rules; i++) { + u8 match_type = entry->rules[i].match_type; + + KUNIT_EXPECT_FALSE(test, + match_type == XE_RTP_MATCH_GRAPHICS_VERSION_RANGE && + entry->rules[i].ver_end == XE_RTP_END_VERSION_UNDEFINED); + KUNIT_EXPECT_FALSE(test, + match_type == XE_RTP_MATCH_MEDIA_VERSION_RANGE && + entry->rules[i].ver_end == XE_RTP_END_VERSION_UNDEFINED); + } +} + +RTP_TABLE_PARAM(register_whitelist); + +static struct kunit_case xe_rtp_table_tests[] = { + KUNIT_CASE_PARAM(xe_rtp_table_gt_test, gt_was_gen_params), + KUNIT_CASE_PARAM(xe_rtp_table_gt_test, gt_tunings_gen_params), + KUNIT_CASE_PARAM(xe_rtp_table_oob_test, oob_was_gen_params), + KUNIT_CASE_PARAM(xe_rtp_table_dev_oob_test, device_oob_was_gen_params), + KUNIT_CASE_PARAM(xe_rtp_table_missing_upper_bound_test, + register_whitelist_gen_params), + {} +}; + +static struct kunit_suite xe_rtp_tables_test_suite = { + .name = "xe_rtp_tables_test", + .test_cases = xe_rtp_table_tests, +}; + +kunit_test_suite(xe_rtp_tables_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 3d0688d058d9..367811621880 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -280,6 +280,11 @@ static void xe_rtp_rules_tests(struct kunit *test) KUNIT_EXPECT_EQ(test, err, param->expected_err); } +static u32 bits_2_3_set(struct xe_gt *gt, struct xe_hw_engine *hwe) +{ + return REG_BIT(2) | REG_BIT(3); +} + static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { { .name = "coalesce-same-reg", @@ -301,6 +306,29 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { ), }, { + .name = "coalesce-same-reg-literal-and-func", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2) | REG_BIT(3), + .expected_clr_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2) | REG_BIT(3), + .expected_active = BIT(0) | BIT(1), + .expected_count_sr_entries = 1, + /* Different bits on the same register: create a single entry */ + .table = XE_RTP_TABLE_SR( + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1, + REG_BIT(0) | REG_BIT(1), + REG_BIT(0) | REG_BIT(1))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(FIELD_SET_FUNC(REGULAR_REG1, + REG_BIT(2) | REG_BIT(3), + bits_2_3_set)) + }, + ), + }, + { .name = "no-match-no-add", .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), @@ -418,6 +446,30 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { ), }, { + .name = "conflict-not-disjoint-literal-and-func", + .expected_reg = REGULAR_REG1, + .expected_set_bits = REG_BIT(1) | REG_BIT(2), + .expected_clr_bits = REG_BIT(1) | REG_BIT(2), + .expected_active = BIT(0) | BIT(1), + .expected_count_sr_entries = 1, + .expected_sr_errors = 1, + .table = XE_RTP_TABLE_SR( + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1, + REG_BIT(1) | REG_BIT(2), + REG_BIT(1) | REG_BIT(2))) + }, + /* drop: bits are not disjoint with previous entries */ + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(FIELD_SET_FUNC(REGULAR_REG1, + REG_BIT(2) | REG_BIT(3), + bits_2_3_set)) + }, + ), + }, + { .name = "conflict-reg-type", .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index ff0e2502b39f..21601e9df353 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -43,9 +43,6 @@ static int xe_wa_test_init(struct kunit *test) xe_gt_mmio_init(gt); } - if (!param->graphics_verx100) - xe->info.step = param->step; - /* TODO: init hw engines for engine/LRC WAs */ xe->drm.dev = dev; test->priv = xe; diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 22b471303984..3c018dbccc07 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -117,7 +117,6 @@ static int info(struct seq_file *m, void *data) drm_printf(&p, "revid %d\n", xe->info.revid); drm_printf(&p, "tile_count %d\n", xe->info.tile_count); drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level); - drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist)); drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs)); drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm)); drm_printf(&p, "skip_guc_pc %s\n", str_yes_no(xe->info.skip_guc_pc)); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b60a651a3c9b..ad7f3e61d457 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -61,6 +61,7 @@ #include "xe_psmi.h" #include "xe_pxp.h" #include "xe_query.h" +#include "xe_ras.h" #include "xe_shrinker.h" #include "xe_soc_remapper.h" #include "xe_survivability_mode.h" @@ -738,9 +739,11 @@ static void vf_update_device_info(struct xe_device *xe) xe->info.probe_display = 0; xe->info.has_heci_cscfi = 0; xe->info.has_heci_gscfi = 0; + xe->info.has_i2c = 0; xe->info.has_late_bind = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; + xe->info.has_drm_ras = false; } static int xe_device_vram_alloc(struct xe_device *xe) @@ -949,6 +952,15 @@ int xe_device_probe(struct xe_device *xe) return err; } + /* + * Wa_16029380221: The affected GT will always use non-coherent + * access to page tables, so we must do uncached writes from the + * CPU. + */ + for_each_gt(gt, xe, id) + if (XE_GT_WA(gt, 16029380221)) + xe->info.has_cached_pt = false; + for_each_tile(tile, xe, id) { err = xe_ggtt_init_early(tile->mem.ggtt); if (err) @@ -989,6 +1001,16 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; + err = xe_soc_remapper_init(xe); + if (err) + return err; + + err = xe_sysctrl_init(xe); + if (err) + return err; + + xe_ras_init(xe); + /* * Now that GT is initialized (TTM in particular), * we can try to init display, and inherit the initial fb. @@ -1029,10 +1051,6 @@ int xe_device_probe(struct xe_device *xe) xe_nvm_init(xe); - err = xe_soc_remapper_init(xe); - if (err) - return err; - err = xe_heci_gsc_init(xe); if (err) return err; @@ -1071,10 +1089,6 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; - err = xe_sysctrl_init(xe); - if (err) - goto err_unregister_display; - err = xe_device_sysfs_init(xe); if (err) goto err_unregister_display; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 975768a6a9c8..a03760d0ce38 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -116,7 +116,7 @@ static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe) static inline bool xe_device_uc_enabled(struct xe_device *xe) { - return !xe->info.force_execlist; + return true; } #define for_each_tile(tile__, xe__, id__) \ @@ -181,6 +181,21 @@ static inline bool xe_device_has_mert(const struct xe_device *xe) return xe->info.has_mert; } +static inline bool xe_device_is_in_reset(struct xe_device *xe) +{ + return atomic_read(&xe->in_reset); +} + +static inline void xe_device_set_in_reset(struct xe_device *xe) +{ + atomic_set(&xe->in_reset, 1); +} + +static inline void xe_device_clear_in_reset(struct xe_device *xe) +{ + atomic_set(&xe->in_reset, 0); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 32dd2ffbc796..022e08205897 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -144,8 +144,6 @@ struct xe_device { * Keep all flags below alphabetically sorted */ - /** @info.force_execlist: Forced execlist submission */ - u8 force_execlist:1; /** @info.has_access_counter: Device supports access counter */ u8 has_access_counter:1; /** @info.has_asid: Has address space ID */ @@ -156,6 +154,8 @@ struct xe_device { u8 has_cached_pt:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; + /** @info.has_drm_ras: Device supports drm_ras (Reliability, Availability, Serviceability) */ + u8 has_drm_ras:1; /** @info.has_fan_control: Device supports fan control */ u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ @@ -483,6 +483,9 @@ struct xe_device { /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; + /** @in_reset: Indicates if device is in reset */ + atomic_t in_reset; + /** @wedged: Struct to control Wedged States and mode */ struct { /** @wedged.flag: Xe device faced a critical error and is now blocked. */ @@ -495,6 +498,9 @@ struct xe_device { bool inconsistent_reset; } wedged; + /** @devres_group: devres group */ + void *devres_group; + /** @bo_device: Struct to control async free of BOs */ struct xe_bo_dev { /** @bo_device.async_free: Free worker */ diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 81020b4b344e..e116fb562c4c 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -193,7 +193,7 @@ static void bo_meminfo(struct xe_bo *bo, if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP)) stats[mem_type].active += sz; - else if (mem_type == XE_PL_SYSTEM) + else if (mem_type == XE_PL_SYSTEM || xe_bo_madv_is_dontneed(bo)) stats[mem_type].purgeable += sz; } } @@ -273,8 +273,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) &stats[mem_type], DRM_GEM_OBJECT_ACTIVE | DRM_GEM_OBJECT_RESIDENT | - (mem_type != XE_PL_SYSTEM ? 0 : - DRM_GEM_OBJECT_PURGEABLE), + DRM_GEM_OBJECT_PURGEABLE, xe_mem_type_to_name[mem_type]); } } diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c index cd236f53699e..7937d8ba0ed9 100644 --- a/drivers/gpu/drm/xe/xe_drm_ras.c +++ b/drivers/gpu/drm/xe/xe_drm_ras.c @@ -11,27 +11,46 @@ #include "xe_device_types.h" #include "xe_drm_ras.h" +#include "xe_ras.h" static const char * const error_components[] = DRM_XE_RAS_ERROR_COMPONENT_NAMES; static const char * const error_severity[] = DRM_XE_RAS_ERROR_SEVERITY_NAMES; -static int hw_query_error_counter(struct xe_drm_ras_counter *info, - u32 error_id, const char **name, u32 *val) +static int query_error_counter(struct xe_device *xe, + enum drm_xe_ras_error_severity severity, + u32 error_id, const char **name, u32 *val) { + struct xe_drm_ras *ras = &xe->ras; + struct xe_drm_ras_counter *info = ras->info[severity]; + if (!info || !info[error_id].name) return -ENOENT; *name = info[error_id].name; + + /* Fetch counter from system controller if supported */ + if (xe->info.has_sysctrl) + return xe_ras_get_counter(xe, severity, error_id, val); + *val = atomic_read(&info[error_id].counter); return 0; } -static int hw_clear_error_counter(struct xe_drm_ras_counter *info, u32 error_id) +static int clear_error_counter(struct xe_device *xe, + enum drm_xe_ras_error_severity severity, + u32 error_id) { + struct xe_drm_ras *ras = &xe->ras; + struct xe_drm_ras_counter *info = ras->info[severity]; + if (!info || !info[error_id].name) return -ENOENT; + /* Clear counter from system controller if supported */ + if (xe->info.has_sysctrl) + return xe_ras_clear_counter(xe, severity, error_id); + atomic_set(&info[error_id].counter, 0); return 0; @@ -41,38 +60,30 @@ static int query_uncorrectable_error_counter(struct drm_ras_node *ep, u32 error_ const char **name, u32 *val) { struct xe_device *xe = ep->priv; - struct xe_drm_ras *ras = &xe->ras; - struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_UNCORRECTABLE]; - return hw_query_error_counter(info, error_id, name, val); + return query_error_counter(xe, DRM_XE_RAS_ERR_SEV_UNCORRECTABLE, error_id, name, val); } static int clear_uncorrectable_error_counter(struct drm_ras_node *node, u32 error_id) { struct xe_device *xe = node->priv; - struct xe_drm_ras *ras = &xe->ras; - struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_UNCORRECTABLE]; - return hw_clear_error_counter(info, error_id); + return clear_error_counter(xe, DRM_XE_RAS_ERR_SEV_UNCORRECTABLE, error_id); } static int query_correctable_error_counter(struct drm_ras_node *ep, u32 error_id, const char **name, u32 *val) { struct xe_device *xe = ep->priv; - struct xe_drm_ras *ras = &xe->ras; - struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_CORRECTABLE]; - return hw_query_error_counter(info, error_id, name, val); + return query_error_counter(xe, DRM_XE_RAS_ERR_SEV_CORRECTABLE, error_id, name, val); } static int clear_correctable_error_counter(struct drm_ras_node *node, u32 error_id) { struct xe_device *xe = node->priv; - struct xe_drm_ras *ras = &xe->ras; - struct xe_drm_ras_counter *info = ras->info[DRM_XE_RAS_ERR_SEV_CORRECTABLE]; - return hw_clear_error_counter(info, error_id); + return clear_error_counter(xe, DRM_XE_RAS_ERR_SEV_CORRECTABLE, error_id); } static struct xe_drm_ras_counter *allocate_and_copy_counters(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 297be3c42b20..d37770c58c5d 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -985,9 +985,10 @@ int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *f return -ENODEV; } - if (xe_observation_paranoid && !perfmon_capable()) { + ret = xe_observation_paranoid_check(); + if (ret) { drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n"); - return -EACCES; + return ret; } /* Initialize and set default values */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 2f5ccf294675..d27ce24daae5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -318,8 +318,6 @@ struct xe_exec_queue_ops { void (*resume)(struct xe_exec_queue *q); /** @reset_status: check exec queue reset status */ bool (*reset_status)(struct xe_exec_queue *q); - /** @active: check exec queue is active */ - bool (*active)(struct xe_exec_queue *q); }; #endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 9fb99c038ea8..6b86b4f9cc1c 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -458,12 +458,6 @@ static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) return false; } -static bool execlist_exec_queue_active(struct xe_exec_queue *q) -{ - /* NIY */ - return false; -} - static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, @@ -476,7 +470,6 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .suspend_wait = execlist_exec_queue_suspend_wait, .resume = execlist_exec_queue_resume, .reset_status = execlist_exec_queue_reset_status, - .active = execlist_exec_queue_active, }; int xe_execlist_init(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index a351c578b170..8ec23862477f 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -111,14 +111,14 @@ struct xe_ggtt_pt_ops { struct xe_ggtt { /** @tile: Back pointer to tile where this GGTT belongs */ struct xe_tile *tile; - /** @start: Start offset of GGTT */ + /** @start: Start offset of GGTT */ u64 start; /** @size: Total usable size of this GGTT */ u64 size; - /** - * @flags: Flags for this GGTT + * @flags: Flags for this GGTT. * Acceptable flags: + * * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. * - %XE_GGTT_FLAGS_ONLINE - is GGTT online, protected by ggtt->lock * after init @@ -129,7 +129,7 @@ struct xe_ggtt { /** @lock: Mutex lock to protect GGTT data */ struct mutex lock; /** - * @gsm: The iomem pointer to the actual location of the translation + * @gsm: The iomem pointer to the actual location of the translation * table located in the GSM for easy PTE manipulation */ u64 __iomem *gsm; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 783eb6d631b5..d904527a8898 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -917,6 +917,9 @@ static void gt_reset_worker(struct work_struct *w) if (xe_device_wedged(gt_to_xe(gt))) goto err_pm_put; + if (xe_device_is_in_reset(gt_to_xe(gt))) + goto err_pm_put; + /* We only support GT resets with GuC submission */ if (!xe_device_uc_enabled(gt_to_xe(gt))) goto err_pm_put; @@ -977,18 +980,21 @@ err_pm_put: void xe_gt_reset_async(struct xe_gt *gt) { - xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0)); + struct xe_device *xe = gt_to_xe(gt); + + if (xe_device_is_in_reset(xe)) + return; /* Don't do a reset while one is already in flight */ if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) return; - xe_gt_info(gt, "reset queued\n"); + xe_gt_info(gt, "reset queued from %ps\n", __builtin_return_address(0)); /* Pair with put in gt_reset_worker() if work is enqueued */ - xe_pm_runtime_get_noresume(gt_to_xe(gt)); + xe_pm_runtime_get_noresume(xe); if (!queue_work(gt->ordered_wq, >->reset.worker)) - xe_pm_runtime_put(gt_to_xe(gt)); + xe_pm_runtime_put(xe); } void xe_gt_suspend_prepare(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 04f0098070a4..a97b236dab7c 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -404,8 +404,7 @@ fallback: * Some older platforms don't have tables or don't have complete tables. * Newer platforms should always have the required info. */ - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000 && - !gt_to_xe(gt)->info.force_execlist) + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000) xe_gt_err(gt, "Slice/Subslice counts missing from hwconfig table; using typical fallback values\n"); if (gt_to_xe(gt)->info.platform == XE_PVC) @@ -507,7 +506,7 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) spin_lock_init(>->mcr_lock); if (gt->info.type == XE_GT_TYPE_MEDIA) { - drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); + xe_gt_WARN_ON(gt, MEDIA_VER(xe) < 13); if (MEDIA_VER(xe) >= 30) { gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table; @@ -662,9 +661,9 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, for (int type = 0; type < IMPLICIT_STEERING; type++) { if (reg_in_steering_type_ranges(gt, reg, type)) { - drm_WARN(>_to_xe(gt)->drm, !gt->steering[type].initialized, - "Uninitialized usage of MCR register %s/%#x\n", - xe_steering_types[type].name, reg.addr); + xe_gt_WARN(gt, !gt->steering[type].initialized, + "Uninitialized usage of MCR register %s/%#x\n", + xe_steering_types[type].name, reg.addr); *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; @@ -679,9 +678,9 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, * Not found in a steering table and not a register with implicit * steering. Just steer to 0/0 as a guess and raise a warning. */ - drm_WARN(>_to_xe(gt)->drm, true, - "Did not find MCR register %#x in any MCR steering table\n", - reg.addr); + xe_gt_WARN(gt, true, + "Did not find MCR register %#x in any MCR steering table\n", + reg.addr); *group = 0; *instance = 0; @@ -710,7 +709,7 @@ static void mcr_lock(struct xe_gt *gt) __acquires(>->mcr_lock) ret = xe_mmio_wait32(>->mmio, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL, true); - drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); + xe_gt_WARN_ON_ONCE(gt, ret == -ETIMEDOUT); } static void mcr_unlock(struct xe_gt *gt) __releases(>->mcr_lock) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index f5c3d8a97ec6..12416bfa3255 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1532,7 +1532,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * If devcoredump not captured and GuC capture for the job is not ready * do manual capture first and decide later if we need to use it */ - if (!exec_queue_killed(q) && !xe->devcoredump.captured && + if (!xe_device_is_in_reset(xe) && !exec_queue_killed(q) && !xe->devcoredump.captured && !xe_guc_capture_get_matching_and_lock(q)) { /* take force wake before engine register manual capture */ CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); @@ -1554,8 +1554,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) set_exec_queue_banned(q); /* Kick job / queue off hardware */ - if (!wedged && (exec_queue_enabled(primary) || - exec_queue_pending_disable(primary))) { + if (!xe_device_is_in_reset(xe) && !wedged && + (exec_queue_enabled(primary) || exec_queue_pending_disable(primary))) { int ret; if (exec_queue_reset(primary)) @@ -1623,7 +1623,8 @@ trigger_reset: trace_xe_sched_job_timedout(job); - if (!exec_queue_killed(q)) + /* Do not access device if in reset */ + if (!xe_device_is_in_reset(xe) && !exec_queue_killed(q)) xe_devcoredump(q, job, "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), @@ -2244,14 +2245,6 @@ static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); } -static bool guc_exec_queue_active(struct xe_exec_queue *q) -{ - struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); - - return exec_queue_enabled(primary) && - !exec_queue_pending_disable(primary); -} - /* * All of these functions are an abstraction layer which other parts of Xe can * use to trap into the GuC backend. All of these functions, aside from init, @@ -2271,7 +2264,6 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .suspend_wait = guc_exec_queue_suspend_wait, .resume = guc_exec_queue_resume, .reset_status = guc_exec_queue_reset_status, - .active = guc_exec_queue_active, }; static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c index cf6d106e6036..046d0655122f 100644 --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -208,9 +208,6 @@ static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, lockdep_assert_held(&tlb_inval->seqno_lock); - if (guc_to_xe(guc)->info.force_execlist) - return -ECANCELED; - return send_tlb_inval_ppgtt(guc, seqno, start, end, asid, XE_GUC_TLB_INVAL_PAGE_SELECTIVE, prl_sa); } @@ -228,9 +225,6 @@ static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, lockdep_assert_held(&tlb_inval->seqno_lock); - if (xe->info.force_execlist) - return -ECANCELED; - vm = xe_device_asid_to_vm(xe, asid); if (IS_ERR(vm)) return PTR_ERR(vm); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 0b193c451a11..87d60c4117bd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -337,39 +337,41 @@ static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_device *xe, return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE; } +static u32 blit_cctl_val(struct xe_gt *gt, struct xe_hw_engine *hwe) +{ + return REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, gt->mocs.uc_index) | + REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, gt->mocs.uc_index); +} + +static const struct xe_rtp_table_sr lrc_setup = XE_RTP_TABLE_SR( + /* + * Some blitter commands do not have a field for MOCS, those + * commands will use MOCS index pointed by BLIT_CCTL. + * BLIT_CCTL registers are needed to be programmed to un-cached. + */ + { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), + ENGINE_CLASS(COPY)), + XE_RTP_ACTIONS(FIELD_SET_FUNC(BLIT_CCTL(0), + BLIT_CCTL_DST_MOCS_MASK | + BLIT_CCTL_SRC_MOCS_MASK, + blit_cctl_val, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + /* Disable WMTP if HW doesn't support it */ + { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), + XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), + XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0), + PREEMPT_GPGPU_LEVEL_MASK, + PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) + }, +); + static void hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) { - struct xe_gt *gt = hwe->gt; - const u8 mocs_write_idx = gt->mocs.uc_index; - const u8 mocs_read_idx = gt->mocs.uc_index; - u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | - REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - const struct xe_rtp_table_sr lrc_setup = XE_RTP_TABLE_SR( - /* - * Some blitter commands do not have a field for MOCS, those - * commands will use MOCS index pointed by BLIT_CCTL. - * BLIT_CCTL registers are needed to be programmed to un-cached. - */ - { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), - ENGINE_CLASS(COPY)), - XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0), - BLIT_CCTL_DST_MOCS_MASK | - BLIT_CCTL_SRC_MOCS_MASK, - blit_cctl_val, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - /* Disable WMTP if HW doesn't support it */ - { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), - XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), - XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0), - PREEMPT_GPGPU_LEVEL_MASK, - PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) - }, - ); xe_rtp_process_to_sr(&ctx, &lrc_setup, &hwe->reg_lrc, true); } @@ -385,86 +387,92 @@ void xe_hw_engine_setup_reg_lrc(struct xe_hw_engine *hwe) xe_tuning_process_lrc(hwe); } -static void -hw_engine_setup_default_state(struct xe_hw_engine *hwe) +/* + * RING_CMD_CCTL specifies the default MOCS entry that will be + * used by the command streamer when executing commands that + * don't have a way to explicitly specify a MOCS setting. + * The default should usually reference whichever MOCS entry + * corresponds to uncached behavior, although use of a WB cached + * entry is recommended by the spec in certain circumstances on + * specific platforms. + * Bspec: 72161 + */ +static u32 ring_cmd_cctl_val(struct xe_gt *gt, struct xe_hw_engine *hwe) { - struct xe_gt *gt = hwe->gt; struct xe_device *xe = gt_to_xe(gt); + u8 mocs_read_idx = gt->mocs.uc_index; + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && + (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC)) + mocs_read_idx = gt->mocs.wb_index; + + return REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, gt->mocs.uc_index) | + REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); +} + +static const struct xe_rtp_table_sr engine_sr = XE_RTP_TABLE_SR( + { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), + XE_RTP_RULES(FUNC(xe_rtp_match_always)), + XE_RTP_ACTIONS(FIELD_SET_FUNC(RING_CMD_CCTL(0), + CMD_CCTL_WRITE_OVERRIDE_MASK | + CMD_CCTL_READ_OVERRIDE_MASK, + ring_cmd_cctl_val, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + { XE_RTP_NAME("Disable HW status page updates for interrupts"), + XE_RTP_RULES(FUNC(xe_rtp_match_always)), + XE_RTP_ACTIONS(SET(RING_HWSTAM(0), ~0x0, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + { XE_RTP_NAME("Disable engine 'legacy' mode"), + XE_RTP_RULES(FUNC(xe_rtp_match_always)), + XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_DISABLE_LEGACY_MODE, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, /* - * RING_CMD_CCTL specifies the default MOCS entry that will be - * used by the command streamer when executing commands that - * don't have a way to explicitly specify a MOCS setting. - * The default should usually reference whichever MOCS entry - * corresponds to uncached behavior, although use of a WB cached - * entry is recommended by the spec in certain circumstances on - * specific platforms. - * Bspec: 72161 + * To allow the GSC engine to go idle on MTL we need to enable + * idle messaging and set the hysteresis value (we use 0xA=5us + * as recommended in spec). On platforms after MTL this is + * enabled by default. */ - const u8 mocs_write_idx = gt->mocs.uc_index; - const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && - (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? - gt->mocs.wb_index : gt->mocs.uc_index; - u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | - REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); + { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), + XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), + XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), + IDLE_MSG_DISABLE, + XE_RTP_ACTION_FLAG(ENGINE_BASE)), + FIELD_SET(RING_PWRCTX_MAXCNT(0), + IDLE_WAIT_TIME, + 0xA, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + /* Enable Priority Mem Read */ + { XE_RTP_NAME("Priority_Mem_Read"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + { XE_RTP_NAME("Enable CCS Engine(s)"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1255, XE_RTP_END_VERSION_UNDEFINED), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(RCU_MODE, RCU_MODE_CCS_ENABLE)) + }, + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, + { XE_RTP_NAME("Enable MSI-X interrupt support"), + XE_RTP_RULES(FUNC(xe_rtp_match_has_msix)), + XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_MSIX_INTERRUPT_ENABLE, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, +); + +static void +hw_engine_setup_default_state(struct xe_hw_engine *hwe) +{ struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - const struct xe_rtp_table_sr engine_sr = XE_RTP_TABLE_SR( - { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), - XE_RTP_RULES(FUNC(xe_rtp_match_always)), - XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), - CMD_CCTL_WRITE_OVERRIDE_MASK | - CMD_CCTL_READ_OVERRIDE_MASK, - ring_cmd_cctl_val, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - { XE_RTP_NAME("Disable HW status page updates for interrupts"), - XE_RTP_RULES(FUNC(xe_rtp_match_always)), - XE_RTP_ACTIONS(SET(RING_HWSTAM(0), ~0x0, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - { XE_RTP_NAME("Disable engine 'legacy' mode"), - XE_RTP_RULES(FUNC(xe_rtp_match_always)), - XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_DISABLE_LEGACY_MODE, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - /* - * To allow the GSC engine to go idle on MTL we need to enable - * idle messaging and set the hysteresis value (we use 0xA=5us - * as recommended in spec). On platforms after MTL this is - * enabled by default. - */ - { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), - XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), - XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), - IDLE_MSG_DISABLE, - XE_RTP_ACTION_FLAG(ENGINE_BASE)), - FIELD_SET(RING_PWRCTX_MAXCNT(0), - IDLE_WAIT_TIME, - 0xA, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - /* Enable Priority Mem Read */ - { XE_RTP_NAME("Priority_Mem_Read"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - { XE_RTP_NAME("Enable CCS Engine(s)"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1255, XE_RTP_END_VERSION_UNDEFINED), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(RCU_MODE, RCU_MODE_CCS_ENABLE)) - }, - /* Use Fixed slice CCS mode */ - { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), - XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), - XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, - RCU_MODE_FIXED_SLICE_CCS_MODE)) - }, - { XE_RTP_NAME("Enable MSI-X interrupt support"), - XE_RTP_RULES(FUNC(xe_rtp_match_has_msix)), - XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_MSIX_INTERRUPT_ENABLE, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - ); xe_rtp_process_to_sr(&ctx, &engine_sr, &hwe->reg_sr, false); } diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c index 4b72959b2276..4a4b363fc844 100644 --- a/drivers/gpu/drm/xe/xe_hw_error.c +++ b/drivers/gpu/drm/xe/xe_hw_error.c @@ -437,6 +437,16 @@ static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_er if (!IS_DGFX(xe)) return; + /* + * Hardware errors are reported through System Controller on the platforms that + * support it, and never routed as direct IRQ to SGUnit. So we should never be + * here for those platforms. + */ + if (xe->info.has_sysctrl) { + drm_err_ratelimited(&xe->drm, HW_ERR "Invalid error routing\n"); + return; + } + spin_lock_irqsave(&xe->irq.lock, flags); err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err)); if (!err_src) { @@ -516,14 +526,6 @@ void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl) } } -static int hw_error_info_init(struct xe_device *xe) -{ - if (xe->info.platform != XE_PVC) - return 0; - - return xe_drm_ras_init(xe); -} - /* * Process hardware errors during boot */ @@ -550,16 +552,11 @@ static void process_hw_errors(struct xe_device *xe) void xe_hw_error_init(struct xe_device *xe) { struct xe_tile *tile = xe_device_get_root_tile(xe); - int ret; if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) return; INIT_WORK(&tile->csc_hw_error_work, csc_hw_error_work); - ret = hw_error_info_init(xe); - if (ret) - drm_err(&xe->drm, "Failed to initialize XE DRM RAS (%pe)\n", ERR_PTR(ret)); - process_hw_errors(xe); } diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 706783863d07..bd956776b10b 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -334,9 +334,6 @@ int xe_i2c_probe(struct xe_device *xe) if (!xe->info.has_i2c) return 0; - if (IS_SRIOV_VF(xe)) - return 0; - xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) return 0; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index a4292a11391d..3e7c995085d0 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -2618,13 +2618,19 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } +static bool engine_valid_for_utilization(struct xe_gt *gt, struct xe_hw_engine *hwe) +{ + /* The USM-reserved copy engine runs kernel migrate contexts queried here */ + return hwe && (!xe_hw_engine_is_reserved(hwe) || xe_gt_is_usm_hwe(gt, hwe)); +} + static struct xe_hw_engine *engine_id_to_hwe(struct xe_gt *gt, u32 engine_id) { u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, class, instance, false); - if (xe_gt_WARN_ONCE(gt, !hwe || xe_hw_engine_is_reserved(hwe), + if (xe_gt_WARN_ONCE(gt, !engine_valid_for_utilization(gt, hwe), "Unexpected engine class:instance %d:%d for utilization\n", class, instance)) return NULL; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 78adb303b663..7fa18dfcb5a2 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -11,27 +11,18 @@ #include <linux/pci.h> #include <drm/drm_managed.h> -#include <drm/drm_print.h> #include "regs/xe_bars.h" #include "xe_device.h" #include "xe_gt_sriov_vf.h" +#include "xe_printk.h" #include "xe_sriov.h" +#include "xe_tile_printk.h" #include "xe_trace.h" #include "xe_wa.h" #include "generated/xe_device_wa_oob.h" -static void tiles_fini(void *arg) -{ - struct xe_device *xe = arg; - struct xe_tile *tile; - int id; - - for_each_remote_tile(tile, xe, id) - tile->mmio.regs = NULL; -} - /* * On multi-tile devices, partition the BAR space for MMIO on each tile, * possibly accounting for register override on the number of tiles available. @@ -56,50 +47,71 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) struct xe_tile *tile; u8 id; - /* - * Nothing to be done as tile 0 has already been setup earlier with the - * entire BAR mapped - see xe_mmio_probe_early() - */ - if (xe->info.tile_count == 1) - return; - for_each_remote_tile(tile, xe, id) xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M); } +/** + * xe_mmio_probe_tiles() - Initialize all tiles' MMIO + * @xe: the &xe_device + * + * Initialize the remaining tiles' MMIO instances. + * + * Return: 0 on success or a negative error code on failure. + */ int xe_mmio_probe_tiles(struct xe_device *xe) { size_t tile_mmio_size = SZ_16M; - mmio_multi_tile_setup(xe, tile_mmio_size); + /* + * Nothing to be done as tile 0 has already been setup earlier with the + * entire BAR mapped - see xe_mmio_probe_early() + */ + if (xe->info.tile_count == 1) + return 0; - return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe); + if (xe->mmio.size < xe->info.tile_count * tile_mmio_size) { + xe_err(xe, "GTTMMADR_BAR is too small for %d tiles: %zu\n", + xe->info.tile_count, xe->mmio.size); + return -EIO; + } + + mmio_multi_tile_setup(xe, tile_mmio_size); + return 0; } static void mmio_fini(void *arg) { struct xe_device *xe = arg; - struct xe_tile *root_tile = xe_device_get_root_tile(xe); - pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; - root_tile->mmio.regs = NULL; } +/** + * xe_mmio_probe_early() - Probe and initialize device's MMIO + * @xe: the &xe_device + * + * Map the entire GTTMMADR_BAR and initialize the first tile's MMIO instance. + * + * The first 16MB of the GTTMMADR_BAR always belongs to the root tile, and + * includes: registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). + * + * Return: 0 on success or a negative error code on failure. + */ int xe_mmio_probe_early(struct xe_device *xe) { struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - /* - * Map the entire BAR. - * The first 16MB of the BAR, belong to the root tile, and include: - * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). - */ - xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); - xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0); + xe->mmio.regs = pcim_iomap(pdev, GTTMMADR_BAR, 0); if (!xe->mmio.regs) { - drm_err(&xe->drm, "failed to map registers\n"); + xe_err(xe, "Failed to map GTTMMADR_BAR\n"); + return -EIO; + } + + xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); + if (xe->mmio.size < SZ_16M) { + xe_err(xe, "GTTMMADR_BAR is too small: %zu\n", xe->mmio.size); return -EIO; } @@ -128,6 +140,11 @@ void xe_mmio_init(struct xe_mmio *mmio, struct xe_tile *tile, void __iomem *ptr, mmio->tile = tile; } +static bool mmio_available(struct xe_mmio *mmio) +{ + return !xe_tile_WARN_ON_ONCE(mmio->tile, !mmio->tile->xe->mmio.regs); +} + static void mmio_flush_pending_writes(struct xe_mmio *mmio) { #define DUMMY_REG_OFFSET 0x130030 @@ -146,6 +163,9 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; + if (!mmio_available(mmio)) + return 0; + mmio_flush_pending_writes(mmio); val = readb(mmio->regs + addr); @@ -158,6 +178,9 @@ void xe_mmio_write8(struct xe_mmio *mmio, struct xe_reg reg, u8 val) { u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); + if (!mmio_available(mmio)) + return; + trace_xe_reg_rw(mmio, true, addr, val, sizeof(val)); writeb(val, mmio->regs + addr); @@ -168,6 +191,9 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; + if (!mmio_available(mmio)) + return 0; + mmio_flush_pending_writes(mmio); val = readw(mmio->regs + addr); @@ -180,6 +206,9 @@ void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val) { u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); + if (!mmio_available(mmio)) + return; + trace_xe_reg_rw(mmio, true, addr, val, sizeof(val)); if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) @@ -194,6 +223,9 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; + if (!mmio_available(mmio)) + return 0; + mmio_flush_pending_writes(mmio); if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) @@ -282,8 +314,8 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg) oldudw = udw; } - drm_WARN(&mmio->tile->xe->drm, retries == 0, - "64-bit read of %#x did not stabilize\n", reg.addr); + xe_tile_WARN(mmio->tile, retries == 0, + "MMIO: 64-bit read of %#x did not stabilize\n", reg.addr); return (u64)udw << 32 | ldw; } diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 4cb578182912..39e4fc85f019 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -36,9 +36,6 @@ module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600) MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " "[default=" __stringify(XE_DEFAULT_SVM_NOTIFIER_SIZE) "]"); -module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); -MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); - #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 79cb9639c0f3..c75153471248 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -10,7 +10,6 @@ /* Module modprobe variables */ struct xe_modparam { - bool force_execlist; bool probe_display; int force_vram_bar_size; int guc_log_level; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 2dce6a47202c..b3acbcd678b7 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1702,11 +1702,12 @@ static int xe_oa_release(struct inode *inode, struct file *file) static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) { struct xe_oa_stream *stream = file->private_data; + int ret = xe_observation_paranoid_check(); struct xe_bo *bo = stream->oa_buffer.bo; - if (xe_observation_paranoid && !perfmon_capable()) { + if (ret) { drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); - return -EACCES; + return ret; } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ @@ -2080,10 +2081,12 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f privileged_op = true; } - if (privileged_op && xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n"); - ret = -EACCES; - goto err_exec_q; + if (privileged_op) { + ret = xe_observation_paranoid_check(); + if (ret) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n"); + goto err_exec_q; + } } if (!param.exec_q && !param.sample) { @@ -2365,9 +2368,10 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi return -ENODEV; } - if (xe_observation_paranoid && !perfmon_capable()) { + err = xe_observation_paranoid_check(); + if (err) { drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); - return -EACCES; + return err; } err = copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); @@ -2467,9 +2471,10 @@ int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file return -ENODEV; } - if (xe_observation_paranoid && !perfmon_capable()) { + ret = xe_observation_paranoid_check(); + if (ret) { drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); - return -EACCES; + return ret; } ret = get_user(arg, ptr); diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index e3f9b546207e..39e05b9131a7 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -4,6 +4,7 @@ */ #include <linux/errno.h> +#include <linux/perf_event.h> #include <linux/sysctl.h> #include <uapi/drm/xe_drm.h> @@ -12,9 +13,28 @@ #include "xe_oa.h" #include "xe_observation.h" -u32 xe_observation_paranoid = true; +static u32 xe_observation_paranoid = true; static struct ctl_table_header *sysctl_header; +/** + * xe_observation_paranoid_check - Gate access to xe observation streams. + * + * When the xe-specific observation_paranoid sysctl is enabled (the + * default), defer to perf_allow_cpu() so that access is governed by the + * same policy as system-wide perf CPU events: kernel.perf_event_paranoid + * plus the security_perf_event_open() LSM hook. When the sysctl has been + * cleared by a privileged user, observation is open to all callers. + * + * Return: 0 if access is permitted, a negative errno otherwise. + */ +int xe_observation_paranoid_check(void) +{ + if (!xe_observation_paranoid) + return 0; + + return perf_allow_cpu(); +} + static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_observation_param *arg, struct drm_file *file) { @@ -83,11 +103,13 @@ static const struct ctl_table observation_ctl_table[] = { }; /** - * xe_observation_sysctl_register - Register xe_observation_paranoid sysctl + * xe_observation_sysctl_register - Register the observation_paranoid sysctl * - * Normally only superuser/root can access observation stream - * data. However, superuser can set xe_observation_paranoid sysctl to 0 to - * allow non-privileged users to also access observation data. + * When dev.xe.observation_paranoid is set (the default), access to + * observation streams follows the system-wide perf_allow_cpu() policy: + * kernel.perf_event_paranoid plus the security_perf_event_open() LSM + * hook. A privileged user can clear the sysctl to bypass that gate and + * allow unprivileged access to observation data. * * Return: always returns 0 */ diff --git a/drivers/gpu/drm/xe/xe_observation.h b/drivers/gpu/drm/xe/xe_observation.h index 17816998e966..73a03e03c96a 100644 --- a/drivers/gpu/drm/xe/xe_observation.h +++ b/drivers/gpu/drm/xe/xe_observation.h @@ -11,8 +11,7 @@ struct drm_device; struct drm_file; -extern u32 xe_observation_paranoid; - +int xe_observation_paranoid_check(void); int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_observation_sysctl_register(void); void xe_observation_sysctl_unregister(void); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3165686e3e04..91af603e9431 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -26,6 +26,7 @@ #include "xe_guc.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_pci_error.h" #include "xe_pci_rebar.h" #include "xe_pci_sriov.h" #include "xe_pci_types.h" @@ -355,6 +356,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { PLATFORM(PVC), .dma_mask_size = 52, .has_display = false, + .has_drm_ras = true, .has_gsc_nvm = 1, .has_heci_gscfi = 1, .max_gt_per_tile = 1, @@ -457,6 +459,7 @@ static const struct xe_device_desc cri_desc = { PLATFORM(CRESCENTISLAND), .dma_mask_size = 52, .has_display = false, + .has_drm_ras = true, .has_flat_ccs = false, .has_gsc_nvm = 1, .has_i2c = true, @@ -599,8 +602,6 @@ static int read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u struct xe_reg gmdid_reg = GMD_ID; u32 val; - KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid); - if (IS_SRIOV_VF(xe)) { /* * To get the value of the GMDID register, VFs must obtain it @@ -726,14 +727,30 @@ static int handle_gmdid(struct xe_device *xe, return 0; } -static void init_devid(struct xe_device *xe) +struct xe_probed_info { + u16 devid; + u8 revid; + u8 tile_count; + struct xe_step_info step; + const struct xe_ip *graphics_ip; + const struct xe_ip *media_ip; +}; + +/* + * Probe from the hardware the info required by xe_info_init_early(). + */ +static int xe_probe_info_early(struct xe_device *xe, + const struct xe_device_desc *desc, + struct xe_probed_info *probed_info) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - KUNIT_STATIC_STUB_REDIRECT(init_devid, xe); + probed_info->devid = pdev->device; + probed_info->revid = pdev->revision; - xe->info.devid = pdev->device; - xe->info.revid = pdev->revision; + xe_step_platform_get(desc->platform, probed_info->revid, &probed_info->step); + + return 0; } /* @@ -742,17 +759,20 @@ static void init_devid(struct xe_device *xe) */ static int xe_info_init_early(struct xe_device *xe, const struct xe_device_desc *desc, - const struct xe_subplatform_desc *subplatform_desc) + const struct xe_subplatform_desc *subplatform_desc, + struct xe_probed_info *probed_info) { int err; + xe->info.devid = probed_info->devid; + xe->info.revid = probed_info->revid; + xe->info.step.platform = probed_info->step.platform; + xe->info.platform_name = desc->platform_name; xe->info.platform = desc->platform; xe->info.subplatform = subplatform_desc ? subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; - init_devid(xe); - xe->info.dma_mask_size = desc->dma_mask_size; xe->info.va_bits = desc->va_bits; xe->info.vm_max_level = desc->vm_max_level; @@ -760,6 +780,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.has_cached_pt = desc->has_cached_pt; + xe->info.has_drm_ras = desc->has_drm_ras; xe->info.has_fan_control = desc->has_fan_control; /* runtime fusing may force flat_ccs to disabled later */ xe->info.has_flat_ccs = desc->has_flat_ccs; @@ -789,14 +810,10 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && desc->has_display; - xe->info.force_execlist = xe_modparam.force_execlist; xe_assert(xe, desc->max_gt_per_tile > 0); xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE); xe->info.max_gt_per_tile = desc->max_gt_per_tile; - xe->info.tile_count = 1 + desc->max_remote_tiles; - - xe_step_platform_get(xe); err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); if (err) @@ -805,22 +822,21 @@ static int xe_info_init_early(struct xe_device *xe, return 0; } -/* - * Possibly override number of tile based on configuration register. - */ -static void xe_info_probe_tile_count(struct xe_device *xe) +static void xe_probe_tile_count(struct xe_device *xe, + const struct xe_device_desc *desc, + struct xe_probed_info *probed_info) { struct xe_mmio *mmio; u8 tile_count; u32 mtcfg; - KUNIT_STATIC_STUB_REDIRECT(xe_info_probe_tile_count, xe); + probed_info->tile_count = 1 + desc->max_remote_tiles; /* * Probe for tile count only for platforms that support multiple * tiles. */ - if (xe->info.tile_count == 1) + if (probed_info->tile_count == 1) return; mmio = xe_root_tile_mmio(xe); @@ -833,10 +849,10 @@ static void xe_info_probe_tile_count(struct xe_device *xe) mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; - if (tile_count < xe->info.tile_count) { + if (tile_count < probed_info->tile_count) { drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); - xe->info.tile_count = tile_count; + probed_info->tile_count, tile_count); + probed_info->tile_count = tile_count; } } @@ -909,25 +925,10 @@ static struct xe_gt *alloc_media_gt(struct xe_tile *tile, return gt; } -/* - * Initialize device info content that does require knowledge about - * graphics / media IP version. - * Make sure that GT / tile structures allocated by the driver match the data - * present in device info. - */ -static int xe_info_init(struct xe_device *xe, - const struct xe_device_desc *desc) +static int xe_probe_ips(struct xe_device *xe, + const struct xe_device_desc *desc, + struct xe_probed_info *probed_info) { - u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0; - const struct xe_ip *graphics_ip; - const struct xe_ip *media_ip; - const struct xe_graphics_desc *graphics_desc; - const struct xe_media_desc *media_desc; - struct xe_tile *tile; - struct xe_gt *gt; - int ret; - u8 id; - /* * If this platform supports GMD_ID, we'll detect the proper IP * descriptor to use from hardware registers. @@ -936,17 +937,21 @@ static int xe_info_init(struct xe_device *xe, * versions are simply derived from that. */ if (desc->pre_gmdid_graphics_ip) { - graphics_ip = desc->pre_gmdid_graphics_ip; - media_ip = desc->pre_gmdid_media_ip; - xe_step_pre_gmdid_get(xe); + probed_info->graphics_ip = desc->pre_gmdid_graphics_ip; + probed_info->media_ip = desc->pre_gmdid_media_ip; + xe_step_pre_gmdid_get(xe, &probed_info->step); } else { + int err; + u32 graphics_revid, media_revid; + xe_assert(xe, !desc->pre_gmdid_media_ip); - ret = handle_gmdid(xe, &graphics_ip, &media_ip, - &graphics_gmdid_revid, &media_gmdid_revid); - if (ret) - return ret; - xe_step_gmdid_get(xe, graphics_gmdid_revid, media_gmdid_revid); + err = handle_gmdid(xe, &probed_info->graphics_ip, &probed_info->media_ip, + &graphics_revid, &media_revid); + if (err) + return err; + + xe_step_gmdid_get(xe, graphics_revid, media_revid, &probed_info->step); } /* @@ -954,9 +959,56 @@ static int xe_info_init(struct xe_device *xe, * error and we should abort driver load. Failing to detect media * IP is non-fatal; we'll just proceed without enabling media support. */ - if (!graphics_ip) + if (!probed_info->graphics_ip) return -ENODEV; + return 0; +} + +/* + * Probe from the hardware the info required by xe_info_init(). + */ +static int xe_probe_info(struct xe_device *xe, + const struct xe_device_desc *desc, + struct xe_probed_info *probed_info) +{ + int err; + + xe_probe_tile_count(xe, desc, probed_info); + + err = xe_probe_ips(xe, desc, probed_info); + if (err) + return err; + + return 0; +} + +/* + * Initialize device info content that does require knowledge about + * graphics / media IP version. + * Make sure that GT / tile structures allocated by the driver match the data + * present in device info. + */ +static int xe_info_init(struct xe_device *xe, + const struct xe_device_desc *desc, + struct xe_probed_info *probed_info) +{ + const struct xe_ip *graphics_ip; + const struct xe_ip *media_ip; + const struct xe_graphics_desc *graphics_desc; + const struct xe_media_desc *media_desc; + struct xe_tile *tile; + struct xe_gt *gt; + u8 id; + + graphics_ip = probed_info->graphics_ip; + media_ip = probed_info->media_ip; + + xe->info.tile_count = probed_info->tile_count; + xe->info.step.basedie = probed_info->step.basedie; + xe->info.step.graphics = probed_info->step.graphics; + xe->info.step.media = probed_info->step.media; + xe->info.graphics_verx100 = graphics_ip->verx100; xe->info.graphics_name = graphics_ip->name; graphics_desc = graphics_ip->desc; @@ -988,8 +1040,6 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_soc_remapper_telem = 0; } - xe_info_probe_tile_count(xe); - for_each_remote_tile(tile, xe, id) { int err; @@ -1072,9 +1122,11 @@ static void xe_pci_remove(struct pci_dev *pdev) */ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + struct xe_probed_info probed_info = {}; const struct xe_device_desc *desc = (const void *)ent->driver_data; const struct xe_subplatform_desc *subplatform_desc; struct xe_device *xe; + void *group; int err; subplatform_desc = find_subplatform(desc, pdev->device); @@ -1102,6 +1154,11 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (xe_display_driver_probe_defer(pdev)) return -EPROBE_DEFER; + /* Group all devres so xe_pci_error_slot_reset() can release them as a unit. */ + group = devres_open_group(&pdev->dev, NULL, GFP_KERNEL); + if (!group) + return -ENOMEM; + err = pcim_enable_device(pdev); if (err) return err; @@ -1110,13 +1167,19 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (IS_ERR(xe)) return PTR_ERR(xe); + xe->devres_group = group; + pci_set_drvdata(pdev, &xe->drm); xe_pm_assert_unbounded_bridge(xe); pci_set_master(pdev); - err = xe_info_init_early(xe, desc, subplatform_desc); + err = xe_probe_info_early(xe, desc, &probed_info); + if (err) + return err; + + err = xe_info_init_early(xe, desc, subplatform_desc, &probed_info); if (err) return err; @@ -1135,7 +1198,11 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - err = xe_info_init(xe, desc); + err = xe_probe_info(xe, desc, &probed_info); + if (err) + return err; + + err = xe_info_init(xe, desc, &probed_info); if (err) return err; @@ -1348,6 +1415,7 @@ static struct pci_driver xe_pci_driver = { .remove = xe_pci_remove, .shutdown = xe_pci_shutdown, .sriov_configure = xe_pci_sriov_configure, + .err_handler = &xe_pci_error_handlers, #ifdef CONFIG_PM_SLEEP .driver.pm = &xe_pm_ops, #endif diff --git a/drivers/gpu/drm/xe/xe_pci_error.c b/drivers/gpu/drm/xe/xe_pci_error.c new file mode 100644 index 000000000000..9b78cc0d3293 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci_error.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2026 Intel Corporation + */ + +#include <linux/pci.h> + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_pci.h" +#include "xe_pm.h" +#include "xe_printk.h" +#include "xe_survivability_mode.h" + +static void prepare_device_for_reset(struct pci_dev *pdev) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_gt *gt; + u8 id; + + /* + * Wedge the device to prevent userspace access but do not send the uevent. + * xe_device_wedged_fini() releases runtime pm if wedged flag is set, so acquire a runtime + * pm reference to avoid underflow. + */ + if (!atomic_xchg(&xe->wedged.flag, 1)) + xe_pm_runtime_get_noresume(xe); + + xe_device_set_in_reset(xe); + + for_each_gt(gt, xe, id) + xe_gt_declare_wedged(gt); + + pci_disable_device(pdev); +} + +static pci_ers_result_t xe_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + + xe_info(xe, "PCI error: detected state = %d\n", state); + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + /* If the device is already wedged or in survivability mode, do not attempt recovery */ + if (xe_survivability_mode_is_boot_enabled(xe) || xe_device_wedged(xe)) + return PCI_ERS_RESULT_DISCONNECT; + + switch (state) { + case pci_channel_io_normal: + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + prepare_device_for_reset(pdev); + return PCI_ERS_RESULT_NEED_RESET; + default: + xe_info(xe, "PCI error: unknown state %d\n", state); + return PCI_ERS_RESULT_DISCONNECT; + } +} + +static pci_ers_result_t xe_pci_error_mmio_enabled(struct pci_dev *pdev) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + + xe_info(xe, "PCI error: MMIO enabled\n"); + + /* TODO: Query system controller for the type of error and take appropriate action */ + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t xe_pci_error_slot_reset(struct pci_dev *pdev) +{ + const struct pci_device_id *ent = pci_match_id(pdev->driver->id_table, pdev); + struct xe_device *xe = pdev_to_xe_device(pdev); + + xe_info(xe, "PCI error: slot reset\n"); + + pci_restore_state(pdev); + + if (pci_enable_device(pdev)) { + xe_err(xe, "Cannot re-enable PCI device after reset\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + + /* + * Secondary Bus Reset causes all VRAM state to be lost along with + * hardware state. As an initial step, re-probe the device to + * re-initialize the driver and hardware. + * TODO: optimize by re-initializing only the hardware state and re-creating + * kernel BOs. + */ + xe_device_clear_in_reset(xe); + pdev->driver->remove(pdev); + devres_release_group(&pdev->dev, xe->devres_group); + + if (pdev->driver->probe(pdev, ent)) + return PCI_ERS_RESULT_DISCONNECT; + + xe = pdev_to_xe_device(pdev); + + /* Wedge the device to prevent I/O operations till the resume callback */ + atomic_set(&xe->wedged.flag, 1); + + return PCI_ERS_RESULT_RECOVERED; +} + +static void xe_pci_error_resume(struct pci_dev *pdev) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + + xe_info(xe, "PCI error: resume\n"); + + atomic_set(&xe->wedged.flag, 0); +} + +const struct pci_error_handlers xe_pci_error_handlers = { + .error_detected = xe_pci_error_detected, + .mmio_enabled = xe_pci_error_mmio_enabled, + .slot_reset = xe_pci_error_slot_reset, + .resume = xe_pci_error_resume, +}; diff --git a/drivers/gpu/drm/xe/xe_pci_error.h b/drivers/gpu/drm/xe/xe_pci_error.h new file mode 100644 index 000000000000..725ad0214e62 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci_error.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_PCI_ERROR_H_ +#define _XE_PCI_ERROR_H_ + +struct pci_error_handlers; + +extern const struct pci_error_handlers xe_pci_error_handlers; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 5b85e2c24b7b..24d4a3d00517 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -40,6 +40,7 @@ struct xe_device_desc { u8 has_cached_pt:1; u8 has_display:1; + u8 has_drm_ras:1; u8 has_fan_control:1; u8 has_flat_ccs:1; u8 has_gsc_nvm:1; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index dc66d0c7ee06..866986694d9c 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -323,15 +323,17 @@ int xe_pcode_ready(struct xe_device *xe, bool locked) } /** - * xe_pcode_init: initialize components of PCODE + * xe_pcode_init_early() - Initialize components of PCODE * @tile: tile instance * * This function initializes the xe_pcode component. * To be called once only during probe. + * + * Return: 0 on success or a negative error code on failure. */ -void xe_pcode_init(struct xe_tile *tile) +int xe_pcode_init_early(struct xe_tile *tile) { - drmm_mutex_init(&tile_to_xe(tile)->drm, &tile->pcode.lock); + return drmm_mutex_init(&tile_to_xe(tile)->drm, &tile->pcode.lock); } /** diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index 490e4f269607..18260c29e620 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -12,7 +12,7 @@ struct drm_device; struct xe_device; struct xe_tile; -void xe_pcode_init(struct xe_tile *tile); +int xe_pcode_init_early(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); int xe_pcode_ready(struct xe_device *xe, bool locked); int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq, diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 670bc2206fea..5fdad444009f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1026,12 +1026,22 @@ xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *t u64 *ptr = data; u32 i; + /* + * @qword_ofs is the absolute entry offset within the page table, while + * @ptes is indexed relative to @update->ofs (its first entry). The GPU + * path (write_pgtable) splits a single update into MAX_PTE_PER_SDI-sized + * chunks, calling this with an advancing @qword_ofs but a fresh @data + * pointer per chunk, so translate back into a @ptes index rather than + * assuming the chunk starts at ptes[0]. + */ for (i = 0; i < num_qwords; i++) { + u32 idx = qword_ofs - update->ofs + i; + if (map) xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); + sizeof(u64), u64, ptes[idx].pte); else - ptr[i] = ptes[i].pte; + ptr[i] = ptes[idx].pte; } } @@ -2070,6 +2080,9 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, * automatically when the context is re-enabled by the rebind worker, * or in fault mode it was invalidated on PTE zapping. * + * If rebind, we have to invalidate TLB on context based TLB invalidation + * LR vms, as they cannot be relied on context re-enable. + * * If !rebind, and scratch enabled VMs, there is a chance the scratch * PTE is already cached in the TLB so it needs to be invalidated. * On !LR VMs this is done in the ring ops preceding a batch, but on @@ -2079,6 +2092,9 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, if ((!pt_op->rebind && xe_vm_has_scratch(vm) && xe_vm_in_lr_mode(vm))) pt_update_ops->needs_invalidation = true; + else if (pt_op->rebind && xe_vm_in_preempt_fence_mode(vm) && + vm->xe->info.has_ctx_tlb_inval) + pt_update_ops->needs_invalidation = true; else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) /* We bump also if batch_invalidate_tlb is true */ vm->tlb_flush_seqno++; diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 968b7e70b3f9..fea3d8ceeddb 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -59,6 +59,7 @@ bool xe_pxp_is_enabled(const struct xe_pxp *pxp) static bool pxp_prerequisites_done(const struct xe_pxp *pxp) { struct xe_gt *gt = pxp->gt; + bool huc_ok; bool ready; CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); @@ -73,9 +74,14 @@ static bool pxp_prerequisites_done(const struct xe_pxp *pxp) */ XE_WARN_ON(!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)); - /* PXP requires both HuC authentication via GSC and GSC proxy initialized */ - ready = xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GSC) && - xe_gsc_proxy_init_done(>->uc.gsc); + /* + * PXP requires GSC proxy to be initialized. On platforms where the HuC + * is loaded by the kernel driver (i.e., pre media 35) PXP also requires + * the HuC to be authenticated by GSC. + */ + huc_ok = MEDIA_VER(gt_to_xe(gt)) >= 35 || + xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GSC); + ready = huc_ok && xe_gsc_proxy_init_done(>->uc.gsc); return ready; } @@ -97,9 +103,13 @@ int xe_pxp_get_readiness_status(struct xe_pxp *pxp) if (!xe_pxp_is_enabled(pxp)) return -ENODEV; - /* if the GSC or HuC FW are in an error state, PXP will never work */ - if (xe_uc_fw_status_to_error(pxp->gt->uc.huc.fw.status) || - xe_uc_fw_status_to_error(pxp->gt->uc.gsc.fw.status)) + /* If the GSC FW is in an error state, PXP will never work */ + if (xe_uc_fw_status_to_error(pxp->gt->uc.gsc.fw.status)) + return -EIO; + + /* Same for HuC FW, but only if the kernel owns HuC-loading (i.e. pre-NVL) */ + if (MEDIA_VER(gt_to_xe(pxp->gt)) < 35 && + xe_uc_fw_status_to_error(pxp->gt->uc.huc.fw.status)) return -EIO; guard(xe_pm_runtime)(pxp->xe); @@ -361,6 +371,7 @@ static void pxp_fini(void *arg) int xe_pxp_init(struct xe_device *xe) { struct xe_gt *gt = xe->tiles[0].media_gt; + bool gsc_ok, huc_ok; struct xe_pxp *pxp; int err; @@ -375,10 +386,14 @@ int xe_pxp_init(struct xe_device *xe) if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) return 0; - /* PXP requires both GSC and HuC firmwares to be available */ - if (!xe_uc_fw_is_loadable(>->uc.gsc.fw) || - !xe_uc_fw_is_loadable(>->uc.huc.fw)) { - drm_info(&xe->drm, "skipping PXP init due to missing FW dependencies"); + /* PXP requires GSC FW to be available. Pre-NVL it also requires HuC FW */ + gsc_ok = xe_uc_fw_is_loadable(>->uc.gsc.fw); + huc_ok = MEDIA_VER(xe) >= 35 || xe_uc_fw_is_loadable(>->uc.huc.fw); + + if (!gsc_ok || !huc_ok) { + drm_info(&xe->drm, "Skipping PXP due to unsatisfied FW deps - GSC=%s, HuC=%s\n", + str_yes_no(gsc_ok), + MEDIA_VER(xe) >= 35 ? "not needed" : str_yes_no(huc_ok)); return 0; } diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 8c7d54498f38..dc975f595368 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -119,6 +119,7 @@ query_engine_cycles(struct xe_device *xe, struct drm_xe_engine_class_instance *eci; struct drm_xe_query_engine_cycles resp; size_t size = sizeof(resp); + enum xe_force_wake_domains fw_domain; __ktime_func_t cpu_clock; struct xe_hw_engine *hwe; struct xe_gt *gt; @@ -154,8 +155,10 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - xe_with_force_wake(fw_ref, gt_to_fw(gt), XE_FORCEWAKE_ALL) { - if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) + fw_domain = xe_hw_engine_to_fw_domain(hwe); + + xe_with_force_wake(fw_ref, gt_to_fw(gt), fw_domain) { + if (!xe_force_wake_ref_has_domain(fw_ref.domains, fw_domain)) return -EIO; hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp, diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c index 4cb16b419b0c..74d5016d9ffe 100644 --- a/drivers/gpu/drm/xe/xe_ras.c +++ b/drivers/gpu/drm/xe/xe_ras.c @@ -4,11 +4,15 @@ */ #include "xe_device.h" +#include "xe_drm_ras.h" +#include "xe_pm.h" #include "xe_printk.h" #include "xe_ras.h" #include "xe_ras_types.h" #include "xe_sysctrl.h" #include "xe_sysctrl_event_types.h" +#include "xe_sysctrl_mailbox.h" +#include "xe_sysctrl_mailbox_types.h" /* Severity of detected errors */ enum xe_ras_severity { @@ -31,6 +35,17 @@ enum xe_ras_component { XE_RAS_COMP_MAX }; +/* RAS response status codes */ +enum xe_ras_response_status { + XE_RAS_STATUS_SUCCESS = 0, + XE_RAS_STATUS_INVALID_PARAM, + XE_RAS_STATUS_OP_NOT_SUPPORTED, + XE_RAS_STATUS_TIMEOUT, + XE_RAS_STATUS_HARDWARE_FAILURE, + XE_RAS_STATUS_INSUFFICIENT_RESOURCES, + XE_RAS_STATUS_MAX +}; + static const char *const xe_ras_severities[] = { [XE_RAS_SEV_NOT_SUPPORTED] = "Not Supported", [XE_RAS_SEV_CORRECTABLE] = "Correctable Error", @@ -50,6 +65,56 @@ static const char *const xe_ras_components[] = { }; static_assert(ARRAY_SIZE(xe_ras_components) == XE_RAS_COMP_MAX); +static u8 drm_to_xe_ras_severity(u8 severity) +{ + switch (severity) { + case DRM_XE_RAS_ERR_SEV_CORRECTABLE: + return XE_RAS_SEV_CORRECTABLE; + case DRM_XE_RAS_ERR_SEV_UNCORRECTABLE: + return XE_RAS_SEV_UNCORRECTABLE; + default: + return XE_RAS_SEV_NOT_SUPPORTED; + } +} + +static u8 drm_to_xe_ras_component(u8 component) +{ + switch (component) { + case DRM_XE_RAS_ERR_COMP_CORE_COMPUTE: + return XE_RAS_COMP_CORE_COMPUTE; + case DRM_XE_RAS_ERR_COMP_SOC_INTERNAL: + return XE_RAS_COMP_SOC_INTERNAL; + case DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY: + return XE_RAS_COMP_DEVICE_MEMORY; + case DRM_XE_RAS_ERR_COMP_PCIE: + return XE_RAS_COMP_PCIE; + case DRM_XE_RAS_ERR_COMP_FABRIC: + return XE_RAS_COMP_FABRIC; + default: + return XE_RAS_COMP_NOT_SUPPORTED; + } +} + +static int ras_status_to_errno(u32 status) +{ + switch (status) { + case XE_RAS_STATUS_SUCCESS: + return 0; + case XE_RAS_STATUS_INVALID_PARAM: + return -EINVAL; + case XE_RAS_STATUS_OP_NOT_SUPPORTED: + return -EOPNOTSUPP; + case XE_RAS_STATUS_TIMEOUT: + return -ETIMEDOUT; + case XE_RAS_STATUS_HARDWARE_FAILURE: + return -EIO; + case XE_RAS_STATUS_INSUFFICIENT_RESOURCES: + return -ENOSPC; + default: + return -EPROTO; + } +} + static inline const char *sev_to_str(u8 severity) { if (severity >= XE_RAS_SEV_MAX) @@ -66,6 +131,68 @@ static inline const char *comp_to_str(u8 component) return xe_ras_components[component]; } +static struct pci_dev *find_usp_dev(struct pci_dev *pdev) +{ + struct pci_dev *vsp; + + /* + * Device Hierarchy: + * + * Upstream Switch Port (USP) --> Virtual Switch Port (VSP) --> SGunit (GPU endpoint) + */ + vsp = pci_upstream_bridge(pdev); + if (!vsp) + return NULL; + + return pci_upstream_bridge(vsp); +} + +static void ras_usp_aer_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct pci_dev *usp; + u16 aer_cap; + u32 status; + + usp = find_usp_dev(pdev); + if (!usp) + return; + + aer_cap = pci_find_ext_capability(usp, PCI_EXT_CAP_ID_ERR); + if (!aer_cap) { + dev_warn(&usp->dev, "AER capability unavailable\n"); + return; + } + + /* + * Clear any stale Uncorrectable Internal Error Status event in Uncorrectable Error + * Status Register. + */ + pci_read_config_dword(usp, aer_cap + PCI_ERR_UNCOR_STATUS, &status); + if (status & PCI_ERR_UNC_INTN) + pci_write_config_dword(usp, aer_cap + PCI_ERR_UNCOR_STATUS, PCI_ERR_UNC_INTN); + + /* + * All errors are steered to USP which is a PCIe AER Compliant device. + * Downgrade all the errors to non-fatal to prevent PCIe bus driver + * from triggering a Secondary Bus Reset (SBR). This allows error + * detection, containment and recovery in the driver. + * + * The Uncorrectable Error Severity Register has the 'Uncorrectable + * Internal Error Severity' set to fatal by default. Set this to + * non-fatal and unmask the error. + */ + + /* Downgrade Uncorrectable Internal Error to non-fatal */ + pci_clear_and_set_config_dword(usp, aer_cap + PCI_ERR_UNCOR_SEVER, PCI_ERR_UNC_INTN, 0); + + /* Unmask Uncorrectable Internal Error */ + pci_clear_and_set_config_dword(usp, aer_cap + PCI_ERR_UNCOR_MASK, PCI_ERR_UNC_INTN, 0); + + pci_save_state(usp); + dev_dbg(&usp->dev, "Uncorrectable Internal Errors downgraded and unmasked\n"); +} + void xe_ras_counter_threshold_crossed(struct xe_device *xe, struct xe_sysctrl_event_response *response) { @@ -91,3 +218,136 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe, comp_to_str(component), sev_to_str(severity)); } } + +static int get_counter(struct xe_device *xe, struct xe_ras_error_class *counter, u32 *value) +{ + struct xe_ras_get_counter_response response = {0}; + struct xe_ras_get_counter_request request = {0}; + struct xe_sysctrl_mailbox_command command = {0}; + struct xe_ras_error_common *common; + size_t rlen; + int ret; + + request.counter = *counter; + + xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP, XE_SYSCTRL_CMD_GET_COUNTER, + &request, sizeof(request), &response, sizeof(response)); + + ret = xe_sysctrl_send_command(&xe->sc, &command, &rlen); + if (ret) { + xe_err(xe, "sysctrl: failed to get counter %d\n", ret); + return ret; + } + + if (rlen != sizeof(response)) { + xe_err(xe, "sysctrl: unexpected get counter response length %zu (expected %zu)\n", + rlen, sizeof(response)); + return -EIO; + } + + common = &response.counter.common; + *value = response.value; + + xe_dbg(xe, "[RAS]: get counter %u for %s %s\n", *value, comp_to_str(common->component), + sev_to_str(common->severity)); + + return 0; +} + +/** + * xe_ras_get_counter() - Get error counter value + * @xe: Xe device instance + * @severity: Error severity to be queried (&enum drm_xe_ras_error_severity) + * @component: Error component to be queried (&enum drm_xe_ras_error_component) + * @value: Counter value + * + * This function retrieves the value of a specific error counter based on + * the error severity and component. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_ras_get_counter(struct xe_device *xe, u8 severity, u8 component, u32 *value) +{ + struct xe_ras_error_class counter = {0}; + + counter.common.severity = drm_to_xe_ras_severity(severity); + counter.common.component = drm_to_xe_ras_component(component); + + guard(xe_pm_runtime)(xe); + return get_counter(xe, &counter, value); +} + +/** + * xe_ras_clear_counter() - Clear error counter value + * @xe: Xe device instance + * @severity: Error severity to be cleared (&enum drm_xe_ras_error_severity) + * @component: Error component to be cleared (&enum drm_xe_ras_error_component) + * + * This function clears the value of a specific error counter based on + * the error severity and component. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_ras_clear_counter(struct xe_device *xe, u8 severity, u8 component) +{ + struct xe_ras_clear_counter_response response = {0}; + struct xe_ras_clear_counter_request request = {0}; + struct xe_sysctrl_mailbox_command command = {0}; + struct xe_ras_error_class *counter; + size_t rlen; + int ret; + + counter = &request.counter; + counter->common.severity = drm_to_xe_ras_severity(severity); + counter->common.component = drm_to_xe_ras_component(component); + + xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP, XE_SYSCTRL_CMD_CLEAR_COUNTER, + &request, sizeof(request), &response, sizeof(response)); + + guard(xe_pm_runtime)(xe); + ret = xe_sysctrl_send_command(&xe->sc, &command, &rlen); + if (ret) { + xe_err(xe, "sysctrl: failed to clear counter %d\n", ret); + return ret; + } + + if (rlen != sizeof(response)) { + xe_err(xe, "sysctrl: unexpected clear counter response length %zu (expected %zu)\n", + rlen, sizeof(response)); + return -EIO; + } + + ret = ras_status_to_errno(response.status); + if (ret) { + xe_err(xe, "sysctrl: clear counter command failed with status %#x\n", + response.status); + return ret; + } + + counter = &response.counter; + + xe_dbg(xe, "[RAS]: clear counter for %s %s\n", comp_to_str(counter->common.component), + sev_to_str(counter->common.severity)); + + return 0; +} + +/** + * xe_ras_init - Initialize Xe RAS + * @xe: xe device instance + * + * Initialize Xe RAS + */ +void xe_ras_init(struct xe_device *xe) +{ + if (!xe->info.has_drm_ras) + return; + + xe_drm_ras_init(xe); + + if (!xe->info.has_sysctrl) + return; + + if (IS_ENABLED(CONFIG_PCIEAER)) + ras_usp_aer_init(xe); +} diff --git a/drivers/gpu/drm/xe/xe_ras.h b/drivers/gpu/drm/xe/xe_ras.h index ea90593b62dc..ba0b0224df23 100644 --- a/drivers/gpu/drm/xe/xe_ras.h +++ b/drivers/gpu/drm/xe/xe_ras.h @@ -6,10 +6,15 @@ #ifndef _XE_RAS_H_ #define _XE_RAS_H_ +#include <linux/types.h> + struct xe_device; struct xe_sysctrl_event_response; void xe_ras_counter_threshold_crossed(struct xe_device *xe, struct xe_sysctrl_event_response *response); +int xe_ras_get_counter(struct xe_device *xe, u8 severity, u8 component, u32 *value); +int xe_ras_clear_counter(struct xe_device *xe, u8 severity, u8 component); +void xe_ras_init(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_ras_types.h b/drivers/gpu/drm/xe/xe_ras_types.h index 4e63c67f806a..6688e11f57a8 100644 --- a/drivers/gpu/drm/xe/xe_ras_types.h +++ b/drivers/gpu/drm/xe/xe_ras_types.h @@ -70,4 +70,55 @@ struct xe_ras_threshold_crossed { struct xe_ras_error_class counters[XE_RAS_NUM_COUNTERS]; } __packed; +/** + * struct xe_ras_get_counter_request - Request structure for get counter + */ +struct xe_ras_get_counter_request { + /** @counter: Error counter to be queried */ + struct xe_ras_error_class counter; + /** @reserved: Reserved for future use */ + u32 reserved; +} __packed; + +/** + * struct xe_ras_get_counter_response - Response structure for get counter + */ +struct xe_ras_get_counter_response { + /** @counter: Error counter that was queried */ + struct xe_ras_error_class counter; + /** @value: Current counter value */ + u32 value; + /** @timestamp: Timestamp when counter was last updated */ + u64 timestamp; + /** @threshold: Threshold value for the counter */ + u32 threshold; + /** @reserved: Reserved */ + u32 reserved[57]; +} __packed; + +/** + * struct xe_ras_clear_counter_request - Request structure for clear counter + */ +struct xe_ras_clear_counter_request { + /** @counter: Counter class to be cleared */ + struct xe_ras_error_class counter; + /** @reserved: Reserved for future use */ + u32 reserved; +} __packed; + +/** + * struct xe_ras_clear_counter_response - Response structure for clear counter + */ +struct xe_ras_clear_counter_response { + /** @counter: Counter class that was cleared */ + struct xe_ras_error_class counter; + /** @reserved: Reserved */ + u32 reserved; + /** @timestamp: Timestamp when the counter was cleared */ + u64 timestamp; + /** @status: Status of the clear operation */ + u32 status; + /** @reserved1: Reserved for future use */ + u32 reserved1[3]; +} __packed; #endif diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 526907d2d824..cab1b578ca0e 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -5,6 +5,8 @@ #include "xe_reg_whitelist.h" +#include <kunit/visibility.h> + #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_oa_regs.h" @@ -41,7 +43,7 @@ static bool match_multi_queue_class(const struct xe_device *xe, return xe_gt_supports_multi_queue(gt, hwe->class); } -static const struct xe_rtp_table_sr register_whitelist = XE_RTP_TABLE_SR( +VISIBLE_IF_KUNIT const struct xe_rtp_table_sr register_whitelist = XE_RTP_TABLE_SR( { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT, @@ -104,6 +106,7 @@ static const struct xe_rtp_table_sr register_whitelist = XE_RTP_TABLE_SR( RING_FORCE_TO_NONPRIV_ACCESS_RW)) }, ); +EXPORT_SYMBOL_IF_KUNIT(register_whitelist); static const struct xe_rtp_table_sr oa_whitelist = XE_RTP_TABLE_SR( diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h index e1eb1b7d5480..c0248063d515 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.h +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h @@ -14,6 +14,10 @@ struct xe_hw_engine; struct xe_reg_sr; struct xe_reg_sr_entry; +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +extern const struct xe_rtp_table_sr register_whitelist; +#endif + void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); void xe_reg_whitelist_oa_regs(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 83a40e1f9528..6a8d6ea68f25 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -227,17 +227,23 @@ static bool rule_matches(const struct xe_device *xe, static void rtp_add_sr_entry(const struct xe_rtp_action *action, struct xe_gt *gt, + struct xe_hw_engine *hwe, u32 mmio_base, struct xe_reg_sr *sr) { struct xe_reg_sr_entry sr_entry = { .reg = action->reg, .clr_bits = action->clr_bits, - .set_bits = action->set_bits, .read_mask = action->read_mask, }; + if (action->use_func) + sr_entry.set_bits = action->set_func(gt, hwe); + else + sr_entry.set_bits = action->set_bits; + sr_entry.reg.addr += mmio_base; + xe_reg_sr_add(sr, &sr_entry, gt); } @@ -259,7 +265,7 @@ static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, else mmio_base = 0; - rtp_add_sr_entry(action, gt, mmio_base, sr); + rtp_add_sr_entry(action, gt, hwe, mmio_base, sr); } return true; diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 2cc65053cd07..0032f68ea187 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -323,6 +323,25 @@ struct xe_reg_sr; .read_mask = 0, ##__VA_ARGS__ } /** + * XE_RTP_ACTION_FIELD_SET_FUNC: Set a bit range to the value returned by a function + * @reg_: Register + * @mask_bits_: Mask of bits to be changed in the register, forming a field + * @func_: Function that returns value to set in the field denoted by @mask_bits_ + * @...: Additional fields to override in the struct xe_rtp_action entry + * + * This macro works like XE_RTP_ACTION_FIELD_SET(), except that the + * field value is evaluated at the time the RTP table is processed. + * + * @func_ will only be called a single time, when the RTP table is being + * processed. After processing, the value in the reg_sr entry is fixed and + * will not be re-evaluated. + */ +#define XE_RTP_ACTION_FIELD_SET_FUNC(reg_, mask_bits_, func_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = mask_bits_, .set_func = func_, .use_func = 1, \ + .read_mask = mask_bits_, ##__VA_ARGS__ } + +/** * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist * @reg_: Register * @val_: Whitelist-specific flags to set diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 58018ae4f8cc..b78092fa06e0 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -22,20 +22,37 @@ struct xe_gt; */ struct xe_rtp_action { /** @reg: Register */ - struct xe_reg reg; + struct xe_reg reg; + /** * @clr_bits: bits to clear when updating register. It's always a * superset of bits being modified */ - u32 clr_bits; - /** @set_bits: bits to set when updating register */ - u32 set_bits; + u32 clr_bits; + + union { + /** @set_bits: bits to set when updating register */ + u32 set_bits; + + /** @set_func: function to provide bits to set when updating register */ + u32 (*set_func)(struct xe_gt *gt, + struct xe_hw_engine *hwe); + }; + #define XE_RTP_NOCHECK .read_mask = 0 /** @read_mask: mask for bits to consider when reading value back */ - u32 read_mask; + u32 read_mask; + #define XE_RTP_ACTION_FLAG_ENGINE_BASE BIT(0) /** @flags: flags to apply on rule evaluation or action */ - u8 flags; + u8 flags; + + /** + * @use_func: + * Internal flag indicating @set_func should be called instead of + * using @set_bits. + */ + u8 use_func:1; }; enum { @@ -69,6 +86,7 @@ struct xe_rtp_rule { u8 platform; u8 subplatform; }; + /* * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE / * MATCH_MEDIA_VERSION / XE_RTP_MATCH_MEDIA_VERSION_RANGE @@ -78,15 +96,18 @@ struct xe_rtp_rule { #define XE_RTP_END_VERSION_UNDEFINED U32_MAX u32 ver_end; }; + /* MATCH_STEP */ struct { u8 step_start; u8 step_end; }; + /* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */ struct { u8 engine_class; }; + /* MATCH_FUNC */ bool (*match_func)(const struct xe_device *xe, const struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index fb9c31613ca7..55c1996f689e 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -110,12 +110,14 @@ __diag_pop(); /** * xe_step_platform_get - Determine platform-level stepping from PCI revid - * @xe: Xe device + * @platform: The Xe platform + * @revid: The PCI revid + * @step: Pointer to the step struct to update * * Convert the PCI revid into a platform-level stepping value and store that - * in the device info. + * in @step->platform. */ -void xe_step_platform_get(struct xe_device *xe) +void xe_step_platform_get(enum xe_platform platform, u8 revid, struct xe_step_info *step) { /* * Not all platforms map PCI revid directly into our symbolic stepping @@ -126,18 +128,21 @@ void xe_step_platform_get(struct xe_device *xe) * checks. */ - if (xe->info.platform == XE_NOVALAKE_P) - xe->info.step.platform = STEP_A0 + xe->info.revid; + if (platform == XE_NOVALAKE_P) + step->platform = STEP_A0 + revid; } /** * xe_step_pre_gmdid_get - Determine IP steppings from PCI revid * @xe: Xe device + * @step: Pointer to the step struct to update + * + * Convert the PCI revid into proper IP steppings and update @step->basedie, + * @step->graphics and @step->media accordingly. * - * Convert the PCI revid into proper IP steppings. This should only be - * used on platforms that do not have GMD_ID support. + * This should only be used on platforms that do not have GMD_ID support. */ -void xe_step_pre_gmdid_get(struct xe_device *xe) +void xe_step_pre_gmdid_get(struct xe_device *xe, struct xe_step_info *step) { const struct xe_step_info *revids = NULL; u16 revid = xe->info.revid; @@ -234,9 +239,9 @@ void xe_step_pre_gmdid_get(struct xe_device *xe) } done: - xe->info.step.graphics = graphics; - xe->info.step.media = media; - xe->info.step.basedie = basedie; + step->graphics = graphics; + step->media = media; + step->basedie = basedie; } /** @@ -244,8 +249,10 @@ done: * @xe: Xe device * @graphics_gmdid_revid: value of graphics GMD_ID register's revid field * @media_gmdid_revid: value of media GMD_ID register's revid field + * @step: Poninter to the step struct to update. * - * Convert the revid fields of the GMD_ID registers into proper IP steppings. + * Convert the revid fields of the GMD_ID registers into proper IP steppings + * and update @step->graphics and @step->media accordingly. * * GMD_ID revid values are currently expected to have consistent meanings on * all platforms: major steppings (A0, B0, etc.) are 4 apart, with minor @@ -253,7 +260,8 @@ done: */ void xe_step_gmdid_get(struct xe_device *xe, u32 graphics_gmdid_revid, - u32 media_gmdid_revid) + u32 media_gmdid_revid, + struct xe_step_info *step) { u8 graphics = STEP_A0 + graphics_gmdid_revid; u8 media = STEP_A0 + media_gmdid_revid; @@ -270,8 +278,8 @@ void xe_step_gmdid_get(struct xe_device *xe, media_gmdid_revid); } - xe->info.step.graphics = graphics; - xe->info.step.media = media; + step->graphics = graphics; + step->media = media; } #define STEP_NAME_CASE(name) \ diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h index ea36b22cc297..5a5845335740 100644 --- a/drivers/gpu/drm/xe/xe_step.h +++ b/drivers/gpu/drm/xe/xe_step.h @@ -10,14 +10,16 @@ #include "xe_step_types.h" +enum xe_platform; struct xe_device; -void xe_step_platform_get(struct xe_device *xe); +void xe_step_platform_get(enum xe_platform platform, u8 revid, struct xe_step_info *step); -void xe_step_pre_gmdid_get(struct xe_device *xe); +void xe_step_pre_gmdid_get(struct xe_device *xe, struct xe_step_info *step); void xe_step_gmdid_get(struct xe_device *xe, u32 graphics_gmdid_revid, - u32 media_gmdid_revid); + u32 media_gmdid_revid, + struct xe_step_info *step); static inline u32 xe_step_to_gmdid(enum intel_step step) { return step - STEP_A0; } const char *xe_step_name(enum intel_step step); diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox.c b/drivers/gpu/drm/xe/xe_sysctrl_mailbox.c index 3caa9f15875f..e13eebaac1d0 100644 --- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox.c +++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox.c @@ -294,6 +294,34 @@ static int sysctrl_send_command(struct xe_sysctrl *sc, } /** + * xe_sysctrl_create_command() - Create system controller command + * @command: Sysctrl command structure + * @group_id: Command group ID + * @cmd_id: Command ID + * @request: Pointer to request buffer (can be NULL) + * @request_len: Size of request buffer + * @response: Pointer to response buffer + * @response_len: Size of response buffer + * + * Helper function to create sysctrl command to be sent via %xe_sysctrl_send_command() + */ +void xe_sysctrl_create_command(struct xe_sysctrl_mailbox_command *command, u8 group_id, u8 cmd_id, + void *request, size_t request_len, void *response, + size_t response_len) +{ + struct xe_sysctrl_app_msg_hdr header = {0}; + + header.data = FIELD_PREP(APP_HDR_GROUP_ID_MASK, group_id) | + FIELD_PREP(APP_HDR_COMMAND_MASK, cmd_id); + + command->header = header; + command->data_in = request; + command->data_in_len = request_len; + command->data_out = response; + command->data_out_len = response_len; +} + +/** * xe_sysctrl_mailbox_init - Initialize System Controller mailbox interface * @sc: System controller structure * diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox.h b/drivers/gpu/drm/xe/xe_sysctrl_mailbox.h index f67e9234de48..fb434cc165b2 100644 --- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox.h +++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox.h @@ -23,6 +23,9 @@ struct xe_sysctrl_mailbox_command; #define XE_SYSCTRL_APP_HDR_VERSION(hdr) \ FIELD_GET(APP_HDR_VERSION_MASK, (hdr)->data) +void xe_sysctrl_create_command(struct xe_sysctrl_mailbox_command *command, u8 group_id, u8 cmd_id, + void *request, size_t request_len, void *response, + size_t response_len); void xe_sysctrl_mailbox_init(struct xe_sysctrl *sc); int xe_sysctrl_send_command(struct xe_sysctrl *sc, struct xe_sysctrl_mailbox_command *cmd, diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h index 84d7c647e743..6e3753554510 100644 --- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h +++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h @@ -22,9 +22,13 @@ enum xe_sysctrl_group { /** * enum xe_sysctrl_gfsp_cmd - Commands supported by GFSP group * + * @XE_SYSCTRL_CMD_GET_COUNTER: Get error counter value + * @XE_SYSCTRL_CMD_CLEAR_COUNTER: Clear error counter value * @XE_SYSCTRL_CMD_GET_PENDING_EVENT: Retrieve pending event */ enum xe_sysctrl_gfsp_cmd { + XE_SYSCTRL_CMD_GET_COUNTER = 0x03, + XE_SYSCTRL_CMD_CLEAR_COUNTER = 0x04, XE_SYSCTRL_CMD_GET_PENDING_EVENT = 0x07, }; diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index c465aae7883c..74d925a337b7 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -157,7 +157,9 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) if (err) return err; - xe_pcode_init(tile); + err = xe_pcode_init_early(tile); + if (err) + return err; return 0; } diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index bf3fad9cdbef..bcec40ca2d35 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -20,7 +20,7 @@ #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) -static const struct xe_rtp_table_sr gt_tunings = XE_RTP_TABLE_SR( +VISIBLE_IF_KUNIT const struct xe_rtp_table_sr gt_tunings = XE_RTP_TABLE_SR( { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) @@ -101,6 +101,7 @@ static const struct xe_rtp_table_sr gt_tunings = XE_RTP_TABLE_SR( BANK_HASH_4KB_MODE)) }, ); +EXPORT_SYMBOL_IF_KUNIT(gt_tunings); static const struct xe_rtp_table_sr engine_tunings = XE_RTP_TABLE_SR( { XE_RTP_NAME("Tuning: L3 Hashing Mask"), diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h index d18e187debf6..869564e3e992 100644 --- a/drivers/gpu/drm/xe/xe_tuning.h +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -6,6 +6,8 @@ #ifndef _XE_TUNING_H_ #define _XE_TUNING_H_ +#include <kunit/visibility.h> + struct drm_printer; struct xe_gt; struct xe_hw_engine; @@ -16,4 +18,8 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe); void xe_tuning_process_lrc(struct xe_hw_engine *hwe); int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p); +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +extern const struct xe_rtp_table_sr gt_tunings; +#endif + #endif diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index c4fb29004195..9e343f9aa44d 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -332,6 +332,20 @@ static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) return err; } +/** + * madvise_range_needs_invalidation() - Check whether madvise needs invalidation + * @args: madvise ioctl arguments + * + * Purgeable state updates only touch VMA/BO metadata. PTEs stay valid and are + * zapped only if the BO is later purged. + * + * Return: true when the update needs PTE invalidation. + */ +static bool madvise_range_needs_invalidation(const struct drm_xe_madvise *args) +{ + return args->type != DRM_XE_VMA_ATTR_PURGEABLE_STATE; +} + static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) { if (XE_IOCTL_DBG(xe, !args)) @@ -708,8 +722,9 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args, &details); - err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr, - madvise_range.addr + args->range); + if (madvise_range_needs_invalidation(args)) + err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr, + madvise_range.addr + args->range); if (madvise_range.has_svm_userptr_vmas) xe_svm_notifier_unlock(vm); diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index b9d9fe0801aa..139434946f8f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -130,7 +130,7 @@ __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); -static const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR( +VISIBLE_IF_KUNIT const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR( /* Workarounds applying over a range of IPs */ { XE_RTP_NAME("14011060649"), @@ -293,7 +293,7 @@ static const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR( XE_RTP_ACTIONS(SET(MMIOATSREQLIMIT_GAM_WALK_3D, DIS_ATS_WRONLY_PG)) }, - { XE_RTP_NAME("14026144927, 16029437861"), + { XE_RTP_NAME("14026144927, 16029437861, 14026127056"), XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(L3SQCREG2, L3_SQ_DISABLE_COAMA_2WAY_COH | L3_SQ_DISABLE_COAMA)) @@ -307,6 +307,7 @@ static const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR( XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) }, ); +EXPORT_SYMBOL_IF_KUNIT(gt_was); static const struct xe_rtp_table_sr engine_was = XE_RTP_TABLE_SR( /* Workarounds applying over a range of IPs */ @@ -586,12 +587,12 @@ static const struct xe_rtp_table_sr engine_was = XE_RTP_TABLE_SR( /* Xe3p_LPG*/ - { XE_RTP_NAME("22021149932"), + { XE_RTP_NAME("22021149932, 14026290593"), XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, SAMPLER_LD_LSC_DISABLE)) }, - { XE_RTP_NAME("14025676848"), + { XE_RTP_NAME("14025676848, 14026270459"), XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE)) @@ -802,10 +803,11 @@ static const struct xe_rtp_entry oob_was_entries[] = { static_assert(ARRAY_SIZE(oob_was_entries) == _XE_WA_OOB_COUNT); -static __maybe_unused const struct xe_rtp_table oob_was = { +VISIBLE_IF_KUNIT __maybe_unused const struct xe_rtp_table oob_was = { .entries = oob_was_entries, .n_entries = ARRAY_SIZE(oob_was_entries), }; +EXPORT_SYMBOL_IF_KUNIT(oob_was); static const struct xe_rtp_entry device_oob_was_entries[] = { #include <generated/xe_device_wa_oob.c> @@ -813,10 +815,11 @@ static const struct xe_rtp_entry device_oob_was_entries[] = { static_assert(ARRAY_SIZE(device_oob_was_entries) == _XE_DEVICE_WA_OOB_COUNT); -static __maybe_unused const struct xe_rtp_table device_oob_was = { +VISIBLE_IF_KUNIT __maybe_unused const struct xe_rtp_table device_oob_was = { .entries = device_oob_was_entries, .n_entries = ARRAY_SIZE(device_oob_was_entries), }; +EXPORT_SYMBOL_IF_KUNIT(device_oob_was); __diag_pop(); diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index a5f7d33c1b32..f4da2b271396 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -6,6 +6,7 @@ #ifndef _XE_WA_H_ #define _XE_WA_H_ +#include <kunit/visibility.h> #include "xe_assert.h" struct drm_printer; @@ -24,6 +25,12 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile); void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p); +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +extern const struct xe_rtp_table_sr gt_was; +extern __maybe_unused const struct xe_rtp_table oob_was; +extern __maybe_unused const struct xe_rtp_table device_oob_was; +#endif + /** * XE_GT_WA - Out-of-band GT workarounds, to be queried and called as needed. * @gt__: gt instance diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f8a185103b80..9027365f0043 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -65,3 +65,4 @@ 14025883347 MEDIA_VERSION_RANGE(1301, 3503) GRAPHICS_VERSION_RANGE(2004, 3005) +16029380221 MEDIA_VERSION(3500) diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index e32ef763427c..dff389b56eb3 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -893,8 +893,9 @@ MACRO__(0xD741, ## __VA_ARGS__), \ MACRO__(0xD742, ## __VA_ARGS__), \ MACRO__(0xD743, ## __VA_ARGS__), \ - MACRO__(0xD744, ## __VA_ARGS__), \ - MACRO__(0xD745, ## __VA_ARGS__) + MACRO__(0xD745, ## __VA_ARGS__), \ + MACRO__(0xD74A, ## __VA_ARGS__), \ + MACRO__(0xD74B, ## __VA_ARGS__) /* CRI */ #define INTEL_CRI_IDS(MACRO__, ...) \ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 48d851fbd8ea..5842552294c1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1791,22 +1791,8 @@ static inline int perf_is_paranoid(void) } extern int perf_allow_kernel(void); - -static inline int perf_allow_cpu(void) -{ - if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) - return -EACCES; - - return security_perf_event_open(PERF_SECURITY_CPU); -} - -static inline int perf_allow_tracepoint(void) -{ - if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) - return -EPERM; - - return security_perf_event_open(PERF_SECURITY_TRACEPOINT); -} +extern int perf_allow_cpu(void); +extern int perf_allow_tracepoint(void); extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs); @@ -2023,6 +2009,19 @@ perf_event_pause(struct perf_event *event, bool reset) { return 0; } static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { return 0; } +static inline int perf_allow_kernel(void) +{ + return perfmon_capable() ? 0 : -EACCES; +} +static inline int perf_allow_cpu(void) +{ + return perfmon_capable() ? 0 : -EACCES; +} +static inline int perf_allow_tracepoint(void) +{ + return perfmon_capable() ? 0 : -EPERM; +} + #endif /* !CONFIG_PERF_EVENTS */ #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 48e9f1fdb78d..50c80af4ad4e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -2589,6 +2589,12 @@ enum drm_xe_ras_error_component { DRM_XE_RAS_ERR_COMP_CORE_COMPUTE = 1, /** @DRM_XE_RAS_ERR_COMP_SOC_INTERNAL: SoC Internal Error */ DRM_XE_RAS_ERR_COMP_SOC_INTERNAL, + /** @DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY: Device Memory Error */ + DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY, + /** @DRM_XE_RAS_ERR_COMP_PCIE: PCIe Subsystem Error */ + DRM_XE_RAS_ERR_COMP_PCIE, + /** @DRM_XE_RAS_ERR_COMP_FABRIC: Fabric Subsystem Error */ + DRM_XE_RAS_ERR_COMP_FABRIC, /** @DRM_XE_RAS_ERR_COMP_MAX: Max Error */ DRM_XE_RAS_ERR_COMP_MAX /* non-ABI */ }; @@ -2606,7 +2612,10 @@ enum drm_xe_ras_error_component { */ #define DRM_XE_RAS_ERROR_COMPONENT_NAMES { \ [DRM_XE_RAS_ERR_COMP_CORE_COMPUTE] = "core-compute", \ - [DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal" \ + [DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal", \ + [DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY] = "device-memory", \ + [DRM_XE_RAS_ERR_COMP_PCIE] = "pcie", \ + [DRM_XE_RAS_ERR_COMP_FABRIC] = "fabric", \ } #if defined(__cplusplus) diff --git a/kernel/events/core.c b/kernel/events/core.c index 954c36e28101..38c35123f23c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -14768,6 +14768,24 @@ int perf_allow_kernel(void) } EXPORT_SYMBOL_GPL(perf_allow_kernel); +int perf_allow_cpu(void) +{ + if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) + return -EACCES; + + return security_perf_event_open(PERF_SECURITY_CPU); +} +EXPORT_SYMBOL_GPL(perf_allow_cpu); + +int perf_allow_tracepoint(void) +{ + if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) + return -EPERM; + + return security_perf_event_open(PERF_SECURITY_TRACEPOINT); +} +EXPORT_SYMBOL_GPL(perf_allow_tracepoint); + /* * Inherit an event from parent task to child task. * |
