diff options
Diffstat (limited to 'drivers/gpu')
| -rw-r--r-- | drivers/gpu/drm/xe/display/xe_display_bo.c | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/tests/xe_rtp_test.c | 103 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_drm_client.c | 12 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_gt_debugfs.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_relay.c | 13 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_hw_engine.c | 20 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_hw_engine_types.h | 8 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_oa.c | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_oa_types.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pt.c | 67 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_reg_whitelist.c | 102 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_reg_whitelist.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_rtp.c | 31 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_rtp.h | 24 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_rtp_types.h | 10 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_svm.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_svm.h | 15 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_tuning.c | 45 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_userptr.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_wa.c | 89 |
22 files changed, 376 insertions, 204 deletions
diff --git a/drivers/gpu/drm/xe/display/xe_display_bo.c b/drivers/gpu/drm/xe/display/xe_display_bo.c index 7fbac223b097..8953da0136dc 100644 --- a/drivers/gpu/drm/xe/display/xe_display_bo.c +++ b/drivers/gpu/drm/xe/display/xe_display_bo.c @@ -48,7 +48,8 @@ static int xe_display_bo_framebuffer_init(struct drm_gem_object *obj, if (ret) goto err; - if (!(bo->flags & XE_BO_FLAG_FORCE_WC)) { + if (!(bo->flags & XE_BO_FLAG_FORCE_WC) && + bo->ttm.type != ttm_bo_type_sg) { /* * XE_BO_FLAG_FORCE_WC should ideally be set at creation, or is * automatically set when creating FB. We cannot change caching diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index f93c98bec5b5..5f4a0cd8deca 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -331,7 +331,8 @@ static struct i915_vma *__xe_pin_fb_vma(struct drm_gem_object *obj, bool is_dpt, int ret = 0; /* We reject creating !SCANOUT fb's, so this is weird.. */ - drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_FORCE_WC)); + drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_FORCE_WC) && + bo->ttm.type != ttm_bo_type_sg); if (!vma) return ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 642f6e090ad0..3d0688d058d9 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -54,13 +54,13 @@ struct rtp_to_sr_test_case { unsigned long expected_count_sr_entries; unsigned int expected_sr_errors; unsigned long expected_active; - const struct xe_rtp_entry_sr *entries; + const struct xe_rtp_table_sr table; }; struct rtp_test_case { const char *name; unsigned long expected_active; - const struct xe_rtp_entry *entries; + const struct xe_rtp_table table; }; static bool fake_xe_gt_mcr_check_reg(struct xe_gt *gt, struct xe_reg reg) @@ -289,7 +289,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0) | BIT(1), .expected_count_sr_entries = 1, /* Different bits on the same register: create a single entry */ - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -298,8 +298,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) }, - {} - }, + ), }, { .name = "no-match-no-add", @@ -309,7 +308,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0), .expected_count_sr_entries = 1, /* Don't coalesce second entry since rules don't match */ - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -318,8 +317,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { XE_RTP_RULES(FUNC(match_no)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) }, - {} - }, + ), }, { .name = "two-regs-two-entries", @@ -329,7 +327,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0) | BIT(1), .expected_count_sr_entries = 2, /* Same bits on different registers are not coalesced */ - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -338,8 +336,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG2, REG_BIT(0))) }, - {} - }, + ), }, { .name = "clr-one-set-other", @@ -349,7 +346,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0) | BIT(1), .expected_count_sr_entries = 1, /* Check clr vs set actions on different bits */ - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -358,8 +355,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_BIT(1))) }, - {} - }, + ), }, { #define TEMP_MASK REG_GENMASK(10, 8) @@ -371,14 +367,13 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0), .expected_count_sr_entries = 1, /* Check FIELD_SET works */ - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1, TEMP_MASK, TEMP_FIELD)) }, - {} - }, + ), #undef TEMP_MASK #undef TEMP_FIELD }, @@ -390,7 +385,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0) | BIT(1), .expected_count_sr_entries = 1, .expected_sr_errors = 1, - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -400,8 +395,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) }, - {} - }, + ), }, { .name = "conflict-not-disjoint", @@ -411,7 +405,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0) | BIT(1), .expected_count_sr_entries = 1, .expected_sr_errors = 1, - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -421,8 +415,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_GENMASK(1, 0))) }, - {} - }, + ), }, { .name = "conflict-reg-type", @@ -432,7 +425,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0) | BIT(1) | BIT(2), .expected_count_sr_entries = 1, .expected_sr_errors = 2, - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -447,8 +440,7 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(MASKED_REG1, REG_BIT(0))) }, - {} - }, + ), }, { .name = "bad-mcr-reg-forced-to-regular", @@ -458,13 +450,12 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0), .expected_count_sr_entries = 1, .expected_sr_errors = 1, - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("bad-mcr-regular-reg"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(BAD_MCR_REG4, REG_BIT(0))) }, - {} - }, + ), }, { .name = "bad-regular-reg-forced-to-mcr", @@ -474,13 +465,12 @@ static const struct rtp_to_sr_test_case rtp_to_sr_cases[] = { .expected_active = BIT(0), .expected_count_sr_entries = 1, .expected_sr_errors = 1, - .entries = (const struct xe_rtp_entry_sr[]) { + .table = XE_RTP_TABLE_SR( { XE_RTP_NAME("bad-regular-reg"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(BAD_REGULAR_REG5, REG_BIT(0))) }, - {} - }, + ), }, }; @@ -492,16 +482,12 @@ static void xe_rtp_process_to_sr_tests(struct kunit *test) struct xe_reg_sr *reg_sr = >->reg_sr; const struct xe_reg_sr_entry *sre, *sr_entry = NULL; struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); - unsigned long idx, count_sr_entries = 0, count_rtp_entries = 0, active = 0; + unsigned long idx, count_sr_entries = 0, active = 0; xe_reg_sr_init(reg_sr, "xe_rtp_to_sr_tests", xe); - while (param->entries[count_rtp_entries].rules) - count_rtp_entries++; - - xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); - xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, - reg_sr, false); + xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, param->table.n_entries); + xe_rtp_process_to_sr(&ctx, ¶m->table, reg_sr, false); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) @@ -534,56 +520,52 @@ static const struct rtp_test_case rtp_cases[] = { { .name = "active1", .expected_active = BIT(0), - .entries = (const struct xe_rtp_entry[]) { + .table = XE_RTP_TABLE( { XE_RTP_NAME("r1"), XE_RTP_RULES(FUNC(match_yes)), }, - {} - }, + ), }, { .name = "active2", .expected_active = BIT(0) | BIT(1), - .entries = (const struct xe_rtp_entry[]) { + .table = XE_RTP_TABLE( { XE_RTP_NAME("r1"), XE_RTP_RULES(FUNC(match_yes)), }, { XE_RTP_NAME("r2"), XE_RTP_RULES(FUNC(match_yes)), }, - {} - }, + ), }, { .name = "active-inactive", .expected_active = BIT(0), - .entries = (const struct xe_rtp_entry[]) { + .table = XE_RTP_TABLE( { XE_RTP_NAME("r1"), XE_RTP_RULES(FUNC(match_yes)), }, { XE_RTP_NAME("r2"), XE_RTP_RULES(FUNC(match_no)), }, - {} - }, + ), }, { .name = "inactive-active", .expected_active = BIT(1), - .entries = (const struct xe_rtp_entry[]) { + .table = XE_RTP_TABLE( { XE_RTP_NAME("r1"), XE_RTP_RULES(FUNC(match_no)), }, { XE_RTP_NAME("r2"), XE_RTP_RULES(FUNC(match_yes)), }, - {} - }, + ), }, { .name = "inactive-active-inactive", .expected_active = BIT(1), - .entries = (const struct xe_rtp_entry[]) { + .table = XE_RTP_TABLE( { XE_RTP_NAME("r1"), XE_RTP_RULES(FUNC(match_no)), }, @@ -593,13 +575,12 @@ static const struct rtp_test_case rtp_cases[] = { { XE_RTP_NAME("r3"), XE_RTP_RULES(FUNC(match_no)), }, - {} - }, + ), }, { .name = "inactive-inactive-inactive", .expected_active = 0, - .entries = (const struct xe_rtp_entry[]) { + .table = XE_RTP_TABLE( { XE_RTP_NAME("r1"), XE_RTP_RULES(FUNC(match_no)), }, @@ -609,8 +590,7 @@ static const struct rtp_test_case rtp_cases[] = { { XE_RTP_NAME("r3"), XE_RTP_RULES(FUNC(match_no)), }, - {} - }, + ), }, }; @@ -620,13 +600,10 @@ static void xe_rtp_process_tests(struct kunit *test) struct xe_device *xe = test->priv; struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt; struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); - unsigned long count_rtp_entries = 0, active = 0; - - while (param->entries[count_rtp_entries].rules) - count_rtp_entries++; + unsigned long active = 0; - xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); - xe_rtp_process(&ctx, param->entries); + xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, param->table.n_entries); + xe_rtp_process(&ctx, ¶m->table); KUNIT_EXPECT_EQ(test, active, param->expected_active); } diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 84b66147bf49..81020b4b344e 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -168,10 +168,20 @@ static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { u64 sz = xe_bo_size(bo); - u32 mem_type = bo->ttm.resource->mem_type; + u32 mem_type; xe_bo_assert_held(bo); + /* + * The resource can be NULL if the BO has been purged, plus maybe some + * other cases. Either way there shouldn't be any memory to account for, + * or a current resource to account this against, so skip for now. + */ + if (!bo->ttm.resource) + return; + + mem_type = bo->ttm.resource->mem_type; + if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) stats[mem_type].shared += sz; else diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index f45306308cd6..c38bcacb27e4 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -149,8 +149,10 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "\n"); drm_printf(p, "Whitelist\n"); - for_each_hw_engine(hwe, gt, id) + for_each_hw_engine(hwe, gt, id) { xe_reg_whitelist_dump(&hwe->reg_whitelist, p); + xe_reg_whitelist_dump(&hwe->oa_whitelist, p); + } return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index 577a315854af..eed0a750d2eb 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -689,12 +689,17 @@ static int relay_action_handler(struct xe_guc_relay *relay, u32 origin, return relay_testloop_action_handler(relay, origin, msg, len, response, size); type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]); + relay_assert(relay, guc_hxg_type_is_action(type)); - if (IS_SRIOV_PF(relay_to_xe(relay))) - ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, response, size); - else + if (IS_SRIOV_PF(relay_to_xe(relay))) { + if (type == GUC_HXG_TYPE_REQUEST) + ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, + response, size); + else + ret = -EOPNOTSUPP; + } else { ret = -EOPNOTSUPP; - + } if (type == GUC_HXG_TYPE_EVENT) relay_assert(relay, ret <= 0); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 12a410458df6..f5c3d8a97ec6 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1163,7 +1163,7 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) if (exec_queue_suspended(q)) return; - if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { + if (!exec_queue_enabled(q)) { action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; action[len++] = q->guc->id; action[len++] = GUC_CONTEXT_ENABLE; @@ -1493,7 +1493,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_device *xe = guc_to_xe(guc); int err = -ETIME; pid_t pid = -1; - bool wedged = false, skip_timeout_check; + bool wedged = false, wedge_device = false, skip_timeout_check; xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); @@ -1638,7 +1638,7 @@ trigger_reset: } if (q->flags & EXEC_QUEUE_FLAG_KERNEL) { xe_gt_WARN(q->gt, true, "Kernel-submitted job timed out\n"); - xe_device_declare_wedged(gt_to_xe(q->gt)); + wedge_device = true; } } else if (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)) { xe_gt_WARN(q->gt, true, "VM job timed out on non-killed execqueue\n"); @@ -1658,6 +1658,9 @@ trigger_reset: xe_guc_exec_queue_trigger_cleanup(q); } + if (wedge_device) + xe_device_declare_wedged(gt_to_xe(q->gt)); + /* * We want the job added back to the pending list so it gets freed; this * is what DRM_GPU_SCHED_STAT_NO_HANG does. diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 8c66ff6f3d3c..0b193c451a11 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -346,7 +346,7 @@ hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - const struct xe_rtp_entry_sr lrc_setup[] = { + const struct xe_rtp_table_sr lrc_setup = XE_RTP_TABLE_SR( /* * Some blitter commands do not have a field for MOCS, those * commands will use MOCS index pointed by BLIT_CCTL. @@ -369,10 +369,9 @@ hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) }, - }; + ); - xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), - &hwe->reg_lrc, true); + xe_rtp_process_to_sr(&ctx, &lrc_setup, &hwe->reg_lrc, true); } void xe_hw_engine_setup_reg_lrc(struct xe_hw_engine *hwe) @@ -408,7 +407,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - const struct xe_rtp_entry_sr engine_entries[] = { + const struct xe_rtp_table_sr engine_sr = XE_RTP_TABLE_SR( { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), XE_RTP_RULES(FUNC(xe_rtp_match_always)), XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), @@ -465,10 +464,9 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(GFX_MODE(0), GFX_MSIX_INTERRUPT_ENABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - }; + ); - xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), - &hwe->reg_sr, false); + xe_rtp_process_to_sr(&ctx, &engine_sr, &hwe->reg_sr, false); } static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) @@ -574,6 +572,8 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hw_engine_setup_default_state(hwe); xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); + xe_reg_sr_init(&hwe->oa_whitelist, hwe->name, gt_to_xe(gt)); + xe_reg_sr_init(&hwe->oa_sr, hwe->name, gt_to_xe(gt)); xe_reg_whitelist_process_engine(hwe); } @@ -628,7 +628,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->exl_port = xe_execlist_port_create(xe, hwe); if (IS_ERR(hwe->exl_port)) { err = PTR_ERR(hwe->exl_port); - goto err_hwsp; + goto err_name; } } else { /* GSCCS has a special interrupt for reset */ @@ -648,8 +648,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); -err_hwsp: - xe_bo_unpin_map_no_vm(hwe->hwsp); err_name: hwe->name = NULL; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 2cf898e682f5..84c097da9b6f 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -131,6 +131,14 @@ struct xe_hw_engine { */ struct xe_reg_sr reg_whitelist; /** + * @oa_whitelist: oa registers to be whitelisted + */ + struct xe_reg_sr oa_whitelist; + /** + * @oa_sr: oa nonpriv whitelist registers, changed on oa stream open/close + */ + struct xe_reg_sr oa_sr; + /** * @reg_lrc: LRC workaround registers */ struct xe_reg_sr reg_lrc; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4bf4b1f65929..2dce6a47202c 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -37,6 +37,7 @@ #include "xe_oa.h" #include "xe_observation.h" #include "xe_pm.h" +#include "xe_reg_whitelist.h" #include "xe_sched_job.h" #include "xe_sriov.h" #include "xe_sync.h" @@ -885,6 +886,9 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) mutex_destroy(&stream->stream_lock); + if (stream->sample) + xe_reg_dewhitelist_oa_regs(stream->gt); + xe_oa_disable_metric_set(stream); xe_exec_queue_put(stream->k_exec_q); @@ -1885,6 +1889,9 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, goto err_disable; } + if (stream->sample) + xe_reg_whitelist_oa_regs(stream->gt); + /* Hold a reference on the drm device till stream_fd is released */ drm_dev_get(&stream->oa->xe->drm); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 3d9ec8490899..e876e9be92ba 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -126,6 +126,9 @@ struct xe_oa_gt { /** @oa_unit: array of oa_units */ struct xe_oa_unit *oa_unit; + + /** @whitelist_count: number of open streams for which oa registers are whitelisted */ + u32 whitelist_count; }; /** diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 18a98667c0e6..670bc2206fea 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -433,6 +433,7 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, struct xe_pt_stage_bind_walk *xe_walk) { + struct xe_bo *bo = xe_vma_bo(xe_walk->vma); u64 size, dma; if (level > MAX_HUGEPTE_LEVEL) @@ -446,8 +447,8 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, if (next - xe_walk->va_curs_start > xe_walk->curs->size) return false; - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) + /* null VMA's and purged BO's do not have dma addresses */ + if (xe_vma_is_null(xe_walk->vma) || (bo && xe_bo_is_purged(bo))) return true; /* if we are clearing page table, no dma addresses*/ @@ -468,6 +469,7 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, static bool xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) { + struct xe_bo *bo = xe_vma_bo(xe_walk->vma); struct xe_res_cursor curs = *xe_walk->curs; if (!IS_ALIGNED(addr, SZ_64K)) @@ -476,8 +478,8 @@ xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) if (next > xe_walk->l0_end_addr) return false; - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) + /* null VMA's and purged BO's do not have dma addresses */ + if (xe_vma_is_null(xe_walk->vma) || (bo && xe_bo_is_purged(bo))) return true; xe_res_next(&curs, addr - xe_walk->va_curs_start); @@ -708,7 +710,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, { struct xe_device *xe = tile_to_xe(tile); struct xe_bo *bo = xe_vma_bo(vma); - struct xe_res_cursor curs; + struct xe_res_cursor curs = {}; struct xe_vm *vm = xe_vma_vm(vma); struct xe_pt_stage_bind_walk xe_walk = { .base = { @@ -885,13 +887,21 @@ static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, { struct xe_pt_zap_ptes_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + struct xe_pt *xe_child; pgoff_t end_offset; - XE_WARN_ON(!*child); XE_WARN_ON(!level); /* + * Below would be unexpected behavior that needs to be root caused + * but better warn and bail than crash the driver. + */ + if (XE_WARN_ON(!*child)) + return 0; + + xe_child = container_of(*child, typeof(*xe_child), base); + + /* * Note that we're called from an entry callback, and we're dealing * with the child of that entry rather than the parent, so need to * adjust level down. @@ -1078,7 +1088,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_pt_commit_prepare_locks_assert(vma); if (xe_vma_is_userptr(vma)) - xe_svm_assert_held_read(vm); + xe_svm_assert_held_read_or_inject_write(vm); } static void xe_pt_commit(struct xe_vma *vma, @@ -1398,6 +1408,33 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) pt_update_ops, rftree); } +/* + * Acquire/release the svm notifier_lock around xe_pt_svm_userptr_pre_commit() + * and the matching late release in xe_pt_update_ops_run(). Read mode by + * default; write mode when CONFIG_DRM_XE_USERPTR_INVAL_INJECT is on, + * because a userptr op in this critical section may invoke the injected + * xe_vma_userptr_force_invalidate() path that calls + * drm_gpusvm_unmap_pages() with ctx->in_notifier=true, which requires the + * lock held for write. + */ +static void xe_pt_svm_userptr_notifier_lock(struct xe_vm *vm) +{ +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + down_write(&vm->svm.gpusvm.notifier_lock); +#else + xe_svm_notifier_lock(vm); +#endif +} + +static void xe_pt_svm_userptr_notifier_unlock(struct xe_vm *vm) +{ +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + up_write(&vm->svm.gpusvm.notifier_lock); +#else + xe_svm_notifier_unlock(vm); +#endif +} + #if IS_ENABLED(CONFIG_DRM_GPUSVM) #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT @@ -1429,7 +1466,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, struct xe_userptr_vma *uvma; unsigned long notifier_seq; - xe_svm_assert_held_read(vm); + xe_svm_assert_held_read_or_inject_write(vm); if (!xe_vma_is_userptr(vma)) return 0; @@ -1459,7 +1496,7 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, { int err = 0; - xe_svm_assert_held_read(vm); + xe_svm_assert_held_read_or_inject_write(vm); switch (op->base.op) { case DRM_GPUVA_OP_MAP: @@ -1531,12 +1568,12 @@ static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) if (err) return err; - xe_svm_notifier_lock(vm); + xe_pt_svm_userptr_notifier_lock(vm); list_for_each_entry(op, &vops->list, link) { err = op_check_svm_userptr(vm, op, pt_update_ops); if (err) { - xe_svm_notifier_unlock(vm); + xe_pt_svm_userptr_notifier_unlock(vm); break; } } @@ -2395,7 +2432,7 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, vma->tile_invalidated & ~BIT(tile->id)); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { - xe_svm_assert_held_read(vm); + xe_svm_assert_held_read_or_inject_write(vm); to_userptr_vma(vma)->userptr.initial_bind = true; } @@ -2431,7 +2468,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); if (xe_vma_is_userptr(vma)) { - xe_svm_assert_held_read(vm); + xe_svm_assert_held_read_or_inject_write(vm); spin_lock(&vm->userptr.invalidated_lock); list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); @@ -2707,7 +2744,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) } if (pt_update_ops->needs_svm_lock) - xe_svm_notifier_unlock(vm); + xe_pt_svm_userptr_notifier_unlock(vm); /* * The last fence is only used for zero bind queue idling; migrate diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index fb65940848d7..526907d2d824 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -41,7 +41,7 @@ static bool match_multi_queue_class(const struct xe_device *xe, return xe_gt_supports_multi_queue(gt, hwe->class); } -static const struct xe_rtp_entry_sr register_whitelist[] = { +static const struct xe_rtp_table_sr register_whitelist = XE_RTP_TABLE_SR( { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT, @@ -103,11 +103,16 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { WHITELIST(VFLSKPD, RING_FORCE_TO_NONPRIV_ACCESS_RW)) }, +); + +static const struct xe_rtp_table_sr oa_whitelist = XE_RTP_TABLE_SR( + +#define WHITELIST_DENY(r, f) WHITELIST(r, (f) | RING_FORCE_TO_NONPRIV_DENY) #define WHITELIST_OA_MMIO_TRG(trg, status, head) \ - WHITELIST(trg, RING_FORCE_TO_NONPRIV_ACCESS_RW), \ - WHITELIST(status, RING_FORCE_TO_NONPRIV_ACCESS_RD), \ - WHITELIST(head, RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4) + WHITELIST_DENY(trg, RING_FORCE_TO_NONPRIV_ACCESS_RW), \ + WHITELIST_DENY(status, RING_FORCE_TO_NONPRIV_ACCESS_RD), \ + WHITELIST_DENY(head, RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4) #define WHITELIST_OAG_MMIO_TRG \ WHITELIST_OA_MMIO_TRG(OAG_MMIOTRIGGER, OAG_OASTATUS, OAG_OAHEADPTR) @@ -124,7 +129,7 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { OAM_HEAD_POINTER(XE_OAM_SCMI_1_BASE_ADJ)) #define WHITELIST_OA_MERT_MMIO_TRG \ - WHITELIST_OA_MMIO_TRG(OAMERT_MMIO_TRG, OAMERT_STATUS, OAMERT_HEAD_POINTER) + WHITELIST_OA_MMIO_TRG(OAMERT_MMIO_TRG, OAMERT_STATUS, OAMERT_TAIL_POINTER) { XE_RTP_NAME("oag_mmio_trg_rcs"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), @@ -154,11 +159,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { XE_RTP_RULES(FUNC(match_has_mert), ENGINE_CLASS(COPY)), XE_RTP_ACTIONS(WHITELIST_OA_MERT_MMIO_TRG) }, -}; +); -static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) +static int whitelist_apply_to_hwe(struct xe_hw_engine *hwe, struct xe_reg_sr *in, + struct xe_reg_sr *out, int first_slot) { - struct xe_reg_sr *sr = &hwe->reg_whitelist; + struct xe_reg_sr *sr = in; struct xe_reg_sr_entry *entry; struct drm_printer p; unsigned long reg; @@ -167,7 +173,7 @@ static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) xe_gt_dbg(hwe->gt, "Add %s whitelist to engine\n", sr->name); p = xe_gt_dbg_printer(hwe->gt); - slot = 0; + slot = first_slot; xa_for_each(&sr->xa, reg, entry) { struct xe_reg_sr_entry hwe_entry = { .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot), @@ -184,10 +190,12 @@ static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) } xe_reg_whitelist_print_entry(&p, 0, reg, entry); - xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt); + xe_reg_sr_add(out, &hwe_entry, hwe->gt); slot++; } + + return slot; } /** @@ -201,10 +209,78 @@ static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + int first_oa_slot; + + xe_rtp_process_to_sr(&ctx, ®ister_whitelist, &hwe->reg_whitelist, false); + first_oa_slot = whitelist_apply_to_hwe(hwe, &hwe->reg_whitelist, &hwe->reg_sr, 0); + + xe_rtp_process_to_sr(&ctx, &oa_whitelist, &hwe->oa_whitelist, false); + + /* + * Save oa nonpriv registers to hwe->oa_sr, from which oa registers are whitelisted + * or de-whitelisted, by toggling the 'deny' bit on oa stream open/close + */ + whitelist_apply_to_hwe(hwe, &hwe->oa_whitelist, &hwe->oa_sr, first_oa_slot); + + /* + * Also save oa nonpriv registers to hwe->reg_sr, to ensure oa registers are not + * whitelisted by default after probe, gt reset, resume and engine reset + */ + whitelist_apply_to_hwe(hwe, &hwe->oa_whitelist, &hwe->reg_sr, first_oa_slot); +} + +static void __whitelist_oa_regs(struct xe_hw_engine *hwe, bool whitelist) +{ + struct xe_reg_sr_entry *entry; + unsigned long reg; + + xa_for_each(&hwe->oa_sr.xa, reg, entry) { + if (whitelist) + entry->set_bits &= ~RING_FORCE_TO_NONPRIV_DENY; + else + entry->set_bits |= RING_FORCE_TO_NONPRIV_DENY; + } + + xe_reg_sr_apply_mmio(&hwe->oa_sr, hwe->gt); +} + +/** + * xe_reg_whitelist_oa_regs - whitelist oa registers for gt + * @gt: gt to whitelist oa registers for + * + * Whitelist OA registers by resetting RING_FORCE_TO_NONPRIV_DENY + */ +void xe_reg_whitelist_oa_regs(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + lockdep_assert_held(>->oa.gt_lock); + if (gt->oa.whitelist_count++) + return; + + for_each_hw_engine(hwe, gt, id) + __whitelist_oa_regs(hwe, true); +} + +/** + * xe_reg_dewhitelist_oa_regs - dewhitelist oa registers for gt + * @gt: gt to dewhitelist oa registers for + * + * Dewhitelist OA registers by setting RING_FORCE_TO_NONPRIV_DENY + */ +void xe_reg_dewhitelist_oa_regs(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + lockdep_assert_held(>->oa.gt_lock); + xe_assert(gt_to_xe(gt), gt->oa.whitelist_count); + if (--gt->oa.whitelist_count) + return; - xe_rtp_process_to_sr(&ctx, register_whitelist, ARRAY_SIZE(register_whitelist), - &hwe->reg_whitelist, false); - whitelist_apply_to_hwe(hwe); + for_each_hw_engine(hwe, gt, id) + __whitelist_oa_regs(hwe, false); } /** diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h index 3b64b42fe96e..e1eb1b7d5480 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.h +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h @@ -9,12 +9,16 @@ #include <linux/types.h> struct drm_printer; +struct xe_gt; struct xe_hw_engine; struct xe_reg_sr; struct xe_reg_sr_entry; void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); +void xe_reg_whitelist_oa_regs(struct xe_gt *gt); +void xe_reg_dewhitelist_oa_regs(struct xe_gt *gt); + void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, u32 reg, struct xe_reg_sr_entry *entry); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index dec9d94e6fb0..83a40e1f9528 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -326,8 +326,7 @@ static void rtp_mark_active(struct xe_device *xe, * xe_rtp_process_to_sr - Process all rtp @entries, adding the matching ones to * the save-restore argument. * @ctx: The context for processing the table, with one of device, gt or hwe - * @entries: Table with RTP definitions - * @n_entries: Number of entries to process, usually ARRAY_SIZE(entries) + * @table: Table with RTP definitions * @sr: Save-restore struct where matching rules execute the action. This can be * viewed as the "coalesced view" of multiple the tables. The bits for each * register set are expected not to collide with previously added entries @@ -339,12 +338,10 @@ static void rtp_mark_active(struct xe_device *xe, * used to calculate the right register offset */ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, - const struct xe_rtp_entry_sr *entries, - size_t n_entries, + const struct xe_rtp_table_sr *table, struct xe_reg_sr *sr, bool process_in_vf) { - const struct xe_rtp_entry_sr *entry; struct xe_hw_engine *hwe = NULL; struct xe_gt *gt = NULL; struct xe_device *xe = NULL; @@ -354,9 +351,10 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, if (!process_in_vf && IS_SRIOV_VF(xe)) return; - xe_assert(xe, entries); + xe_assert(xe, table->entries); - for (entry = entries; entry - entries < n_entries; entry++) { + for (size_t i = 0; i < table->n_entries; i++) { + const struct xe_rtp_entry_sr *entry = &table->entries[i]; bool match = false; if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) { @@ -371,37 +369,40 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, } if (match) - rtp_mark_active(xe, ctx, entry - entries); + rtp_mark_active(xe, ctx, i); } } EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr); /** - * xe_rtp_process - Process all rtp @entries, without running any action + * xe_rtp_process - Process all entries in rtp @table, without running any action * @ctx: The context for processing the table, with one of device, gt or hwe - * @entries: Table with RTP definitions + * @table: Table with RTP definitions * - * Walk the table pointed by @entries (with an empty sentinel), executing the + * Walk the table pointed by @table, executing the * rules. One difference from xe_rtp_process_to_sr(): there is no action * associated with each entry since this uses struct xe_rtp_entry. Its main use * is for marking active workarounds via * xe_rtp_process_ctx_enable_active_tracking(). */ void xe_rtp_process(struct xe_rtp_process_ctx *ctx, - const struct xe_rtp_entry *entries) + const struct xe_rtp_table *table) { - const struct xe_rtp_entry *entry; struct xe_hw_engine *hwe; struct xe_gt *gt; struct xe_device *xe; rtp_get_context(ctx, &hwe, >, &xe); - for (entry = entries; entry && entry->rules; entry++) { + xe_assert(xe, table->entries); + + for (size_t i = 0; i < table->n_entries; i++) { + const struct xe_rtp_entry *entry = &table->entries[i]; + if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules)) continue; - rtp_mark_active(xe, ctx, entry - entries); + rtp_mark_active(xe, ctx, i); } } EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process); diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index e4f1930ca1c3..2cc65053cd07 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -461,6 +461,24 @@ struct xe_reg_sr; XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__)) \ } +/* + * Note: ARRAY_SIZE() cannot be used here because it expands through + * __must_be_array() -> __BUILD_BUG_ON_ZERO_MSG() -> _Static_assert inside + * sizeof(struct{}), which clang < 21 rejects when the compound literal + * contains non-compile-time-constant initializers. + */ +#define XE_RTP_TABLE_SR(...) { \ + .entries = (const struct xe_rtp_entry_sr[]){__VA_ARGS__}, \ + .n_entries = sizeof((const struct xe_rtp_entry_sr[]){__VA_ARGS__}) / \ + sizeof(struct xe_rtp_entry_sr), \ +} + +#define XE_RTP_TABLE(...) { \ + .entries = (const struct xe_rtp_entry[]){__VA_ARGS__}, \ + .n_entries = sizeof((const struct xe_rtp_entry[]){__VA_ARGS__}) / \ + sizeof(struct xe_rtp_entry), \ +} + #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \ @@ -471,12 +489,12 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, size_t n_entries); void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, - const struct xe_rtp_entry_sr *entries, - size_t n_entries, struct xe_reg_sr *sr, + const struct xe_rtp_table_sr *table, + struct xe_reg_sr *sr, bool process_in_vf); void xe_rtp_process(struct xe_rtp_process_ctx *ctx, - const struct xe_rtp_entry *entries); + const struct xe_rtp_table *table); /* Match functions to be used with XE_RTP_MATCH_FUNC */ diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 0265c16d2762..58018ae4f8cc 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -112,6 +112,16 @@ struct xe_rtp_entry { u8 n_rules; }; +struct xe_rtp_table_sr { + const struct xe_rtp_entry_sr *entries; + size_t n_entries; +}; + +struct xe_rtp_table { + const struct xe_rtp_entry *entries; + size_t n_entries; +}; + enum xe_rtp_process_type { XE_RTP_PROCESS_TYPE_DEVICE, XE_RTP_PROCESS_TYPE_GT, diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index e1651e70c8f0..b1e1ac26c66d 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1248,10 +1248,8 @@ retry: xe_svm_range_fault_count_stats_incr(gt, range); - if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) { - err = -EACCES; - goto out; - } + if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) + return -EACCES; if (xe_svm_range_is_valid(range, tile, ctx.devmem_only, dpagemap)) { xe_svm_range_valid_fault_count_stats_incr(gt, range); diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index b7b8eeacf196..3ca46a6f98c7 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -394,8 +394,19 @@ static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_inst #define xe_svm_assert_in_notifier(vm__) \ lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) -#define xe_svm_assert_held_read(vm__) \ +/* + * Assert the svm notifier_lock is held. Read mode by default; write mode + * when CONFIG_DRM_XE_USERPTR_INVAL_INJECT is on, because that path forces + * a userptr invalidation that ends in drm_gpusvm_unmap_pages() with + * ctx->in_notifier=true, which requires the lock held for write. + */ +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +#define xe_svm_assert_held_read_or_inject_write(vm__) \ + lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) +#else +#define xe_svm_assert_held_read_or_inject_write(vm__) \ lockdep_assert_held_read(&(vm__)->svm.gpusvm.notifier_lock) +#endif #define xe_svm_notifier_lock(vm__) \ drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) @@ -409,7 +420,7 @@ static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_inst #else #define xe_svm_assert_in_notifier(...) do {} while (0) -static inline void xe_svm_assert_held_read(struct xe_vm *vm) +static inline void xe_svm_assert_held_read_or_inject_write(struct xe_vm *vm) { } diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 9a1b3862e192..bf3fad9cdbef 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -20,7 +20,7 @@ #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) -static const struct xe_rtp_entry_sr gt_tunings[] = { +static const struct xe_rtp_table_sr gt_tunings = XE_RTP_TABLE_SR( { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) @@ -100,9 +100,9 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE, BANK_HASH_4KB_MODE)) }, -}; +); -static const struct xe_rtp_entry_sr engine_tunings[] = { +static const struct xe_rtp_table_sr engine_tunings = XE_RTP_TABLE_SR( { XE_RTP_NAME("Tuning: L3 Hashing Mask"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), FUNC(xe_rtp_match_first_render_or_compute)), @@ -129,9 +129,9 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN2, TILEY_LOCALID)) }, -}; +); -static const struct xe_rtp_entry_sr lrc_tunings[] = { +static const struct xe_rtp_table_sr lrc_tunings = XE_RTP_TABLE_SR( { XE_RTP_NAME("Tuning: Windower HW Filtering"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, HW_FILTERING)) @@ -171,7 +171,7 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) }, -}; +); /** * xe_tuning_init - initialize gt with tunings bookkeeping @@ -185,9 +185,9 @@ int xe_tuning_init(struct xe_gt *gt) size_t n_lrc, n_engine, n_gt, total; unsigned long *p; - n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings)); - n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings)); - n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings)); + n_gt = BITS_TO_LONGS(gt_tunings.n_entries); + n_engine = BITS_TO_LONGS(engine_tunings.n_entries); + n_lrc = BITS_TO_LONGS(lrc_tunings.n_entries); total = n_gt + n_engine + n_lrc; p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); @@ -210,9 +210,8 @@ void xe_tuning_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->tuning_active.gt, - ARRAY_SIZE(gt_tunings)); - xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), - >->reg_sr, false); + gt_tunings.n_entries); + xe_rtp_process_to_sr(&ctx, >_tunings, >->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -222,9 +221,8 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->tuning_active.engine, - ARRAY_SIZE(engine_tunings)); - xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), - &hwe->reg_sr, false); + engine_tunings.n_entries); + xe_rtp_process_to_sr(&ctx, &engine_tunings, &hwe->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); @@ -242,9 +240,8 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->tuning_active.lrc, - ARRAY_SIZE(lrc_tunings)); - xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), - &hwe->reg_lrc, true); + lrc_tunings.n_entries); + xe_rtp_process_to_sr(&ctx, &lrc_tunings, &hwe->reg_lrc, true); } /** @@ -259,18 +256,18 @@ int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) size_t idx; drm_printf(p, "GT Tunings\n"); - for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) - drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name); + for_each_set_bit(idx, gt->tuning_active.gt, gt_tunings.n_entries) + drm_printf_indent(p, 1, "%s\n", gt_tunings.entries[idx].name); drm_puts(p, "\n"); drm_printf(p, "Engine Tunings\n"); - for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) - drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); + for_each_set_bit(idx, gt->tuning_active.engine, engine_tunings.n_entries) + drm_printf_indent(p, 1, "%s\n", engine_tunings.entries[idx].name); drm_puts(p, "\n"); drm_printf(p, "LRC Tunings\n"); - for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) - drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name); + for_each_set_bit(idx, gt->tuning_active.lrc, lrc_tunings.n_entries) + drm_printf_indent(p, 1, "%s\n", lrc_tunings.entries[idx].name); return 0; } diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c index 6761005c0b90..6f71bc66b14e 100644 --- a/drivers/gpu/drm/xe/xe_userptr.c +++ b/drivers/gpu/drm/xe/xe_userptr.c @@ -269,7 +269,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { */ void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) { - static struct mmu_interval_notifier_finish *finish; + struct mmu_interval_notifier_finish *finish; struct xe_vm *vm = xe_vma_vm(&uvma->vma); /* Protect against concurrent userptr pinning */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index cb811f8a7781..b9d9fe0801aa 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -130,7 +130,7 @@ __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); -static const struct xe_rtp_entry_sr gt_was[] = { +static const struct xe_rtp_table_sr gt_was = XE_RTP_TABLE_SR( /* Workarounds applying over a range of IPs */ { XE_RTP_NAME("14011060649"), @@ -306,9 +306,9 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) }, -}; +); -static const struct xe_rtp_entry_sr engine_was[] = { +static const struct xe_rtp_table_sr engine_was = XE_RTP_TABLE_SR( /* Workarounds applying over a range of IPs */ { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), @@ -614,9 +614,9 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, BIT_APQ_OPT_DIS)) }, -}; +); -static const struct xe_rtp_entry_sr lrc_was[] = { +static const struct xe_rtp_table_sr lrc_was = XE_RTP_TABLE_SR( { XE_RTP_NAME("16011163337"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), /* read verification is ignored due to 1608008084. */ @@ -794,21 +794,29 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, -}; +); -static __maybe_unused const struct xe_rtp_entry oob_was[] = { +static const struct xe_rtp_entry oob_was_entries[] = { #include <generated/xe_wa_oob.c> - {} }; -static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); +static_assert(ARRAY_SIZE(oob_was_entries) == _XE_WA_OOB_COUNT); -static __maybe_unused const struct xe_rtp_entry device_oob_was[] = { +static __maybe_unused const struct xe_rtp_table oob_was = { + .entries = oob_was_entries, + .n_entries = ARRAY_SIZE(oob_was_entries), +}; + +static const struct xe_rtp_entry device_oob_was_entries[] = { #include <generated/xe_device_wa_oob.c> - {} }; -static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT); +static_assert(ARRAY_SIZE(device_oob_was_entries) == _XE_DEVICE_WA_OOB_COUNT); + +static __maybe_unused const struct xe_rtp_table device_oob_was = { + .entries = device_oob_was_entries, + .n_entries = ARRAY_SIZE(device_oob_was_entries), +}; __diag_pop(); @@ -824,10 +832,10 @@ void xe_wa_process_device_oob(struct xe_device *xe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe); - xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)); + xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, device_oob_was.n_entries); xe->wa_active.oob_initialized = true; - xe_rtp_process(&ctx, device_oob_was); + xe_rtp_process(&ctx, &device_oob_was); } /** @@ -842,9 +850,9 @@ void xe_wa_process_gt_oob(struct xe_gt *gt) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob, - ARRAY_SIZE(oob_was)); + oob_was.n_entries); gt->wa_active.oob_initialized = true; - xe_rtp_process(&ctx, oob_was); + xe_rtp_process(&ctx, &oob_was); } /** @@ -859,9 +867,8 @@ void xe_wa_process_gt(struct xe_gt *gt) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt, - ARRAY_SIZE(gt_was)); - xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), - >->reg_sr, false); + gt_was.n_entries); + xe_rtp_process_to_sr(&ctx, >_was, >->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -878,9 +885,8 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine, - ARRAY_SIZE(engine_was)); - xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), - &hwe->reg_sr, false); + engine_was.n_entries); + xe_rtp_process_to_sr(&ctx, &engine_was, &hwe->reg_sr, false); } /** @@ -896,9 +902,8 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc, - ARRAY_SIZE(lrc_was)); - xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), - &hwe->reg_lrc, true); + lrc_was.n_entries); + xe_rtp_process_to_sr(&ctx, &lrc_was, &hwe->reg_lrc, true); } /** @@ -912,7 +917,7 @@ int xe_wa_device_init(struct xe_device *xe) unsigned long *p; p = drmm_kzalloc(&xe->drm, - sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)), + sizeof(*p) * BITS_TO_LONGS(device_oob_was.n_entries), GFP_KERNEL); if (!p) @@ -935,10 +940,10 @@ int xe_wa_gt_init(struct xe_gt *gt) size_t n_oob, n_lrc, n_engine, n_gt, total; unsigned long *p; - n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was)); - n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was)); - n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_was)); - n_oob = BITS_TO_LONGS(ARRAY_SIZE(oob_was)); + n_gt = BITS_TO_LONGS(gt_was.n_entries); + n_engine = BITS_TO_LONGS(engine_was.n_entries); + n_lrc = BITS_TO_LONGS(lrc_was.n_entries); + n_oob = BITS_TO_LONGS(oob_was.n_entries); total = n_gt + n_engine + n_lrc + n_oob; p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); @@ -962,9 +967,9 @@ void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) size_t idx; drm_printf(p, "Device OOB Workarounds\n"); - for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)) - if (device_oob_was[idx].name) - drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); + for_each_set_bit(idx, xe->wa_active.oob, device_oob_was.n_entries) + if (device_oob_was.entries[idx].name) + drm_printf_indent(p, 1, "%s\n", device_oob_was.entries[idx].name); } /** @@ -979,24 +984,24 @@ int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p) size_t idx; drm_printf(p, "GT Workarounds\n"); - for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was)) - drm_printf_indent(p, 1, "%s\n", gt_was[idx].name); + for_each_set_bit(idx, gt->wa_active.gt, gt_was.n_entries) + drm_printf_indent(p, 1, "%s\n", gt_was.entries[idx].name); drm_puts(p, "\n"); drm_printf(p, "Engine Workarounds\n"); - for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was)) - drm_printf_indent(p, 1, "%s\n", engine_was[idx].name); + for_each_set_bit(idx, gt->wa_active.engine, engine_was.n_entries) + drm_printf_indent(p, 1, "%s\n", engine_was.entries[idx].name); drm_puts(p, "\n"); drm_printf(p, "LRC Workarounds\n"); - for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was)) - drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name); + for_each_set_bit(idx, gt->wa_active.lrc, lrc_was.n_entries) + drm_printf_indent(p, 1, "%s\n", lrc_was.entries[idx].name); drm_puts(p, "\n"); drm_printf(p, "OOB Workarounds\n"); - for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was)) - if (oob_was[idx].name) - drm_printf_indent(p, 1, "%s\n", oob_was[idx].name); + for_each_set_bit(idx, gt->wa_active.oob, oob_was.n_entries) + if (oob_was.entries[idx].name) + drm_printf_indent(p, 1, "%s\n", oob_was.entries[idx].name); return 0; } |
