diff options
author | Dave Airlie <airlied@redhat.com> | 2024-11-05 11:47:52 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-11-05 11:48:14 +1000 |
commit | fb6c5b1fdc03a61bcd0ac716dc8597fc97d00da5 (patch) | |
tree | 0b569d6a5fadc72be65f389859bf7f2acb288778 /drivers/gpu/drm/xe | |
parent | ffd99396c630781b4142b2b8c27eb2d69e344f7c (diff) | |
parent | 35d25a4a0012e690ef0cc4c5440231176db595cc (diff) | |
download | lwn-fb6c5b1fdc03a61bcd0ac716dc8597fc97d00da5.tar.gz lwn-fb6c5b1fdc03a61bcd0ac716dc8597fc97d00da5.zip |
Merge tag 'drm-xe-next-2024-10-31' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes:
- Define and parse OA sync properties (Ashutosh)
Driver Changes:
- Add caller info to xe_gt_reset_async (Nirmoy)
- A large forcewake rework / cleanup (Himal)
- A g2h response timeout fix (Badal)
- A PTL workaround (Vinay)
- Handle unreliable MMIO reads during forcewake (Shuicheng)
- Ufence user-space access fixes (Nirmoy)
- Annotate flexible arrays (Matthew Brost)
- Enable GuC lite restore (Fei)
- Prevent GuC register capture on VF (Zhanjun)
- Show VFs VRAM / LMEM provisioning summary over debugfs (Michal)
- Parallel queues fix on GT reset (Nirmoy)
- Move reference grabbing to a job's dma-fence (Matt Brost)
- Mark a number of local workqueues WQ_MEM_RECLAIM (Matt Brost)
- OA synchronization support (Ashutosh)
- Capture all available bits of GuC timestamp to GuC log (John)
- Increase readability of guc_info debugfs (John)
- Add a mmio barrier before GGTT invalidate (Matt Brost)
- Don't short-circuit TDR on jobs not started (Matt Brost)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZyNvA_vZZYR-1eWE@fedora
Diffstat (limited to 'drivers/gpu/drm/xe')
46 files changed, 995 insertions, 518 deletions
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 6b30743a2f6c..37606cf8cc5e 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -352,6 +352,7 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, + GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, }; #endif diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 231677129a35..6d34b55d72bb 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -42,6 +42,7 @@ bool intel_hdcp_gsc_check_status(struct intel_display *display) struct xe_gt *gt = tile->media_gt; struct xe_gsc *gsc = >->uc.gsc; bool ret = true; + unsigned int fw_ref; if (!gsc && !xe_uc_fw_is_enabled(&gsc->fw)) { drm_dbg_kms(&xe->drm, @@ -50,7 +51,8 @@ bool intel_hdcp_gsc_check_status(struct intel_display *display) } xe_pm_runtime_get(xe); - if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + if (!fw_ref) { drm_dbg_kms(&xe->drm, "failed to get forcewake to check proxy status\n"); ret = false; @@ -60,7 +62,7 @@ bool intel_hdcp_gsc_check_status(struct intel_display *display) if (!xe_gsc_proxy_init_done(gsc)) ret = false; - xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); + xe_force_wake_put(gt_to_fw(gt), fw_ref); out: xe_pm_runtime_put(xe); return ret; diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index b27b73680c12..2118f7dec287 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -84,7 +84,8 @@ #define HUC_LOADING_AGENT_GUC REG_BIT(1) #define GUC_WOPCM_OFFSET_VALID REG_BIT(0) #define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4) -#define GUC_PMTIMESTAMP XE_REG(0xc3e8) +#define GUC_PMTIMESTAMP_LO XE_REG(0xc3e8) +#define GUC_PMTIMESTAMP_HI XE_REG(0xc3ec) #define GUC_SEND_INTERRUPT XE_REG(0xc4c8) #define GUC_SEND_TRIGGER REG_BIT(0) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index ea932c051cc7..6f9b7a266b41 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -43,12 +43,11 @@ static void read_l3cc_table(struct xe_gt *gt, { struct kunit *test = kunit_get_current_test(); u32 l3cc, l3cc_expected; - unsigned int i; + unsigned int fw_ref, i; u32 reg_val; - u32 ret; - ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); for (i = 0; i < info->num_mocs_regs; i++) { if (!(i & 1)) { @@ -72,7 +71,7 @@ static void read_l3cc_table(struct xe_gt *gt, KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc, "l3cc idx=%u has incorrect val.\n", i); } - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static void read_mocs_table(struct xe_gt *gt, @@ -80,15 +79,14 @@ static void read_mocs_table(struct xe_gt *gt, { struct kunit *test = kunit_get_current_test(); u32 mocs, mocs_expected; - unsigned int i; + unsigned int fw_ref, i; u32 reg_val; - u32 ret; KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index, "Unused entries index should have been defined\n"); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); for (i = 0; i < info->num_mocs_regs; i++) { if (regs_are_mcr(gt)) @@ -106,7 +104,7 @@ static void read_mocs_table(struct xe_gt *gt, "mocs reg 0x%x has incorrect val.\n", i); } - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static int mocs_kernel_test_run_device(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index fe4319eb13fd..492b4877433f 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -90,13 +90,32 @@ static int forcewake_open(struct inode *inode, struct file *file) { struct xe_device *xe = inode->i_private; struct xe_gt *gt; - u8 id; + u8 id, last_gt; + unsigned int fw_ref; xe_pm_runtime_get(xe); - for_each_gt(gt, xe, id) - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + for_each_gt(gt, xe, id) { + last_gt = id; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) + goto err_fw_get; + } return 0; + +err_fw_get: + for_each_gt(gt, xe, id) { + if (id < last_gt) + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + else if (id == last_gt) + xe_force_wake_put(gt_to_fw(gt), fw_ref); + else + break; + } + + xe_pm_runtime_put(xe); + return -ETIMEDOUT; } static int forcewake_release(struct inode *inode, struct file *file) @@ -106,7 +125,7 @@ static int forcewake_release(struct inode *inode, struct file *file) u8 id; for_each_gt(gt, xe, id) - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_pm_runtime_put(xe); return 0; diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 99842a35dbf0..d2679c5d976b 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -158,13 +158,15 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) { struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + unsigned int fw_ref; /* keep going if fw fails as we still want to save the memory and SW data */ - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); xe_vm_snapshot_capture_delayed(ss->vm); xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); /* Calculate devcoredump size */ ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); @@ -236,8 +238,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - int i; + unsigned int fw_ref; bool cookie; + int i; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -261,11 +264,10 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, } /* keep going if fw fails as we still want to save the memory and SW data */ - if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true); - ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); + ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct); ss->ge = xe_guc_exec_queue_snapshot_capture(q); ss->job = xe_sched_job_snapshot_capture(job); ss->vm = xe_vm_snapshot_capture(q->vm); @@ -274,7 +276,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, queue_work(system_unbound_wq, &ss->work); - xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(q->gt), fw_ref); dma_fence_end_signalling(cookie); } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f6c87d14f78c..51bb9d875268 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -604,8 +604,8 @@ int xe_device_probe_early(struct xe_device *xe) static int probe_has_flat_ccs(struct xe_device *xe) { struct xe_gt *gt; + unsigned int fw_ref; u32 reg; - int err; /* Always enabled/disabled, no runtime check to do */ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) @@ -613,9 +613,9 @@ static int probe_has_flat_ccs(struct xe_device *xe) gt = xe_root_mmio_gt(xe); - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - return err; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE); @@ -624,7 +624,8 @@ static int probe_has_flat_ccs(struct xe_device *xe) drm_dbg(&xe->drm, "Flat CCS has been disabled in bios, May lead to performance impact"); - return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } int xe_device_probe(struct xe_device *xe) @@ -875,6 +876,7 @@ void xe_device_wmb(struct xe_device *xe) void xe_device_td_flush(struct xe_device *xe) { struct xe_gt *gt; + unsigned int fw_ref; u8 id; if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) @@ -889,7 +891,8 @@ void xe_device_td_flush(struct xe_device *xe) if (xe_gt_is_media_type(gt)) continue; - if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); @@ -904,22 +907,22 @@ void xe_device_td_flush(struct xe_device *xe) 150, NULL, false)) xe_gt_err_once(gt, "TD flush timeout\n"); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } } void xe_device_l2_flush(struct xe_device *xe) { struct xe_gt *gt; - int err; + unsigned int fw_ref; gt = xe_root_mmio_gt(xe); if (!XE_WA(gt, 16023588340)) return; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) return; spin_lock(>->global_invl_lock); @@ -929,7 +932,7 @@ void xe_device_l2_flush(struct xe_device *xe) xe_gt_err_once(gt, "Global invalidation timeout\n"); spin_unlock(>->global_invl_lock); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index fb52a23e28f8..22f0f1a6dfd5 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -278,6 +278,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) struct xe_hw_engine *hwe; struct xe_exec_queue *q; u64 gpu_timestamp; + unsigned int fw_ref; xe_pm_runtime_get(xe); @@ -303,13 +304,16 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) continue; fw = xe_hw_engine_to_fw_domain(hwe); - if (xe_force_wake_get(gt_to_fw(gt), fw)) { + + fw_ref = xe_force_wake_get(gt_to_fw(gt), fw); + if (!xe_force_wake_ref_has_domain(fw_ref, fw)) { hwe = NULL; + xe_force_wake_put(gt_to_fw(gt), fw_ref); break; } gpu_timestamp = xe_hw_engine_read_timestamp(hwe); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), fw)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); break; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 7deb480e26af..1158b6062a6c 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -143,7 +143,7 @@ struct xe_exec_queue { /** @hw_engine_group_link: link into exec queues in the same hw engine group */ struct list_head hw_engine_group_link; /** @lrc: logical ring context for this exec queue */ - struct xe_lrc *lrc[]; + struct xe_lrc *lrc[] __counted_by(width); }; /** diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index f3b71fe7a96d..a8c416a48812 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -313,7 +313,7 @@ execlist_run_job(struct drm_sched_job *drm_job) q->ring_ops->emit_job(job); xe_execlist_make_active(exl); - return dma_fence_get(job->fence); + return job->fence; } static void execlist_job_free(struct drm_sched_job *drm_job) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 08621717b14b..4f6784e5abf8 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -21,15 +21,25 @@ static const char *str_wake_sleep(bool wake) return wake ? "wake" : "sleep"; } -static void domain_init(struct xe_force_wake_domain *domain, +static void mark_domain_initialized(struct xe_force_wake *fw, + enum xe_force_wake_domain_id id) +{ + fw->initialized_domains |= BIT(id); +} + +static void init_domain(struct xe_force_wake *fw, enum xe_force_wake_domain_id id, struct xe_reg reg, struct xe_reg ack) { + struct xe_force_wake_domain *domain = &fw->domains[id]; + domain->id = id; domain->reg_ctl = reg; domain->reg_ack = ack; domain->val = FORCEWAKE_MT(FORCEWAKE_KERNEL); domain->mask = FORCEWAKE_MT_MASK(FORCEWAKE_KERNEL); + + mark_domain_initialized(fw, id); } void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) @@ -43,13 +53,11 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); if (xe->info.graphics_verx100 >= 1270) { - domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], - XE_FW_DOMAIN_ID_GT, + init_domain(fw, XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT, FORCEWAKE_ACK_GT_MTL); } else { - domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], - XE_FW_DOMAIN_ID_GT, + init_domain(fw, XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT, FORCEWAKE_ACK_GT); } @@ -63,8 +71,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); if (!xe_gt_is_media_type(gt)) - domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], - XE_FW_DOMAIN_ID_RENDER, + init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, FORCEWAKE_ACK_RENDER); @@ -72,8 +79,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) if (!(gt->info.engine_mask & BIT(i))) continue; - domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], - XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, + init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, FORCEWAKE_MEDIA_VDBOX(j), FORCEWAKE_ACK_MEDIA_VDBOX(j)); } @@ -82,15 +88,13 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) if (!(gt->info.engine_mask & BIT(i))) continue; - domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], - XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, + init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, FORCEWAKE_MEDIA_VEBOX(j), FORCEWAKE_ACK_MEDIA_VEBOX(j)); } if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) - domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC], - XE_FW_DOMAIN_ID_GSC, + init_domain(fw, XE_FW_DOMAIN_ID_GSC, FORCEWAKE_GSC, FORCEWAKE_ACK_GSC); } @@ -156,52 +160,108 @@ static int domain_sleep_wait(struct xe_gt *gt, (ffs(tmp__) - 1))) && \ domain__->reg_ctl.addr) -int xe_force_wake_get(struct xe_force_wake *fw, - enum xe_force_wake_domains domains) +/** + * xe_force_wake_get() : Increase the domain refcount + * @fw: struct xe_force_wake + * @domains: forcewake domains to get refcount on + * + * This function wakes up @domains if they are asleep and takes references. + * If requested domain is XE_FORCEWAKE_ALL then only applicable/initialized + * domains will be considered for refcount and it is a caller responsibility + * to check returned ref if it includes any specific domain by using + * xe_force_wake_ref_has_domain() function. Caller must call + * xe_force_wake_put() function to decrease incremented refcounts. + * + * Return: opaque reference to woken domains or zero if none of requested + * domains were awake. + */ +unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw, + enum xe_force_wake_domains domains) { struct xe_gt *gt = fw->gt; struct xe_force_wake_domain *domain; - enum xe_force_wake_domains tmp, woken = 0; + unsigned int ref_incr = 0, awake_rqst = 0, awake_failed = 0; + unsigned int tmp, ref_rqst; unsigned long flags; - int ret = 0; + xe_gt_assert(gt, is_power_of_2(domains)); + xe_gt_assert(gt, domains <= XE_FORCEWAKE_ALL); + xe_gt_assert(gt, domains == XE_FORCEWAKE_ALL || fw->initialized_domains & domains); + + ref_rqst = (domains == XE_FORCEWAKE_ALL) ? fw->initialized_domains : domains; spin_lock_irqsave(&fw->lock, flags); - for_each_fw_domain_masked(domain, domains, fw, tmp) { + for_each_fw_domain_masked(domain, ref_rqst, fw, tmp) { if (!domain->ref++) { - woken |= BIT(domain->id); + awake_rqst |= BIT(domain->id); domain_wake(gt, domain); } + ref_incr |= BIT(domain->id); } - for_each_fw_domain_masked(domain, woken, fw, tmp) { - ret |= domain_wake_wait(gt, domain); + for_each_fw_domain_masked(domain, awake_rqst, fw, tmp) { + if (domain_wake_wait(gt, domain) == 0) { + fw->awake_domains |= BIT(domain->id); + } else { + awake_failed |= BIT(domain->id); + --domain->ref; + } } - fw->awake_domains |= woken; + ref_incr &= ~awake_failed; spin_unlock_irqrestore(&fw->lock, flags); - return ret; + xe_gt_WARN(gt, awake_failed, "Forcewake domain%s %#x failed to acknowledge awake request\n", + str_plural(hweight_long(awake_failed)), awake_failed); + + if (domains == XE_FORCEWAKE_ALL && ref_incr == fw->initialized_domains) + ref_incr |= XE_FORCEWAKE_ALL; + + return ref_incr; } -int xe_force_wake_put(struct xe_force_wake *fw, - enum xe_force_wake_domains domains) +/** + * xe_force_wake_put - Decrement the refcount and put domain to sleep if refcount becomes 0 + * @fw: Pointer to the force wake structure + * @fw_ref: return of xe_force_wake_get() + * + * This function reduces the reference counts for domains in fw_ref. If + * refcount for any of the specified domain reaches 0, it puts the domain to sleep + * and waits for acknowledgment for domain to sleep within 50 milisec timeout. + * Warns in case of timeout of ack from domain. + */ +void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref) { struct xe_gt *gt = fw->gt; struct xe_force_wake_domain *domain; - enum xe_force_wake_domains tmp, sleep = 0; + unsigned int tmp, sleep = 0; unsigned long flags; - int ret = 0; + int ack_fail = 0; + + /* + * Avoid unnecessary lock and unlock when the function is called + * in error path of individual domains. + */ + if (!fw_ref) + return; + + if (xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) + fw_ref = fw->initialized_domains; spin_lock_irqsave(&fw->lock, flags); - for_each_fw_domain_masked(domain, domains, fw, tmp) { + for_each_fw_domain_masked(domain, fw_ref, fw, tmp) { + xe_gt_assert(gt, domain->ref); + if (!--domain->ref) { sleep |= BIT(domain->id); domain_sleep(gt, domain); } } for_each_fw_domain_masked(domain, sleep, fw, tmp) { - ret |= domain_sleep_wait(gt, domain); + if (domain_sleep_wait(gt, domain) == 0) + fw->awake_domains &= ~BIT(domain->id); + else + ack_fail |= BIT(domain->id); } - fw->awake_domains &= ~sleep; spin_unlock_irqrestore(&fw->lock, flags); - return ret; + xe_gt_WARN(gt, ack_fail, "Forcewake domain%s %#x failed to acknowledge sleep request\n", + str_plural(hweight_long(ack_fail)), ack_fail); } diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index a2577672f4e3..0e3e84bfa51c 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -15,10 +15,9 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw); void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw); -int xe_force_wake_get(struct xe_force_wake *fw, - enum xe_force_wake_domains domains); -int xe_force_wake_put(struct xe_force_wake *fw, - enum xe_force_wake_domains domains); +unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw, + enum xe_force_wake_domains domains); +void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref); static inline int xe_force_wake_ref(struct xe_force_wake *fw, @@ -46,4 +45,20 @@ xe_force_wake_assert_held(struct xe_force_wake *fw, xe_gt_assert(fw->gt, fw->awake_domains & domain); } +/** + * xe_force_wake_ref_has_domain - verifies if the domains are in fw_ref + * @fw_ref : the force_wake reference + * @domain : forcewake domain to verify + * + * This function confirms whether the @fw_ref includes a reference to the + * specified @domain. + * + * Return: true if domain is refcounted. + */ +static inline bool +xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains domain) +{ + return fw_ref & domain; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h index ed0edc2cdf9f..899fbbcb3ea9 100644 --- a/drivers/gpu/drm/xe/xe_force_wake_types.h +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -48,7 +48,7 @@ enum xe_force_wake_domains { XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2), XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3), XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC), - XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1 + XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) }; /** @@ -78,7 +78,9 @@ struct xe_force_wake { /** @lock: protects everything force wake struct */ spinlock_t lock; /** @awake_domains: mask of all domains awake */ - enum xe_force_wake_domains awake_domains; + unsigned int awake_domains; + /** @initialized_domains: mask of all initialized domains */ + unsigned int initialized_domains; /** @domains: force wake domains */ struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT]; }; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 65bc41d2c867..558fac8bb6fb 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -246,7 +246,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) else ggtt->pt_ops = &xelp_pt_ops; - ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); @@ -409,7 +409,7 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) * vs. correct GGTT page. Not particularly a hot code path so blindly * do a mmio read here which results in GuC reading correct GGTT page. */ - xe_mmio_read32(&xe_root_mmio_gt(xe)->mmio, VF_CAP_REG); + xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG); /* Each GT in a tile has its own TLB to cache GGTT lookups */ ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 783b09bf3681..1eb791ddc375 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -261,19 +261,17 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_tile *tile = gt_to_tile(gt); + unsigned int fw_ref; int ret; if (XE_WA(tile->primary_gt, 14018094691)) { - ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); + fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* * If the forcewake fails we want to keep going, because the worst * case outcome in failing to apply the WA is that PXP won't work, - * which is not fatal. We still throw a warning so the issue is - * seen if it happens. + * which is not fatal. Forcewake get warns implicitly in case of failure */ - xe_gt_WARN_ON(tile->primary_gt, ret); - xe_gt_mcr_multicast_write(tile->primary_gt, EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); @@ -282,7 +280,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); if (XE_WA(tile->primary_gt, 14018094691)) - xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref); if (ret) return ret; @@ -352,6 +350,7 @@ static void gsc_work(struct work_struct *work) struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref; u32 actions; int ret; @@ -361,7 +360,7 @@ static void gsc_work(struct work_struct *work) spin_unlock_irq(&gsc->lock); xe_pm_runtime_get(xe); - xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); if (actions & GSC_ACTION_ER_COMPLETE) { ret = gsc_er_complete(gt); @@ -381,7 +380,7 @@ static void gsc_work(struct work_struct *work) xe_gsc_proxy_request_handler(gsc); out: - xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); } @@ -601,7 +600,7 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_mmio *mmio = >->mmio; - int err; + unsigned int fw_ref; xe_uc_fw_print(&gsc->fw, p); @@ -610,8 +609,8 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) if (!xe_uc_fw_is_enabled(&gsc->fw)) return; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + if (!fw_ref) return; drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", @@ -622,5 +621,5 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)), xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE))); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 6d89c22ae811..fc64b45d324b 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -450,22 +450,21 @@ void xe_gsc_proxy_remove(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); - int err = 0; + unsigned int fw_ref = 0; if (!gsc->proxy.component_added) return; /* disable HECI2 IRQs */ xe_pm_runtime_get(xe); - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + if (!fw_ref) xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); /* try do disable irq even if forcewake failed */ gsc_proxy_irq_toggle(gsc, false); - if (!err) - xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); xe_gsc_wait_for_worker_completion(gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 1c79660fb086..d6744be01a68 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -77,7 +77,8 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile) return ERR_PTR(-ENOMEM); gt->tile = tile; - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); + gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", + WQ_MEM_RECLAIM); err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); if (err) @@ -97,14 +98,14 @@ void xe_gt_sanitize(struct xe_gt *gt) static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) { + unsigned int fw_ref; u32 reg; - int err; if (!XE_WA(gt, 16023588340)) return; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (WARN_ON(err)) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) return; if (!xe_gt_is_media_type(gt)) { @@ -114,13 +115,13 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) } xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) { + unsigned int fw_ref; u32 reg; - int err; if (!XE_WA(gt, 16023588340)) return; @@ -128,15 +129,15 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) if (xe_gt_is_media_type(gt)) return; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (WARN_ON(err)) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) return; reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg &= ~CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } /** @@ -402,11 +403,14 @@ static void dump_pat_on_error(struct xe_gt *gt) static int gt_fw_domain_init(struct xe_gt *gt) { + unsigned int fw_ref; int err, i; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) { + err = -ETIMEDOUT; goto err_hw_fence_irq; + } if (!xe_gt_is_media_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); @@ -441,14 +445,12 @@ static int gt_fw_domain_init(struct xe_gt *gt) */ gt->info.gmdid = xe_mmio_read32(>->mmio, GMD_ID); - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - XE_WARN_ON(err); - + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; err_force_wake: dump_pat_on_error(gt); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); err_hw_fence_irq: for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); @@ -458,11 +460,14 @@ err_hw_fence_irq: static int all_fw_domain_init(struct xe_gt *gt) { + unsigned int fw_ref; int err, i; - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_hw_fence_irq; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + err = -ETIMEDOUT; + goto err_force_wake; + } xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); @@ -526,14 +531,12 @@ static int all_fw_domain_init(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt))) xe_gt_sriov_pf_init_hw(gt); - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; err_force_wake: - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); -err_hw_fence_irq: + xe_force_wake_put(gt_to_fw(gt), fw_ref); for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); @@ -546,11 +549,12 @@ err_hw_fence_irq: */ int xe_gt_init_hwconfig(struct xe_gt *gt) { + unsigned int fw_ref; int err; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto out; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; xe_gt_mcr_init_early(gt); xe_pat_init(gt); @@ -568,8 +572,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_enable_host_l2_vram(gt); out_fw: - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -out: + xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } @@ -764,6 +767,7 @@ static int do_gt_restart(struct xe_gt *gt) static int gt_reset(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (xe_device_wedged(gt_to_xe(gt))) @@ -784,9 +788,11 @@ static int gt_reset(struct xe_gt *gt) xe_gt_sanitize(gt); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + err = -ETIMEDOUT; + goto err_out; + } xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); @@ -804,8 +810,7 @@ static int gt_reset(struct xe_gt *gt) if (err) goto err_out; - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); @@ -813,8 +818,7 @@ static int gt_reset(struct xe_gt *gt) return 0; err_out: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: + xe_force_wake_put(gt_to_fw(gt), fw_ref); XE_WARN_ON(xe_uc_start(>->uc)); err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); @@ -834,7 +838,7 @@ static void gt_reset_worker(struct work_struct *w) void xe_gt_reset_async(struct xe_gt *gt) { - xe_gt_info(gt, "trying reset\n"); + xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0)); /* Don't do a reset while one is already in flight */ if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) @@ -846,22 +850,25 @@ void xe_gt_reset_async(struct xe_gt *gt) void xe_gt_suspend_prepare(struct xe_gt *gt) { - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + unsigned int fw_ref; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_uc_stop_prepare(>->uc); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } int xe_gt_suspend(struct xe_gt *gt) { + unsigned int fw_ref; int err; xe_gt_dbg(gt, "suspending\n"); xe_gt_sanitize(gt); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) goto err_msg; err = xe_uc_suspend(>->uc); @@ -872,14 +879,15 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_disable_host_l2_vram(gt); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_dbg(gt, "suspended\n"); return 0; -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: + err = -ETIMEDOUT; +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); return err; @@ -887,9 +895,11 @@ err_msg: void xe_gt_shutdown(struct xe_gt *gt) { - xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + unsigned int fw_ref; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); do_gt_reset(gt); - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } /** @@ -914,11 +924,12 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) int xe_gt_resume(struct xe_gt *gt) { + unsigned int fw_ref; int err; xe_gt_dbg(gt, "resuming\n"); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) goto err_msg; err = do_gt_restart(gt); @@ -927,14 +938,15 @@ int xe_gt_resume(struct xe_gt *gt) xe_gt_idle_enable_pg(gt); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_dbg(gt, "resumed\n"); return 0; -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: + err = -ETIMEDOUT; +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); return err; diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index cbc43973ff7e..3e8c351a0eab 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -90,22 +90,21 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - int err; + unsigned int fw_ref; xe_pm_runtime_get(xe); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { xe_pm_runtime_put(xe); - return err; + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return -ETIMEDOUT; } for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, p); - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); - if (err) - return err; return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 746812aee8ff..fd80afeef56a 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -101,6 +101,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) struct xe_gt_idle *gtidle = >->gtidle; struct xe_mmio *mmio = >->mmio; u32 vcs_mask, vecs_mask; + unsigned int fw_ref; int i, j; if (IS_SRIOV_VF(xe)) @@ -127,7 +128,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) VDN_MFXVDENC_POWERGATE_ENABLE(j)); } - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (xe->info.skip_guc_pc) { /* * GuC sets the hysteresis value when GuC PC is enabled @@ -138,12 +139,13 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) } xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } void xe_gt_idle_disable_pg(struct xe_gt *gt) { struct xe_gt_idle *gtidle = >->gtidle; + unsigned int fw_ref; if (IS_SRIOV_VF(gt_to_xe(gt))) return; @@ -151,9 +153,9 @@ void xe_gt_idle_disable_pg(struct xe_gt *gt) xe_device_assert_mem_access(gt_to_xe(gt)); gtidle->powergate_enable = 0; - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); xe_mmio_write32(>->mmio, POWERGATE_ENABLE, gtidle->powergate_enable); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } /** @@ -172,7 +174,8 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) enum xe_gt_idle_state state; u32 pg_enabled, pg_status = 0; u32 vcs_mask, vecs_mask; - int err, n; + unsigned int fw_ref; + int n; /* * Media Slices * @@ -208,14 +211,14 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) /* Do not wake the GT to read powergating status */ if (state != GT_IDLE_C6) { - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - return err; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; pg_enabled = xe_mmio_read32(>->mmio, POWERGATE_ENABLE); pg_status = xe_mmio_read32(>->mmio, POWERGATE_DOMAIN_STATUS); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) { @@ -298,13 +301,14 @@ static void gt_idle_fini(void *arg) { struct kobject *kobj = arg; struct xe_gt *gt = kobj_to_gt(kobj->parent); + unsigned int fw_ref; xe_gt_idle_disable_pg(gt); if (gt_to_xe(gt)->info.skip_guc_pc) { - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); xe_gt_idle_disable_c6(gt); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } sysfs_remove_files(kobj, gt_idle_attrs); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index a863e50b756e..062a0c2fd2cd 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -2377,6 +2377,41 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p) } /** + * xe_gt_sriov_pf_config_print_lmem - Print LMEM configurations. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print LMEM allocations across all VFs. + * VFs without LMEM allocation are skipped. + * + * This function can only be called on PF. + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); + const struct xe_gt_sriov_config *config; + char buf[10]; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 1; n <= total_vfs; n++) { + config = >->sriov.pf.vfs[n].config; + if (!config->lmem_obj) + continue; + + string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, + buf, sizeof(buf)); + drm_printf(p, "VF%u:\t%zu\t(%s)\n", + n, config->lmem_obj->size, buf); + } + + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return 0; +} + +/** * xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges. * @gt: the &xe_gt * @p: the &drm_printer diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index b74ec38baa18..0c55aa40a1a7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -65,6 +65,7 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt); int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 91fc42e386d8..05df4ab3514b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -82,6 +82,11 @@ static const struct drm_info_list pf_info[] = { .data = xe_gt_sriov_pf_config_print_dbs, }, { + "lmem_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_lmem, + }, + { "runtime_registers", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_service_print_runtime, diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index a530a933eedc..773de1f08db9 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -268,6 +268,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref; if (xe_guc_ct_enabled(>->uc.guc.ct) && gt->uc.guc.submission_state.enabled) { @@ -286,7 +287,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) if (IS_SRIOV_VF(xe)) return 0; - xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE); @@ -296,7 +297,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_mmio_write32(mmio, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE); } - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } return 0; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 8570b1218287..7f704346a8f4 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -70,7 +70,7 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc) static u32 guc_ctl_feature_flags(struct xe_guc *guc) { - u32 flags = 0; + u32 flags = GUC_CTL_ENABLE_LITE_RESTORE; if (!guc_to_xe(guc)->info.skip_guc_pc) flags |= GUC_CTL_ENABLE_SLPC; @@ -248,10 +248,11 @@ static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; struct xe_gt *gt = guc_to_gt(guc); + unsigned int fw_ref; - xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_uc_fini_hw(&guc_to_gt(guc)->uc); - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } /** @@ -1155,14 +1156,14 @@ int xe_guc_start(struct xe_guc *guc) void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) { struct xe_gt *gt = guc_to_gt(guc); + unsigned int fw_ref; u32 status; - int err; int i; xe_uc_fw_print(&guc->fw, p); - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) return; status = xe_mmio_read32(>->mmio, GUC_STATUS); @@ -1183,9 +1184,12 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) i, xe_mmio_read32(>->mmio, SOFT_SCRATCH(i))); } - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + drm_puts(p, "\n"); + xe_guc_ct_print(&guc->ct, p, false); - xe_guc_ct_print(&guc->ct, p); + drm_puts(p, "\n"); xe_guc_submit_print(guc, p); } diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 25292997c7f3..4e746ae98888 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -359,6 +359,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, &offset, &remain); + if (XE_WA(gt, 14022866841)) + guc_waklv_enable_simple(ads, + GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO, + &offset, &remain); + /* * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, * the default value for this register is determined to be 0xC40. This could change in the diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 41262bda20ed..8b6cb786a2aa 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1590,6 +1590,9 @@ xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot u16 guc_id = 0; u32 lrca = 0; + if (IS_SRIOV_VF(xe)) + return; + new = guc_capture_get_prealloc_node(guc); if (!new) return; @@ -1820,7 +1823,7 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) return NULL; xe = gt_to_xe(q->gt); - if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe)) + if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe) || IS_SRIOV_VF(xe)) return NULL; ss = &xe->devcoredump.snapshot; @@ -1876,6 +1879,9 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) enum xe_hw_engine_id id; u32 adj_logical_mask = q->logical_mask; + if (IS_SRIOV_VF(xe)) + return; + for_each_hw_engine(hwe, q->gt, id) { if (hwe->class != q->hwe->class || !(BIT(hwe->logical_instance) & adj_logical_mask)) { diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index c260d8840990..4870af1c5a90 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -213,7 +213,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); - ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0); + ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", WQ_MEM_RECLAIM); if (!ct->g2h_wq) return -ENOMEM; @@ -1607,7 +1607,8 @@ static void g2h_worker_func(struct work_struct *w) receive_g2h(ct); } -struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic) +static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic, + bool want_ctb) { struct xe_guc_ct_snapshot *snapshot; @@ -1615,7 +1616,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool a if (!snapshot) return NULL; - if (ct->bo) { + if (ct->bo && want_ctb) { snapshot->ctb_size = ct->bo->size; snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } @@ -1645,25 +1646,13 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot, drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status); } -/** - * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. - * @ct: GuC CT object. - * @atomic: Boolean to indicate if this is called from atomic context like - * reset or CTB handler or from some regular path like debugfs. - * - * This can be printed out in a later stage like during dev_coredump - * analysis. - * - * Returns: a GuC CT snapshot object that must be freed by the caller - * by using `xe_guc_ct_snapshot_free`. - */ -struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, - bool atomic) +static struct xe_guc_ct_snapshot *guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic, + bool want_ctb) { struct xe_device *xe = ct_to_xe(ct); struct xe_guc_ct_snapshot *snapshot; - snapshot = xe_guc_ct_snapshot_alloc(ct, atomic); + snapshot = guc_ct_snapshot_alloc(ct, atomic, want_ctb); if (!snapshot) { xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n"); return NULL; @@ -1683,6 +1672,21 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, } /** + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. + * @ct: GuC CT object. + * + * This can be printed out in a later stage like during dev_coredump + * analysis. This is safe to be called during atomic context. + * + * Returns: a GuC CT snapshot object that must be freed by the caller + * by using `xe_guc_ct_snapshot_free`. + */ +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct) +{ + return guc_ct_snapshot_capture(ct, true, true); +} + +/** * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot. * @snapshot: GuC CT snapshot object. * @p: drm_printer where it will be printed out. @@ -1704,12 +1708,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, drm_printf(p, "\tg2h outstanding: %d\n", snapshot->g2h_outstanding); - if (snapshot->ctb) { + if (snapshot->ctb) xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size); - } else { - drm_printf(p, "CTB snapshot missing!\n"); - return; - } } else { drm_puts(p, "CT disabled\n"); } @@ -1735,14 +1735,16 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot) * xe_guc_ct_print - GuC CT Print. * @ct: GuC CT. * @p: drm_printer where it will be printed out. + * @want_ctb: Should the full CTB content be dumped (vs just the headers) * - * This function quickly capture a snapshot and immediately print it out. + * This function will quickly capture a snapshot of the CT state + * and immediately print it out. */ -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb) { struct xe_guc_ct_snapshot *snapshot; - snapshot = xe_guc_ct_snapshot_capture(ct, false); + snapshot = guc_ct_snapshot_capture(ct, false, want_ctb); xe_guc_ct_snapshot_print(snapshot, p); xe_guc_ct_snapshot_free(snapshot); } @@ -1776,7 +1778,7 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso return; snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true); - snapshot_ct = xe_guc_ct_snapshot_capture((ct), true); + snapshot_ct = xe_guc_ct_snapshot_capture((ct)); spin_lock_irqsave(&ct->dead.lock, flags); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 338f0b75d29f..82c4ae458dda 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -17,11 +17,10 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct); void xe_guc_ct_stop(struct xe_guc_ct *ct); void xe_guc_ct_fast_path(struct xe_guc_ct *ct); -struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic); -struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic); +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct); void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p); void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index d3822cbea273..995b306aced7 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -47,9 +47,23 @@ static int guc_log(struct seq_file *m, void *data) return 0; } +static int guc_ctb(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_pm_runtime_get(xe); + xe_guc_ct_print(&guc->ct, &p, true); + xe_pm_runtime_put(xe); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"guc_info", guc_info, 0}, {"guc_log", guc_log, 0}, + {"guc_ctb", guc_ctb, 0}, }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 01e3ab590c3a..08ffe59f22fa 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -105,6 +105,7 @@ struct guc_update_exec_queue_policy { #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) +#define GUC_CTL_ENABLE_LITE_RESTORE BIT(4) #define GUC_CTL_DISABLE_SCHEDULER BIT(14) #define GUC_CTL_DEBUG 3 diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index cc70f448d879..df4cfb698cdb 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -145,8 +145,9 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, struct xe_device *xe = log_to_xe(log); struct xe_guc *guc = log_to_guc(log); struct xe_gt *gt = log_to_gt(log); + unsigned int fw_ref; size_t remain; - int i, err; + int i; if (!log->bo) { xe_gt_err(gt, "GuC log buffer not allocated\n"); @@ -168,12 +169,12 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, remain -= size; } - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) { - snapshot->stamp = ~0; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) { + snapshot->stamp = ~0ULL; } else { - snapshot->stamp = xe_mmio_read32(>->mmio, GUC_PMTIMESTAMP); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + snapshot->stamp = xe_mmio_read64_2x32(>->mmio, GUC_PMTIMESTAMP_LO); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } snapshot->ktime = ktime_get_boottime_ns(); snapshot->level = log->level; @@ -204,7 +205,7 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ snapshot->ver_found.major, snapshot->ver_found.minor, snapshot->ver_found.patch, snapshot->ver_want.major, snapshot->ver_want.minor, snapshot->ver_want.patch); drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", snapshot->ktime, snapshot->ktime); - drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", snapshot->stamp, snapshot->stamp); + drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp); drm_printf(p, "Log level: %u\n", snapshot->level); remain = snapshot->size; diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h index 4d57f8322efc..b3d5c72ac752 100644 --- a/drivers/gpu/drm/xe/xe_guc_log_types.h +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h @@ -27,7 +27,7 @@ struct xe_guc_log_snapshot { /** @ktime: Kernel time the snapshot was taken */ u64 ktime; /** @stamp: GuC timestamp at which the snapshot was taken */ - u32 stamp; + u64 stamp; /** @level: GuC log verbosity level */ u32 level; /** @ver_found: GuC firmware version */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 2b654f820ae2..e8b9faeaef64 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -415,22 +415,24 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) { struct xe_gt *gt = pc_to_gt(pc); - int ret; + unsigned int fw_ref; /* * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (ret) - return ret; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return -ETIMEDOUT; + } *freq = xe_mmio_read32(>->mmio, RPNSWREQ); *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq); *freq = decode_freq(*freq); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -480,6 +482,7 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { struct xe_gt *gt = pc_to_gt(pc); + unsigned int fw_ref; int ret; mutex_lock(&pc->freq_lock); @@ -493,9 +496,11 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) * GuC SLPC plays with min freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (ret) - goto out; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + ret = -ETIMEDOUT; + goto fw; + } ret = pc_action_query_task_state(pc); if (ret) @@ -504,7 +509,7 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) *freq = pc_get_min_freq(pc); fw: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); out: mutex_unlock(&pc->freq_lock); return ret; @@ -855,6 +860,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); struct xe_gt *gt = pc_to_gt(pc); + unsigned int fw_ref; int ret = 0; if (xe->info.skip_guc_pc) @@ -864,13 +870,15 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (ret) return ret; - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (ret) - return ret; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return -ETIMEDOUT; + } xe_gt_idle_disable_c6(gt); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -956,13 +964,16 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) struct xe_device *xe = pc_to_xe(pc); struct xe_gt *gt = pc_to_gt(pc); u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + unsigned int fw_ref; int ret; xe_gt_assert(gt, xe_device_uc_enabled(xe)); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (ret) - return ret; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return -ETIMEDOUT; + } if (xe->info.skip_guc_pc) { if (xe->info.platform != XE_PVC) @@ -1005,7 +1016,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); out: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return ret; } @@ -1037,18 +1048,19 @@ static void xe_guc_pc_fini_hw(void *arg) { struct xe_guc_pc *pc = arg; struct xe_device *xe = pc_to_xe(pc); + unsigned int fw_ref; if (xe_device_wedged(xe)) return; - XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); + fw_ref = xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); xe_guc_pc_gucrc_disable(pc); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); - xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 5f8c7a45b028..7afcc243037c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -717,6 +717,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); + struct dma_fence *fence = NULL; bool lr = xe_exec_queue_is_lr(q); xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || @@ -734,12 +735,12 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) if (lr) { xe_sched_job_set_error(job, -EOPNOTSUPP); - return NULL; - } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { - return job->fence; + dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */ } else { - return dma_fence_get(job->fence); + fence = job->fence; } + + return fence; } static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) @@ -1035,6 +1036,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_guc *guc = exec_queue_to_guc(q); const char *process_name = "no process"; struct xe_device *xe = guc_to_xe(guc); + unsigned int fw_ref; int err = -ETIME; pid_t pid = -1; int i = 0; @@ -1068,12 +1070,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!exec_queue_killed(q) && !xe->devcoredump.captured && !xe_guc_capture_get_matching_and_lock(job)) { /* take force wake before engine register manual capture */ - if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) + fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); xe_engine_snapshot_capture_for_job(job); - xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(q->gt), fw_ref); } /* diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 77c5830309cf..6a846e4cb221 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -296,19 +296,19 @@ void xe_huc_sanitize(struct xe_huc *huc) void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) { struct xe_gt *gt = huc_to_gt(huc); - int err; + unsigned int fw_ref; xe_uc_fw_print(&huc->fw, p); if (!xe_uc_fw_is_enabled(&huc->fw)) return; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) return; drm_printf(p, "\nHuC status: 0x%08x\n", xe_mmio_read32(>->mmio, HUC_KERNEL_LOAD_INFO)); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 231d0e86ed83..54d199b5cfb2 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -774,25 +774,21 @@ void xe_mocs_init(struct xe_gt *gt) void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) { - struct xe_mocs_info table; - unsigned int flags; - u32 ret; struct xe_device *xe = gt_to_xe(gt); + struct xe_mocs_info table; + unsigned int fw_ref, flags; flags = get_mocs_settings(xe, &table); xe_pm_runtime_get_noresume(xe); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - - if (ret) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) goto err_fw; table.ops->dump(&table, flags, gt, p); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - + xe_force_wake_put(gt_to_fw(gt), fw_ref); err_fw: - xe_assert(xe, !ret); xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index bbe03db0c401..fd2ffe8df156 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -36,11 +36,22 @@ #include "xe_pm.h" #include "xe_sched_job.h" #include "xe_sriov.h" +#include "xe_sync.h" #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX +enum xe_oa_submit_deps { + XE_OA_SUBMIT_NO_DEPS, + XE_OA_SUBMIT_ADD_DEPS, +}; + +enum xe_oa_user_extn_from { + XE_OA_USER_EXTN_FROM_OPEN, + XE_OA_USER_EXTN_FROM_CONFIG, +}; + struct xe_oa_reg { struct xe_reg addr; u32 value; @@ -70,6 +81,7 @@ struct flex { }; struct xe_oa_open_param { + struct xe_file *xef; u32 oa_unit_id; bool sample; u32 metric_set; @@ -81,6 +93,9 @@ struct xe_oa_open_param { struct xe_exec_queue *exec_q; struct xe_hw_engine *hwe; bool no_preempt; + struct drm_xe_sync __user *syncs_user; + int num_syncs; + struct xe_sync_entry *syncs; }; struct xe_oa_config_bo { @@ -90,6 +105,17 @@ struct xe_oa_config_bo { struct xe_bb *bb; }; +struct xe_oa_fence { + /* @base: dma fence base */ + struct dma_fence base; + /* @lock: lock for the fence */ + spinlock_t lock; + /* @work: work to signal @base */ + struct delayed_work work; + /* @cb: callback to schedule @work */ + struct dma_fence_cb cb; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -162,10 +188,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se return oa_config; } -static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) +static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence) { xe_oa_config_put(oa_bo->oa_config); - xe_bb_free(oa_bo->bb, NULL); + xe_bb_free(oa_bo->bb, last_fence); kfree(oa_bo); } @@ -570,11 +596,11 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) return ret; } -static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) +static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, + struct xe_bb *bb) { struct xe_sched_job *job; struct dma_fence *fence; - long timeout; int err = 0; /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ @@ -584,18 +610,24 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) goto exit; } + if (deps == XE_OA_SUBMIT_ADD_DEPS) { + for (int i = 0; i < stream->num_syncs && !err; i++) + err = xe_sync_entry_add_deps(&stream->syncs[i], job); + if (err) { + drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err); + goto err_put_job; + } + } + xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - if (timeout < 0) - err = timeout; - else if (!timeout) - err = -ETIME; + return fence; +err_put_job: + xe_sched_job_put(job); exit: - return err; + return ERR_PTR(err); } static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) @@ -639,7 +671,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) xe_oa_config_put(stream->oa_config); llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) - free_oa_config_bo(oa_bo); + free_oa_config_bo(oa_bo, stream->last_fence); + dma_fence_put(stream->last_fence); } static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, @@ -659,6 +692,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, const struct flex *flex, u32 count) { + struct dma_fence *fence; struct xe_bb *bb; int err; @@ -670,7 +704,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr xe_oa_store_flex(stream, lrc, bb, flex, count); - err = xe_oa_submit_bb(stream, bb); + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto free_bb; + } + xe_bb_free(bb, fence); + dma_fence_put(fence); + + return 0; +free_bb: xe_bb_free(bb, NULL); exit: return err; @@ -678,6 +721,7 @@ exit: static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) { + struct dma_fence *fence; struct xe_bb *bb; int err; @@ -689,7 +733,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re write_cs_mi_lri(bb, reg_lri, 1); - err = xe_oa_submit_bb(stream, bb); + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto free_bb; + } + xe_bb_free(bb, fence); + dma_fence_put(fence); + + return 0; +free_bb: xe_bb_free(bb, NULL); exit: return err; @@ -837,7 +890,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_oa_free_oa_buffer(stream); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_pm_runtime_put(stream->oa->xe); /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ @@ -845,6 +898,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); xe_oa_free_configs(stream); + xe_file_put(stream->xef); } static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) @@ -915,11 +969,62 @@ out: return oa_bo; } +static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence) +{ + dma_fence_put(stream->last_fence); + stream->last_fence = dma_fence_get(fence); +} + +static void xe_oa_fence_work_fn(struct work_struct *w) +{ + struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work); + + /* Signal fence to indicate new OA configuration is active */ + dma_fence_signal(&ofence->base); + dma_fence_put(&ofence->base); +} + +static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + /* Additional empirical delay needed for NOA programming after registers are written */ +#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 + + struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb); + + INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn); + queue_delayed_work(system_unbound_wq, &ofence->work, + usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US)); + dma_fence_put(fence); +} + +static const char *xe_oa_get_driver_name(struct dma_fence *fence) +{ + return "xe_oa"; +} + +static const char *xe_oa_get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static const struct dma_fence_ops xe_oa_fence_ops = { + .get_driver_name = xe_oa_get_driver_name, + .get_timeline_name = xe_oa_get_timeline_name, +}; + static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) { #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 struct xe_oa_config_bo *oa_bo; - int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; + struct xe_oa_fence *ofence; + int i, err, num_signal = 0; + struct dma_fence *fence; + + ofence = kzalloc(sizeof(*ofence), GFP_KERNEL); + if (!ofence) { + err = -ENOMEM; + goto exit; + } oa_bo = xe_oa_alloc_config_buffer(stream, config); if (IS_ERR(oa_bo)) { @@ -927,11 +1032,50 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config goto exit; } - err = xe_oa_submit_bb(stream, oa_bo->bb); + /* Emit OA configuration batch */ + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto exit; + } - /* Additional empirical delay needed for NOA programming after registers are written */ - usleep_range(us, 2 * us); + /* Point of no return: initialize and set fence to signal */ + spin_lock_init(&ofence->lock); + dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0); + + for (i = 0; i < stream->num_syncs; i++) { + if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL) + num_signal++; + xe_sync_entry_signal(&stream->syncs[i], &ofence->base); + } + + /* Additional dma_fence_get in case we dma_fence_wait */ + if (!num_signal) + dma_fence_get(&ofence->base); + + /* Update last fence too before adding callback */ + xe_oa_update_last_fence(stream, fence); + + /* Add job fence callback to schedule work to signal ofence->base */ + err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb); + xe_gt_assert(stream->gt, !err || err == -ENOENT); + if (err == -ENOENT) + xe_oa_config_cb(fence, &ofence->cb); + + /* If nothing needs to be signaled we wait synchronously */ + if (!num_signal) { + dma_fence_wait(&ofence->base, false); + dma_fence_put(&ofence->base); + } + + /* Done with syncs */ + for (i = 0; i < stream->num_syncs; i++) + xe_sync_entry_cleanup(&stream->syncs[i]); + kfree(stream->syncs); + + return 0; exit: + kfree(ofence); return err; } @@ -1003,6 +1147,262 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) return xe_oa_emit_oa_config(stream, stream->oa_config); } +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) +{ + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); + int idx; + + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { + const struct xe_oa_format *f = &oa->oa_formats[idx]; + + if (counter_size == f->counter_size && bc_report == f->bc_report && + type == f->type && counter_sel == f->counter_select) { + *name = idx; + return 0; + } + } + + return -EINVAL; +} + +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (value >= oa->oa_unit_ids) { + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); + return -EINVAL; + } + param->oa_unit_id = value; + return 0; +} + +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->sample = value; + return 0; +} + +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->metric_set = value; + return 0; +} + +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + int ret = decode_oa_format(oa, value, ¶m->oa_format); + + if (ret) { + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); + return ret; + } + return 0; +} + +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ +#define OA_EXPONENT_MAX 31 + + if (value > OA_EXPONENT_MAX) { + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); + return -EINVAL; + } + param->period_exponent = value; + return 0; +} + +static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->disabled = value; + return 0; +} + +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->exec_queue_id = value; + return 0; +} + +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->engine_instance = value; + return 0; +} + +static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->no_preempt = value; + return 0; +} + +static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->num_syncs = value; + return 0; +} + +static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->syncs_user = u64_to_user_ptr(value); + return 0; +} + +static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + return -EINVAL; +} + +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); +static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, + [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, + [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, +}; + +static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, + [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, +}; + +static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, + u64 extension, struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) != + ARRAY_SIZE(xe_oa_set_property_funcs_config)); + + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) || + XE_IOCTL_DBG(oa->xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open)); + + if (from == XE_OA_USER_EXTN_FROM_CONFIG) + return xe_oa_set_property_funcs_config[idx](oa, ext.value, param); + else + return xe_oa_set_property_funcs_open[idx](oa, ext.value, param); +} + +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from, + u64 extension, struct xe_oa_open_param *param); +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension, + int ext_number, struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.pad) || + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); + err = xe_oa_user_extension_funcs[idx](oa, from, extension, param); + if (XE_IOCTL_DBG(oa->xe, err)) + return err; + + if (ext.next_extension) + return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param); + + return 0; +} + +static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +{ + int ret, num_syncs, num_ufence = 0; + + if (param->num_syncs && !param->syncs_user) { + drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n"); + ret = -EINVAL; + goto exit; + } + + if (param->num_syncs) { + param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL); + if (!param->syncs) { + ret = -ENOMEM; + goto exit; + } + } + + for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { + ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], + ¶m->syncs_user[num_syncs], 0); + if (ret) + goto err_syncs; + + if (xe_sync_is_ufence(¶m->syncs[num_syncs])) + num_ufence++; + } + + if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) { + ret = -EINVAL; + goto err_syncs; + } + + return 0; + +err_syncs: + while (num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[num_syncs]); + kfree(param->syncs); +exit: + return ret; +} + static void xe_oa_stream_enable(struct xe_oa_stream *stream) { stream->pollin = false; @@ -1096,36 +1496,38 @@ static int xe_oa_disable_locked(struct xe_oa_stream *stream) static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) { - struct drm_xe_ext_set_property ext; + struct xe_oa_open_param param = {}; long ret = stream->oa_config->id; struct xe_oa_config *config; int err; - err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); - if (XE_IOCTL_DBG(stream->oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || - XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) - return -EINVAL; + err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, ¶m); + if (err) + return err; - config = xe_oa_get_oa_config(stream->oa, ext.value); + config = xe_oa_get_oa_config(stream->oa, param.metric_set); if (!config) return -ENODEV; - if (config != stream->oa_config) { - err = xe_oa_emit_oa_config(stream, config); - if (!err) - config = xchg(&stream->oa_config, config); - else - ret = err; + param.xef = stream->xef; + err = xe_oa_parse_syncs(stream->oa, ¶m); + if (err) + goto err_config_put; + + stream->num_syncs = param.num_syncs; + stream->syncs = param.syncs; + + err = xe_oa_emit_oa_config(stream, config); + if (!err) { + config = xchg(&stream->oa_config, config); + drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n", + stream->oa_config->uuid); } +err_config_put: xe_oa_config_put(config); - return ret; + return err ?: ret; } static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) @@ -1353,6 +1755,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, { struct xe_oa_unit *u = param->hwe->oa_unit; struct xe_gt *gt = param->hwe->gt; + unsigned int fw_ref; int ret; stream->exec_q = param->exec_q; @@ -1366,6 +1769,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; + stream->xef = xe_file_get(param->xef); + stream->num_syncs = param->num_syncs; + stream->syncs = param->syncs; + /* * For Xe2+, when overrun mode is enabled, there are no partial reports at the end * of buffer, making the OA buffer effectively a non-power-of-2 size circular @@ -1413,7 +1820,11 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(stream->oa->xe); - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + ret = -ETIMEDOUT; + goto err_fw_put; + } ret = xe_oa_alloc_oa_buffer(stream); if (ret) @@ -1455,13 +1866,14 @@ err_put_k_exec_q: err_free_oa_buf: xe_oa_free_oa_buffer(stream); err_fw_put: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(stream->oa->xe); if (stream->override_gucrc) xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); err_free_configs: xe_oa_free_configs(stream); exit: + xe_file_put(stream->xef); return ret; } @@ -1571,27 +1983,6 @@ static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) } } -static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) -{ - u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); - u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); - u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); - u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); - int idx; - - for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { - const struct xe_oa_format *f = &oa->oa_formats[idx]; - - if (counter_size == f->counter_size && bc_report == f->bc_report && - type == f->type && counter_sel == f->counter_select) { - *name = idx; - return 0; - } - } - - return -EINVAL; -} - /** * xe_oa_unit_id - Return OA unit ID for a hardware engine * @hwe: @xe_hw_engine @@ -1638,155 +2029,6 @@ out: return ret; } -static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - if (value >= oa->oa_unit_ids) { - drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); - return -EINVAL; - } - param->oa_unit_id = value; - return 0; -} - -static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->sample = value; - return 0; -} - -static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->metric_set = value; - return 0; -} - -static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - int ret = decode_oa_format(oa, value, ¶m->oa_format); - - if (ret) { - drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); - return ret; - } - return 0; -} - -static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ -#define OA_EXPONENT_MAX 31 - - if (value > OA_EXPONENT_MAX) { - drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); - return -EINVAL; - } - param->period_exponent = value; - return 0; -} - -static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->disabled = value; - return 0; -} - -static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->exec_queue_id = value; - return 0; -} - -static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->engine_instance = value; - return 0; -} - -static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->no_preempt = value; - return 0; -} - -typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param); -static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { - [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, - [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, - [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, - [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, - [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, - [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, - [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, - [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, - [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, -}; - -static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || - XE_IOCTL_DBG(oa->xe, ext.pad)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); - return xe_oa_set_property_funcs[idx](oa, ext.value, param); -} - -typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param); -static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { - [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.pad) || - XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); - err = xe_oa_user_extension_funcs[idx](oa, extension, param); - if (XE_IOCTL_DBG(oa->xe, err)) - return err; - - if (ext.next_extension) - return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); - - return 0; -} - /** * xe_oa_stream_open_ioctl - Opens an OA stream * @dev: @drm_device @@ -1812,7 +2054,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f return -ENODEV; } - ret = xe_oa_user_extensions(oa, data, 0, ¶m); + param.xef = xef; + ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); if (ret) return ret; @@ -1880,11 +2123,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); } + ret = xe_oa_parse_syncs(oa, ¶m); + if (ret) + goto err_exec_q; + mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); + if (ret < 0) + goto err_sync_cleanup; + + return ret; + +err_sync_cleanup: + while (param.num_syncs--) + xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); + kfree(param.syncs); err_exec_q: - if (ret < 0 && param.exec_q) + if (param.exec_q) xe_exec_queue_put(param.exec_q); return ret; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 8862eca73fbe..fea9d981e414 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -238,5 +238,17 @@ struct xe_oa_stream { /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ u32 no_preempt; + + /** @xef: xe_file with which the stream was opened */ + struct xe_file *xef; + + /** @last_fence: fence to use in stream destroy when needed */ + struct dma_fence *last_fence; + + /** @num_syncs: size of @syncs array */ + u32 num_syncs; + + /** @syncs: syncs to wait on and to signal */ + struct xe_sync_entry *syncs; }; #endif diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index b16473818173..30fdbdb9341e 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -182,11 +182,12 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - int i, err; + unsigned int fw_ref; + int i; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err_fw; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; drm_printf(p, "PAT table:\n"); @@ -198,9 +199,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) XELP_MEM_TYPE_STR_MAP[mem_type], pat); } - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_fw: - xe_assert(xe, !err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static const struct xe_pat_ops xelp_pat_ops = { @@ -211,11 +210,12 @@ static const struct xe_pat_ops xelp_pat_ops = { static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - int i, err; + unsigned int fw_ref; + int i; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err_fw; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; drm_printf(p, "PAT table:\n"); @@ -229,9 +229,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) XELP_MEM_TYPE_STR_MAP[mem_type], pat); } - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_fw: - xe_assert(xe, !err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static const struct xe_pat_ops xehp_pat_ops = { @@ -242,11 +240,12 @@ static const struct xe_pat_ops xehp_pat_ops = { static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - int i, err; + unsigned int fw_ref; + int i; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err_fw; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; drm_printf(p, "PAT table:\n"); @@ -258,9 +257,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat); } - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_fw: - xe_assert(xe, !err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static const struct xe_pat_ops xehpc_pat_ops = { @@ -271,11 +268,12 @@ static const struct xe_pat_ops xehpc_pat_ops = { static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - int i, err; + unsigned int fw_ref; + int i; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err_fw; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; drm_printf(p, "PAT table:\n"); @@ -292,9 +290,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat); } - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_fw: - xe_assert(xe, !err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } /* @@ -330,12 +326,13 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - int i, err; + unsigned int fw_ref; u32 pat; + int i; - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err_fw; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; drm_printf(p, "PAT table:\n"); @@ -374,9 +371,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) REG_FIELD_GET(XE2_COH_MODE, pat), pat); - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_fw: - xe_assert(xe, !err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); } static const struct xe_pat_ops xe2_pat_ops = { diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 5093a243e9fe..170ae72d1a7b 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -117,6 +117,7 @@ query_engine_cycles(struct xe_device *xe, __ktime_func_t cpu_clock; struct xe_hw_engine *hwe; struct xe_gt *gt; + unsigned int fw_ref; if (query->size == 0) { query->size = size; @@ -149,13 +150,16 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); return -EIO; + } hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp, &resp.cpu_delta, cpu_clock); - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), fw_ref); if (GRAPHICS_VER(xe) >= 20) resp.width = 64; @@ -666,7 +670,7 @@ static int query_oa_units(struct xe_device *xe, du->oa_unit_id = u->oa_unit_id; du->oa_unit_type = u->type; du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); - du->capabilities = DRM_XE_OA_CAPS_BASE; + du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 191cb4121acd..e1a0e27cda14 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -188,27 +188,27 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) { struct xe_reg_sr_entry *entry; unsigned long reg; - int err; + unsigned int fw_ref; if (xa_empty(&sr->xa)) return; xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) goto err_force_wake; xa_for_each(&sr->xa, reg, entry) apply_one_mmio(gt, entry); - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return; err_force_wake: - xe_gt_err(gt, "Failed to apply, err=%d\n", err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n"); } void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) @@ -221,15 +221,15 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) u32 mmio_base = hwe->mmio_base; unsigned long reg; unsigned int slot = 0; - int err; + unsigned int fw_ref; if (xa_empty(&sr->xa)) return; drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) goto err_force_wake; p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); @@ -254,13 +254,13 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); } - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); return; err_force_wake: - drm_err(&xe->drm, "Failed to apply, err=%d\n", err); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n"); } /** diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index eeccc1c318ae..1905ca590965 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -280,7 +280,7 @@ void xe_sched_job_arm(struct xe_sched_job *job) fence = &chain->base; } - job->fence = fence; + job->fence = dma_fence_get(fence); /* Pairs with put in scheduler */ drm_sched_job_arm(&job->drm); } diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index 0d3f76fb05ce..f13f333f00be 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -40,7 +40,6 @@ struct xe_sched_job { * @fence: dma fence to indicate completion. 1 way relationship - job * can safely reference fence, fence cannot safely reference job. */ -#define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS struct dma_fence *fence; /** @user_fence: write back value when BB is complete */ struct { @@ -63,7 +62,7 @@ struct xe_sched_job { struct xe_sched_job_snapshot { u16 batch_addr_len; - u64 batch_addr[]; + u64 batch_addr[] __counted_by(batch_addr_len); }; #endif diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 2e72c06fd40d..a90480c6aecf 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -83,6 +83,8 @@ static void user_fence_worker(struct work_struct *w) XE_WARN_ON("Copy to user failed"); kthread_unuse_mm(ufence->mm); mmput(ufence->mm); + } else { + drm_dbg(&ufence->xe->drm, "mmget_not_zero() failed, ufence wasn't signaled\n"); } wake_up_all(&ufence->xe->ufence_wq); diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 2a623bfcda7e..b1f81dca610d 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -220,8 +220,8 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, { struct xe_device *xe = tile_to_xe(tile); struct xe_gt *gt = tile->primary_gt; + unsigned int fw_ref; u64 offset; - int err; u32 reg; if (IS_SRIOV_VF(xe)) { @@ -240,9 +240,9 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, return 0; } - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - return err; + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; /* actual size */ if (unlikely(xe->info.platform == XE_DG1)) { @@ -264,7 +264,9 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size, /* remove the tile offset so we have just the available size */ *vram_size = offset - *tile_offset; - return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + return 0; } static void vram_fini(void *arg) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 264d6e116499..bcd04464b85e 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -39,3 +39,5 @@ 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) no_media_l3 MEDIA_VERSION(3000) +14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0) + MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) |