summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-11-05 11:47:52 +1000
committerDave Airlie <airlied@redhat.com>2024-11-05 11:48:14 +1000
commitfb6c5b1fdc03a61bcd0ac716dc8597fc97d00da5 (patch)
tree0b569d6a5fadc72be65f389859bf7f2acb288778 /drivers/gpu/drm/xe
parentffd99396c630781b4142b2b8c27eb2d69e344f7c (diff)
parent35d25a4a0012e690ef0cc4c5440231176db595cc (diff)
downloadlwn-fb6c5b1fdc03a61bcd0ac716dc8597fc97d00da5.tar.gz
lwn-fb6c5b1fdc03a61bcd0ac716dc8597fc97d00da5.zip
Merge tag 'drm-xe-next-2024-10-31' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes: - Define and parse OA sync properties (Ashutosh) Driver Changes: - Add caller info to xe_gt_reset_async (Nirmoy) - A large forcewake rework / cleanup (Himal) - A g2h response timeout fix (Badal) - A PTL workaround (Vinay) - Handle unreliable MMIO reads during forcewake (Shuicheng) - Ufence user-space access fixes (Nirmoy) - Annotate flexible arrays (Matthew Brost) - Enable GuC lite restore (Fei) - Prevent GuC register capture on VF (Zhanjun) - Show VFs VRAM / LMEM provisioning summary over debugfs (Michal) - Parallel queues fix on GT reset (Nirmoy) - Move reference grabbing to a job's dma-fence (Matt Brost) - Mark a number of local workqueues WQ_MEM_RECLAIM (Matt Brost) - OA synchronization support (Ashutosh) - Capture all available bits of GuC timestamp to GuC log (John) - Increase readability of guc_info debugfs (John) - Add a mmio barrier before GGTT invalidate (Matt Brost) - Don't short-circuit TDR on jobs not started (Matt Brost) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZyNvA_vZZYR-1eWE@fedora
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h1
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c6
-rw-r--r--drivers/gpu/drm/xe/regs/xe_guc_regs.h3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c18
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c27
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c16
-rw-r--r--drivers/gpu/drm/xe/xe_device.c25
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c8
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c2
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c122
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.h23
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c23
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c9
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c110
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c35
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c20
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c8
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c56
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c14
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c50
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c15
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c14
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c678
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h12
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c65
-rw-r--r--drivers/gpu/drm/xe/xe_query.c10
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c24
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c2
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c12
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules2
46 files changed, 995 insertions, 518 deletions
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 6b30743a2f6c..37606cf8cc5e 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -352,6 +352,7 @@ enum xe_guc_klv_ids {
GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007,
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008,
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009,
+ GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a,
};
#endif
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 231677129a35..6d34b55d72bb 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -42,6 +42,7 @@ bool intel_hdcp_gsc_check_status(struct intel_display *display)
struct xe_gt *gt = tile->media_gt;
struct xe_gsc *gsc = &gt->uc.gsc;
bool ret = true;
+ unsigned int fw_ref;
if (!gsc && !xe_uc_fw_is_enabled(&gsc->fw)) {
drm_dbg_kms(&xe->drm,
@@ -50,7 +51,8 @@ bool intel_hdcp_gsc_check_status(struct intel_display *display)
}
xe_pm_runtime_get(xe);
- if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref) {
drm_dbg_kms(&xe->drm,
"failed to get forcewake to check proxy status\n");
ret = false;
@@ -60,7 +62,7 @@ bool intel_hdcp_gsc_check_status(struct intel_display *display)
if (!xe_gsc_proxy_init_done(gsc))
ret = false;
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
out:
xe_pm_runtime_put(xe);
return ret;
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index b27b73680c12..2118f7dec287 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -84,7 +84,8 @@
#define HUC_LOADING_AGENT_GUC REG_BIT(1)
#define GUC_WOPCM_OFFSET_VALID REG_BIT(0)
#define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4)
-#define GUC_PMTIMESTAMP XE_REG(0xc3e8)
+#define GUC_PMTIMESTAMP_LO XE_REG(0xc3e8)
+#define GUC_PMTIMESTAMP_HI XE_REG(0xc3ec)
#define GUC_SEND_INTERRUPT XE_REG(0xc4c8)
#define GUC_SEND_TRIGGER REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index ea932c051cc7..6f9b7a266b41 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -43,12 +43,11 @@ static void read_l3cc_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 l3cc, l3cc_expected;
- unsigned int i;
+ unsigned int fw_ref, i;
u32 reg_val;
- u32 ret;
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (!(i & 1)) {
@@ -72,7 +71,7 @@ static void read_l3cc_table(struct xe_gt *gt,
KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc,
"l3cc idx=%u has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void read_mocs_table(struct xe_gt *gt,
@@ -80,15 +79,14 @@ static void read_mocs_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 mocs, mocs_expected;
- unsigned int i;
+ unsigned int fw_ref, i;
u32 reg_val;
- u32 ret;
KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index,
"Unused entries index should have been defined\n");
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (regs_are_mcr(gt))
@@ -106,7 +104,7 @@ static void read_mocs_table(struct xe_gt *gt,
"mocs reg 0x%x has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static int mocs_kernel_test_run_device(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index fe4319eb13fd..492b4877433f 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -90,13 +90,32 @@ static int forcewake_open(struct inode *inode, struct file *file)
{
struct xe_device *xe = inode->i_private;
struct xe_gt *gt;
- u8 id;
+ u8 id, last_gt;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
- for_each_gt(gt, xe, id)
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ for_each_gt(gt, xe, id) {
+ last_gt = id;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ goto err_fw_get;
+ }
return 0;
+
+err_fw_get:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ else if (id == last_gt)
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ else
+ break;
+ }
+
+ xe_pm_runtime_put(xe);
+ return -ETIMEDOUT;
}
static int forcewake_release(struct inode *inode, struct file *file)
@@ -106,7 +125,7 @@ static int forcewake_release(struct inode *inode, struct file *file)
u8 id;
for_each_gt(gt, xe, id)
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_pm_runtime_put(xe);
return 0;
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 99842a35dbf0..d2679c5d976b 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -158,13 +158,15 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
{
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
+ unsigned int fw_ref;
/* keep going if fw fails as we still want to save the memory and SW data */
- if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
+ fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
xe_vm_snapshot_capture_delayed(ss->vm);
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
- xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
/* Calculate devcoredump size */
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
@@ -236,8 +238,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
u32 width_mask = (0x1 << q->width) - 1;
const char *process_name = "no process";
- int i;
+ unsigned int fw_ref;
bool cookie;
+ int i;
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
@@ -261,11 +264,10 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
}
/* keep going if fw fails as we still want to save the memory and SW data */
- if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
- xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
- ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
+ ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
ss->ge = xe_guc_exec_queue_snapshot_capture(q);
ss->job = xe_sched_job_snapshot_capture(job);
ss->vm = xe_vm_snapshot_capture(q->vm);
@@ -274,7 +276,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
queue_work(system_unbound_wq, &ss->work);
- xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
dma_fence_end_signalling(cookie);
}
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index f6c87d14f78c..51bb9d875268 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -604,8 +604,8 @@ int xe_device_probe_early(struct xe_device *xe)
static int probe_has_flat_ccs(struct xe_device *xe)
{
struct xe_gt *gt;
+ unsigned int fw_ref;
u32 reg;
- int err;
/* Always enabled/disabled, no runtime check to do */
if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
@@ -613,9 +613,9 @@ static int probe_has_flat_ccs(struct xe_device *xe)
gt = xe_root_mmio_gt(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- return err;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
@@ -624,7 +624,8 @@ static int probe_has_flat_ccs(struct xe_device *xe)
drm_dbg(&xe->drm,
"Flat CCS has been disabled in bios, May lead to performance impact");
- return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
}
int xe_device_probe(struct xe_device *xe)
@@ -875,6 +876,7 @@ void xe_device_wmb(struct xe_device *xe)
void xe_device_td_flush(struct xe_device *xe)
{
struct xe_gt *gt;
+ unsigned int fw_ref;
u8 id;
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
@@ -889,7 +891,8 @@ void xe_device_td_flush(struct xe_device *xe)
if (xe_gt_is_media_type(gt))
continue;
- if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
@@ -904,22 +907,22 @@ void xe_device_td_flush(struct xe_device *xe)
150, NULL, false))
xe_gt_err_once(gt, "TD flush timeout\n");
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
}
void xe_device_l2_flush(struct xe_device *xe)
{
struct xe_gt *gt;
- int err;
+ unsigned int fw_ref;
gt = xe_root_mmio_gt(xe);
if (!XE_WA(gt, 16023588340))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
spin_lock(&gt->global_invl_lock);
@@ -929,7 +932,7 @@ void xe_device_l2_flush(struct xe_device *xe)
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index fb52a23e28f8..22f0f1a6dfd5 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -278,6 +278,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
struct xe_hw_engine *hwe;
struct xe_exec_queue *q;
u64 gpu_timestamp;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
@@ -303,13 +304,16 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
continue;
fw = xe_hw_engine_to_fw_domain(hwe);
- if (xe_force_wake_get(gt_to_fw(gt), fw)) {
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), fw);
+ if (!xe_force_wake_ref_has_domain(fw_ref, fw)) {
hwe = NULL;
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
break;
}
gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), fw));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
break;
}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 7deb480e26af..1158b6062a6c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -143,7 +143,7 @@ struct xe_exec_queue {
/** @hw_engine_group_link: link into exec queues in the same hw engine group */
struct list_head hw_engine_group_link;
/** @lrc: logical ring context for this exec queue */
- struct xe_lrc *lrc[];
+ struct xe_lrc *lrc[] __counted_by(width);
};
/**
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index f3b71fe7a96d..a8c416a48812 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -313,7 +313,7 @@ execlist_run_job(struct drm_sched_job *drm_job)
q->ring_ops->emit_job(job);
xe_execlist_make_active(exl);
- return dma_fence_get(job->fence);
+ return job->fence;
}
static void execlist_job_free(struct drm_sched_job *drm_job)
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 08621717b14b..4f6784e5abf8 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -21,15 +21,25 @@ static const char *str_wake_sleep(bool wake)
return wake ? "wake" : "sleep";
}
-static void domain_init(struct xe_force_wake_domain *domain,
+static void mark_domain_initialized(struct xe_force_wake *fw,
+ enum xe_force_wake_domain_id id)
+{
+ fw->initialized_domains |= BIT(id);
+}
+
+static void init_domain(struct xe_force_wake *fw,
enum xe_force_wake_domain_id id,
struct xe_reg reg, struct xe_reg ack)
{
+ struct xe_force_wake_domain *domain = &fw->domains[id];
+
domain->id = id;
domain->reg_ctl = reg;
domain->reg_ack = ack;
domain->val = FORCEWAKE_MT(FORCEWAKE_KERNEL);
domain->mask = FORCEWAKE_MT_MASK(FORCEWAKE_KERNEL);
+
+ mark_domain_initialized(fw, id);
}
void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
@@ -43,13 +53,11 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
if (xe->info.graphics_verx100 >= 1270) {
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
- XE_FW_DOMAIN_ID_GT,
+ init_domain(fw, XE_FW_DOMAIN_ID_GT,
FORCEWAKE_GT,
FORCEWAKE_ACK_GT_MTL);
} else {
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
- XE_FW_DOMAIN_ID_GT,
+ init_domain(fw, XE_FW_DOMAIN_ID_GT,
FORCEWAKE_GT,
FORCEWAKE_ACK_GT);
}
@@ -63,8 +71,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
if (!xe_gt_is_media_type(gt))
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
- XE_FW_DOMAIN_ID_RENDER,
+ init_domain(fw, XE_FW_DOMAIN_ID_RENDER,
FORCEWAKE_RENDER,
FORCEWAKE_ACK_RENDER);
@@ -72,8 +79,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
if (!(gt->info.engine_mask & BIT(i)))
continue;
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
- XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
+ init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
FORCEWAKE_MEDIA_VDBOX(j),
FORCEWAKE_ACK_MEDIA_VDBOX(j));
}
@@ -82,15 +88,13 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
if (!(gt->info.engine_mask & BIT(i)))
continue;
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
- XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
+ init_domain(fw, XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
FORCEWAKE_MEDIA_VEBOX(j),
FORCEWAKE_ACK_MEDIA_VEBOX(j));
}
if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))
- domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC],
- XE_FW_DOMAIN_ID_GSC,
+ init_domain(fw, XE_FW_DOMAIN_ID_GSC,
FORCEWAKE_GSC,
FORCEWAKE_ACK_GSC);
}
@@ -156,52 +160,108 @@ static int domain_sleep_wait(struct xe_gt *gt,
(ffs(tmp__) - 1))) && \
domain__->reg_ctl.addr)
-int xe_force_wake_get(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains)
+/**
+ * xe_force_wake_get() : Increase the domain refcount
+ * @fw: struct xe_force_wake
+ * @domains: forcewake domains to get refcount on
+ *
+ * This function wakes up @domains if they are asleep and takes references.
+ * If requested domain is XE_FORCEWAKE_ALL then only applicable/initialized
+ * domains will be considered for refcount and it is a caller responsibility
+ * to check returned ref if it includes any specific domain by using
+ * xe_force_wake_ref_has_domain() function. Caller must call
+ * xe_force_wake_put() function to decrease incremented refcounts.
+ *
+ * Return: opaque reference to woken domains or zero if none of requested
+ * domains were awake.
+ */
+unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains)
{
struct xe_gt *gt = fw->gt;
struct xe_force_wake_domain *domain;
- enum xe_force_wake_domains tmp, woken = 0;
+ unsigned int ref_incr = 0, awake_rqst = 0, awake_failed = 0;
+ unsigned int tmp, ref_rqst;
unsigned long flags;
- int ret = 0;
+ xe_gt_assert(gt, is_power_of_2(domains));
+ xe_gt_assert(gt, domains <= XE_FORCEWAKE_ALL);
+ xe_gt_assert(gt, domains == XE_FORCEWAKE_ALL || fw->initialized_domains & domains);
+
+ ref_rqst = (domains == XE_FORCEWAKE_ALL) ? fw->initialized_domains : domains;
spin_lock_irqsave(&fw->lock, flags);
- for_each_fw_domain_masked(domain, domains, fw, tmp) {
+ for_each_fw_domain_masked(domain, ref_rqst, fw, tmp) {
if (!domain->ref++) {
- woken |= BIT(domain->id);
+ awake_rqst |= BIT(domain->id);
domain_wake(gt, domain);
}
+ ref_incr |= BIT(domain->id);
}
- for_each_fw_domain_masked(domain, woken, fw, tmp) {
- ret |= domain_wake_wait(gt, domain);
+ for_each_fw_domain_masked(domain, awake_rqst, fw, tmp) {
+ if (domain_wake_wait(gt, domain) == 0) {
+ fw->awake_domains |= BIT(domain->id);
+ } else {
+ awake_failed |= BIT(domain->id);
+ --domain->ref;
+ }
}
- fw->awake_domains |= woken;
+ ref_incr &= ~awake_failed;
spin_unlock_irqrestore(&fw->lock, flags);
- return ret;
+ xe_gt_WARN(gt, awake_failed, "Forcewake domain%s %#x failed to acknowledge awake request\n",
+ str_plural(hweight_long(awake_failed)), awake_failed);
+
+ if (domains == XE_FORCEWAKE_ALL && ref_incr == fw->initialized_domains)
+ ref_incr |= XE_FORCEWAKE_ALL;
+
+ return ref_incr;
}
-int xe_force_wake_put(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains)
+/**
+ * xe_force_wake_put - Decrement the refcount and put domain to sleep if refcount becomes 0
+ * @fw: Pointer to the force wake structure
+ * @fw_ref: return of xe_force_wake_get()
+ *
+ * This function reduces the reference counts for domains in fw_ref. If
+ * refcount for any of the specified domain reaches 0, it puts the domain to sleep
+ * and waits for acknowledgment for domain to sleep within 50 milisec timeout.
+ * Warns in case of timeout of ack from domain.
+ */
+void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref)
{
struct xe_gt *gt = fw->gt;
struct xe_force_wake_domain *domain;
- enum xe_force_wake_domains tmp, sleep = 0;
+ unsigned int tmp, sleep = 0;
unsigned long flags;
- int ret = 0;
+ int ack_fail = 0;
+
+ /*
+ * Avoid unnecessary lock and unlock when the function is called
+ * in error path of individual domains.
+ */
+ if (!fw_ref)
+ return;
+
+ if (xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ fw_ref = fw->initialized_domains;
spin_lock_irqsave(&fw->lock, flags);
- for_each_fw_domain_masked(domain, domains, fw, tmp) {
+ for_each_fw_domain_masked(domain, fw_ref, fw, tmp) {
+ xe_gt_assert(gt, domain->ref);
+
if (!--domain->ref) {
sleep |= BIT(domain->id);
domain_sleep(gt, domain);
}
}
for_each_fw_domain_masked(domain, sleep, fw, tmp) {
- ret |= domain_sleep_wait(gt, domain);
+ if (domain_sleep_wait(gt, domain) == 0)
+ fw->awake_domains &= ~BIT(domain->id);
+ else
+ ack_fail |= BIT(domain->id);
}
- fw->awake_domains &= ~sleep;
spin_unlock_irqrestore(&fw->lock, flags);
- return ret;
+ xe_gt_WARN(gt, ack_fail, "Forcewake domain%s %#x failed to acknowledge sleep request\n",
+ str_plural(hweight_long(ack_fail)), ack_fail);
}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
index a2577672f4e3..0e3e84bfa51c 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.h
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -15,10 +15,9 @@ void xe_force_wake_init_gt(struct xe_gt *gt,
struct xe_force_wake *fw);
void xe_force_wake_init_engines(struct xe_gt *gt,
struct xe_force_wake *fw);
-int xe_force_wake_get(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains);
-int xe_force_wake_put(struct xe_force_wake *fw,
- enum xe_force_wake_domains domains);
+unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains);
+void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref);
static inline int
xe_force_wake_ref(struct xe_force_wake *fw,
@@ -46,4 +45,20 @@ xe_force_wake_assert_held(struct xe_force_wake *fw,
xe_gt_assert(fw->gt, fw->awake_domains & domain);
}
+/**
+ * xe_force_wake_ref_has_domain - verifies if the domains are in fw_ref
+ * @fw_ref : the force_wake reference
+ * @domain : forcewake domain to verify
+ *
+ * This function confirms whether the @fw_ref includes a reference to the
+ * specified @domain.
+ *
+ * Return: true if domain is refcounted.
+ */
+static inline bool
+xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains domain)
+{
+ return fw_ref & domain;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
index ed0edc2cdf9f..899fbbcb3ea9 100644
--- a/drivers/gpu/drm/xe/xe_force_wake_types.h
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -48,7 +48,7 @@ enum xe_force_wake_domains {
XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC),
- XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1
+ XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT)
};
/**
@@ -78,7 +78,9 @@ struct xe_force_wake {
/** @lock: protects everything force wake struct */
spinlock_t lock;
/** @awake_domains: mask of all domains awake */
- enum xe_force_wake_domains awake_domains;
+ unsigned int awake_domains;
+ /** @initialized_domains: mask of all initialized domains */
+ unsigned int initialized_domains;
/** @domains: force wake domains */
struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT];
};
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 65bc41d2c867..558fac8bb6fb 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -246,7 +246,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
else
ggtt->pt_ops = &xelp_pt_ops;
- ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0);
+ ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM);
drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
ggtt->size - xe_wopcm_size(xe));
@@ -409,7 +409,7 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
* vs. correct GGTT page. Not particularly a hot code path so blindly
* do a mmio read here which results in GuC reading correct GGTT page.
*/
- xe_mmio_read32(&xe_root_mmio_gt(xe)->mmio, VF_CAP_REG);
+ xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG);
/* Each GT in a tile has its own TLB to cache GGTT lookups */
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 783b09bf3681..1eb791ddc375 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -261,19 +261,17 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_tile *tile = gt_to_tile(gt);
+ unsigned int fw_ref;
int ret;
if (XE_WA(tile->primary_gt, 14018094691)) {
- ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+ fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
/*
* If the forcewake fails we want to keep going, because the worst
* case outcome in failing to apply the WA is that PXP won't work,
- * which is not fatal. We still throw a warning so the issue is
- * seen if it happens.
+ * which is not fatal. Forcewake get warns implicitly in case of failure
*/
- xe_gt_WARN_ON(tile->primary_gt, ret);
-
xe_gt_mcr_multicast_write(tile->primary_gt,
EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK,
EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT);
@@ -282,7 +280,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
ret = gsc_upload(gsc);
if (XE_WA(tile->primary_gt, 14018094691))
- xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref);
if (ret)
return ret;
@@ -352,6 +350,7 @@ static void gsc_work(struct work_struct *work)
struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
+ unsigned int fw_ref;
u32 actions;
int ret;
@@ -361,7 +360,7 @@ static void gsc_work(struct work_struct *work)
spin_unlock_irq(&gsc->lock);
xe_pm_runtime_get(xe);
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
if (actions & GSC_ACTION_ER_COMPLETE) {
ret = gsc_er_complete(gt);
@@ -381,7 +380,7 @@ static void gsc_work(struct work_struct *work)
xe_gsc_proxy_request_handler(gsc);
out:
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
}
@@ -601,7 +600,7 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
{
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_mmio *mmio = &gt->mmio;
- int err;
+ unsigned int fw_ref;
xe_uc_fw_print(&gsc->fw, p);
@@ -610,8 +609,8 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
if (!xe_uc_fw_is_enabled(&gsc->fw))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref)
return;
drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
@@ -622,5 +621,5 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)),
xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE)));
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 6d89c22ae811..fc64b45d324b 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -450,22 +450,21 @@ void xe_gsc_proxy_remove(struct xe_gsc *gsc)
{
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
- int err = 0;
+ unsigned int fw_ref = 0;
if (!gsc->proxy.component_added)
return;
/* disable HECI2 IRQs */
xe_pm_runtime_get(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref)
xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n");
/* try do disable irq even if forcewake failed */
gsc_proxy_irq_toggle(gsc, false);
- if (!err)
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
xe_gsc_wait_for_worker_completion(gsc);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 1c79660fb086..d6744be01a68 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -77,7 +77,8 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
return ERR_PTR(-ENOMEM);
gt->tile = tile;
- gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+ gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq",
+ WQ_MEM_RECLAIM);
err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
if (err)
@@ -97,14 +98,14 @@ void xe_gt_sanitize(struct xe_gt *gt)
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
{
+ unsigned int fw_ref;
u32 reg;
- int err;
if (!XE_WA(gt, 16023588340))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (WARN_ON(err))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
if (!xe_gt_is_media_type(gt)) {
@@ -114,13 +115,13 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
}
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
{
+ unsigned int fw_ref;
u32 reg;
- int err;
if (!XE_WA(gt, 16023588340))
return;
@@ -128,15 +129,15 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
if (xe_gt_is_media_type(gt))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (WARN_ON(err))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg &= ~CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -402,11 +403,14 @@ static void dump_pat_on_error(struct xe_gt *gt)
static int gt_fw_domain_init(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err, i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref) {
+ err = -ETIMEDOUT;
goto err_hw_fence_irq;
+ }
if (!xe_gt_is_media_type(gt)) {
err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
@@ -441,14 +445,12 @@ static int gt_fw_domain_init(struct xe_gt *gt)
*/
gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
- XE_WARN_ON(err);
-
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
err_force_wake:
dump_pat_on_error(gt);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
err_hw_fence_irq:
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -458,11 +460,14 @@ err_hw_fence_irq:
static int all_fw_domain_init(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err, i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
- goto err_hw_fence_irq;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ err = -ETIMEDOUT;
+ goto err_force_wake;
+ }
xe_gt_mcr_set_implicit_defaults(gt);
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
@@ -526,14 +531,12 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_gt_sriov_pf_init_hw(gt);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-err_hw_fence_irq:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -546,11 +549,12 @@ err_hw_fence_irq:
*/
int xe_gt_init_hwconfig(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto out;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
@@ -568,8 +572,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_enable_host_l2_vram(gt);
out_fw:
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-out:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return err;
}
@@ -764,6 +767,7 @@ static int do_gt_restart(struct xe_gt *gt)
static int gt_reset(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
if (xe_device_wedged(gt_to_xe(gt)))
@@ -784,9 +788,11 @@ static int gt_reset(struct xe_gt *gt)
xe_gt_sanitize(gt);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
- goto err_msg;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ err = -ETIMEDOUT;
+ goto err_out;
+ }
xe_uc_gucrc_disable(&gt->uc);
xe_uc_stop_prepare(&gt->uc);
@@ -804,8 +810,7 @@ static int gt_reset(struct xe_gt *gt)
if (err)
goto err_out;
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(gt_to_xe(gt));
xe_gt_info(gt, "reset done\n");
@@ -813,8 +818,7 @@ static int gt_reset(struct xe_gt *gt)
return 0;
err_out:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-err_msg:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
XE_WARN_ON(xe_uc_start(&gt->uc));
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
@@ -834,7 +838,7 @@ static void gt_reset_worker(struct work_struct *w)
void xe_gt_reset_async(struct xe_gt *gt)
{
- xe_gt_info(gt, "trying reset\n");
+ xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0));
/* Don't do a reset while one is already in flight */
if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
@@ -846,22 +850,25 @@ void xe_gt_reset_async(struct xe_gt *gt)
void xe_gt_suspend_prepare(struct xe_gt *gt)
{
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ unsigned int fw_ref;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_stop_prepare(&gt->uc);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
int xe_gt_suspend(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "suspending\n");
xe_gt_sanitize(gt);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_msg;
err = xe_uc_suspend(&gt->uc);
@@ -872,14 +879,15 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_disable_host_l2_vram(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_dbg(gt, "suspended\n");
return 0;
-err_force_wake:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
+ err = -ETIMEDOUT;
+err_force_wake:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
return err;
@@ -887,9 +895,11 @@ err_msg:
void xe_gt_shutdown(struct xe_gt *gt)
{
- xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ unsigned int fw_ref;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
do_gt_reset(gt);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -914,11 +924,12 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
int xe_gt_resume(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "resuming\n");
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_msg;
err = do_gt_restart(gt);
@@ -927,14 +938,15 @@ int xe_gt_resume(struct xe_gt *gt)
xe_gt_idle_enable_pg(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_dbg(gt, "resumed\n");
return 0;
-err_force_wake:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
err_msg:
+ err = -ETIMEDOUT;
+err_force_wake:
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index cbc43973ff7e..3e8c351a0eab 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -90,22 +90,21 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
struct xe_device *xe = gt_to_xe(gt);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- int err;
+ unsigned int fw_ref;
xe_pm_runtime_get(xe);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err) {
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
xe_pm_runtime_put(xe);
- return err;
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
}
for_each_hw_engine(hwe, gt, id)
xe_hw_engine_print(hwe, p);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
- if (err)
- return err;
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 746812aee8ff..fd80afeef56a 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -101,6 +101,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
struct xe_gt_idle *gtidle = &gt->gtidle;
struct xe_mmio *mmio = &gt->mmio;
u32 vcs_mask, vecs_mask;
+ unsigned int fw_ref;
int i, j;
if (IS_SRIOV_VF(xe))
@@ -127,7 +128,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
VDN_MFXVDENC_POWERGATE_ENABLE(j));
}
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (xe->info.skip_guc_pc) {
/*
* GuC sets the hysteresis value when GuC PC is enabled
@@ -138,12 +139,13 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
}
xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
void xe_gt_idle_disable_pg(struct xe_gt *gt)
{
struct xe_gt_idle *gtidle = &gt->gtidle;
+ unsigned int fw_ref;
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
@@ -151,9 +153,9 @@ void xe_gt_idle_disable_pg(struct xe_gt *gt)
xe_device_assert_mem_access(gt_to_xe(gt));
gtidle->powergate_enable = 0;
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
xe_mmio_write32(&gt->mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -172,7 +174,8 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
enum xe_gt_idle_state state;
u32 pg_enabled, pg_status = 0;
u32 vcs_mask, vecs_mask;
- int err, n;
+ unsigned int fw_ref;
+ int n;
/*
* Media Slices
*
@@ -208,14 +211,14 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
/* Do not wake the GT to read powergating status */
if (state != GT_IDLE_C6) {
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- return err;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
pg_enabled = xe_mmio_read32(&gt->mmio, POWERGATE_ENABLE);
pg_status = xe_mmio_read32(&gt->mmio, POWERGATE_DOMAIN_STATUS);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) {
@@ -298,13 +301,14 @@ static void gt_idle_fini(void *arg)
{
struct kobject *kobj = arg;
struct xe_gt *gt = kobj_to_gt(kobj->parent);
+ unsigned int fw_ref;
xe_gt_idle_disable_pg(gt);
if (gt_to_xe(gt)->info.skip_guc_pc) {
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
xe_gt_idle_disable_c6(gt);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
sysfs_remove_files(kobj, gt_idle_attrs);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index a863e50b756e..062a0c2fd2cd 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -2377,6 +2377,41 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p)
}
/**
+ * xe_gt_sriov_pf_config_print_lmem - Print LMEM configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print LMEM allocations across all VFs.
+ * VFs without LMEM allocation are skipped.
+ *
+ * This function can only be called on PF.
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p)
+{
+ unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+ const struct xe_gt_sriov_config *config;
+ char buf[10];
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 1; n <= total_vfs; n++) {
+ config = &gt->sriov.pf.vfs[n].config;
+ if (!config->lmem_obj)
+ continue;
+
+ string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2,
+ buf, sizeof(buf));
+ drm_printf(p, "VF%u:\t%zu\t(%s)\n",
+ n, config->lmem_obj->size, buf);
+ }
+
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return 0;
+}
+
+/**
* xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges.
* @gt: the &xe_gt
* @p: the &drm_printer
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index b74ec38baa18..0c55aa40a1a7 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -65,6 +65,7 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt);
int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 91fc42e386d8..05df4ab3514b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -82,6 +82,11 @@ static const struct drm_info_list pf_info[] = {
.data = xe_gt_sriov_pf_config_print_dbs,
},
{
+ "lmem_provisioned",
+ .show = xe_gt_debugfs_simple_show,
+ .data = xe_gt_sriov_pf_config_print_lmem,
+ },
+ {
"runtime_registers",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_service_print_runtime,
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index a530a933eedc..773de1f08db9 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -268,6 +268,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ unsigned int fw_ref;
if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
gt->uc.guc.submission_state.enabled) {
@@ -286,7 +287,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
if (IS_SRIOV_VF(xe))
return 0;
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
PVC_GUC_TLB_INV_DESC1_INVALIDATE);
@@ -296,7 +297,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
xe_mmio_write32(mmio, GUC_TLB_INV_CR,
GUC_TLB_INV_CR_INVALIDATE);
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
return 0;
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 8570b1218287..7f704346a8f4 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -70,7 +70,7 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
static u32 guc_ctl_feature_flags(struct xe_guc *guc)
{
- u32 flags = 0;
+ u32 flags = GUC_CTL_ENABLE_LITE_RESTORE;
if (!guc_to_xe(guc)->info.skip_guc_pc)
flags |= GUC_CTL_ENABLE_SLPC;
@@ -248,10 +248,11 @@ static void guc_fini_hw(void *arg)
{
struct xe_guc *guc = arg;
struct xe_gt *gt = guc_to_gt(guc);
+ unsigned int fw_ref;
- xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_fini_hw(&guc_to_gt(guc)->uc);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -1155,14 +1156,14 @@ int xe_guc_start(struct xe_guc *guc)
void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
{
struct xe_gt *gt = guc_to_gt(guc);
+ unsigned int fw_ref;
u32 status;
- int err;
int i;
xe_uc_fw_print(&guc->fw, p);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
@@ -1183,9 +1184,12 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
i, xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(i)));
}
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ drm_puts(p, "\n");
+ xe_guc_ct_print(&guc->ct, p, false);
- xe_guc_ct_print(&guc->ct, p);
+ drm_puts(p, "\n");
xe_guc_submit_print(guc, p);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 25292997c7f3..4e746ae98888 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -359,6 +359,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
&offset, &remain);
+ if (XE_WA(gt, 14022866841))
+ guc_waklv_enable_simple(ads,
+ GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
+ &offset, &remain);
+
/*
* On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
* the default value for this register is determined to be 0xC40. This could change in the
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index 41262bda20ed..8b6cb786a2aa 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -1590,6 +1590,9 @@ xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot
u16 guc_id = 0;
u32 lrca = 0;
+ if (IS_SRIOV_VF(xe))
+ return;
+
new = guc_capture_get_prealloc_node(guc);
if (!new)
return;
@@ -1820,7 +1823,7 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
return NULL;
xe = gt_to_xe(q->gt);
- if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe))
+ if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe) || IS_SRIOV_VF(xe))
return NULL;
ss = &xe->devcoredump.snapshot;
@@ -1876,6 +1879,9 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
enum xe_hw_engine_id id;
u32 adj_logical_mask = q->logical_mask;
+ if (IS_SRIOV_VF(xe))
+ return;
+
for_each_hw_engine(hwe, q->gt, id) {
if (hwe->class != q->hwe->class ||
!(BIT(hwe->logical_instance) & adj_logical_mask)) {
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index c260d8840990..4870af1c5a90 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -213,7 +213,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
- ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", WQ_MEM_RECLAIM);
if (!ct->g2h_wq)
return -ENOMEM;
@@ -1607,7 +1607,8 @@ static void g2h_worker_func(struct work_struct *w)
receive_g2h(ct);
}
-struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
+static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic,
+ bool want_ctb)
{
struct xe_guc_ct_snapshot *snapshot;
@@ -1615,7 +1616,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool a
if (!snapshot)
return NULL;
- if (ct->bo) {
+ if (ct->bo && want_ctb) {
snapshot->ctb_size = ct->bo->size;
snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
}
@@ -1645,25 +1646,13 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
}
-/**
- * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
- * @ct: GuC CT object.
- * @atomic: Boolean to indicate if this is called from atomic context like
- * reset or CTB handler or from some regular path like debugfs.
- *
- * This can be printed out in a later stage like during dev_coredump
- * analysis.
- *
- * Returns: a GuC CT snapshot object that must be freed by the caller
- * by using `xe_guc_ct_snapshot_free`.
- */
-struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
- bool atomic)
+static struct xe_guc_ct_snapshot *guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic,
+ bool want_ctb)
{
struct xe_device *xe = ct_to_xe(ct);
struct xe_guc_ct_snapshot *snapshot;
- snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
+ snapshot = guc_ct_snapshot_alloc(ct, atomic, want_ctb);
if (!snapshot) {
xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
return NULL;
@@ -1683,6 +1672,21 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
}
/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis. This is safe to be called during atomic context.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct)
+{
+ return guc_ct_snapshot_capture(ct, true, true);
+}
+
+/**
* xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
* @snapshot: GuC CT snapshot object.
* @p: drm_printer where it will be printed out.
@@ -1704,12 +1708,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
drm_printf(p, "\tg2h outstanding: %d\n",
snapshot->g2h_outstanding);
- if (snapshot->ctb) {
+ if (snapshot->ctb)
xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
- } else {
- drm_printf(p, "CTB snapshot missing!\n");
- return;
- }
} else {
drm_puts(p, "CT disabled\n");
}
@@ -1735,14 +1735,16 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
* xe_guc_ct_print - GuC CT Print.
* @ct: GuC CT.
* @p: drm_printer where it will be printed out.
+ * @want_ctb: Should the full CTB content be dumped (vs just the headers)
*
- * This function quickly capture a snapshot and immediately print it out.
+ * This function will quickly capture a snapshot of the CT state
+ * and immediately print it out.
*/
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb)
{
struct xe_guc_ct_snapshot *snapshot;
- snapshot = xe_guc_ct_snapshot_capture(ct, false);
+ snapshot = guc_ct_snapshot_capture(ct, false, want_ctb);
xe_guc_ct_snapshot_print(snapshot, p);
xe_guc_ct_snapshot_free(snapshot);
}
@@ -1776,7 +1778,7 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso
return;
snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
- snapshot_ct = xe_guc_ct_snapshot_capture((ct), true);
+ snapshot_ct = xe_guc_ct_snapshot_capture((ct));
spin_lock_irqsave(&ct->dead.lock, flags);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 338f0b75d29f..82c4ae458dda 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -17,11 +17,10 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
-struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic);
-struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct);
void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p);
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index d3822cbea273..995b306aced7 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -47,9 +47,23 @@ static int guc_log(struct seq_file *m, void *data)
return 0;
}
+static int guc_ctb(struct seq_file *m, void *data)
+{
+ struct xe_guc *guc = node_to_guc(m->private);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_pm_runtime_get(xe);
+ xe_guc_ct_print(&guc->ct, &p, true);
+ xe_pm_runtime_put(xe);
+
+ return 0;
+}
+
static const struct drm_info_list debugfs_list[] = {
{"guc_info", guc_info, 0},
{"guc_log", guc_log, 0},
+ {"guc_ctb", guc_ctb, 0},
};
void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 01e3ab590c3a..08ffe59f22fa 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -105,6 +105,7 @@ struct guc_update_exec_queue_policy {
#define GUC_CTL_FEATURE 2
#define GUC_CTL_ENABLE_SLPC BIT(2)
+#define GUC_CTL_ENABLE_LITE_RESTORE BIT(4)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
#define GUC_CTL_DEBUG 3
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index cc70f448d879..df4cfb698cdb 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -145,8 +145,9 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log,
struct xe_device *xe = log_to_xe(log);
struct xe_guc *guc = log_to_guc(log);
struct xe_gt *gt = log_to_gt(log);
+ unsigned int fw_ref;
size_t remain;
- int i, err;
+ int i;
if (!log->bo) {
xe_gt_err(gt, "GuC log buffer not allocated\n");
@@ -168,12 +169,12 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log,
remain -= size;
}
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err) {
- snapshot->stamp = ~0;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref) {
+ snapshot->stamp = ~0ULL;
} else {
- snapshot->stamp = xe_mmio_read32(&gt->mmio, GUC_PMTIMESTAMP);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ snapshot->stamp = xe_mmio_read64_2x32(&gt->mmio, GUC_PMTIMESTAMP_LO);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
snapshot->ktime = ktime_get_boottime_ns();
snapshot->level = log->level;
@@ -204,7 +205,7 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_
snapshot->ver_found.major, snapshot->ver_found.minor, snapshot->ver_found.patch,
snapshot->ver_want.major, snapshot->ver_want.minor, snapshot->ver_want.patch);
drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", snapshot->ktime, snapshot->ktime);
- drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", snapshot->stamp, snapshot->stamp);
+ drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp);
drm_printf(p, "Log level: %u\n", snapshot->level);
remain = snapshot->size;
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
index 4d57f8322efc..b3d5c72ac752 100644
--- a/drivers/gpu/drm/xe/xe_guc_log_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -27,7 +27,7 @@ struct xe_guc_log_snapshot {
/** @ktime: Kernel time the snapshot was taken */
u64 ktime;
/** @stamp: GuC timestamp at which the snapshot was taken */
- u32 stamp;
+ u64 stamp;
/** @level: GuC log verbosity level */
u32 level;
/** @ver_found: GuC firmware version */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 2b654f820ae2..e8b9faeaef64 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -415,22 +415,24 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
{
struct xe_gt *gt = pc_to_gt(pc);
- int ret;
+ unsigned int fw_ref;
/*
* GuC SLPC plays with cur freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
*freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
*freq = decode_freq(*freq);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -480,6 +482,7 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
struct xe_gt *gt = pc_to_gt(pc);
+ unsigned int fw_ref;
int ret;
mutex_lock(&pc->freq_lock);
@@ -493,9 +496,11 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
* GuC SLPC plays with min freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- goto out;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ ret = -ETIMEDOUT;
+ goto fw;
+ }
ret = pc_action_query_task_state(pc);
if (ret)
@@ -504,7 +509,7 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
*freq = pc_get_min_freq(pc);
fw:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
out:
mutex_unlock(&pc->freq_lock);
return ret;
@@ -855,6 +860,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
{
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
+ unsigned int fw_ref;
int ret = 0;
if (xe->info.skip_guc_pc)
@@ -864,13 +870,15 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
if (ret)
return ret;
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
xe_gt_idle_disable_c6(gt);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -956,13 +964,16 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+ unsigned int fw_ref;
int ret;
xe_gt_assert(gt, xe_device_uc_enabled(xe));
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (ret)
- return ret;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return -ETIMEDOUT;
+ }
if (xe->info.skip_guc_pc) {
if (xe->info.platform != XE_PVC)
@@ -1005,7 +1016,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL);
out:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return ret;
}
@@ -1037,18 +1048,19 @@ static void xe_guc_pc_fini_hw(void *arg)
{
struct xe_guc_pc *pc = arg;
struct xe_device *xe = pc_to_xe(pc);
+ unsigned int fw_ref;
if (xe_device_wedged(xe))
return;
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
xe_guc_pc_gucrc_disable(pc);
XE_WARN_ON(xe_guc_pc_stop(pc));
/* Bind requested freq to mert_freq_cap before unload */
pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
- xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 5f8c7a45b028..7afcc243037c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -717,6 +717,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
+ struct dma_fence *fence = NULL;
bool lr = xe_exec_queue_is_lr(q);
xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
@@ -734,12 +735,12 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
if (lr) {
xe_sched_job_set_error(job, -EOPNOTSUPP);
- return NULL;
- } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
- return job->fence;
+ dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */
} else {
- return dma_fence_get(job->fence);
+ fence = job->fence;
}
+
+ return fence;
}
static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
@@ -1035,6 +1036,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
struct xe_guc *guc = exec_queue_to_guc(q);
const char *process_name = "no process";
struct xe_device *xe = guc_to_xe(guc);
+ unsigned int fw_ref;
int err = -ETIME;
pid_t pid = -1;
int i = 0;
@@ -1068,12 +1070,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
!xe_guc_capture_get_matching_and_lock(job)) {
/* take force wake before engine register manual capture */
- if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
+ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
xe_engine_snapshot_capture_for_job(job);
- xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
}
/*
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 77c5830309cf..6a846e4cb221 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -296,19 +296,19 @@ void xe_huc_sanitize(struct xe_huc *huc)
void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
{
struct xe_gt *gt = huc_to_gt(huc);
- int err;
+ unsigned int fw_ref;
xe_uc_fw_print(&huc->fw, p);
if (!xe_uc_fw_is_enabled(&huc->fw))
return;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
return;
drm_printf(p, "\nHuC status: 0x%08x\n",
xe_mmio_read32(&gt->mmio, HUC_KERNEL_LOAD_INFO));
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 231d0e86ed83..54d199b5cfb2 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -774,25 +774,21 @@ void xe_mocs_init(struct xe_gt *gt)
void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_mocs_info table;
- unsigned int flags;
- u32 ret;
struct xe_device *xe = gt_to_xe(gt);
+ struct xe_mocs_info table;
+ unsigned int fw_ref, flags;
flags = get_mocs_settings(xe, &table);
xe_pm_runtime_get_noresume(xe);
- ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-
- if (ret)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
goto err_fw;
table.ops->dump(&table, flags, gt, p);
- xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
err_fw:
- xe_assert(xe, !ret);
xe_pm_runtime_put(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index bbe03db0c401..fd2ffe8df156 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -36,11 +36,22 @@
#include "xe_pm.h"
#include "xe_sched_job.h"
#include "xe_sriov.h"
+#include "xe_sync.h"
#define DEFAULT_POLL_FREQUENCY_HZ 200
#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
#define XE_OA_UNIT_INVALID U32_MAX
+enum xe_oa_submit_deps {
+ XE_OA_SUBMIT_NO_DEPS,
+ XE_OA_SUBMIT_ADD_DEPS,
+};
+
+enum xe_oa_user_extn_from {
+ XE_OA_USER_EXTN_FROM_OPEN,
+ XE_OA_USER_EXTN_FROM_CONFIG,
+};
+
struct xe_oa_reg {
struct xe_reg addr;
u32 value;
@@ -70,6 +81,7 @@ struct flex {
};
struct xe_oa_open_param {
+ struct xe_file *xef;
u32 oa_unit_id;
bool sample;
u32 metric_set;
@@ -81,6 +93,9 @@ struct xe_oa_open_param {
struct xe_exec_queue *exec_q;
struct xe_hw_engine *hwe;
bool no_preempt;
+ struct drm_xe_sync __user *syncs_user;
+ int num_syncs;
+ struct xe_sync_entry *syncs;
};
struct xe_oa_config_bo {
@@ -90,6 +105,17 @@ struct xe_oa_config_bo {
struct xe_bb *bb;
};
+struct xe_oa_fence {
+ /* @base: dma fence base */
+ struct dma_fence base;
+ /* @lock: lock for the fence */
+ spinlock_t lock;
+ /* @work: work to signal @base */
+ struct delayed_work work;
+ /* @cb: callback to schedule @work */
+ struct dma_fence_cb cb;
+};
+
#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
static const struct xe_oa_format oa_formats[] = {
@@ -162,10 +188,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
return oa_config;
}
-static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
+static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence)
{
xe_oa_config_put(oa_bo->oa_config);
- xe_bb_free(oa_bo->bb, NULL);
+ xe_bb_free(oa_bo->bb, last_fence);
kfree(oa_bo);
}
@@ -570,11 +596,11 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
return ret;
}
-static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
+static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps,
+ struct xe_bb *bb)
{
struct xe_sched_job *job;
struct dma_fence *fence;
- long timeout;
int err = 0;
/* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
@@ -584,18 +610,24 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
goto exit;
}
+ if (deps == XE_OA_SUBMIT_ADD_DEPS) {
+ for (int i = 0; i < stream->num_syncs && !err; i++)
+ err = xe_sync_entry_add_deps(&stream->syncs[i], job);
+ if (err) {
+ drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err);
+ goto err_put_job;
+ }
+ }
+
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
xe_sched_job_push(job);
- timeout = dma_fence_wait_timeout(fence, false, HZ);
- dma_fence_put(fence);
- if (timeout < 0)
- err = timeout;
- else if (!timeout)
- err = -ETIME;
+ return fence;
+err_put_job:
+ xe_sched_job_put(job);
exit:
- return err;
+ return ERR_PTR(err);
}
static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
@@ -639,7 +671,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
xe_oa_config_put(stream->oa_config);
llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
- free_oa_config_bo(oa_bo);
+ free_oa_config_bo(oa_bo, stream->last_fence);
+ dma_fence_put(stream->last_fence);
}
static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
@@ -659,6 +692,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
const struct flex *flex, u32 count)
{
+ struct dma_fence *fence;
struct xe_bb *bb;
int err;
@@ -670,7 +704,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
xe_oa_store_flex(stream, lrc, bb, flex, count);
- err = xe_oa_submit_bb(stream, bb);
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_bb;
+ }
+ xe_bb_free(bb, fence);
+ dma_fence_put(fence);
+
+ return 0;
+free_bb:
xe_bb_free(bb, NULL);
exit:
return err;
@@ -678,6 +721,7 @@ exit:
static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
{
+ struct dma_fence *fence;
struct xe_bb *bb;
int err;
@@ -689,7 +733,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
write_cs_mi_lri(bb, reg_lri, 1);
- err = xe_oa_submit_bb(stream, bb);
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_bb;
+ }
+ xe_bb_free(bb, fence);
+ dma_fence_put(fence);
+
+ return 0;
+free_bb:
xe_bb_free(bb, NULL);
exit:
return err;
@@ -837,7 +890,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_oa_free_oa_buffer(stream);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_pm_runtime_put(stream->oa->xe);
/* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */
@@ -845,6 +898,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
xe_oa_free_configs(stream);
+ xe_file_put(stream->xef);
}
static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
@@ -915,11 +969,62 @@ out:
return oa_bo;
}
+static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence)
+{
+ dma_fence_put(stream->last_fence);
+ stream->last_fence = dma_fence_get(fence);
+}
+
+static void xe_oa_fence_work_fn(struct work_struct *w)
+{
+ struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work);
+
+ /* Signal fence to indicate new OA configuration is active */
+ dma_fence_signal(&ofence->base);
+ dma_fence_put(&ofence->base);
+}
+
+static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ /* Additional empirical delay needed for NOA programming after registers are written */
+#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
+
+ struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb);
+
+ INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn);
+ queue_delayed_work(system_unbound_wq, &ofence->work,
+ usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US));
+ dma_fence_put(fence);
+}
+
+static const char *xe_oa_get_driver_name(struct dma_fence *fence)
+{
+ return "xe_oa";
+}
+
+static const char *xe_oa_get_timeline_name(struct dma_fence *fence)
+{
+ return "unbound";
+}
+
+static const struct dma_fence_ops xe_oa_fence_ops = {
+ .get_driver_name = xe_oa_get_driver_name,
+ .get_timeline_name = xe_oa_get_timeline_name,
+};
+
static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config)
{
#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
struct xe_oa_config_bo *oa_bo;
- int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
+ struct xe_oa_fence *ofence;
+ int i, err, num_signal = 0;
+ struct dma_fence *fence;
+
+ ofence = kzalloc(sizeof(*ofence), GFP_KERNEL);
+ if (!ofence) {
+ err = -ENOMEM;
+ goto exit;
+ }
oa_bo = xe_oa_alloc_config_buffer(stream, config);
if (IS_ERR(oa_bo)) {
@@ -927,11 +1032,50 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
goto exit;
}
- err = xe_oa_submit_bb(stream, oa_bo->bb);
+ /* Emit OA configuration batch */
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto exit;
+ }
- /* Additional empirical delay needed for NOA programming after registers are written */
- usleep_range(us, 2 * us);
+ /* Point of no return: initialize and set fence to signal */
+ spin_lock_init(&ofence->lock);
+ dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0);
+
+ for (i = 0; i < stream->num_syncs; i++) {
+ if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
+ num_signal++;
+ xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
+ }
+
+ /* Additional dma_fence_get in case we dma_fence_wait */
+ if (!num_signal)
+ dma_fence_get(&ofence->base);
+
+ /* Update last fence too before adding callback */
+ xe_oa_update_last_fence(stream, fence);
+
+ /* Add job fence callback to schedule work to signal ofence->base */
+ err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
+ xe_gt_assert(stream->gt, !err || err == -ENOENT);
+ if (err == -ENOENT)
+ xe_oa_config_cb(fence, &ofence->cb);
+
+ /* If nothing needs to be signaled we wait synchronously */
+ if (!num_signal) {
+ dma_fence_wait(&ofence->base, false);
+ dma_fence_put(&ofence->base);
+ }
+
+ /* Done with syncs */
+ for (i = 0; i < stream->num_syncs; i++)
+ xe_sync_entry_cleanup(&stream->syncs[i]);
+ kfree(stream->syncs);
+
+ return 0;
exit:
+ kfree(ofence);
return err;
}
@@ -1003,6 +1147,262 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
return xe_oa_emit_oa_config(stream, stream->oa_config);
}
+static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
+{
+ u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
+ u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
+ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
+ u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
+ int idx;
+
+ for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
+ const struct xe_oa_format *f = &oa->oa_formats[idx];
+
+ if (counter_size == f->counter_size && bc_report == f->bc_report &&
+ type == f->type && counter_sel == f->counter_select) {
+ *name = idx;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ if (value >= oa->oa_unit_ids) {
+ drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
+ return -EINVAL;
+ }
+ param->oa_unit_id = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->sample = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->metric_set = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ int ret = decode_oa_format(oa, value, &param->oa_format);
+
+ if (ret) {
+ drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
+ return ret;
+ }
+ return 0;
+}
+
+static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+#define OA_EXPONENT_MAX 31
+
+ if (value > OA_EXPONENT_MAX) {
+ drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
+ return -EINVAL;
+ }
+ param->period_exponent = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->disabled = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->exec_queue_id = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->engine_instance = value;
+ return 0;
+}
+
+static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->no_preempt = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->num_syncs = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->syncs_user = u64_to_user_ptr(value);
+ return 0;
+}
+
+static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ return -EINVAL;
+}
+
+typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param);
+static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
+ [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
+ [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
+ [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
+ [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
+ [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
+ [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
+ [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
+ [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
+ [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+};
+
+static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
+ [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
+ [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+};
+
+static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
+ u64 extension, struct xe_oa_open_param *param)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_ext_set_property ext;
+ int err;
+ u32 idx;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return -EFAULT;
+
+ BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) !=
+ ARRAY_SIZE(xe_oa_set_property_funcs_config));
+
+ if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) ||
+ XE_IOCTL_DBG(oa->xe, ext.pad))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open));
+
+ if (from == XE_OA_USER_EXTN_FROM_CONFIG)
+ return xe_oa_set_property_funcs_config[idx](oa, ext.value, param);
+ else
+ return xe_oa_set_property_funcs_open[idx](oa, ext.value, param);
+}
+
+typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from,
+ u64 extension, struct xe_oa_open_param *param);
+static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
+ [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS 16
+static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension,
+ int ext_number, struct xe_oa_open_param *param)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_user_extension ext;
+ int err;
+ u32 idx;
+
+ if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
+ return -E2BIG;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
+ XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
+ err = xe_oa_user_extension_funcs[idx](oa, from, extension, param);
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return err;
+
+ if (ext.next_extension)
+ return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param);
+
+ return 0;
+}
+
+static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
+{
+ int ret, num_syncs, num_ufence = 0;
+
+ if (param->num_syncs && !param->syncs_user) {
+ drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (param->num_syncs) {
+ param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL);
+ if (!param->syncs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+ }
+
+ for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
+ ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs],
+ &param->syncs_user[num_syncs], 0);
+ if (ret)
+ goto err_syncs;
+
+ if (xe_sync_is_ufence(&param->syncs[num_syncs]))
+ num_ufence++;
+ }
+
+ if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) {
+ ret = -EINVAL;
+ goto err_syncs;
+ }
+
+ return 0;
+
+err_syncs:
+ while (num_syncs--)
+ xe_sync_entry_cleanup(&param->syncs[num_syncs]);
+ kfree(param->syncs);
+exit:
+ return ret;
+}
+
static void xe_oa_stream_enable(struct xe_oa_stream *stream)
{
stream->pollin = false;
@@ -1096,36 +1496,38 @@ static int xe_oa_disable_locked(struct xe_oa_stream *stream)
static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg)
{
- struct drm_xe_ext_set_property ext;
+ struct xe_oa_open_param param = {};
long ret = stream->oa_config->id;
struct xe_oa_config *config;
int err;
- err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext));
- if (XE_IOCTL_DBG(stream->oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET))
- return -EINVAL;
+ err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, &param);
+ if (err)
+ return err;
- config = xe_oa_get_oa_config(stream->oa, ext.value);
+ config = xe_oa_get_oa_config(stream->oa, param.metric_set);
if (!config)
return -ENODEV;
- if (config != stream->oa_config) {
- err = xe_oa_emit_oa_config(stream, config);
- if (!err)
- config = xchg(&stream->oa_config, config);
- else
- ret = err;
+ param.xef = stream->xef;
+ err = xe_oa_parse_syncs(stream->oa, &param);
+ if (err)
+ goto err_config_put;
+
+ stream->num_syncs = param.num_syncs;
+ stream->syncs = param.syncs;
+
+ err = xe_oa_emit_oa_config(stream, config);
+ if (!err) {
+ config = xchg(&stream->oa_config, config);
+ drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n",
+ stream->oa_config->uuid);
}
+err_config_put:
xe_oa_config_put(config);
- return ret;
+ return err ?: ret;
}
static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
@@ -1353,6 +1755,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
{
struct xe_oa_unit *u = param->hwe->oa_unit;
struct xe_gt *gt = param->hwe->gt;
+ unsigned int fw_ref;
int ret;
stream->exec_q = param->exec_q;
@@ -1366,6 +1769,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->period_exponent = param->period_exponent;
stream->no_preempt = param->no_preempt;
+ stream->xef = xe_file_get(param->xef);
+ stream->num_syncs = param->num_syncs;
+ stream->syncs = param->syncs;
+
/*
* For Xe2+, when overrun mode is enabled, there are no partial reports at the end
* of buffer, making the OA buffer effectively a non-power-of-2 size circular
@@ -1413,7 +1820,11 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
/* Take runtime pm ref and forcewake to disable RC6 */
xe_pm_runtime_get(stream->oa->xe);
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ ret = -ETIMEDOUT;
+ goto err_fw_put;
+ }
ret = xe_oa_alloc_oa_buffer(stream);
if (ret)
@@ -1455,13 +1866,14 @@ err_put_k_exec_q:
err_free_oa_buf:
xe_oa_free_oa_buffer(stream);
err_fw_put:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(stream->oa->xe);
if (stream->override_gucrc)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
err_free_configs:
xe_oa_free_configs(stream);
exit:
+ xe_file_put(stream->xef);
return ret;
}
@@ -1571,27 +1983,6 @@ static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type)
}
}
-static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
-{
- u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
- u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
- u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
- u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
- int idx;
-
- for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
- const struct xe_oa_format *f = &oa->oa_formats[idx];
-
- if (counter_size == f->counter_size && bc_report == f->bc_report &&
- type == f->type && counter_sel == f->counter_select) {
- *name = idx;
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
/**
* xe_oa_unit_id - Return OA unit ID for a hardware engine
* @hwe: @xe_hw_engine
@@ -1638,155 +2029,6 @@ out:
return ret;
}
-static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- if (value >= oa->oa_unit_ids) {
- drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
- return -EINVAL;
- }
- param->oa_unit_id = value;
- return 0;
-}
-
-static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->sample = value;
- return 0;
-}
-
-static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->metric_set = value;
- return 0;
-}
-
-static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- int ret = decode_oa_format(oa, value, &param->oa_format);
-
- if (ret) {
- drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
- return ret;
- }
- return 0;
-}
-
-static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
-#define OA_EXPONENT_MAX 31
-
- if (value > OA_EXPONENT_MAX) {
- drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
- return -EINVAL;
- }
- param->period_exponent = value;
- return 0;
-}
-
-static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->disabled = value;
- return 0;
-}
-
-static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->exec_queue_id = value;
- return 0;
-}
-
-static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->engine_instance = value;
- return 0;
-}
-
-static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->no_preempt = value;
- return 0;
-}
-
-typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param);
-static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
- [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
- [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
- [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
- [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
- [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
- [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
- [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
- [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
- [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
-};
-
-static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension,
- struct xe_oa_open_param *param)
-{
- u64 __user *address = u64_to_user_ptr(extension);
- struct drm_xe_ext_set_property ext;
- int err;
- u32 idx;
-
- err = __copy_from_user(&ext, address, sizeof(ext));
- if (XE_IOCTL_DBG(oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) ||
- XE_IOCTL_DBG(oa->xe, ext.pad))
- return -EINVAL;
-
- idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs));
- return xe_oa_set_property_funcs[idx](oa, ext.value, param);
-}
-
-typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension,
- struct xe_oa_open_param *param);
-static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
- [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
-};
-
-#define MAX_USER_EXTENSIONS 16
-static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number,
- struct xe_oa_open_param *param)
-{
- u64 __user *address = u64_to_user_ptr(extension);
- struct drm_xe_user_extension ext;
- int err;
- u32 idx;
-
- if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
- return -E2BIG;
-
- err = __copy_from_user(&ext, address, sizeof(ext));
- if (XE_IOCTL_DBG(oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
- XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
- return -EINVAL;
-
- idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
- err = xe_oa_user_extension_funcs[idx](oa, extension, param);
- if (XE_IOCTL_DBG(oa->xe, err))
- return err;
-
- if (ext.next_extension)
- return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param);
-
- return 0;
-}
-
/**
* xe_oa_stream_open_ioctl - Opens an OA stream
* @dev: @drm_device
@@ -1812,7 +2054,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
return -ENODEV;
}
- ret = xe_oa_user_extensions(oa, data, 0, &param);
+ param.xef = xef;
+ ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, &param);
if (ret)
return ret;
@@ -1880,11 +2123,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
}
+ ret = xe_oa_parse_syncs(oa, &param);
+ if (ret)
+ goto err_exec_q;
+
mutex_lock(&param.hwe->gt->oa.gt_lock);
ret = xe_oa_stream_open_ioctl_locked(oa, &param);
mutex_unlock(&param.hwe->gt->oa.gt_lock);
+ if (ret < 0)
+ goto err_sync_cleanup;
+
+ return ret;
+
+err_sync_cleanup:
+ while (param.num_syncs--)
+ xe_sync_entry_cleanup(&param.syncs[param.num_syncs]);
+ kfree(param.syncs);
err_exec_q:
- if (ret < 0 && param.exec_q)
+ if (param.exec_q)
xe_exec_queue_put(param.exec_q);
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 8862eca73fbe..fea9d981e414 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -238,5 +238,17 @@ struct xe_oa_stream {
/** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */
u32 no_preempt;
+
+ /** @xef: xe_file with which the stream was opened */
+ struct xe_file *xef;
+
+ /** @last_fence: fence to use in stream destroy when needed */
+ struct dma_fence *last_fence;
+
+ /** @num_syncs: size of @syncs array */
+ u32 num_syncs;
+
+ /** @syncs: syncs to wait on and to signal */
+ struct xe_sync_entry *syncs;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index b16473818173..30fdbdb9341e 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -182,11 +182,12 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta
static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -198,9 +199,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
XELP_MEM_TYPE_STR_MAP[mem_type], pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xelp_pat_ops = {
@@ -211,11 +210,12 @@ static const struct xe_pat_ops xelp_pat_ops = {
static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -229,9 +229,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
XELP_MEM_TYPE_STR_MAP[mem_type], pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xehp_pat_ops = {
@@ -242,11 +240,12 @@ static const struct xe_pat_ops xehp_pat_ops = {
static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -258,9 +257,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xehpc_pat_ops = {
@@ -271,11 +268,12 @@ static const struct xe_pat_ops xehpc_pat_ops = {
static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -292,9 +290,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/*
@@ -330,12 +326,13 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- int i, err;
+ unsigned int fw_ref;
u32 pat;
+ int i;
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- goto err_fw;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return;
drm_printf(p, "PAT table:\n");
@@ -374,9 +371,7 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
REG_FIELD_GET(XE2_COH_MODE, pat),
pat);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-err_fw:
- xe_assert(xe, !err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static const struct xe_pat_ops xe2_pat_ops = {
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 5093a243e9fe..170ae72d1a7b 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -117,6 +117,7 @@ query_engine_cycles(struct xe_device *xe,
__ktime_func_t cpu_clock;
struct xe_hw_engine *hwe;
struct xe_gt *gt;
+ unsigned int fw_ref;
if (query->size == 0) {
query->size = size;
@@ -149,13 +150,16 @@ query_engine_cycles(struct xe_device *xe,
if (!hwe)
return -EINVAL;
- if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return -EIO;
+ }
hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp,
&resp.cpu_delta, cpu_clock);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
if (GRAPHICS_VER(xe) >= 20)
resp.width = 64;
@@ -666,7 +670,7 @@ static int query_oa_units(struct xe_device *xe,
du->oa_unit_id = u->oa_unit_id;
du->oa_unit_type = u->type;
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
- du->capabilities = DRM_XE_OA_CAPS_BASE;
+ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 191cb4121acd..e1a0e27cda14 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -188,27 +188,27 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
{
struct xe_reg_sr_entry *entry;
unsigned long reg;
- int err;
+ unsigned int fw_ref;
if (xa_empty(&sr->xa))
return;
xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_force_wake;
xa_for_each(&sr->xa, reg, entry)
apply_one_mmio(gt, entry);
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return;
err_force_wake:
- xe_gt_err(gt, "Failed to apply, err=%d\n", err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
}
void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
@@ -221,15 +221,15 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
u32 mmio_base = hwe->mmio_base;
unsigned long reg;
unsigned int slot = 0;
- int err;
+ unsigned int fw_ref;
if (xa_empty(&sr->xa))
return;
drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
- err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (err)
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
goto err_force_wake;
p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
@@ -254,13 +254,13 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr);
}
- err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- XE_WARN_ON(err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return;
err_force_wake:
- drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n");
}
/**
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index eeccc1c318ae..1905ca590965 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -280,7 +280,7 @@ void xe_sched_job_arm(struct xe_sched_job *job)
fence = &chain->base;
}
- job->fence = fence;
+ job->fence = dma_fence_get(fence); /* Pairs with put in scheduler */
drm_sched_job_arm(&job->drm);
}
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 0d3f76fb05ce..f13f333f00be 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -40,7 +40,6 @@ struct xe_sched_job {
* @fence: dma fence to indicate completion. 1 way relationship - job
* can safely reference fence, fence cannot safely reference job.
*/
-#define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS
struct dma_fence *fence;
/** @user_fence: write back value when BB is complete */
struct {
@@ -63,7 +62,7 @@ struct xe_sched_job {
struct xe_sched_job_snapshot {
u16 batch_addr_len;
- u64 batch_addr[];
+ u64 batch_addr[] __counted_by(batch_addr_len);
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 2e72c06fd40d..a90480c6aecf 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -83,6 +83,8 @@ static void user_fence_worker(struct work_struct *w)
XE_WARN_ON("Copy to user failed");
kthread_unuse_mm(ufence->mm);
mmput(ufence->mm);
+ } else {
+ drm_dbg(&ufence->xe->drm, "mmget_not_zero() failed, ufence wasn't signaled\n");
}
wake_up_all(&ufence->xe->ufence_wq);
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 2a623bfcda7e..b1f81dca610d 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -220,8 +220,8 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_gt *gt = tile->primary_gt;
+ unsigned int fw_ref;
u64 offset;
- int err;
u32 reg;
if (IS_SRIOV_VF(xe)) {
@@ -240,9 +240,9 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
return 0;
}
- err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (err)
- return err;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
/* actual size */
if (unlikely(xe->info.platform == XE_DG1)) {
@@ -264,7 +264,9 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
/* remove the tile offset so we have just the available size */
*vram_size = offset - *tile_offset;
- return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ return 0;
}
static void vram_fini(void *arg)
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 264d6e116499..bcd04464b85e 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -39,3 +39,5 @@
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
no_media_l3 MEDIA_VERSION(3000)
+14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)
+ MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)