summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_drv.h
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-02-23 07:44:14 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2017-02-23 14:49:32 +0000
commit754c9fd5764909bc7c3ba9779355d55cb357be8a (patch)
tree6368dabb127d1ad6eb5d7b6d8403dc3f9d7750cd /drivers/gpu/drm/i915/i915_drv.h
parentfe49789fab974dd04c0fef26a4ce7f7f411b40dd (diff)
downloadlwn-754c9fd5764909bc7c3ba9779355d55cb357be8a.tar.gz
lwn-754c9fd5764909bc7c3ba9779355d55cb357be8a.zip
drm/i915: Protect the request->global_seqno with the engine->timeline lock
A request is assigned a global seqno only when it is on the hardware execution queue. The global seqno can be used to maintain a list of requests on the same engine in retirement order, for example for constructing a priority queue for waiting. Prior to its execution, or if it is subsequently removed in the event of preemption, its global seqno is zero. As both insertion and removal from the execution queue may operate in IRQ context, it is not guarded by the usual struct_mutex BKL. Instead those relying on the global seqno must be prepared for its value to change between reads. Only when the request is complete can the global seqno be stable (due to the memory barriers on submitting the commands to the hardware to write the breadcrumb, if the HWS shows that it has passed the global seqno and the global seqno is unchanged after the read, it is indeed complete). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-9-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_drv.h')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h17
1 files changed, 14 insertions, 3 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9220a28fbaa8..e5b34eaf41ca 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4009,9 +4009,10 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
}
static inline bool
-__i915_request_irq_complete(struct drm_i915_gem_request *req)
+__i915_request_irq_complete(const struct drm_i915_gem_request *req)
{
struct intel_engine_cs *engine = req->engine;
+ u32 seqno;
/* Note that the engine may have wrapped around the seqno, and
* so our request->global_seqno will be ahead of the hardware,
@@ -4022,10 +4023,20 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags))
return true;
+ /* The request was dequeued before we were awoken. We check after
+ * inspecting the hw to confirm that this was the same request
+ * that generated the HWS update. The memory barriers within
+ * the request execution are sufficient to ensure that a check
+ * after reading the value from hw matches this request.
+ */
+ seqno = i915_gem_request_global_seqno(req);
+ if (!seqno)
+ return false;
+
/* Before we do the heavier coherent read of the seqno,
* check the value (hopefully) in the CPU cacheline.
*/
- if (__i915_gem_request_completed(req))
+ if (__i915_gem_request_completed(req, seqno))
return true;
/* Ensure our read of the seqno is coherent so that we
@@ -4076,7 +4087,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
wake_up_process(tsk);
rcu_read_unlock();
- if (__i915_gem_request_completed(req))
+ if (__i915_gem_request_completed(req, seqno))
return true;
}