summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_drv.h
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-02-27 20:58:48 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2017-02-27 21:57:20 +0000
commit56299fb7d9047cc1d25362827073b2ac0984ed21 (patch)
tree311ae5bcaec77364beb1a0514886e73534a5516d /drivers/gpu/drm/i915/i915_drv.h
parent8d769ea7bc16c34c9dc5143be021e943014c4cd1 (diff)
downloadlwn-56299fb7d9047cc1d25362827073b2ac0984ed21.tar.gz
lwn-56299fb7d9047cc1d25362827073b2ac0984ed21.zip
drm/i915: Signal first fence from irq handler if complete
As execlists and other non-semaphore multi-engine devices coordinate between engines using interrupts, we can shave off a few 10s of microsecond of scheduling latency by doing the fence signaling from the interrupt as opposed to a RT kthread. (Realistically the delay adds about 1% to an individual cross-engine workload.) We only signal the first fence in order to limit the amount of work we move into the interrupt handler. We also have to remember that our breadcrumbs may be unordered with respect to the interrupt and so we still require the waiter process to perform some heavyweight coherency fixups, as well as traversing the tree of waiters. v2: No need for early exit in irq handler - it breaks the flow between patches and prevents the tracepoint v3: Restore rcu hold across irq signaling of request Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-2-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_drv.h')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h13
1 files changed, 6 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 933874ddea75..71152656ebbf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4060,9 +4060,9 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
* is woken.
*/
if (engine->irq_seqno_barrier &&
- rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
- struct task_struct *tsk;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ unsigned long flags;
/* The ordering of irq_posted versus applying the barrier
* is crucial. The clearing of the current irq_posted must
@@ -4084,17 +4084,16 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
* the seqno before we believe it coherent since they see
* irq_posted == false but we are still running).
*/
- rcu_read_lock();
- tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
- if (tsk && tsk != current)
+ spin_lock_irqsave(&b->lock, flags);
+ if (b->first_wait && b->first_wait->tsk != current)
/* Note that if the bottom-half is changed as we
* are sending the wake-up, the new bottom-half will
* be woken by whomever made the change. We only have
* to worry about when we steal the irq-posted for
* ourself.
*/
- wake_up_process(tsk);
- rcu_read_unlock();
+ wake_up_process(b->first_wait->tsk);
+ spin_unlock_irqrestore(&b->lock, flags);
if (__i915_gem_request_completed(req, seqno))
return true;