diff options
author | Thomas Daniel <thomas.daniel@intel.com> | 2014-07-24 17:04:39 +0100 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2014-08-14 22:43:47 +0200 |
commit | e981e7b17f2b41970e7e2367d4225e0bb3310667 (patch) | |
tree | d4109ce705c97f5e635aa7f4d97fa559b5d41cda /drivers/gpu/drm/i915/intel_lrc.c | |
parent | acdd884a2e1b873995c120d5eabd8cab77f48f20 (diff) | |
download | lwn-e981e7b17f2b41970e7e2367d4225e0bb3310667.tar.gz lwn-e981e7b17f2b41970e7e2367d4225e0bb3310667.zip |
drm/i915/bdw: Handle context switch events
Handle all context status events in the context status buffer on every
context switch interrupt. We only remove work from the execlist queue
after a context status buffer reports that it has completed and we only
attempt to schedule new contexts on interrupt when a previously submitted
context completes (unless no contexts are queued, which means the GPU is
free).
We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock
grabbed, FWIW), because it might sleep, which is not a nice thing to do.
Instead, do the runtime_pm get/put together with the create/destroy request,
and handle the forcewake get/put directly.
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
v2: Unreferencing the context when we are freeing the request might free
the backing bo, which requires the struct_mutex to be grabbed, so defer
unreferencing and freeing to a bottom half.
v3:
- Ack the interrupt inmediately, before trying to handle it (fix for
missing interrupts by Bob Beckett <robert.beckett@intel.com>).
- Update the Context Status Buffer Read Pointer, just in case (spotted
by Damien Lespiau).
v4: New namespace and multiple rebase changes.
v5: Squash with "drm/i915/bdw: Do not call intel_runtime_pm_get() in an
interrupt", as suggested by Daniel.
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
[danvet: Checkpatch ...]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 133 |
1 files changed, 127 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e81f5f6c49b9..22f6a7c0cb18 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -49,6 +49,22 @@ #define RING_ELSP(ring) ((ring)->mmio_base+0x230) #define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) +#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) +#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) + +#define RING_EXECLIST_QFULL (1 << 0x2) +#define RING_EXECLIST1_VALID (1 << 0x3) +#define RING_EXECLIST0_VALID (1 << 0x4) +#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) +#define RING_EXECLIST1_ACTIVE (1 << 0x11) +#define RING_EXECLIST0_ACTIVE (1 << 0x12) + +#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) +#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) +#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) +#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) +#define GEN8_CTX_STATUS_COMPLETE (1 << 4) +#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 @@ -150,6 +166,7 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, struct drm_i915_private *dev_priv = ring->dev->dev_private; uint64_t temp = 0; uint32_t desc[4]; + unsigned long flags; /* XXX: You must always write both descriptors in the order below. */ if (ctx_obj1) @@ -163,9 +180,17 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, desc[3] = (u32)(temp >> 32); desc[2] = (u32)temp; - /* Set Force Wakeup bit to prevent GT from entering C6 while - * ELSP writes are in progress */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + /* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes + * are in progress. + * + * The other problem is that we can't just call gen6_gt_force_wake_get() + * because that function calls intel_runtime_pm_get(), which might sleep. + * Instead, we do the runtime_pm_get/put when creating/destroying requests. + */ + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (dev_priv->uncore.forcewake_count++ == 0) + dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); I915_WRITE(RING_ELSP(ring), desc[1]); I915_WRITE(RING_ELSP(ring), desc[0]); @@ -176,7 +201,11 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, /* ELSP is a wo register, so use another nearby reg for posting instead */ POSTING_READ(RING_EXECLIST_STATUS(ring)); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + /* Release Force Wakeup (see the big comment above). */ + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (--dev_priv->uncore.forcewake_count == 0) + dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); } static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail) @@ -224,6 +253,9 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) { struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL; struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + + assert_spin_locked(&ring->execlist_lock); if (list_empty(&ring->execlist_queue)) return; @@ -237,8 +269,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ list_del(&req0->execlist_link); - i915_gem_context_unreference(req0->ctx); - kfree(req0); + queue_work(dev_priv->wq, &req0->work); req0 = cursor; } else { req1 = cursor; @@ -251,11 +282,97 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) req1 ? req1->tail : 0)); } +static bool execlists_check_remove_request(struct intel_engine_cs *ring, + u32 request_id) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct intel_ctx_submit_request *head_req; + + assert_spin_locked(&ring->execlist_lock); + + head_req = list_first_entry_or_null(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + + if (head_req != NULL) { + struct drm_i915_gem_object *ctx_obj = + head_req->ctx->engine[ring->id].state; + if (intel_execlists_ctx_id(ctx_obj) == request_id) { + list_del(&head_req->execlist_link); + queue_work(dev_priv->wq, &head_req->work); + return true; + } + } + + return false; +} + +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + u32 status_pointer; + u8 read_pointer; + u8 write_pointer; + u32 status; + u32 status_id; + u32 submit_contexts = 0; + + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + + read_pointer = ring->next_context_status_buffer; + write_pointer = status_pointer & 0x07; + if (read_pointer > write_pointer) + write_pointer += 6; + + spin_lock(&ring->execlist_lock); + + while (read_pointer < write_pointer) { + read_pointer++; + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8); + status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8 + 4); + + if (status & GEN8_CTX_STATUS_COMPLETE) { + if (execlists_check_remove_request(ring, status_id)) + submit_contexts++; + } + } + + if (submit_contexts != 0) + execlists_context_unqueue(ring); + + spin_unlock(&ring->execlist_lock); + + WARN(submit_contexts > 2, "More than two context complete events?\n"); + ring->next_context_status_buffer = write_pointer % 6; + + I915_WRITE(RING_CONTEXT_STATUS_PTR(ring), + ((u32)ring->next_context_status_buffer & 0x07) << 8); +} + +static void execlists_free_request_task(struct work_struct *work) +{ + struct intel_ctx_submit_request *req = + container_of(work, struct intel_ctx_submit_request, work); + struct drm_device *dev = req->ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + intel_runtime_pm_put(dev_priv); + + mutex_lock(&dev->struct_mutex); + i915_gem_context_unreference(req->ctx); + mutex_unlock(&dev->struct_mutex); + + kfree(req); +} + static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, u32 tail) { struct intel_ctx_submit_request *req = NULL; + struct drm_i915_private *dev_priv = ring->dev->dev_private; unsigned long flags; bool was_empty; @@ -266,6 +383,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring, i915_gem_context_reference(req->ctx); req->ring = ring; req->tail = tail; + INIT_WORK(&req->work, execlists_free_request_task); + + intel_runtime_pm_get(dev_priv); spin_lock_irqsave(&ring->execlist_lock, flags); @@ -907,6 +1027,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(&ring->execlist_queue); spin_lock_init(&ring->execlist_lock); + ring->next_context_status_buffer = 0; ret = intel_lr_context_deferred_create(dctx, ring); if (ret) |