summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_irq.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-06-10 11:20:20 +0100
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-06-11 11:44:00 +0200
commit9107e9d227e3b0893829baee4ac59feb874d4c23 (patch)
treee95c4b8646977e744fedc6df20567cabeb28c8b8 /drivers/gpu/drm/i915/i915_irq.c
parent50f018dff180942dc40e601de6e97145a4aaeaa9 (diff)
downloadlwn-9107e9d227e3b0893829baee4ac59feb874d4c23.tar.gz
lwn-9107e9d227e3b0893829baee4ac59feb874d4c23.zip
drm/i915: Only slightly increment hangcheck score if we succesfully kick a ring
After kicking a ring, it should be free to make progress again and so should not be accused of being stuck until hangcheck fires once more. In order to catch a denial-of-service within a batch or across multiple batches, we still do increment the hangcheck score - just not as severely so that it takes multiple kicks to fail. This should address part of Ben's justified criticism of commit 05407ff889ceebe383aa5907219f86582ef96b72 Author: Mika Kuoppala <mika.kuoppala@linux.intel.com> Date: Thu May 30 09:04:29 2013 +0300 drm/i915: detect hang using per ring hangcheck_score "There's also another corner case on the kick. If the seqno = 2 (though not stuck), and on the 3rd hangcheck, the ring is stuck, and we try to kick it... we don't actually try to find out if the kick helped." v2: Make sure we catch DoS attempts with batches full of invalid WAITs. v3: Preserve the ability to detect loops by always charging the ring if it is busy on the same request. v4: Make sure we queue another check if on a new batch References: https://bugs.freedesktop.org/show_bug.cgi?id=65394 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Ben Widawsky <ben@bwidawsk.net> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_irq.c')
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c110
1 files changed, 58 insertions, 52 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c482e8ae58dc..6a757691488e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2314,21 +2314,11 @@ ring_last_seqno(struct intel_ring_buffer *ring)
struct drm_i915_gem_request, list)->seqno;
}
-static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring,
- u32 ring_seqno, bool *err)
-{
- if (list_empty(&ring->request_list) ||
- i915_seqno_passed(ring_seqno, ring_last_seqno(ring))) {
- /* Issue a wake-up to catch stuck h/w. */
- if (waitqueue_active(&ring->irq_queue)) {
- DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
- ring->name);
- wake_up_all(&ring->irq_queue);
- *err = true;
- }
- return true;
- }
- return false;
+static bool
+ring_idle(struct intel_ring_buffer *ring, u32 seqno)
+{
+ return (list_empty(&ring->request_list) ||
+ i915_seqno_passed(seqno, ring_last_seqno(ring)));
}
static bool semaphore_passed(struct intel_ring_buffer *ring)
@@ -2362,16 +2352,26 @@ static bool semaphore_passed(struct intel_ring_buffer *ring)
ioread32(ring->virtual_start+acthd+4)+1);
}
-static bool kick_ring(struct intel_ring_buffer *ring)
+static bool ring_hung(struct intel_ring_buffer *ring)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- u32 tmp = I915_READ_CTL(ring);
+ u32 tmp;
+
+ if (IS_GEN2(dev))
+ return true;
+
+ /* Is the chip hanging on a WAIT_FOR_EVENT?
+ * If so we can simply poke the RB_WAIT bit
+ * and break the hang. This should work on
+ * all but the second generation chipsets.
+ */
+ tmp = I915_READ_CTL(ring);
if (tmp & RING_WAIT) {
DRM_ERROR("Kicking stuck wait on %s\n",
ring->name);
I915_WRITE_CTL(ring, tmp);
- return true;
+ return false;
}
if (INTEL_INFO(dev)->gen >= 6 &&
@@ -2380,22 +2380,10 @@ static bool kick_ring(struct intel_ring_buffer *ring)
DRM_ERROR("Kicking stuck semaphore on %s\n",
ring->name);
I915_WRITE_CTL(ring, tmp);
- return true;
- }
- return false;
-}
-
-static bool i915_hangcheck_ring_hung(struct intel_ring_buffer *ring)
-{
- if (IS_GEN2(ring->dev))
return false;
+ }
- /* Is the chip hanging on a WAIT_FOR_EVENT?
- * If so we can simply poke the RB_WAIT bit
- * and break the hang. This should work on
- * all but the second generation chipsets.
- */
- return !kick_ring(ring);
+ return true;
}
/**
@@ -2413,45 +2401,63 @@ void i915_hangcheck_elapsed(unsigned long data)
struct intel_ring_buffer *ring;
int i;
int busy_count = 0, rings_hung = 0;
- bool stuck[I915_NUM_RINGS];
+ bool stuck[I915_NUM_RINGS] = { 0 };
+#define BUSY 1
+#define KICK 5
+#define HUNG 20
+#define FIRE 30
if (!i915_enable_hangcheck)
return;
for_each_ring(ring, dev_priv, i) {
u32 seqno, acthd;
- bool idle, err = false;
+ bool busy = true;
seqno = ring->get_seqno(ring, false);
acthd = intel_ring_get_active_head(ring);
- idle = i915_hangcheck_ring_idle(ring, seqno, &err);
- stuck[i] = ring->hangcheck.acthd == acthd;
-
- if (idle) {
- if (err)
- ring->hangcheck.score += 2;
- else
- ring->hangcheck.score = 0;
- } else {
- busy_count++;
- if (ring->hangcheck.seqno == seqno) {
- ring->hangcheck.score++;
-
- /* Kick ring if stuck*/
- if (stuck[i])
- i915_hangcheck_ring_hung(ring);
+ if (ring->hangcheck.seqno == seqno) {
+ if (ring_idle(ring, seqno)) {
+ if (waitqueue_active(&ring->irq_queue)) {
+ /* Issue a wake-up to catch stuck h/w. */
+ DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
+ ring->name);
+ wake_up_all(&ring->irq_queue);
+ ring->hangcheck.score += HUNG;
+ } else
+ busy = false;
} else {
- ring->hangcheck.score = 0;
+ int score;
+
+ stuck[i] = ring->hangcheck.acthd == acthd;
+ if (stuck[i]) {
+ /* Every time we kick the ring, add a
+ * small increment to the hangcheck
+ * score so that we can catch a
+ * batch that is repeatedly kicked.
+ */
+ score = ring_hung(ring) ? HUNG : KICK;
+ } else
+ score = BUSY;
+
+ ring->hangcheck.score += score;
}
+ } else {
+ /* Gradually reduce the count so that we catch DoS
+ * attempts across multiple batches.
+ */
+ if (ring->hangcheck.score > 0)
+ ring->hangcheck.score--;
}
ring->hangcheck.seqno = seqno;
ring->hangcheck.acthd = acthd;
+ busy_count += busy;
}
for_each_ring(ring, dev_priv, i) {
- if (ring->hangcheck.score > 2) {
+ if (ring->hangcheck.score > FIRE) {
rings_hung++;
DRM_ERROR("%s: %s on %s 0x%x\n", ring->name,
stuck[i] ? "stuck" : "no progress",