diff options
author | Mika Kuoppala <mika.kuoppala@linux.intel.com> | 2016-03-02 16:48:29 +0200 |
---|---|---|
committer | Mika Kuoppala <mika.kuoppala@intel.com> | 2016-03-04 15:17:14 +0200 |
commit | 24a65e624bcdc726c7711ae90efeffaf0a8e9f32 (patch) | |
tree | 37c200c671b61ce58a5edce28e5ac4d994ba1a92 /drivers/gpu/drm/i915/i915_irq.c | |
parent | d431440cce2427dcdd665d936865fe802637b4c2 (diff) | |
download | lwn-24a65e624bcdc726c7711ae90efeffaf0a8e9f32.tar.gz lwn-24a65e624bcdc726c7711ae90efeffaf0a8e9f32.zip |
drm/i915/hangcheck: Prevent long walks across full-ppgtt
With full-ppgtt, it takes the GPU an eon to traverse the entire 256PiB
address space, causing a loop to be detected. Under the current scheme,
if ACTHD walks off the end of a batch buffer and into an empty
address space, we "never" detect the hang. If we always increment the
score as the ACTHD is progressing then we will eventually timeout (after
~46.5s (31 * 1.5s) without advancing onto a new batch). To counter act
this, increase the amount we reduce the score for good batches, so that
only a series of almost-bad batches trigger a full reset. DoS detection
suffers slightly but series of long running shader tests will benefit.
Based on a patch from Chris Wilson.
Testcase: igt/drv_hangman/hangcheck-unterminated
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1456930109-21532-1-git-send-email-mika.kuoppala@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_irq.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 17 |
1 files changed, 7 insertions, 10 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d1a46ef5ab3f..53e5104964b3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3001,12 +3001,7 @@ head_stuck(struct intel_engine_cs *ring, u64 acthd) memset(ring->hangcheck.instdone, 0, sizeof(ring->hangcheck.instdone)); - if (acthd > ring->hangcheck.max_acthd) { - ring->hangcheck.max_acthd = acthd; - return HANGCHECK_ACTIVE; - } - - return HANGCHECK_ACTIVE_LOOP; + return HANGCHECK_ACTIVE; } if (!subunits_stuck(ring)) @@ -3083,6 +3078,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) #define BUSY 1 #define KICK 5 #define HUNG 20 +#define ACTIVE_DECAY 15 if (!i915.enable_hangcheck) return; @@ -3151,9 +3147,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) switch (ring->hangcheck.action) { case HANGCHECK_IDLE: case HANGCHECK_WAIT: - case HANGCHECK_ACTIVE: break; - case HANGCHECK_ACTIVE_LOOP: + case HANGCHECK_ACTIVE: ring->hangcheck.score += BUSY; break; case HANGCHECK_KICK: @@ -3172,10 +3167,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work) * attempts across multiple batches. */ if (ring->hangcheck.score > 0) - ring->hangcheck.score--; + ring->hangcheck.score -= ACTIVE_DECAY; + if (ring->hangcheck.score < 0) + ring->hangcheck.score = 0; /* Clear head and subunit states on seqno movement */ - ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0; + ring->hangcheck.acthd = 0; memset(ring->hangcheck.instdone, 0, sizeof(ring->hangcheck.instdone)); |