diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-06-28 13:35:48 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-06-28 15:23:12 +0100 |
commit | 7b92c1bd0540b64f54d98331d67e57266f9343c4 (patch) | |
tree | c2ec13c790024d982bbffe17fe9c44c01c3dc872 /drivers/gpu/drm/i915/intel_pm.c | |
parent | 13f8458f9a4522bcb7d1856dd8b329ff5d90f887 (diff) | |
download | lwn-7b92c1bd0540b64f54d98331d67e57266f9343c4.tar.gz lwn-7b92c1bd0540b64f54d98331d67e57266f9343c4.zip |
drm/i915: Avoid keeping waitboost active for signaling threads
Once a client has requested a waitboost, we keep that waitboost active
until all clients are no longer waiting. This is because we don't
distinguish which waiter deserves the boost. However, with the advent of
fence signaling, the signaler threads appear as waiters to the RPS
interrupt handler. So instead of using a single boolean to track when to
keep the waitboost active, use a counter of all outstanding waitboosted
requests.
At this point, I have removed all vestiges of the rate limiting on
clients. Whilst this means that compositors should remain more fluid,
it also means that boosts are more prevalent. See commit b29c19b64528
("drm/i915: Boost RPS frequency for CPU stalls") for a longer discussion
on the pros and cons of both approaches.
A drawback of this implementation is that it requires constant request
submission to keep the waitboost trimmed (as it is now cancelled when the
request is completed). This will be fine for a busy system, but near
idle the boosts may be kept for longer than desired (effectively tens of
vblanks worstcase) and there is a reliance on rc6 instead.
v2: Remove defunct rps.client_lock
Reported-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170628123548.9236-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 57 |
1 files changed, 22 insertions, 35 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 48ea0fca1f72..c3fcadfa0ae7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6126,47 +6126,35 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } mutex_unlock(&dev_priv->rps.hw_lock); - - spin_lock(&dev_priv->rps.client_lock); - while (!list_empty(&dev_priv->rps.clients)) - list_del_init(dev_priv->rps.clients.next); - spin_unlock(&dev_priv->rps.client_lock); } -void gen6_rps_boost(struct drm_i915_private *dev_priv, - struct intel_rps_client *rps, - unsigned long submitted) +void gen6_rps_boost(struct drm_i915_gem_request *rq, + struct intel_rps_client *rps) { + struct drm_i915_private *i915 = rq->i915; + bool boost; + /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!(dev_priv->gt.awake && - dev_priv->rps.enabled && - dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) + if (!i915->rps.enabled) return; - /* Force a RPS boost (and don't count it against the client) if - * the GPU is severely congested. - */ - if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) - rps = NULL; - - spin_lock(&dev_priv->rps.client_lock); - if (rps == NULL || list_empty(&rps->link)) { - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) { - dev_priv->rps.client_boost = true; - schedule_work(&dev_priv->rps.work); - } - spin_unlock_irq(&dev_priv->irq_lock); - - if (rps != NULL) { - list_add(&rps->link, &dev_priv->rps.clients); - rps->boosts++; - } else - dev_priv->rps.boosts++; + boost = false; + spin_lock_irq(&rq->lock); + if (!rq->waitboost && !i915_gem_request_completed(rq)) { + atomic_inc(&i915->rps.num_waiters); + rq->waitboost = true; + boost = true; } - spin_unlock(&dev_priv->rps.client_lock); + spin_unlock_irq(&rq->lock); + if (!boost) + return; + + if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) + schedule_work(&i915->rps.work); + + atomic_inc(rps ? &rps->boosts : &i915->rps.boosts); } int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) @@ -9113,7 +9101,7 @@ static void __intel_rps_boost_work(struct work_struct *work) struct drm_i915_gem_request *req = boost->req; if (!i915_gem_request_completed(req)) - gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); + gen6_rps_boost(req, NULL); i915_gem_request_put(req); kfree(boost); @@ -9142,11 +9130,10 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) void intel_pm_setup(struct drm_i915_private *dev_priv) { mutex_init(&dev_priv->rps.hw_lock); - spin_lock_init(&dev_priv->rps.client_lock); INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, __intel_autoenable_gt_powersave); - INIT_LIST_HEAD(&dev_priv->rps.clients); + atomic_set(&dev_priv->rps.num_waiters, 0); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); |