diff options
author | Rob Clark <robdclark@gmail.com> | 2015-06-07 13:46:04 -0400 |
---|---|---|
committer | Rob Clark <robdclark@gmail.com> | 2015-06-11 13:11:06 -0400 |
commit | 1a370be9ac51129e40b0ed7fa71d2b2b92bc47e5 (patch) | |
tree | 678bad05754937920c7d09fb66dd5a356ed9f172 /drivers/gpu/drm/msm/msm_gpu.c | |
parent | 56c2da8338d5cdfc0695eeed96ebe03cf2ac0321 (diff) | |
download | lwn-1a370be9ac51129e40b0ed7fa71d2b2b92bc47e5.tar.gz lwn-1a370be9ac51129e40b0ed7fa71d2b2b92bc47e5.zip |
drm/msm: restart queued submits after hang
Track the list of in-flight submits. If the gpu hangs, retire up to an
including the offending submit, and then re-submit the remainder. This
way, for concurrently running piglit tests (for example), one failing
test doesn't cause unrelated tests to fail simply because it's submit
was queued up after one that triggered a hang.
Signed-off-by: Rob Clark <robdclark@gmail.com>
Diffstat (limited to 'drivers/gpu/drm/msm/msm_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 49 |
1 files changed, 46 insertions, 3 deletions
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 4016aef56c50..8f70d9248ac5 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -265,6 +265,8 @@ static void inactive_start(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ +static void retire_submits(struct msm_gpu *gpu, uint32_t fence); + static void recover_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); @@ -274,8 +276,19 @@ static void recover_worker(struct work_struct *work) mutex_lock(&dev->struct_mutex); if (msm_gpu_active(gpu)) { + struct msm_gem_submit *submit; + uint32_t fence = gpu->funcs->last_fence(gpu); + + /* retire completed submits, plus the one that hung: */ + retire_submits(gpu, fence + 1); + inactive_cancel(gpu); gpu->funcs->recover(gpu); + + /* replay the remaining submits after the one that hung: */ + list_for_each_entry(submit, &gpu->submit_list, node) { + gpu->funcs->submit(gpu, submit, NULL); + } } mutex_unlock(&dev->struct_mutex); @@ -418,6 +431,27 @@ out: * Cmdstream submission/retirement: */ +static void retire_submits(struct msm_gpu *gpu, uint32_t fence) +{ + struct drm_device *dev = gpu->dev; + + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + + while (!list_empty(&gpu->submit_list)) { + struct msm_gem_submit *submit; + + submit = list_first_entry(&gpu->submit_list, + struct msm_gem_submit, node); + + if (submit->fence <= fence) { + list_del(&submit->node); + kfree(submit); + } else { + break; + } + } +} + static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); @@ -428,6 +462,8 @@ static void retire_worker(struct work_struct *work) mutex_lock(&dev->struct_mutex); + retire_submits(gpu, fence); + while (!list_empty(&gpu->active_list)) { struct msm_gem_object *obj; @@ -467,21 +503,22 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_drm_private *priv = dev->dev_private; int i, ret; + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + submit->fence = ++priv->next_fence; gpu->submitted_fence = submit->fence; inactive_cancel(gpu); + list_add_tail(&submit->node, &gpu->submit_list); + msm_rd_dump_submit(submit); gpu->submitted_fence = submit->fence; update_sw_cntrs(gpu); - ret = gpu->funcs->submit(gpu, submit, ctx); - priv->lastctx = ctx; - for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; @@ -505,6 +542,10 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); } + + ret = gpu->funcs->submit(gpu, submit, ctx); + priv->lastctx = ctx; + hangcheck_timer_reset(gpu); return ret; @@ -545,6 +586,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, INIT_WORK(&gpu->inactive_work, inactive_worker); INIT_WORK(&gpu->recover_work, recover_worker); + INIT_LIST_HEAD(&gpu->submit_list); + setup_timer(&gpu->inactive_timer, inactive_handler, (unsigned long)gpu); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, |