From 158863fb50968c0ae85e87a401221425c941b9f0 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 20 Feb 2018 10:47:42 +0000 Subject: drm/i915: Make global seqno known in i915_gem_request_execute tracepoint Commit fe49789fab97 ("drm/i915: Deconstruct execute fence") re-arranged the code and moved the i915_gem_request_execute tracepoint to before the global seqno is assigned to the request. We need to move the tracepoint a bit later so this information is once again available. Signed-off-by: Tvrtko Ursulin Fixes: fe49789fab97 ("drm/i915: Deconstruct execute fence") Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180220104742.565-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 8bc7c50b8418..0deca06fdf0e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -490,8 +490,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); - trace_i915_gem_request_execute(request); - /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); @@ -515,6 +513,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); + trace_i915_gem_request_execute(request); + wake_up_all(&request->execute); } -- cgit v1.2.3 From 7b3b61b62a58b7be8af838aa7d726720e38087ee Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 20 Feb 2018 14:20:17 +0100 Subject: drm/todo: i915 could use device_link_add Noticed while reading some unrelated patches. Unfortunately Imre's patch to add our early/late hooks predated the device_link infrastructure by 2 years. Cc: Imre Deak Cc: Takashi Iwai Acked-by: Imre Deak Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180220132017.30719-1-daniel.vetter@ffwll.ch --- Documentation/gpu/todo.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 1e593370f64f..459936c04aa5 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -440,5 +440,12 @@ See drivers/gpu/drm/amd/display/TODO for tasks. Contact: Harry Wentland, Alex Deucher +i915 +---- + +- Our early/late pm callbacks could be removed in favour of using + device_link_add to model the dependency between i915 and snd_had. See + https://dri.freedesktop.org/docs/drm/driver-api/device_link.html + Outside DRM =========== -- cgit v1.2.3 From ea3f0ef37c56641779519c18bab7f60cf9eb9a15 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 20 Feb 2018 23:39:08 -0800 Subject: drm/doc: Fix documentation for _vblank_restore(). No code changes, fixes doc build warnings and polish some doc text. Reported-by: Daniel Vetter Cc: Rodrigo Vivi Cc: Daniel Vetter Signed-off-by: Dhinakaran Pandiyan Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180221073908.4500-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/drm_vblank.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index c781cb426bf1..51041eec0047 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -1238,12 +1238,15 @@ void drm_crtc_vblank_on(struct drm_crtc *crtc) EXPORT_SYMBOL(drm_crtc_vblank_on); /** - * drm_vblank_restore - estimated vblanks using timestamps and update it. + * drm_vblank_restore - estimate missed vblanks and update vblank count. + * @dev: DRM device + * @pipe: CRTC index * * Power manamement features can cause frame counter resets between vblank - * disable and enable. Drivers can then use this function in their - * &drm_crtc_funcs.enable_vblank implementation to estimate the vblanks since - * the last &drm_crtc_funcs.disable_vblank. + * disable and enable. Drivers can use this function in their + * &drm_crtc_funcs.enable_vblank implementation to estimate missed vblanks since + * the last &drm_crtc_funcs.disable_vblank using timestamps and update the + * vblank counter. * * This function is the legacy version of drm_crtc_vblank_restore(). */ @@ -1284,11 +1287,14 @@ void drm_vblank_restore(struct drm_device *dev, unsigned int pipe) EXPORT_SYMBOL(drm_vblank_restore); /** - * drm_crtc_vblank_restore - estimate vblanks using timestamps and update it. + * drm_crtc_vblank_restore - estimate missed vblanks and update vblank count. + * @crtc: CRTC in question + * * Power manamement features can cause frame counter resets between vblank - * disable and enable. Drivers can then use this function in their - * &drm_crtc_funcs.enable_vblank implementation to estimate the vblanks since - * the last &drm_crtc_funcs.disable_vblank. + * disable and enable. Drivers can use this function in their + * &drm_crtc_funcs.enable_vblank implementation to estimate missed vblanks since + * the last &drm_crtc_funcs.disable_vblank using timestamps and update the + * vblank counter. */ void drm_crtc_vblank_restore(struct drm_crtc *crtc) { -- cgit v1.2.3 From e61e0f51ba7974bb575cdc23220b573e5cd4ff2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Feb 2018 09:56:36 +0000 Subject: drm/i915: Rename drm_i915_gem_request to i915_request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to de-emphasize the link between the request (dependency, execution and fence tracking) from GEM and so rename the struct from drm_i915_gem_request to i915_request. That is we may implement the GEM user interface on top of requests, but they are an abstraction for tracking execution rather than an implementation detail of GEM. (Since they are not tied to HW, we keep the i915 prefix as opposed to intel.) In short, the spatch: @@ @@ - struct drm_i915_gem_request + struct i915_request A corollary to contracting the type name, we also harmonise on using 'rq' shorthand for local variables where space if of the essence and repetition makes 'request' unwieldy. For globals and struct members, 'request' is still much preferred for its clarity. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Michał Winiarski Cc: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20180221095636.6649-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Reviewed-by: Michał Winiarski Acked-by: Joonas Lahtinen --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 16 +- drivers/gpu/drm/i915/gvt/scheduler.h | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_drv.c | 6 +- drivers/gpu/drm/i915/i915_drv.h | 26 +- drivers/gpu/drm/i915/i915_gem.c | 88 +- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 18 +- drivers/gpu/drm/i915/i915_gem_context.h | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 60 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.h | 4 +- drivers/gpu/drm/i915/i915_gem_request.c | 1397 ------------------- drivers/gpu/drm/i915/i915_gem_request.h | 730 ---------- drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 +- drivers/gpu/drm/i915/i915_gem_timeline.h | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c | 18 +- drivers/gpu/drm/i915/i915_irq.c | 8 +- drivers/gpu/drm/i915/i915_perf.c | 28 +- drivers/gpu/drm/i915/i915_request.c | 1411 ++++++++++++++++++++ drivers/gpu/drm/i915/i915_request.h | 738 ++++++++++ drivers/gpu/drm/i915/i915_trace.h | 128 +- drivers/gpu/drm/i915/i915_vma.c | 3 +- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 31 +- drivers/gpu/drm/i915/intel_display.c | 8 +- drivers/gpu/drm/i915/intel_drv.h | 3 +- drivers/gpu/drm/i915/intel_engine_cs.c | 26 +- drivers/gpu/drm/i915/intel_guc_submission.c | 27 +- drivers/gpu/drm/i915/intel_lrc.c | 115 +- drivers/gpu/drm/i915/intel_mocs.c | 28 +- drivers/gpu/drm/i915/intel_mocs.h | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 82 +- drivers/gpu/drm/i915/intel_pm.c | 4 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 196 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 78 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 6 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 8 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 6 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 6 +- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 868 ------------ .../gpu/drm/i915/selftests/i915_live_selftests.h | 2 +- .../gpu/drm/i915/selftests/i915_mock_selftests.h | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 865 ++++++++++++ drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 117 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 10 +- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 +- drivers/gpu/drm/i915/selftests/mock_request.c | 10 +- drivers/gpu/drm/i915/selftests/mock_request.h | 8 +- 55 files changed, 3633 insertions(+), 3639 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_gem_request.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_request.h create mode 100644 drivers/gpu/drm/i915/i915_request.c create mode 100644 drivers/gpu/drm/i915/i915_request.h delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_request.c create mode 100644 drivers/gpu/drm/i915/selftests/i915_request.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 838f9b48246b..5b908c797294 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -63,13 +63,13 @@ i915-y += i915_cmd_parser.o \ i915_gem.o \ i915_gem_object.o \ i915_gem_render_state.o \ - i915_gem_request.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ + i915_request.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 0056638b0c16..a22a686f14c2 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -126,7 +126,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return 0; } -static inline bool is_gvt_request(struct drm_i915_gem_request *req) +static inline bool is_gvt_request(struct i915_request *req) { return i915_gem_context_force_single_submission(req->ctx); } @@ -148,7 +148,7 @@ static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { - struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data; + struct i915_request *req = data; struct intel_gvt *gvt = container_of(nb, struct intel_gvt, shadow_ctx_notifier_block[req->engine->id]); struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; @@ -333,13 +333,13 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ret; - rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); + rq = i915_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { gvt_vgpu_err("fail to allocate gem request\n"); ret = PTR_ERR(rq); @@ -348,7 +348,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq); - workload->req = i915_gem_request_get(rq); + workload->req = i915_request_get(rq); ret = copy_workload_to_ring_buffer(workload); if (ret) goto err_unpin; @@ -582,7 +582,7 @@ out: if (!IS_ERR_OR_NULL(workload->req)) { gvt_dbg_sched("ring id %d submit workload to i915 %p\n", ring_id, workload->req); - i915_add_request(workload->req); + i915_request_add(workload->req); workload->dispatched = true; } @@ -769,7 +769,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - i915_gem_request_put(fetch_and_zero(&workload->req)); + i915_request_put(fetch_and_zero(&workload->req)); if (!workload->status && !(vgpu->resetting_eng & ENGINE_MASK(ring_id))) { @@ -886,7 +886,7 @@ static int workload_thread(void *priv) gvt_dbg_sched("ring id %d wait workload %p\n", workload->ring_id, workload); - i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_wait(workload->req, 0, MAX_SCHEDULE_TIMEOUT); complete: gvt_dbg_sched("will complete workload %p, status: %d\n", diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 3de77dfa7c59..899831b089d4 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -80,7 +80,7 @@ struct intel_shadow_wa_ctx { struct intel_vgpu_workload { struct intel_vgpu *vgpu; int ring_id; - struct drm_i915_gem_request *req; + struct i915_request *req; /* if this workload has been dispatched to i915? */ bool dispatched; bool shadowed; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 05b41045b8f9..bad2ed7050ba 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -519,7 +519,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct file_stats stats; struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_request *request; + struct i915_request *request; struct task_struct *task; mutex_lock(&dev->struct_mutex); @@ -536,7 +536,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) * Therefore, we need to protect this ->comm access using RCU. */ request = list_first_entry_or_null(&file_priv->mm.request_list, - struct drm_i915_gem_request, + struct i915_request, client_link); rcu_read_lock(); task = pid_task(request && request->ctx->pid ? @@ -4060,7 +4060,7 @@ i915_drop_caches_set(void *data, u64 val) I915_WAIT_LOCKED); if (val & DROP_RETIRE) - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); mutex_unlock(&dev->struct_mutex); } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d09f8e661fbd..aaa861b51024 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -808,7 +808,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) /* * The i915 workqueue is primarily used for batched retirement of * requests (and thus managing bo) once the task has been completed - * by the GPU. i915_gem_retire_requests() is called directly when we + * by the GPU. i915_retire_requests() is called directly when we * need high-priority retirement, such as waiting for an explicit * bo. * @@ -1992,7 +1992,7 @@ taint: add_taint(TAINT_WARN, LOCKDEP_STILL_OK); error: i915_gem_set_wedged(i915); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); intel_gpu_reset(i915, ALL_ENGINES); goto finish; } @@ -2019,7 +2019,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) { struct i915_gpu_error *error = &engine->i915->gpu_error; - struct drm_i915_gem_request *active_request; + struct i915_request *active_request; int ret; GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4a279be84f66..9143d0d6be5a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -71,9 +71,9 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_request.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -1231,7 +1231,7 @@ struct i915_gpu_error { * * #I915_WEDGED - If reset fails and we can no longer use the GPU, * we set the #I915_WEDGED bit. Prior to command submission, e.g. - * i915_gem_request_alloc(), this bit is checked and the sequence + * i915_request_alloc(), this bit is checked and the sequence * aborted (with -EIO reported to userspace) if set. */ unsigned long flags; @@ -3329,7 +3329,7 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, @@ -3344,11 +3344,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); -struct drm_i915_gem_request * +struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine); -void i915_gem_retire_requests(struct drm_i915_private *dev_priv); - static inline bool i915_reset_backoff(struct i915_gpu_error *error) { return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); @@ -3380,7 +3378,7 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, return READ_ONCE(error->reset_engine_count[engine->id]); } -struct drm_i915_gem_request * +struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv); @@ -3389,7 +3387,7 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request); + struct i915_request *request); void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); @@ -4007,9 +4005,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool -__i915_request_irq_complete(const struct drm_i915_gem_request *req) +__i915_request_irq_complete(const struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 seqno; /* Note that the engine may have wrapped around the seqno, and @@ -4018,7 +4016,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * this by kicking all the waiters before resetting the seqno * in hardware, and also signal the fence. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) return true; /* The request was dequeued before we were awoken. We check after @@ -4027,14 +4025,14 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * the request execution are sufficient to ensure that a check * after reading the value from hw matches this request. */ - seqno = i915_gem_request_global_seqno(req); + seqno = i915_request_global_seqno(rq); if (!seqno) return false; /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (__i915_gem_request_completed(req, seqno)) + if (__i915_request_completed(rq, seqno)) return true; /* Ensure our read of the seqno is coherent so that we @@ -4083,7 +4081,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) wake_up_process(b->irq_wait->tsk); spin_unlock_irq(&b->irq_lock); - if (__i915_gem_request_completed(req, seqno)) + if (__i915_request_completed(rq, seqno)) return true; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 43afa1c1b14f..14c855b1a3a4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -353,7 +353,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, long timeout, struct intel_rps_client *rps_client) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); @@ -366,7 +366,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, timeout); rq = to_request(fence); - if (i915_gem_request_completed(rq)) + if (i915_request_completed(rq)) goto out; /* @@ -385,16 +385,16 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps_client && !i915_gem_request_started(rq)) { + if (rps_client && !i915_request_started(rq)) { if (INTEL_GEN(rq->i915) >= 6) gen6_rps_boost(rq, rps_client); } - timeout = i915_wait_request(rq, flags, timeout); + timeout = i915_request_wait(rq, flags, timeout); out: - if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) - i915_gem_request_retire_upto(rq); + if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) + i915_request_retire_upto(rq); return timeout; } @@ -463,7 +463,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, static void __fence_set_priority(struct dma_fence *fence, int prio) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_engine_cs *engine; if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) @@ -2856,10 +2856,10 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) atomic_inc(&ctx->active_count); } -struct drm_i915_gem_request * +struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request, *active = NULL; + struct i915_request *request, *active = NULL; unsigned long flags; /* We are called by the error capture and reset at a random @@ -2872,8 +2872,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) */ spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request, - request->global_seqno)) + if (__i915_request_completed(request, request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); @@ -2906,10 +2905,10 @@ static bool engine_stalled(struct intel_engine_cs *engine) * Ensure irq handler finishes, and not run again. * Also return the active request so that we only search for it once. */ -struct drm_i915_gem_request * +struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request = NULL; + struct i915_request *request = NULL; /* * During the reset sequence, we must prevent the engine from @@ -2967,7 +2966,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - struct drm_i915_gem_request *request; + struct i915_request *request; enum intel_engine_id id; int err = 0; @@ -2986,7 +2985,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) return err; } -static void skip_request(struct drm_i915_gem_request *request) +static void skip_request(struct i915_request *request) { void *vaddr = request->ring->vaddr; u32 head; @@ -3005,7 +3004,7 @@ static void skip_request(struct drm_i915_gem_request *request) dma_fence_set_error(&request->fence, -EIO); } -static void engine_skip_context(struct drm_i915_gem_request *request) +static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; @@ -3029,9 +3028,9 @@ static void engine_skip_context(struct drm_i915_gem_request *request) } /* Returns the request if it was guilty of the hang */ -static struct drm_i915_gem_request * +static struct i915_request * i915_gem_reset_request(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* The guilty request will get skipped on a hung engine. * @@ -3085,7 +3084,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine, } void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* * Make sure this write is visible before we re-enable the interrupt @@ -3113,7 +3112,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { struct i915_gem_context *ctx; @@ -3134,12 +3133,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) * empty request appears sufficient to paper over the glitch. */ if (intel_engine_is_idle(engine)) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; - rq = i915_gem_request_alloc(engine, - dev_priv->kernel_context); + rq = i915_request_alloc(engine, + dev_priv->kernel_context); if (!IS_ERR(rq)) - __i915_add_request(rq, false); + __i915_request_add(rq, false); } } @@ -3174,21 +3173,21 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } } -static void nop_submit_request(struct drm_i915_gem_request *request) +static void nop_submit_request(struct i915_request *request) { dma_fence_set_error(&request->fence, -EIO); - i915_gem_request_submit(request); + i915_request_submit(request); } -static void nop_complete_submit_request(struct drm_i915_gem_request *request) +static void nop_complete_submit_request(struct i915_request *request) { unsigned long flags; dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&request->engine->timeline->lock, flags); - __i915_gem_request_submit(request); + __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } @@ -3281,7 +3280,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) */ list_for_each_entry(tl, &i915->gt.timelines, link) { for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; rq = i915_gem_active_peek(&tl->engine[i].last_request, &i915->drm.struct_mutex); @@ -3330,7 +3329,7 @@ i915_gem_retire_work_handler(struct work_struct *work) /* Come back later if the device is busy... */ if (mutex_trylock(&dev->struct_mutex)) { - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); mutex_unlock(&dev->struct_mutex); } @@ -3684,7 +3683,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } - i915_gem_retire_requests(i915); + i915_retire_requests(i915); ret = wait_for_engines(i915); } else { @@ -4224,7 +4223,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_file_private *file_priv = file->driver_priv; unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; - struct drm_i915_gem_request *request, *target = NULL; + struct i915_request *request, *target = NULL; long ret; /* ABI: return -EIO if already wedged */ @@ -4244,16 +4243,16 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) target = request; } if (target) - i915_gem_request_get(target); + i915_request_get(target); spin_unlock(&file_priv->mm.lock); if (target == NULL) return 0; - ret = i915_wait_request(target, + ret = i915_request_wait(target, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(target); + i915_request_put(target); return ret < 0 ? ret : 0; } @@ -4367,7 +4366,7 @@ static __always_inline unsigned int __busy_set_if_active(const struct dma_fence *fence, unsigned int (*flag)(unsigned int id)) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; /* We have to check the current hw status of the fence as the uABI * guarantees forward progress. We could rely on the idle worker @@ -4380,8 +4379,8 @@ __busy_set_if_active(const struct dma_fence *fence, return 0; /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, struct drm_i915_gem_request, fence); - if (i915_gem_request_completed(rq)) + rq = container_of(fence, struct i915_request, fence); + if (i915_request_completed(rq)) return 0; return flag(rq->engine->uabi_id); @@ -4526,8 +4525,7 @@ out: } static void -frontbuffer_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *request) +frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) { struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); @@ -5161,9 +5159,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) return PTR_ERR(ctx); for_each_engine(engine, i915, id) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_ctx; @@ -5173,7 +5171,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (engine->init_context) err = engine->init_context(rq); - __i915_add_request(rq, true); + __i915_request_add(rq, true); if (err) goto err_active; } @@ -5479,7 +5477,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->luts) goto err_vmas; - dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, + dev_priv->requests = KMEM_CACHE(i915_request, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); @@ -5612,7 +5610,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_request *request; + struct i915_request *request; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index c93005c2e0fb..d3cbe8432f48 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -119,7 +119,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (!reservation_object_test_signaled_rcu(resv, true)) break; - i915_gem_retire_requests(pool->engine->i915); + i915_retire_requests(pool->engine->i915); GEM_BUG_ON(i915_gem_object_is_active(obj)); /* diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3d75f484f6e5..a73340ae9419 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -219,7 +219,7 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) * Flush any pending retires to hopefully release some * stale contexts and try again. */ - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); ret = ida_simple_get(&dev_priv->contexts.hw_ida, 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); if (ret < 0) @@ -590,28 +590,28 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { - struct drm_i915_gem_request *req; + struct i915_request *rq; if (engine_has_idle_kernel_context(engine)) continue; - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = i915_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; struct intel_timeline *tl; tl = &timeline->engine[engine->id]; prev = i915_gem_active_raw(&tl->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&req->submit, + i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, I915_FENCE_GFP); } @@ -623,7 +623,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) * but an extra layer of paranoia before we declare the system * idle (on suspend etc) is advisable! */ - __i915_add_request(req, true); + __i915_request_add(rq, true); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index a681c5b891ff..1829dafe54b4 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -276,7 +276,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_context_close(struct drm_file *file); -int i915_switch_context(struct drm_i915_gem_request *req); +int i915_switch_context(struct i915_request *rq); int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); void i915_gem_context_release(struct kref *ctx_ref); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 60ca4f05ae94..54814a196ee4 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -168,7 +168,7 @@ i915_gem_evict_something(struct i915_address_space *vm, * retiring. */ if (!(flags & PIN_NONBLOCK)) - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); else phases[1] = NULL; @@ -293,7 +293,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * retiring. */ if (!(flags & PIN_NONBLOCK)) - i915_gem_retire_requests(vm->i915); + i915_retire_requests(vm->i915); check_color = vm->mm.color_adjust; if (check_color) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4eb28e84fda4..8c170db8495d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -200,7 +200,7 @@ struct i915_execbuffer { struct i915_gem_context *ctx; /** context for building the request */ struct i915_address_space *vm; /** GTT and vma for the request */ - struct drm_i915_gem_request *request; /** our request to build */ + struct i915_request *request; /** our request to build */ struct i915_vma *batch; /** identity of the batch obj/vma */ /** actual size of execobj[] as we may extend it for the cmdparser */ @@ -227,7 +227,7 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; - struct drm_i915_gem_request *rq; + struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; } reloc_cache; @@ -886,7 +886,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) i915_gem_object_unpin_map(cache->rq->batch->obj); i915_gem_chipset_flush(cache->rq->i915); - __i915_add_request(cache->rq, true); + __i915_request_add(cache->rq, true); cache->rq = NULL; } @@ -1070,7 +1070,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, { struct reloc_cache *cache = &eb->reloc_cache; struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; @@ -1103,13 +1103,13 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_gem_request_alloc(eb->engine, eb->ctx); + rq = i915_request_alloc(eb->engine, eb->ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; } - err = i915_gem_request_await_object(rq, vma->obj, true); + err = i915_request_await_object(rq, vma->obj, true); if (err) goto err_request; @@ -1141,7 +1141,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, return 0; err_request: - i915_add_request(rq); + i915_request_add(rq); err_unpin: i915_vma_unpin(batch); err_unmap: @@ -1727,7 +1727,7 @@ slow: } static void eb_export_fence(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags) { struct reservation_object *resv = vma->resv; @@ -1739,9 +1739,9 @@ static void eb_export_fence(struct i915_vma *vma, */ reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) - reservation_object_add_excl_fence(resv, &req->fence); + reservation_object_add_excl_fence(resv, &rq->fence); else if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &req->fence); + reservation_object_add_shared_fence(resv, &rq->fence); reservation_object_unlock(resv); } @@ -1757,7 +1757,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) struct drm_i915_gem_object *obj = vma->obj; if (flags & EXEC_OBJECT_CAPTURE) { - struct i915_gem_capture_list *capture; + struct i915_capture_list *capture; capture = kmalloc(sizeof(*capture), GFP_KERNEL); if (unlikely(!capture)) @@ -1788,7 +1788,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (flags & EXEC_OBJECT_ASYNC) continue; - err = i915_gem_request_await_object + err = i915_request_await_object (eb->request, obj, flags & EXEC_OBJECT_WRITE); if (err) return err; @@ -1840,13 +1840,13 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) } void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; - const unsigned int idx = req->engine->id; + const unsigned int idx = rq->engine->id; - lockdep_assert_held(&req->i915->drm.struct_mutex); + lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); /* @@ -1860,7 +1860,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, if (!i915_vma_is_active(vma)) obj->active_count++; i915_vma_set_active(vma, idx); - i915_gem_active_set(&vma->last_read[idx], req); + i915_gem_active_set(&vma->last_read[idx], rq); list_move_tail(&vma->vm_link, &vma->vm->active_list); obj->write_domain = 0; @@ -1868,27 +1868,27 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->write_domain = I915_GEM_DOMAIN_RENDER; if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) - i915_gem_active_set(&obj->frontbuffer_write, req); + i915_gem_active_set(&obj->frontbuffer_write, rq); obj->read_domains = 0; } obj->read_domains |= I915_GEM_GPU_DOMAINS; if (flags & EXEC_OBJECT_NEEDS_FENCE) - i915_gem_active_set(&vma->last_fence, req); + i915_gem_active_set(&vma->last_fence, rq); } -static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) +static int i915_reset_gen7_sol_offsets(struct i915_request *rq) { u32 *cs; int i; - if (!IS_GEN7(req->i915) || req->engine->id != RCS) { + if (!IS_GEN7(rq->i915) || rq->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } - cs = intel_ring_begin(req, 4 * 2 + 2); + cs = intel_ring_begin(rq, 4 * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1898,7 +1898,7 @@ static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) *cs++ = 0; } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1944,10 +1944,10 @@ out: } static void -add_to_client(struct drm_i915_gem_request *req, struct drm_file *file) +add_to_client(struct i915_request *rq, struct drm_file *file) { - req->file_priv = file->driver_priv; - list_add_tail(&req->client_link, &req->file_priv->mm.request_list); + rq->file_priv = file->driver_priv; + list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); } static int eb_submit(struct i915_execbuffer *eb) @@ -2151,7 +2151,7 @@ await_fence_array(struct i915_execbuffer *eb, if (!fence) return -EINVAL; - err = i915_gem_request_await_dma_fence(eb->request, fence); + err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); if (err < 0) return err; @@ -2365,14 +2365,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, GEM_BUG_ON(eb.reloc_cache.rq); /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); + eb.request = i915_request_alloc(eb.engine, eb.ctx); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); goto err_batch_unpin; } if (in_fence) { - err = i915_gem_request_await_dma_fence(eb.request, in_fence); + err = i915_request_await_dma_fence(eb.request, in_fence); if (err < 0) goto err_request; } @@ -2400,10 +2400,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, */ eb.request->batch = eb.batch; - trace_i915_gem_request_queue(eb.request, eb.batch_flags); + trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: - __i915_add_request(eb.request, err == 0); + __i915_request_add(eb.request, err == 0); add_to_client(eb.request, file); if (fences) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cd5984246bc3..21d72f695adb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -765,16 +765,16 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, } /* Broadwell Page Directory Pointer Descriptors */ -static int gen8_write_pdp(struct drm_i915_gem_request *req, +static int gen8_write_pdp(struct i915_request *rq, unsigned entry, dma_addr_t addr) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; BUG_ON(entry >= 4); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -784,20 +784,20 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); *cs++ = lower_32_bits(addr); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { int i, ret; for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - ret = gen8_write_pdp(req, i, pd_daddr); + ret = gen8_write_pdp(rq, i, pd_daddr); if (ret) return ret; } @@ -806,9 +806,9 @@ static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, } static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); + return gen8_write_pdp(rq, 0, px_dma(&ppgtt->pml4)); } /* PDE TLBs are a pain to invalidate on GEN8+. When we modify @@ -1732,13 +1732,13 @@ static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1748,19 +1748,19 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); *cs++ = get_pd_offset(ppgtt); *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1770,16 +1770,16 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); *cs++ = get_pd_offset(ppgtt); *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_private *dev_priv = req->i915; + struct intel_engine_cs *engine = rq->engine; + struct drm_i915_private *dev_priv = rq->i915; I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a42890d9af38..6efc017e8bb3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,7 +39,8 @@ #include #include "i915_gem_timeline.h" -#include "i915_gem_request.h" + +#include "i915_request.h" #include "i915_selftest.h" #define I915_GTT_PAGE_SIZE_4K BIT(12) @@ -398,7 +399,7 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req); + struct i915_request *rq); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index ca2b3b62569d..54f00b350779 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -33,7 +33,7 @@ #include -#include "i915_gem_request.h" +#include "i915_request.h" #include "i915_selftest.h" struct drm_i915_gem_object; diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index f7fc0df251ac..1036e8686916 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -177,7 +177,7 @@ err: #undef OUT_BATCH -int i915_gem_render_state_emit(struct drm_i915_gem_request *rq) +int i915_gem_render_state_emit(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct intel_render_state so = {}; /* keep the compiler happy */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 86369520482e..112cda8fa1a8 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -24,8 +24,8 @@ #ifndef _I915_GEM_RENDER_STATE_H_ #define _I915_GEM_RENDER_STATE_H_ -struct drm_i915_gem_request; +struct i915_request; -int i915_gem_render_state_emit(struct drm_i915_gem_request *rq); +int i915_gem_render_state_emit(struct i915_request *rq); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c deleted file mode 100644 index 0deca06fdf0e..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ /dev/null @@ -1,1397 +0,0 @@ -/* - * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include - -#include "i915_drv.h" - -static const char *i915_fence_get_driver_name(struct dma_fence *fence) -{ - return "i915"; -} - -static const char *i915_fence_get_timeline_name(struct dma_fence *fence) -{ - /* The timeline struct (as part of the ppgtt underneath a context) - * may be freed when the request is no longer in use by the GPU. - * We could extend the life of a context to beyond that of all - * fences, possibly keeping the hw resource around indefinitely, - * or we just give them a false name. Since - * dma_fence_ops.get_timeline_name is a debug feature, the occasional - * lie seems justifiable. - */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return "signaled"; - - return to_request(fence)->timeline->common->name; -} - -static bool i915_fence_signaled(struct dma_fence *fence) -{ - return i915_gem_request_completed(to_request(fence)); -} - -static bool i915_fence_enable_signaling(struct dma_fence *fence) -{ - if (i915_fence_signaled(fence)) - return false; - - intel_engine_enable_signaling(to_request(fence), true); - return !i915_fence_signaled(fence); -} - -static signed long i915_fence_wait(struct dma_fence *fence, - bool interruptible, - signed long timeout) -{ - return i915_wait_request(to_request(fence), interruptible, timeout); -} - -static void i915_fence_release(struct dma_fence *fence) -{ - struct drm_i915_gem_request *req = to_request(fence); - - /* The request is put onto a RCU freelist (i.e. the address - * is immediately reused), mark the fences as being freed now. - * Otherwise the debugobjects for the fences are only marked as - * freed when the slab cache itself is freed, and so we would get - * caught trying to reuse dead objects. - */ - i915_sw_fence_fini(&req->submit); - - kmem_cache_free(req->i915->requests, req); -} - -const struct dma_fence_ops i915_fence_ops = { - .get_driver_name = i915_fence_get_driver_name, - .get_timeline_name = i915_fence_get_timeline_name, - .enable_signaling = i915_fence_enable_signaling, - .signaled = i915_fence_signaled, - .wait = i915_fence_wait, - .release = i915_fence_release, -}; - -static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) -{ - struct drm_i915_file_private *file_priv; - - file_priv = request->file_priv; - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - if (request->file_priv) { - list_del(&request->client_link); - request->file_priv = NULL; - } - spin_unlock(&file_priv->mm.lock); -} - -static struct i915_dependency * -i915_dependency_alloc(struct drm_i915_private *i915) -{ - return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); -} - -static void -i915_dependency_free(struct drm_i915_private *i915, - struct i915_dependency *dep) -{ - kmem_cache_free(i915->dependencies, dep); -} - -static void -__i915_priotree_add_dependency(struct i915_priotree *pt, - struct i915_priotree *signal, - struct i915_dependency *dep, - unsigned long flags) -{ - INIT_LIST_HEAD(&dep->dfs_link); - list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &pt->signalers_list); - dep->signaler = signal; - dep->flags = flags; -} - -static int -i915_priotree_add_dependency(struct drm_i915_private *i915, - struct i915_priotree *pt, - struct i915_priotree *signal) -{ - struct i915_dependency *dep; - - dep = i915_dependency_alloc(i915); - if (!dep) - return -ENOMEM; - - __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); - return 0; -} - -static void -i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) -{ - struct i915_dependency *dep, *next; - - GEM_BUG_ON(!list_empty(&pt->link)); - - /* - * Everyone we depended upon (the fences we wait to be signaled) - * should retire before us and remove themselves from our list. - * However, retirement is run independently on each timeline and - * so we may be called out-of-order. - */ - list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { - GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->wait_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); - } - - /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != pt); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->signal_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); - } -} - -static void -i915_priotree_init(struct i915_priotree *pt) -{ - INIT_LIST_HEAD(&pt->signalers_list); - INIT_LIST_HEAD(&pt->waiters_list); - INIT_LIST_HEAD(&pt->link); - pt->priority = I915_PRIORITY_INVALID; -} - -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; - - /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - return ret; - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - for_each_engine(engine, i915, id) { - struct i915_gem_timeline *timeline; - struct intel_timeline *tl = engine->timeline; - - if (!i915_seqno_passed(seqno, tl->seqno)) { - /* spin until threads are complete */ - while (intel_breadcrumbs_busy(engine)) - cond_resched(); - } - - /* Check we are idle before we fiddle with hw state! */ - GEM_BUG_ON(!intel_engine_is_idle(engine)); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - - /* Finally reset hw state */ - intel_engine_init_global_seqno(engine, seqno); - tl->seqno = seqno; - - list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].global_sync, 0, - sizeof(timeline->engine[id].global_sync)); - } - - return 0; -} - -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - return reset_all_global_seqno(dev_priv, seqno - 1); -} - -static void mark_busy(struct drm_i915_private *i915) -{ - if (i915->gt.awake) - return; - - GEM_BUG_ON(!i915->gt.active_requests); - - intel_runtime_pm_get_noresume(i915); - - /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. - * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. - */ - intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - - i915->gt.awake = true; - if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ - i915->gt.epoch = 1; - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -static int reserve_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; - int ret; - - /* Reservation is fine until we need to wrap around */ - if (unlikely(add_overflows(seqno, active))) { - ret = reset_all_global_seqno(i915, 0); - if (ret) { - engine->timeline->inflight_seqnos--; - return ret; - } - } - - if (!i915->gt.active_requests++) - mark_busy(i915); - - return 0; -} - -static void unreserve_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - - if (!--i915->gt.active_requests) { - /* Cancel the mark_busy() from our reserve_engine() */ - GEM_BUG_ON(!i915->gt.awake); - mod_delayed_work(i915->wq, - &i915->gt.idle_work, - msecs_to_jiffies(100)); - } - - GEM_BUG_ON(!engine->timeline->inflight_seqnos); - engine->timeline->inflight_seqnos--; -} - -void i915_gem_retire_noop(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - /* Space left intentionally blank */ -} - -static void advance_ring(struct drm_i915_gem_request *request) -{ - unsigned int tail; - - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - if (list_is_last(&request->ring_link, &request->ring->request_list)) { - /* We may race here with execlists resubmitting this request - * as we retire it. The resubmission will move the ring->tail - * forwards (to request->wa_tail). We either read the - * current value that was written to hw, or the value that - * is just about to be. Either works, if we miss the last two - * noops - they are safe to be replayed on a reset. - */ - tail = READ_ONCE(request->ring->tail); - } else { - tail = request->postfix; - } - list_del(&request->ring_link); - - request->ring->head = tail; -} - -static void free_capture_list(struct drm_i915_gem_request *request) -{ - struct i915_gem_capture_list *capture; - - capture = request->capture_list; - while (capture) { - struct i915_gem_capture_list *next = capture->next; - - kfree(capture); - capture = next; - } -} - -static void i915_gem_request_retire(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct i915_gem_active *active, *next; - - lockdep_assert_held(&request->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_gem_request_completed(request)); - GEM_BUG_ON(!request->i915->gt.active_requests); - - trace_i915_gem_request_retire(request); - - spin_lock_irq(&engine->timeline->lock); - list_del_init(&request->link); - spin_unlock_irq(&engine->timeline->lock); - - unreserve_engine(request->engine); - advance_ring(request); - - free_capture_list(request); - - /* Walk through the active list, calling retire on each. This allows - * objects to track their GPU activity and mark themselves as idle - * when their *last* active request is completed (updating state - * tracking lists for eviction, active references for GEM, etc). - * - * As the ->retire() may free the node, we decouple it first and - * pass along the auxiliary information (to avoid dereferencing - * the node after the callback). - */ - list_for_each_entry_safe(active, next, &request->active_list, link) { - /* In microbenchmarks or focusing upon time inside the kernel, - * we may spend an inordinate amount of time simply handling - * the retirement of requests and processing their callbacks. - * Of which, this loop itself is particularly hot due to the - * cache misses when jumping around the list of i915_gem_active. - * So we try to keep this loop as streamlined as possible and - * also prefetch the next i915_gem_active to try and hide - * the likely cache miss. - */ - prefetchw(next); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - } - - i915_gem_request_remove_from_client(request); - - /* Retirement decays the ban score as it is a sign of ctx progress */ - atomic_dec_if_positive(&request->ctx->ban_score); - - /* The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. However, since we - * cannot take the required locks at i915_gem_request_submit() we - * defer the unpinning of the active context to now, retirement of - * the subsequent request. - */ - if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); - engine->last_retired_context = request->ctx; - - spin_lock_irq(&request->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) - dma_fence_signal_locked(&request->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - if (request->waitboost) { - GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); - atomic_dec(&request->i915->gt_pm.rps.num_waiters); - } - spin_unlock_irq(&request->lock); - - i915_priotree_fini(request->i915, &request->priotree); - i915_gem_request_put(request); -} - -void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - struct drm_i915_gem_request *tmp; - - lockdep_assert_held(&req->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_gem_request_completed(req)); - - if (list_empty(&req->link)) - return; - - do { - tmp = list_first_entry(&engine->timeline->requests, - typeof(*tmp), link); - - i915_gem_request_retire(tmp); - } while (tmp != req); -} - -static u32 timeline_get_seqno(struct intel_timeline *tl) -{ - return ++tl->seqno; -} - -void __i915_gem_request_submit(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; - u32 seqno; - - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); - - /* Transfer from per-context onto the global per-engine timeline */ - timeline = engine->timeline; - GEM_BUG_ON(timeline == request->timeline); - GEM_BUG_ON(request->global_seqno); - - seqno = timeline_get_seqno(timeline); - GEM_BUG_ON(!seqno); - GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); - - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - request->global_seqno = seqno; - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_enable_signaling(request, false); - spin_unlock(&request->lock); - - engine->emit_breadcrumb(request, - request->ring->vaddr + request->postfix); - - spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); - spin_unlock(&request->timeline->lock); - - trace_i915_gem_request_execute(request); - - wake_up_all(&request->execute); -} - -void i915_gem_request_submit(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - - __i915_gem_request_submit(request); - - spin_unlock_irqrestore(&engine->timeline->lock, flags); -} - -void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; - - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); - - /* Only unwind in reverse order, required so that the per-context list - * is kept in seqno/ring order. - */ - GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); - GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), - request->global_seqno)); - engine->timeline->seqno--; - - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - request->global_seqno = 0; - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - spin_unlock(&request->lock); - - /* Transfer back from the global per-engine timeline to per-context */ - timeline = request->timeline; - GEM_BUG_ON(timeline == engine->timeline); - - spin_lock(&timeline->lock); - list_move(&request->link, &timeline->requests); - spin_unlock(&timeline->lock); - - /* We don't need to wake_up any waiters on request->execute, they - * will get woken by any other event or us re-adding this request - * to the engine timeline (__i915_gem_request_submit()). The waiters - * should be quite adapt at finding that the request now has a new - * global_seqno to the one they went to sleep on. - */ -} - -void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - - __i915_gem_request_unsubmit(request); - - spin_unlock_irqrestore(&engine->timeline->lock, flags); -} - -static int __i915_sw_fence_call -submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), submit); - - switch (state) { - case FENCE_COMPLETE: - trace_i915_gem_request_submit(request); - /* - * We need to serialize use of the submit_request() callback with its - * hotplugging performed during an emergency i915_gem_set_wedged(). - * We use the RCU mechanism to mark the critical section in order to - * force i915_gem_set_wedged() to wait until the submit_request() is - * completed before proceeding. - */ - rcu_read_lock(); - request->engine->submit_request(request); - rcu_read_unlock(); - break; - - case FENCE_FREE: - i915_gem_request_put(request); - break; - } - - return NOTIFY_DONE; -} - -/** - * i915_gem_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ -struct drm_i915_gem_request * -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct drm_i915_gem_request *req; - struct intel_ring *ring; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - /* - * Preempt contexts are reserved for exclusive use to inject a - * preemption context switch. They are never to be used for any trivial - * request! - */ - GEM_BUG_ON(ctx == dev_priv->preempt_context); - - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) - return ERR_PTR(-EIO); - - /* Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ring = engine->context_pin(engine, ctx); - if (IS_ERR(ring)) - return ERR_CAST(ring); - GEM_BUG_ON(!ring); - - ret = reserve_engine(engine); - if (ret) - goto err_unpin; - - ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); - if (ret) - goto err_unreserve; - - /* Move the oldest request to the slab-cache (if not in use!) */ - req = list_first_entry_or_null(&engine->timeline->requests, - typeof(*req), link); - if (req && i915_gem_request_completed(req)) - i915_gem_request_retire(req); - - /* Beware: Dragons be flying overhead. - * - * We use RCU to look up requests in flight. The lookups may - * race with the request being allocated from the slab freelist. - * That is the request we are writing to here, may be in the process - * of being read by __i915_gem_active_get_rcu(). As such, - * we have to be very careful when overwriting the contents. During - * the RCU lookup, we change chase the request->engine pointer, - * read the request->global_seqno and increment the reference count. - * - * The reference count is incremented atomically. If it is zero, - * the lookup knows the request is unallocated and complete. Otherwise, - * it is either still in use, or has been reallocated and reset - * with dma_fence_init(). This increment is safe for release as we - * check that the request we have a reference to and matches the active - * request. - * - * Before we increment the refcount, we chase the request->engine - * pointer. We must not call kmem_cache_zalloc() or else we set - * that pointer to NULL and cause a crash during the lookup. If - * we see the request is completed (based on the value of the - * old engine and seqno), the lookup is complete and reports NULL. - * If we decide the request is not completed (new engine or seqno), - * then we grab a reference and double check that it is still the - * active request - which it won't be and restart the lookup. - * - * Do not use kmem_cache_zalloc() here! - */ - req = kmem_cache_alloc(dev_priv->requests, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(!req)) { - /* Ratelimit ourselves to prevent oom from malicious clients */ - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE); - if (ret) - goto err_unreserve; - - /* - * We've forced the client to stall and catch up with whatever - * backlog there might have been. As we are assuming that we - * caused the mempressure, now is an opportune time to - * recover as much memory from the request pool as is possible. - * Having already penalized the client to stall, we spend - * a little extra time to re-optimise page allocation. - */ - kmem_cache_shrink(dev_priv->requests); - rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ - - req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); - if (!req) { - ret = -ENOMEM; - goto err_unreserve; - } - } - - req->timeline = i915_gem_context_lookup_timeline(ctx, engine); - GEM_BUG_ON(req->timeline == engine->timeline); - - spin_lock_init(&req->lock); - dma_fence_init(&req->fence, - &i915_fence_ops, - &req->lock, - req->timeline->fence_context, - timeline_get_seqno(req->timeline)); - - /* We bump the ref for the fence chain */ - i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); - init_waitqueue_head(&req->execute); - - i915_priotree_init(&req->priotree); - - INIT_LIST_HEAD(&req->active_list); - req->i915 = dev_priv; - req->engine = engine; - req->ctx = ctx; - req->ring = ring; - - /* No zalloc, must clear what we need by hand */ - req->global_seqno = 0; - req->signaling.wait.seqno = 0; - req->file_priv = NULL; - req->batch = NULL; - req->capture_list = NULL; - req->waitboost = false; - - /* - * Reserve space in the ring buffer for all the commands required to - * eventually emit this request. This is to guarantee that the - * i915_add_request() call can't fail. Note that the reserve may need - * to be redone if the request is not actually submitted straight - * away, e.g. because a GPU scheduler has deferred it. - */ - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; - GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); - - /* - * Record the position of the start of the request so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the head. - */ - req->head = req->ring->emit; - - /* Unconditionally invalidate GPU caches and TLBs. */ - ret = engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) - goto err_unwind; - - ret = engine->request_alloc(req); - if (ret) - goto err_unwind; - - /* Check that we didn't interrupt ourselves with a new request */ - GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); - return req; - -err_unwind: - req->ring->emit = req->head; - - /* Make sure we didn't add ourselves to external state before freeing */ - GEM_BUG_ON(!list_empty(&req->active_list)); - GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); - GEM_BUG_ON(!list_empty(&req->priotree.waiters_list)); - - kmem_cache_free(dev_priv->requests, req); -err_unreserve: - unreserve_engine(engine); -err_unpin: - engine->context_unpin(engine, ctx); - return ERR_PTR(ret); -} - -static int -i915_gem_request_await_request(struct drm_i915_gem_request *to, - struct drm_i915_gem_request *from) -{ - int ret; - - GEM_BUG_ON(to == from); - GEM_BUG_ON(to->timeline == from->timeline); - - if (i915_gem_request_completed(from)) - return 0; - - if (to->engine->schedule) { - ret = i915_priotree_add_dependency(to->i915, - &to->priotree, - &from->priotree); - if (ret < 0) - return ret; - } - - if (to->engine == from->engine) { - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); - return ret < 0 ? ret : 0; - } - - if (to->engine->semaphore.sync_to) { - u32 seqno; - - GEM_BUG_ON(!from->engine->semaphore.signal); - - seqno = i915_gem_request_global_seqno(from); - if (!seqno) - goto await_dma_fence; - - if (seqno <= to->timeline->global_sync[from->engine->id]) - return 0; - - trace_i915_gem_ring_sync_to(to, from); - ret = to->engine->semaphore.sync_to(to, from); - if (ret) - return ret; - - to->timeline->global_sync[from->engine->id] = seqno; - return 0; - } - -await_dma_fence: - ret = i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - I915_FENCE_GFP); - return ret < 0 ? ret : 0; -} - -int -i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, - struct dma_fence *fence) -{ - struct dma_fence **child = &fence; - unsigned int nchild = 1; - int ret; - - /* Note that if the fence-array was created in signal-on-any mode, - * we should *not* decompose it into its individual fences. However, - * we don't currently store which mode the fence-array is operating - * in. Fortunately, the only user of signal-on-any is private to - * amdgpu and we should not see any incoming fence-array from - * sync-file being in signal-on-any mode. - */ - if (dma_fence_is_array(fence)) { - struct dma_fence_array *array = to_dma_fence_array(fence); - - child = array->fences; - nchild = array->num_fences; - GEM_BUG_ON(!nchild); - } - - do { - fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - continue; - - /* - * Requests on the same timeline are explicitly ordered, along - * with their dependencies, by i915_add_request() which ensures - * that requests are submitted in-order through each ring. - */ - if (fence->context == req->fence.context) - continue; - - /* Squash repeated waits to the same timelines */ - if (fence->context != req->i915->mm.unordered_timeline && - intel_timeline_sync_is_later(req->timeline, fence)) - continue; - - if (dma_fence_is_i915(fence)) - ret = i915_gem_request_await_request(req, - to_request(fence)); - else - ret = i915_sw_fence_await_dma_fence(&req->submit, fence, - I915_FENCE_TIMEOUT, - I915_FENCE_GFP); - if (ret < 0) - return ret; - - /* Record the latest fence used against each timeline */ - if (fence->context != req->i915->mm.unordered_timeline) - intel_timeline_sync_set(req->timeline, fence); - } while (--nchild); - - return 0; -} - -/** - * i915_gem_request_await_object - set this request to (async) wait upon a bo - * @to: request we are wishing to use - * @obj: object which may be in use on another ring. - * @write: whether the wait is on behalf of a writer - * - * This code is meant to abstract object synchronization with the GPU. - * Conceptually we serialise writes between engines inside the GPU. - * We only allow one engine to write into a buffer at any time, but - * multiple readers. To ensure each has a coherent view of memory, we must: - * - * - If there is an outstanding write request to the object, the new - * request must wait for it to complete (either CPU or in hw, requests - * on the same ring will be naturally ordered). - * - * - If we are a write request (pending_write_domain is set), the new - * request must wait for outstanding read requests to complete. - * - * Returns 0 if successful, else propagates up the lower layer error. - */ -int -i915_gem_request_await_object(struct drm_i915_gem_request *to, - struct drm_i915_gem_object *obj, - bool write) -{ - struct dma_fence *excl; - int ret = 0; - - if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = reservation_object_get_fences_rcu(obj->resv, - &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - ret = i915_gem_request_await_dma_fence(to, shared[i]); - if (ret) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = reservation_object_get_excl_rcu(obj->resv); - } - - if (excl) { - if (ret == 0) - ret = i915_gem_request_await_dma_fence(to, excl); - - dma_fence_put(excl); - } - - return ret; -} - -/* - * NB: This function is not allowed to fail. Doing so would mean the the - * request is not being tracked for completion but the work itself is - * going to happen on the hardware. This would be a Bad Thing(tm). - */ -void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_ring *ring = request->ring; - struct intel_timeline *timeline = request->timeline; - struct drm_i915_gem_request *prev; - u32 *cs; - int err; - - lockdep_assert_held(&request->i915->drm.struct_mutex); - trace_i915_gem_request_add(request); - - /* - * Make sure that no request gazumped us - if it was allocated after - * our i915_gem_request_alloc() and called __i915_add_request() before - * us, the timeline will hold its seqno which is later than ours. - */ - GEM_BUG_ON(timeline->seqno != request->fence.seqno); - - /* - * To ensure that this call will not fail, space for its emissions - * should already have been reserved in the ring buffer. Let the ring - * know that it is time to use that space up. - */ - request->reserved_space = 0; - - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - if (flush_caches) { - err = engine->emit_flush(request, EMIT_FLUSH); - - /* Not allowed to fail! */ - WARN(err, "engine->emit_flush() failed: %d!\n", err); - } - - /* - * Record the position of the start of the breadcrumb so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the ring's HEAD. - */ - cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); - GEM_BUG_ON(IS_ERR(cs)); - request->postfix = intel_ring_offset(request, cs); - - /* - * Seal the request and mark it as pending execution. Note that - * we may inspect this state, without holding any locks, during - * hangcheck. Hence we apply the barrier to ensure that we do not - * see a more recent value in the hws than we are tracking. - */ - - prev = i915_gem_active_raw(&timeline->last_request, - &request->i915->drm.struct_mutex); - if (prev && !i915_gem_request_completed(prev)) { - i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, - &request->submitq); - if (engine->schedule) - __i915_priotree_add_dependency(&request->priotree, - &prev->priotree, - &request->dep, - 0); - } - - spin_lock_irq(&timeline->lock); - list_add_tail(&request->link, &timeline->requests); - spin_unlock_irq(&timeline->lock); - - GEM_BUG_ON(timeline->seqno != request->fence.seqno); - i915_gem_active_set(&timeline->last_request, request); - - list_add_tail(&request->ring_link, &ring->request_list); - request->emitted_jiffies = jiffies; - - /* - * Let the backend know a new request has arrived that may need - * to adjust the existing execution schedule due to a high priority - * request - i.e. we may want to preempt the current request in order - * to run a high priority dependency chain *before* we can execute this - * request. - * - * This is called before the request is ready to run so that we can - * decide whether to preempt the entire chain so that it is ready to - * run at the earliest possible convenience. - */ - if (engine->schedule) - engine->schedule(request, request->ctx->priority); - - local_bh_disable(); - i915_sw_fence_commit(&request->submit); - local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ - - /* - * In typical scenarios, we do not expect the previous request on - * the timeline to be still tracked by timeline->last_request if it - * has been completed. If the completed request is still here, that - * implies that request retirement is a long way behind submission, - * suggesting that we haven't been retiring frequently enough from - * the combination of retire-before-alloc, waiters and the background - * retirement worker. So if the last request on this timeline was - * already completed, do a catch up pass, flushing the retirement queue - * up to this client. Since we have now moved the heaviest operations - * during retirement onto secondary workers, such as freeing objects - * or contexts, retiring a bunch of requests is mostly list management - * (and cache misses), and so we should not be overly penalizing this - * client by performing excess work, though we may still performing - * work on behalf of others -- but instead we should benefit from - * improved resource management. (Well, that's the theory at least.) - */ - if (prev && i915_gem_request_completed(prev)) - i915_gem_request_retire_upto(prev); -} - -static unsigned long local_clock_us(unsigned int *cpu) -{ - unsigned long t; - - /* Cheaply and approximately convert from nanoseconds to microseconds. - * The result and subsequent calculations are also defined in the same - * approximate microseconds units. The principal source of timing - * error here is from the simple truncation. - * - * Note that local_clock() is only defined wrt to the current CPU; - * the comparisons are no longer valid if we switch CPUs. Instead of - * blocking preemption for the entire busywait, we can detect the CPU - * switch and use that as indicator of system load and a reason to - * stop busywaiting, see busywait_stop(). - */ - *cpu = get_cpu(); - t = local_clock() >> 10; - put_cpu(); - - return t; -} - -static bool busywait_stop(unsigned long timeout, unsigned int cpu) -{ - unsigned int this_cpu; - - if (time_after(local_clock_us(&this_cpu), timeout)) - return true; - - return this_cpu != cpu; -} - -static bool __i915_spin_request(const struct drm_i915_gem_request *req, - u32 seqno, int state, unsigned long timeout_us) -{ - struct intel_engine_cs *engine = req->engine; - unsigned int irq, cpu; - - GEM_BUG_ON(!seqno); - - /* - * Only wait for the request if we know it is likely to complete. - * - * We don't track the timestamps around requests, nor the average - * request length, so we do not have a good indicator that this - * request will complete within the timeout. What we do know is the - * order in which requests are executed by the engine and so we can - * tell if the request has started. If the request hasn't started yet, - * it is a fair assumption that it will not complete within our - * relatively short timeout. - */ - if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) - return false; - - /* When waiting for high frequency requests, e.g. during synchronous - * rendering split between the CPU and GPU, the finite amount of time - * required to set up the irq and wait upon it limits the response - * rate. By busywaiting on the request completion for a short while we - * can service the high frequency waits as quick as possible. However, - * if it is a slow request, we want to sleep as quickly as possible. - * The tradeoff between waiting and sleeping is roughly the time it - * takes to sleep on a request, on the order of a microsecond. - */ - - irq = atomic_read(&engine->irq_count); - timeout_us += local_clock_us(&cpu); - do { - if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) - return seqno == i915_gem_request_global_seqno(req); - - /* Seqno are meant to be ordered *before* the interrupt. If - * we see an interrupt without a corresponding seqno advance, - * assume we won't see one in the near future but require - * the engine->seqno_barrier() to fixup coherency. - */ - if (atomic_read(&engine->irq_count) != irq) - break; - - if (signal_pending_state(state, current)) - break; - - if (busywait_stop(timeout_us, cpu)) - break; - - cpu_relax(); - } while (!need_resched()); - - return false; -} - -static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) -{ - if (likely(!i915_reset_handoff(&request->i915->gpu_error))) - return false; - - __set_current_state(TASK_RUNNING); - i915_reset(request->i915, 0); - return true; -} - -/** - * i915_wait_request - wait until execution of request has finished - * @req: the request to wait upon - * @flags: how to wait - * @timeout: how long to wait in jiffies - * - * i915_wait_request() waits for the request to be completed, for a - * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an - * unbounded wait). - * - * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED - * in via the flags, and vice versa if the struct_mutex is not held, the caller - * must not specify that the wait is locked. - * - * Returns the remaining time (in jiffies) if the request completed, which may - * be zero or -ETIME if the request is unfinished after the timeout expires. - * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is - * pending before the request completes. - */ -long i915_wait_request(struct drm_i915_gem_request *req, - unsigned int flags, - long timeout) -{ - const int state = flags & I915_WAIT_INTERRUPTIBLE ? - TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; - DEFINE_WAIT_FUNC(reset, default_wake_function); - DEFINE_WAIT_FUNC(exec, default_wake_function); - struct intel_wait wait; - - might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&req->i915->drm.struct_mutex) != - !!(flags & I915_WAIT_LOCKED)); -#endif - GEM_BUG_ON(timeout < 0); - - if (i915_gem_request_completed(req)) - return timeout; - - if (!timeout) - return -ETIME; - - trace_i915_gem_request_wait_begin(req, flags); - - add_wait_queue(&req->execute, &exec); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(errq, &reset); - - intel_wait_init(&wait, req); - -restart: - do { - set_current_state(state); - if (intel_wait_update_request(&wait, req)) - break; - - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(req)) - continue; - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - goto complete; - } - - if (!timeout) { - timeout = -ETIME; - goto complete; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - - GEM_BUG_ON(!intel_wait_has_seqno(&wait)); - GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); - - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(req, wait.seqno, state, 5)) - goto complete; - - set_current_state(state); - if (intel_engine_add_wait(req->engine, &wait)) - /* In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; - - if (flags & I915_WAIT_LOCKED) - __i915_wait_request_check_and_reset(req); - - for (;;) { - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - break; - } - - if (!timeout) { - timeout = -ETIME; - break; - } - - timeout = io_schedule_timeout(timeout); - - if (intel_wait_complete(&wait) && - intel_wait_check_request(&wait, req)) - break; - - set_current_state(state); - -wakeup: - /* Carefully check if the request is complete, giving time - * for the seqno to be visible following the interrupt. - * We also have to check in case we are kicked by the GPU - * reset in order to drop the struct_mutex. - */ - if (__i915_request_irq_complete(req)) - break; - - /* If the GPU is hung, and we hold the lock, reset the GPU - * and then check for completion. On a full reset, the engine's - * HW seqno will be advanced passed us and we are complete. - * If we do a partial reset, we have to wait for the GPU to - * resume and update the breadcrumb. - * - * If we don't hold the mutex, we can just wait for the worker - * to come along and update the breadcrumb (either directly - * itself, or indirectly by recovering the GPU). - */ - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(req)) - continue; - - /* Only spin if we know the GPU is processing this request */ - if (__i915_spin_request(req, wait.seqno, state, 2)) - break; - - if (!intel_wait_check_request(&wait, req)) { - intel_engine_remove_wait(req->engine, &wait); - goto restart; - } - } - - intel_engine_remove_wait(req->engine, &wait); -complete: - __set_current_state(TASK_RUNNING); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(errq, &reset); - remove_wait_queue(&req->execute, &exec); - trace_i915_gem_request_wait_end(req); - - return timeout; -} - -static void engine_retire_requests(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_request *request, *next; - u32 seqno = intel_engine_get_seqno(engine); - LIST_HEAD(retire); - - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry_safe(request, next, - &engine->timeline->requests, link) { - if (!i915_seqno_passed(seqno, request->global_seqno)) - break; - - list_move_tail(&request->link, &retire); - } - spin_unlock_irq(&engine->timeline->lock); - - list_for_each_entry_safe(request, next, &retire, link) - i915_gem_request_retire(request); -} - -void i915_gem_retire_requests(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (!dev_priv->gt.active_requests) - return; - - for_each_engine(engine, dev_priv, id) - engine_retire_requests(engine); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_request.c" -#include "selftests/i915_gem_request.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h deleted file mode 100644 index 2236e9188c5c..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef I915_GEM_REQUEST_H -#define I915_GEM_REQUEST_H - -#include - -#include "i915_gem.h" -#include "i915_sw_fence.h" - -#include - -struct drm_file; -struct drm_i915_gem_object; -struct drm_i915_gem_request; - -struct intel_wait { - struct rb_node node; - struct task_struct *tsk; - struct drm_i915_gem_request *request; - u32 seqno; -}; - -struct intel_signal_node { - struct rb_node node; - struct intel_wait wait; -}; - -struct i915_dependency { - struct i915_priotree *signaler; - struct list_head signal_link; - struct list_head wait_link; - struct list_head dfs_link; - unsigned long flags; -#define I915_DEPENDENCY_ALLOC BIT(0) -}; - -/* Requests exist in a complex web of interdependencies. Each request - * has to wait for some other request to complete before it is ready to be run - * (e.g. we have to wait until the pixels have been rendering into a texture - * before we can copy from it). We track the readiness of a request in terms - * of fences, but we also need to keep the dependency tree for the lifetime - * of the request (beyond the life of an individual fence). We use the tree - * at various points to reorder the requests whilst keeping the requests - * in order with respect to their various dependencies. - */ -struct i915_priotree { - struct list_head signalers_list; /* those before us, we depend upon */ - struct list_head waiters_list; /* those after us, they depend upon us */ - struct list_head link; - int priority; -}; - -enum { - I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, - I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, - I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - I915_PRIORITY_INVALID = INT_MIN -}; - -struct i915_gem_capture_list { - struct i915_gem_capture_list *next; - struct i915_vma *vma; -}; - -/** - * Request queue structure. - * - * The request queue allows us to note sequence numbers that have been emitted - * and may be associated with active buffers to be retired. - * - * By keeping this list, we can avoid having to do questionable sequence - * number comparisons on buffer last_read|write_seqno. It also allows an - * emission time to be associated with the request for tracking how far ahead - * of the GPU the submission is. - * - * When modifying this structure be very aware that we perform a lockless - * RCU lookup of it that may race against reallocation of the struct - * from the slab freelist. We intentionally do not zero the structure on - * allocation so that the lookup can use the dangling pointers (and is - * cogniscent that those pointers may be wrong). Instead, everything that - * needs to be initialised must be done so explicitly. - * - * The requests are reference counted. - */ -struct drm_i915_gem_request { - struct dma_fence fence; - spinlock_t lock; - - /** On Which ring this request was generated */ - struct drm_i915_private *i915; - - /** - * Context and ring buffer related to this request - * Contexts are refcounted, so when this request is associated with a - * context, we must increment the context's refcount, to guarantee that - * it persists while any request is linked to it. Requests themselves - * are also refcounted, so the request will only be freed when the last - * reference to it is dismissed, and the code in - * i915_gem_request_free() will then decrement the refcount on the - * context. - */ - struct i915_gem_context *ctx; - struct intel_engine_cs *engine; - struct intel_ring *ring; - struct intel_timeline *timeline; - struct intel_signal_node signaling; - - /* Fences for the various phases in the request's lifetime. - * - * The submit fence is used to await upon all of the request's - * dependencies. When it is signaled, the request is ready to run. - * It is used by the driver to then queue the request for execution. - */ - struct i915_sw_fence submit; - wait_queue_entry_t submitq; - wait_queue_head_t execute; - - /* A list of everyone we wait upon, and everyone who waits upon us. - * Even though we will not be submitted to the hardware before the - * submit fence is signaled (it waits for all external events as well - * as our own requests), the scheduler still needs to know the - * dependency tree for the lifetime of the request (from execbuf - * to retirement), i.e. bidirectional dependency information for the - * request not tied to individual fences. - */ - struct i915_priotree priotree; - struct i915_dependency dep; - - /** GEM sequence number associated with this request on the - * global execution timeline. It is zero when the request is not - * on the HW queue (i.e. not on the engine timeline list). - * Its value is guarded by the timeline spinlock. - */ - u32 global_seqno; - - /** Position in the ring of the start of the request */ - u32 head; - - /** - * Position in the ring of the start of the postfix. - * This is required to calculate the maximum available ring space - * without overwriting the postfix. - */ - u32 postfix; - - /** Position in the ring of the end of the whole request */ - u32 tail; - - /** Position in the ring of the end of any workarounds after the tail */ - u32 wa_tail; - - /** Preallocate space in the ring for the emitting the request */ - u32 reserved_space; - - /** Batch buffer related to this request if any (used for - * error state dump only). - */ - struct i915_vma *batch; - /** Additional buffers requested by userspace to be captured upon - * a GPU hang. The vma/obj on this list are protected by their - * active reference - all objects on this list must also be - * on the active_list (of their final request). - */ - struct i915_gem_capture_list *capture_list; - struct list_head active_list; - - /** Time at which this request was emitted, in jiffies. */ - unsigned long emitted_jiffies; - - bool waitboost; - - /** engine->request_list entry for this request */ - struct list_head link; - - /** ring->request_list entry for this request */ - struct list_head ring_link; - - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_link; -}; - -#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -extern const struct dma_fence_ops i915_fence_ops; - -static inline bool dma_fence_is_i915(const struct dma_fence *fence) -{ - return fence->ops == &i915_fence_ops; -} - -struct drm_i915_gem_request * __must_check -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); -void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); - -static inline struct drm_i915_gem_request * -to_request(struct dma_fence *fence) -{ - /* We assume that NULL fence/request are interoperable */ - BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); - GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); - return container_of(fence, struct drm_i915_gem_request, fence); -} - -static inline struct drm_i915_gem_request * -i915_gem_request_get(struct drm_i915_gem_request *req) -{ - return to_request(dma_fence_get(&req->fence)); -} - -static inline struct drm_i915_gem_request * -i915_gem_request_get_rcu(struct drm_i915_gem_request *req) -{ - return to_request(dma_fence_get_rcu(&req->fence)); -} - -static inline void -i915_gem_request_put(struct drm_i915_gem_request *req) -{ - dma_fence_put(&req->fence); -} - -/** - * i915_gem_request_global_seqno - report the current global seqno - * @request - the request - * - * A request is assigned a global seqno only when it is on the hardware - * execution queue. The global seqno can be used to maintain a list of - * requests on the same engine in retirement order, for example for - * constructing a priority queue for waiting. Prior to its execution, or - * if it is subsequently removed in the event of preemption, its global - * seqno is zero. As both insertion and removal from the execution queue - * may operate in IRQ context, it is not guarded by the usual struct_mutex - * BKL. Instead those relying on the global seqno must be prepared for its - * value to change between reads. Only when the request is complete can - * the global seqno be stable (due to the memory barriers on submitting - * the commands to the hardware to write the breadcrumb, if the HWS shows - * that it has passed the global seqno and the global seqno is unchanged - * after the read, it is indeed complete). - */ -static u32 -i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) -{ - return READ_ONCE(request->global_seqno); -} - -int -i915_gem_request_await_object(struct drm_i915_gem_request *to, - struct drm_i915_gem_object *obj, - bool write); -int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, - struct dma_fence *fence); - -void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); -#define i915_add_request(req) \ - __i915_add_request(req, false) - -void __i915_gem_request_submit(struct drm_i915_gem_request *request); -void i915_gem_request_submit(struct drm_i915_gem_request *request); - -void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); -void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); - -struct intel_rps_client; -#define NO_WAITBOOST ERR_PTR(-1) -#define IS_RPS_CLIENT(p) (!IS_ERR(p)) -#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) - -long i915_wait_request(struct drm_i915_gem_request *req, - unsigned int flags, - long timeout) - __attribute__((nonnull(1))); -#define I915_WAIT_INTERRUPTIBLE BIT(0) -#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ - -static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); - -/** - * Returns true if seq1 is later than seq2. - */ -static inline bool i915_seqno_passed(u32 seq1, u32 seq2) -{ - return (s32)(seq1 - seq2) >= 0; -} - -static inline bool -__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) -{ - GEM_BUG_ON(!seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && - seqno == i915_gem_request_global_seqno(req); -} - -static inline bool -i915_gem_request_completed(const struct drm_i915_gem_request *req) -{ - u32 seqno; - - seqno = i915_gem_request_global_seqno(req); - if (!seqno) - return false; - - return __i915_gem_request_completed(req, seqno); -} - -static inline bool -i915_gem_request_started(const struct drm_i915_gem_request *req) -{ - u32 seqno; - - seqno = i915_gem_request_global_seqno(req); - if (!seqno) - return false; - - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - seqno - 1); -} - -static inline bool i915_priotree_signaled(const struct i915_priotree *pt) -{ - const struct drm_i915_gem_request *rq = - container_of(pt, const struct drm_i915_gem_request, priotree); - - return i915_gem_request_completed(rq); -} - -/* We treat requests as fences. This is not be to confused with our - * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. - * We use the fences to synchronize access from the CPU with activity on the - * GPU, for example, we should not rewrite an object's PTE whilst the GPU - * is reading them. We also track fences at a higher level to provide - * implicit synchronisation around GEM objects, e.g. set-domain will wait - * for outstanding GPU rendering before marking the object ready for CPU - * access, or a pageflip will wait until the GPU is complete before showing - * the frame on the scanout. - * - * In order to use a fence, the object must track the fence it needs to - * serialise with. For example, GEM objects want to track both read and - * write access so that we can perform concurrent read operations between - * the CPU and GPU engines, as well as waiting for all rendering to - * complete, or waiting for the last GPU user of a "fence register". The - * object then embeds a #i915_gem_active to track the most recent (in - * retirement order) request relevant for the desired mode of access. - * The #i915_gem_active is updated with i915_gem_active_set() to track the - * most recent fence request, typically this is done as part of - * i915_vma_move_to_active(). - * - * When the #i915_gem_active completes (is retired), it will - * signal its completion to the owner through a callback as well as mark - * itself as idle (i915_gem_active.request == NULL). The owner - * can then perform any action, such as delayed freeing of an active - * resource including itself. - */ -struct i915_gem_active; - -typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, - struct drm_i915_gem_request *); - -struct i915_gem_active { - struct drm_i915_gem_request __rcu *request; - struct list_head link; - i915_gem_retire_fn retire; -}; - -void i915_gem_retire_noop(struct i915_gem_active *, - struct drm_i915_gem_request *request); - -/** - * init_request_active - prepares the activity tracker for use - * @active - the active tracker - * @func - a callback when then the tracker is retired (becomes idle), - * can be NULL - * - * init_request_active() prepares the embedded @active struct for use as - * an activity tracker, that is for tracking the last known active request - * associated with it. When the last request becomes idle, when it is retired - * after completion, the optional callback @func is invoked. - */ -static inline void -init_request_active(struct i915_gem_active *active, - i915_gem_retire_fn retire) -{ - INIT_LIST_HEAD(&active->link); - active->retire = retire ?: i915_gem_retire_noop; -} - -/** - * i915_gem_active_set - updates the tracker to watch the current request - * @active - the active tracker - * @request - the request to watch - * - * i915_gem_active_set() watches the given @request for completion. Whilst - * that @request is busy, the @active reports busy. When that @request is - * retired, the @active tracker is updated to report idle. - */ -static inline void -i915_gem_active_set(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - list_move(&active->link, &request->active_list); - rcu_assign_pointer(active->request, request); -} - -/** - * i915_gem_active_set_retire_fn - updates the retirement callback - * @active - the active tracker - * @fn - the routine called when the request is retired - * @mutex - struct_mutex used to guard retirements - * - * i915_gem_active_set_retire_fn() updates the function pointer that - * is called when the final request associated with the @active tracker - * is retired. - */ -static inline void -i915_gem_active_set_retire_fn(struct i915_gem_active *active, - i915_gem_retire_fn fn, - struct mutex *mutex) -{ - lockdep_assert_held(mutex); - active->retire = fn ?: i915_gem_retire_noop; -} - -static inline struct drm_i915_gem_request * -__i915_gem_active_peek(const struct i915_gem_active *active) -{ - /* Inside the error capture (running with the driver in an unknown - * state), we want to bend the rules slightly (a lot). - * - * Work is in progress to make it safer, in the meantime this keeps - * the known issue from spamming the logs. - */ - return rcu_dereference_protected(active->request, 1); -} - -/** - * i915_gem_active_raw - return the active request - * @active - the active tracker - * - * i915_gem_active_raw() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the caller - * must hold struct_mutex. - */ -static inline struct drm_i915_gem_request * -i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) -{ - return rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); -} - -/** - * i915_gem_active_peek - report the active request being monitored - * @active - the active tracker - * - * i915_gem_active_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. - */ -static inline struct drm_i915_gem_request * -i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) -{ - struct drm_i915_gem_request *request; - - request = i915_gem_active_raw(active, mutex); - if (!request || i915_gem_request_completed(request)) - return NULL; - - return request; -} - -/** - * i915_gem_active_get - return a reference to the active request - * @active - the active tracker - * - * i915_gem_active_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold struct_mutex. - */ -static inline struct drm_i915_gem_request * -i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) -{ - return i915_gem_request_get(i915_gem_active_peek(active, mutex)); -} - -/** - * __i915_gem_active_get_rcu - return a reference to the active request - * @active - the active tracker - * - * __i915_gem_active_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold the RCU read lock, but - * the returned pointer is safe to use outside of RCU. - */ -static inline struct drm_i915_gem_request * -__i915_gem_active_get_rcu(const struct i915_gem_active *active) -{ - /* Performing a lockless retrieval of the active request is super - * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing - * slab of request objects will not be freed whilst we hold the - * RCU read lock. It does not guarantee that the request itself - * will not be freed and then *reused*. Viz, - * - * Thread A Thread B - * - * req = active.request - * retire(req) -> free(req); - * (req is now first on the slab freelist) - * active.request = NULL - * - * req = new submission on a new object - * ref(req) - * - * To prevent the request from being reused whilst the caller - * uses it, we take a reference like normal. Whilst acquiring - * the reference we check that it is not in a destroyed state - * (refcnt == 0). That prevents the request being reallocated - * whilst the caller holds on to it. To check that the request - * was not reallocated as we acquired the reference we have to - * check that our request remains the active request across - * the lookup, in the same manner as a seqlock. The visibility - * of the pointer versus the reference counting is controlled - * by using RCU barriers (rcu_dereference and rcu_assign_pointer). - * - * In the middle of all that, we inspect whether the request is - * complete. Retiring is lazy so the request may be completed long - * before the active tracker is updated. Querying whether the - * request is complete is far cheaper (as it involves no locked - * instructions setting cachelines to exclusive) than acquiring - * the reference, so we do it first. The RCU read lock ensures the - * pointer dereference is valid, but does not ensure that the - * seqno nor HWS is the right one! However, if the request was - * reallocated, that means the active tracker's request was complete. - * If the new request is also complete, then both are and we can - * just report the active tracker is idle. If the new request is - * incomplete, then we acquire a reference on it and check that - * it remained the active request. - * - * It is then imperative that we do not zero the request on - * reallocation, so that we can chase the dangling pointers! - * See i915_gem_request_alloc(). - */ - do { - struct drm_i915_gem_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) - return NULL; - - /* An especially silly compiler could decide to recompute the - * result of i915_gem_request_completed, more specifically - * re-emit the load for request->fence.seqno. A race would catch - * a later seqno value, which could flip the result from true to - * false. Which means part of the instructions below might not - * be executed, while later on instructions are executed. Due to - * barriers within the refcounting the inconsistency can't reach - * past the call to i915_gem_request_get_rcu, but not executing - * that while still executing i915_gem_request_put() creates - * havoc enough. Prevent this with a compiler barrier. - */ - barrier(); - - request = i915_gem_request_get_rcu(request); - - /* What stops the following rcu_access_pointer() from occurring - * before the above i915_gem_request_get_rcu()? If we were - * to read the value before pausing to get the reference to - * the request, we may not notice a change in the active - * tracker. - * - * The rcu_access_pointer() is a mere compiler barrier, which - * means both the CPU and compiler are free to perform the - * memory read without constraint. The compiler only has to - * ensure that any operations after the rcu_access_pointer() - * occur afterwards in program order. This means the read may - * be performed earlier by an out-of-order CPU, or adventurous - * compiler. - * - * The atomic operation at the heart of - * i915_gem_request_get_rcu(), see dma_fence_get_rcu(), is - * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_gem_request_get_rcu() - * returns the request (and so with the reference counted - * incremented) then the following read for rcu_access_pointer() - * must occur after the atomic operation and so confirm - * that this request is the one currently being tracked. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - if (!request || request == rcu_access_pointer(active->request)) - return rcu_pointer_handoff(request); - - i915_gem_request_put(request); - } while (1); -} - -/** - * i915_gem_active_get_unlocked - return a reference to the active request - * @active - the active tracker - * - * i915_gem_active_get_unlocked() returns a reference to the active request, - * or NULL if the active tracker is idle. The reference is obtained under RCU, - * so no locking is required by the caller. - * - * The reference should be freed with i915_gem_request_put(). - */ -static inline struct drm_i915_gem_request * -i915_gem_active_get_unlocked(const struct i915_gem_active *active) -{ - struct drm_i915_gem_request *request; - - rcu_read_lock(); - request = __i915_gem_active_get_rcu(active); - rcu_read_unlock(); - - return request; -} - -/** - * i915_gem_active_isset - report whether the active tracker is assigned - * @active - the active tracker - * - * i915_gem_active_isset() returns true if the active tracker is currently - * assigned to a request. Due to the lazy retiring, that request may be idle - * and this may report stale information. - */ -static inline bool -i915_gem_active_isset(const struct i915_gem_active *active) -{ - return rcu_access_pointer(active->request); -} - -/** - * i915_gem_active_wait - waits until the request is completed - * @active - the active request on which to wait - * @flags - how to wait - * @timeout - how long to wait at most - * @rps - userspace client to charge for a waitboost - * - * i915_gem_active_wait() waits until the request is completed before - * returning, without requiring any locks to be held. Note that it does not - * retire any requests before returning. - * - * This function relies on RCU in order to acquire the reference to the active - * request without holding any locks. See __i915_gem_active_get_rcu() for the - * glory details on how that is managed. Once the reference is acquired, we - * can then wait upon the request, and afterwards release our reference, - * free of any locking. - * - * This function wraps i915_wait_request(), see it for the full details on - * the arguments. - * - * Returns 0 if successful, or a negative error code. - */ -static inline int -i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) -{ - struct drm_i915_gem_request *request; - long ret = 0; - - request = i915_gem_active_get_unlocked(active); - if (request) { - ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(request); - } - - return ret < 0 ? ret : 0; -} - -/** - * i915_gem_active_retire - waits until the request is retired - * @active - the active request on which to wait - * - * i915_gem_active_retire() waits until the request is completed, - * and then ensures that at least the retirement handler for this - * @active tracker is called before returning. If the @active - * tracker is idle, the function returns immediately. - */ -static inline int __must_check -i915_gem_active_retire(struct i915_gem_active *active, - struct mutex *mutex) -{ - struct drm_i915_gem_request *request; - long ret; - - request = i915_gem_active_raw(active, mutex); - if (!request) - return 0; - - ret = i915_wait_request(request, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) - return ret; - - list_del_init(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - - return 0; -} - -#define for_each_active(mask, idx) \ - for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) - -#endif /* I915_GEM_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 0e158f9287c4..5757fb7c4b5a 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -175,7 +175,7 @@ i915_gem_shrink(struct drm_i915_private *i915, i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); trace_i915_gem_shrink(i915, target, flags); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -267,7 +267,7 @@ i915_gem_shrink(struct drm_i915_private *i915, if (flags & I915_SHRINK_BOUND) intel_runtime_pm_put(i915); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); shrinker_unlock(i915, unlock); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index b5a22400a01f..33e01bf6aa36 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -27,9 +27,9 @@ #include -#include "i915_utils.h" -#include "i915_gem_request.h" +#include "i915_request.h" #include "i915_syncmap.h" +#include "i915_utils.h" struct i915_gem_timeline; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 65c0bef73ee5..a7933c9b5562 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -991,7 +991,7 @@ out: static inline uint32_t __active_get_seqno(struct i915_gem_active *active) { - struct drm_i915_gem_request *request; + struct i915_request *request; request = __i915_gem_active_peek(active); return request ? request->global_seqno : 0; @@ -1000,7 +1000,7 @@ __active_get_seqno(struct i915_gem_active *active) static inline int __active_get_engine_id(struct i915_gem_active *active) { - struct drm_i915_gem_request *request; + struct i915_request *request; request = __i915_gem_active_peek(active); return request ? request->engine->id : -1; @@ -1293,7 +1293,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } } -static void record_request(struct drm_i915_gem_request *request, +static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; @@ -1310,10 +1310,10 @@ static void record_request(struct drm_i915_gem_request *request, } static void engine_record_requests(struct intel_engine_cs *engine, - struct drm_i915_gem_request *first, + struct i915_request *first, struct drm_i915_error_engine *ee) { - struct drm_i915_gem_request *request; + struct i915_request *request; int count; count = 0; @@ -1363,7 +1363,7 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine, unsigned int n; for (n = 0; n < execlists_num_ports(execlists); n++) { - struct drm_i915_gem_request *rq = port_request(&execlists->port[n]); + struct i915_request *rq = port_request(&execlists->port[n]); if (!rq) break; @@ -1398,10 +1398,10 @@ static void record_context(struct drm_i915_error_context *e, e->active = atomic_read(&ctx->active_count); } -static void request_record_user_bo(struct drm_i915_gem_request *request, +static void request_record_user_bo(struct i915_request *request, struct drm_i915_error_engine *ee) { - struct i915_gem_capture_list *c; + struct i915_capture_list *c; struct drm_i915_error_object **bo; long count; @@ -1454,7 +1454,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = dev_priv->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; - struct drm_i915_gem_request *request; + struct i915_request *request; ee->engine_id = -1; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 17de6cef2a30..0a7ed990a8d1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1071,7 +1071,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq = NULL; + struct i915_request *rq = NULL; struct intel_wait *wait; if (!engine->breadcrumbs.irq_armed) @@ -1098,13 +1098,13 @@ static void notify_ring(struct intel_engine_cs *engine) */ if (i915_seqno_passed(intel_engine_get_seqno(engine), wait->seqno)) { - struct drm_i915_gem_request *waiter = wait->request; + struct i915_request *waiter = wait->request; wakeup = true; if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &waiter->fence.flags) && intel_wait_check_request(wait, waiter)) - rq = i915_gem_request_get(waiter); + rq = i915_request_get(waiter); } if (wakeup) @@ -1117,7 +1117,7 @@ static void notify_ring(struct intel_engine_cs *engine) if (rq) { dma_fence_signal(&rq->fence); - i915_gem_request_put(rq); + i915_request_put(rq); } trace_intel_engine_notify(engine, wait); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index f0cfdece14ae..f464c3737228 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1630,10 +1630,10 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This * is only used by the kernel context. */ -static int gen8_emit_oa_config(struct drm_i915_gem_request *req, +static int gen8_emit_oa_config(struct i915_request *rq, const struct i915_oa_config *oa_config) { - struct drm_i915_private *dev_priv = req->i915; + struct drm_i915_private *dev_priv = rq->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ u32 flex_mmio[] = { i915_mmio_reg_offset(EU_PERF_CNTL0), @@ -1647,7 +1647,7 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req, u32 *cs; int i; - cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4); + cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1685,7 +1685,7 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1695,38 +1695,38 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_timeline *timeline; - struct drm_i915_gem_request *req; + struct i915_request *rq; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = i915_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); - ret = gen8_emit_oa_config(req, oa_config); + ret = gen8_emit_oa_config(rq, oa_config); if (ret) { - i915_add_request(req); + i915_request_add(rq); return ret; } /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; struct intel_timeline *tl; tl = &timeline->engine[engine->id]; prev = i915_gem_active_raw(&tl->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&req->submit, + i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, GFP_KERNEL); } - i915_add_request(req); + i915_request_add(rq); return 0; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c new file mode 100644 index 000000000000..9b25270f2491 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_request.c @@ -0,0 +1,1411 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +#include "i915_drv.h" + +static const char *i915_fence_get_driver_name(struct dma_fence *fence) +{ + return "i915"; +} + +static const char *i915_fence_get_timeline_name(struct dma_fence *fence) +{ + /* + * The timeline struct (as part of the ppgtt underneath a context) + * may be freed when the request is no longer in use by the GPU. + * We could extend the life of a context to beyond that of all + * fences, possibly keeping the hw resource around indefinitely, + * or we just give them a false name. Since + * dma_fence_ops.get_timeline_name is a debug feature, the occasional + * lie seems justifiable. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return "signaled"; + + return to_request(fence)->timeline->common->name; +} + +static bool i915_fence_signaled(struct dma_fence *fence) +{ + return i915_request_completed(to_request(fence)); +} + +static bool i915_fence_enable_signaling(struct dma_fence *fence) +{ + if (i915_fence_signaled(fence)) + return false; + + intel_engine_enable_signaling(to_request(fence), true); + return !i915_fence_signaled(fence); +} + +static signed long i915_fence_wait(struct dma_fence *fence, + bool interruptible, + signed long timeout) +{ + return i915_request_wait(to_request(fence), interruptible, timeout); +} + +static void i915_fence_release(struct dma_fence *fence) +{ + struct i915_request *rq = to_request(fence); + + /* + * The request is put onto a RCU freelist (i.e. the address + * is immediately reused), mark the fences as being freed now. + * Otherwise the debugobjects for the fences are only marked as + * freed when the slab cache itself is freed, and so we would get + * caught trying to reuse dead objects. + */ + i915_sw_fence_fini(&rq->submit); + + kmem_cache_free(rq->i915->requests, rq); +} + +const struct dma_fence_ops i915_fence_ops = { + .get_driver_name = i915_fence_get_driver_name, + .get_timeline_name = i915_fence_get_timeline_name, + .enable_signaling = i915_fence_enable_signaling, + .signaled = i915_fence_signaled, + .wait = i915_fence_wait, + .release = i915_fence_release, +}; + +static inline void +i915_request_remove_from_client(struct i915_request *request) +{ + struct drm_i915_file_private *file_priv; + + file_priv = request->file_priv; + if (!file_priv) + return; + + spin_lock(&file_priv->mm.lock); + if (request->file_priv) { + list_del(&request->client_link); + request->file_priv = NULL; + } + spin_unlock(&file_priv->mm.lock); +} + +static struct i915_dependency * +i915_dependency_alloc(struct drm_i915_private *i915) +{ + return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); +} + +static void +i915_dependency_free(struct drm_i915_private *i915, + struct i915_dependency *dep) +{ + kmem_cache_free(i915->dependencies, dep); +} + +static void +__i915_priotree_add_dependency(struct i915_priotree *pt, + struct i915_priotree *signal, + struct i915_dependency *dep, + unsigned long flags) +{ + INIT_LIST_HEAD(&dep->dfs_link); + list_add(&dep->wait_link, &signal->waiters_list); + list_add(&dep->signal_link, &pt->signalers_list); + dep->signaler = signal; + dep->flags = flags; +} + +static int +i915_priotree_add_dependency(struct drm_i915_private *i915, + struct i915_priotree *pt, + struct i915_priotree *signal) +{ + struct i915_dependency *dep; + + dep = i915_dependency_alloc(i915); + if (!dep) + return -ENOMEM; + + __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); + return 0; +} + +static void +i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) +{ + struct i915_dependency *dep, *next; + + GEM_BUG_ON(!list_empty(&pt->link)); + + /* + * Everyone we depended upon (the fences we wait to be signaled) + * should retire before us and remove themselves from our list. + * However, retirement is run independently on each timeline and + * so we may be called out-of-order. + */ + list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { + GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->wait_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } + + /* Remove ourselves from everyone who depends upon us */ + list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != pt); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->signal_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } +} + +static void +i915_priotree_init(struct i915_priotree *pt) +{ + INIT_LIST_HEAD(&pt->signalers_list); + INIT_LIST_HEAD(&pt->waiters_list); + INIT_LIST_HEAD(&pt->link); + pt->priority = I915_PRIORITY_INVALID; +} + +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + /* Carefully retire all requests without writing to the rings */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + for_each_engine(engine, i915, id) { + struct i915_gem_timeline *timeline; + struct intel_timeline *tl = engine->timeline; + + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Check we are idle before we fiddle with hw state! */ + GEM_BUG_ON(!intel_engine_is_idle(engine)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + + /* Finally reset hw state */ + intel_engine_init_global_seqno(engine, seqno); + tl->seqno = seqno; + + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->engine[id].global_sync, 0, + sizeof(timeline->engine[id].global_sync)); + } + + return 0; +} + +int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *i915 = to_i915(dev); + + lockdep_assert_held(&i915->drm.struct_mutex); + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we will inject to ring */ + return reset_all_global_seqno(i915, seqno - 1); +} + +static void mark_busy(struct drm_i915_private *i915) +{ + if (i915->gt.awake) + return; + + GEM_BUG_ON(!i915->gt.active_requests); + + intel_runtime_pm_get_noresume(i915); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + + i915->gt.awake = true; + if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ + i915->gt.epoch = 1; + + intel_enable_gt_powersave(i915); + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + + intel_engines_unpark(i915); + + i915_queue_hangcheck(i915); + + queue_delayed_work(i915->wq, + &i915->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + +static int reserve_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; + int ret; + + /* Reservation is fine until we need to wrap around */ + if (unlikely(add_overflows(seqno, active))) { + ret = reset_all_global_seqno(i915, 0); + if (ret) { + engine->timeline->inflight_seqnos--; + return ret; + } + } + + if (!i915->gt.active_requests++) + mark_busy(i915); + + return 0; +} + +static void unreserve_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (!--i915->gt.active_requests) { + /* Cancel the mark_busy() from our reserve_engine() */ + GEM_BUG_ON(!i915->gt.awake); + mod_delayed_work(i915->wq, + &i915->gt.idle_work, + msecs_to_jiffies(100)); + } + + GEM_BUG_ON(!engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos--; +} + +void i915_gem_retire_noop(struct i915_gem_active *active, + struct i915_request *request) +{ + /* Space left intentionally blank */ +} + +static void advance_ring(struct i915_request *request) +{ + unsigned int tail; + + /* + * We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + if (list_is_last(&request->ring_link, &request->ring->request_list)) { + /* + * We may race here with execlists resubmitting this request + * as we retire it. The resubmission will move the ring->tail + * forwards (to request->wa_tail). We either read the + * current value that was written to hw, or the value that + * is just about to be. Either works, if we miss the last two + * noops - they are safe to be replayed on a reset. + */ + tail = READ_ONCE(request->ring->tail); + } else { + tail = request->postfix; + } + list_del(&request->ring_link); + + request->ring->head = tail; +} + +static void free_capture_list(struct i915_request *request) +{ + struct i915_capture_list *capture; + + capture = request->capture_list; + while (capture) { + struct i915_capture_list *next = capture->next; + + kfree(capture); + capture = next; + } +} + +static void i915_request_retire(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct i915_gem_active *active, *next; + + lockdep_assert_held(&request->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); + GEM_BUG_ON(!i915_request_completed(request)); + GEM_BUG_ON(!request->i915->gt.active_requests); + + trace_i915_request_retire(request); + + spin_lock_irq(&engine->timeline->lock); + list_del_init(&request->link); + spin_unlock_irq(&engine->timeline->lock); + + unreserve_engine(request->engine); + advance_ring(request); + + free_capture_list(request); + + /* + * Walk through the active list, calling retire on each. This allows + * objects to track their GPU activity and mark themselves as idle + * when their *last* active request is completed (updating state + * tracking lists for eviction, active references for GEM, etc). + * + * As the ->retire() may free the node, we decouple it first and + * pass along the auxiliary information (to avoid dereferencing + * the node after the callback). + */ + list_for_each_entry_safe(active, next, &request->active_list, link) { + /* + * In microbenchmarks or focusing upon time inside the kernel, + * we may spend an inordinate amount of time simply handling + * the retirement of requests and processing their callbacks. + * Of which, this loop itself is particularly hot due to the + * cache misses when jumping around the list of i915_gem_active. + * So we try to keep this loop as streamlined as possible and + * also prefetch the next i915_gem_active to try and hide + * the likely cache miss. + */ + prefetchw(next); + + INIT_LIST_HEAD(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + } + + i915_request_remove_from_client(request); + + /* Retirement decays the ban score as it is a sign of ctx progress */ + atomic_dec_if_positive(&request->ctx->ban_score); + + /* + * The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + engine->last_retired_context = request->ctx; + + spin_lock_irq(&request->lock); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) + dma_fence_signal_locked(&request->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + if (request->waitboost) { + GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); + atomic_dec(&request->i915->gt_pm.rps.num_waiters); + } + spin_unlock_irq(&request->lock); + + i915_priotree_fini(request->i915, &request->priotree); + i915_request_put(request); +} + +void i915_request_retire_upto(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_request *tmp; + + lockdep_assert_held(&rq->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_request_completed(rq)); + + if (list_empty(&rq->link)) + return; + + do { + tmp = list_first_entry(&engine->timeline->requests, + typeof(*tmp), link); + + i915_request_retire(tmp); + } while (tmp != rq); +} + +static u32 timeline_get_seqno(struct intel_timeline *tl) +{ + return ++tl->seqno; +} + +void __i915_request_submit(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + u32 seqno; + + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->timeline->lock); + + /* Transfer from per-context onto the global per-engine timeline */ + timeline = engine->timeline; + GEM_BUG_ON(timeline == request->timeline); + GEM_BUG_ON(request->global_seqno); + + seqno = timeline_get_seqno(timeline); + GEM_BUG_ON(!seqno); + GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); + + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = seqno; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_enable_signaling(request, false); + spin_unlock(&request->lock); + + engine->emit_breadcrumb(request, + request->ring->vaddr + request->postfix); + + spin_lock(&request->timeline->lock); + list_move_tail(&request->link, &timeline->requests); + spin_unlock(&request->timeline->lock); + + trace_i915_request_execute(request); + + wake_up_all(&request->execute); +} + +void i915_request_submit(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_request_submit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + +void __i915_request_unsubmit(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->timeline->lock); + + /* + * Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(!request->global_seqno); + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), + request->global_seqno)); + engine->timeline->seqno--; + + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* + * We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_request_unsubmit(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + +static int __i915_sw_fence_call +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct i915_request *request = + container_of(fence, typeof(*request), submit); + + switch (state) { + case FENCE_COMPLETE: + trace_i915_request_submit(request); + /* + * We need to serialize use of the submit_request() callback + * with its hotplugging performed during an emergency + * i915_gem_set_wedged(). We use the RCU mechanism to mark the + * critical section in order to force i915_gem_set_wedged() to + * wait until the submit_request() is completed before + * proceeding. + */ + rcu_read_lock(); + request->engine->submit_request(request); + rcu_read_unlock(); + break; + + case FENCE_FREE: + i915_request_put(request); + break; + } + + return NOTIFY_DONE; +} + +/** + * i915_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct i915_request * +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_request *rq; + struct intel_ring *ring; + int ret; + + lockdep_assert_held(&i915->drm.struct_mutex); + + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == i915->preempt_context); + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + if (i915_terminally_wedged(&i915->gpu_error)) + return ERR_PTR(-EIO); + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ring = engine->context_pin(engine, ctx); + if (IS_ERR(ring)) + return ERR_CAST(ring); + GEM_BUG_ON(!ring); + + ret = reserve_engine(engine); + if (ret) + goto err_unpin; + + ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); + if (ret) + goto err_unreserve; + + /* Move the oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry_or_null(&engine->timeline->requests, + typeof(*rq), link); + if (rq && i915_request_completed(rq)) + i915_request_retire(rq); + + /* + * Beware: Dragons be flying overhead. + * + * We use RCU to look up requests in flight. The lookups may + * race with the request being allocated from the slab freelist. + * That is the request we are writing to here, may be in the process + * of being read by __i915_gem_active_get_rcu(). As such, + * we have to be very careful when overwriting the contents. During + * the RCU lookup, we change chase the request->engine pointer, + * read the request->global_seqno and increment the reference count. + * + * The reference count is incremented atomically. If it is zero, + * the lookup knows the request is unallocated and complete. Otherwise, + * it is either still in use, or has been reallocated and reset + * with dma_fence_init(). This increment is safe for release as we + * check that the request we have a reference to and matches the active + * request. + * + * Before we increment the refcount, we chase the request->engine + * pointer. We must not call kmem_cache_zalloc() or else we set + * that pointer to NULL and cause a crash during the lookup. If + * we see the request is completed (based on the value of the + * old engine and seqno), the lookup is complete and reports NULL. + * If we decide the request is not completed (new engine or seqno), + * then we grab a reference and double check that it is still the + * active request - which it won't be and restart the lookup. + * + * Do not use kmem_cache_zalloc() here! + */ + rq = kmem_cache_alloc(i915->requests, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (unlikely(!rq)) { + /* Ratelimit ourselves to prevent oom from malicious clients */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_INTERRUPTIBLE); + if (ret) + goto err_unreserve; + + /* + * We've forced the client to stall and catch up with whatever + * backlog there might have been. As we are assuming that we + * caused the mempressure, now is an opportune time to + * recover as much memory from the request pool as is possible. + * Having already penalized the client to stall, we spend + * a little extra time to re-optimise page allocation. + */ + kmem_cache_shrink(i915->requests); + rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ + + rq = kmem_cache_alloc(i915->requests, GFP_KERNEL); + if (!rq) { + ret = -ENOMEM; + goto err_unreserve; + } + } + + rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); + GEM_BUG_ON(rq->timeline == engine->timeline); + + spin_lock_init(&rq->lock); + dma_fence_init(&rq->fence, + &i915_fence_ops, + &rq->lock, + rq->timeline->fence_context, + timeline_get_seqno(rq->timeline)); + + /* We bump the ref for the fence chain */ + i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); + init_waitqueue_head(&rq->execute); + + i915_priotree_init(&rq->priotree); + + INIT_LIST_HEAD(&rq->active_list); + rq->i915 = i915; + rq->engine = engine; + rq->ctx = ctx; + rq->ring = ring; + + /* No zalloc, must clear what we need by hand */ + rq->global_seqno = 0; + rq->signaling.wait.seqno = 0; + rq->file_priv = NULL; + rq->batch = NULL; + rq->capture_list = NULL; + rq->waitboost = false; + + /* + * Reserve space in the ring buffer for all the commands required to + * eventually emit this request. This is to guarantee that the + * i915_request_add() call can't fail. Note that the reserve may need + * to be redone if the request is not actually submitted straight + * away, e.g. because a GPU scheduler has deferred it. + */ + rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz); + + /* + * Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + rq->head = rq->ring->emit; + + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = engine->emit_flush(rq, EMIT_INVALIDATE); + if (ret) + goto err_unwind; + + ret = engine->request_alloc(rq); + if (ret) + goto err_unwind; + + /* Check that we didn't interrupt ourselves with a new request */ + GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); + return rq; + +err_unwind: + rq->ring->emit = rq->head; + + /* Make sure we didn't add ourselves to external state before freeing */ + GEM_BUG_ON(!list_empty(&rq->active_list)); + GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); + GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); + + kmem_cache_free(i915->requests, rq); +err_unreserve: + unreserve_engine(engine); +err_unpin: + engine->context_unpin(engine, ctx); + return ERR_PTR(ret); +} + +static int +i915_request_await_request(struct i915_request *to, struct i915_request *from) +{ + int ret; + + GEM_BUG_ON(to == from); + GEM_BUG_ON(to->timeline == from->timeline); + + if (i915_request_completed(from)) + return 0; + + if (to->engine->schedule) { + ret = i915_priotree_add_dependency(to->i915, + &to->priotree, + &from->priotree); + if (ret < 0) + return ret; + } + + if (to->engine == from->engine) { + ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + return ret < 0 ? ret : 0; + } + + if (to->engine->semaphore.sync_to) { + u32 seqno; + + GEM_BUG_ON(!from->engine->semaphore.signal); + + seqno = i915_request_global_seqno(from); + if (!seqno) + goto await_dma_fence; + + if (seqno <= to->timeline->global_sync[from->engine->id]) + return 0; + + trace_i915_gem_ring_sync_to(to, from); + ret = to->engine->semaphore.sync_to(to, from); + if (ret) + return ret; + + to->timeline->global_sync[from->engine->id] = seqno; + return 0; + } + +await_dma_fence: + ret = i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + I915_FENCE_GFP); + return ret < 0 ? ret : 0; +} + +int +i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) +{ + struct dma_fence **child = &fence; + unsigned int nchild = 1; + int ret; + + /* + * Note that if the fence-array was created in signal-on-any mode, + * we should *not* decompose it into its individual fences. However, + * we don't currently store which mode the fence-array is operating + * in. Fortunately, the only user of signal-on-any is private to + * amdgpu and we should not see any incoming fence-array from + * sync-file being in signal-on-any mode. + */ + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + + child = array->fences; + nchild = array->num_fences; + GEM_BUG_ON(!nchild); + } + + do { + fence = *child++; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; + + /* + * Requests on the same timeline are explicitly ordered, along + * with their dependencies, by i915_request_add() which ensures + * that requests are submitted in-order through each ring. + */ + if (fence->context == rq->fence.context) + continue; + + /* Squash repeated waits to the same timelines */ + if (fence->context != rq->i915->mm.unordered_timeline && + intel_timeline_sync_is_later(rq->timeline, fence)) + continue; + + if (dma_fence_is_i915(fence)) + ret = i915_request_await_request(rq, to_request(fence)); + else + ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, + I915_FENCE_TIMEOUT, + I915_FENCE_GFP); + if (ret < 0) + return ret; + + /* Record the latest fence used against each timeline */ + if (fence->context != rq->i915->mm.unordered_timeline) + intel_timeline_sync_set(rq->timeline, fence); + } while (--nchild); + + return 0; +} + +/** + * i915_request_await_object - set this request to (async) wait upon a bo + * @to: request we are wishing to use + * @obj: object which may be in use on another ring. + * @write: whether the wait is on behalf of a writer + * + * This code is meant to abstract object synchronization with the GPU. + * Conceptually we serialise writes between engines inside the GPU. + * We only allow one engine to write into a buffer at any time, but + * multiple readers. To ensure each has a coherent view of memory, we must: + * + * - If there is an outstanding write request to the object, the new + * request must wait for it to complete (either CPU or in hw, requests + * on the same ring will be naturally ordered). + * + * - If we are a write request (pending_write_domain is set), the new + * request must wait for outstanding read requests to complete. + * + * Returns 0 if successful, else propagates up the lower layer error. + */ +int +i915_request_await_object(struct i915_request *to, + struct drm_i915_gem_object *obj, + bool write) +{ + struct dma_fence *excl; + int ret = 0; + + if (write) { + struct dma_fence **shared; + unsigned int count, i; + + ret = reservation_object_get_fences_rcu(obj->resv, + &excl, &count, &shared); + if (ret) + return ret; + + for (i = 0; i < count; i++) { + ret = i915_request_await_dma_fence(to, shared[i]); + if (ret) + break; + + dma_fence_put(shared[i]); + } + + for (; i < count; i++) + dma_fence_put(shared[i]); + kfree(shared); + } else { + excl = reservation_object_get_excl_rcu(obj->resv); + } + + if (excl) { + if (ret == 0) + ret = i915_request_await_dma_fence(to, excl); + + dma_fence_put(excl); + } + + return ret; +} + +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_request_add(struct i915_request *request, bool flush_caches) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_ring *ring = request->ring; + struct intel_timeline *timeline = request->timeline; + struct i915_request *prev; + u32 *cs; + int err; + + lockdep_assert_held(&request->i915->drm.struct_mutex); + trace_i915_request_add(request); + + /* + * Make sure that no request gazumped us - if it was allocated after + * our i915_request_alloc() and called __i915_request_add() before + * us, the timeline will hold its seqno which is later than ours. + */ + GEM_BUG_ON(timeline->seqno != request->fence.seqno); + + /* + * To ensure that this call will not fail, space for its emissions + * should already have been reserved in the ring buffer. Let the ring + * know that it is time to use that space up. + */ + request->reserved_space = 0; + + /* + * Emit any outstanding flushes - execbuf can fail to emit the flush + * after having emitted the batchbuffer command. Hence we need to fix + * things up similar to emitting the lazy request. The difference here + * is that the flush _must_ happen before the next request, no matter + * what. + */ + if (flush_caches) { + err = engine->emit_flush(request, EMIT_FLUSH); + + /* Not allowed to fail! */ + WARN(err, "engine->emit_flush() failed: %d!\n", err); + } + + /* + * Record the position of the start of the breadcrumb so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the ring's HEAD. + */ + cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + GEM_BUG_ON(IS_ERR(cs)); + request->postfix = intel_ring_offset(request, cs); + + /* + * Seal the request and mark it as pending execution. Note that + * we may inspect this state, without holding any locks, during + * hangcheck. Hence we apply the barrier to ensure that we do not + * see a more recent value in the hws than we are tracking. + */ + + prev = i915_gem_active_raw(&timeline->last_request, + &request->i915->drm.struct_mutex); + if (prev && !i915_request_completed(prev)) { + i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, + &request->submitq); + if (engine->schedule) + __i915_priotree_add_dependency(&request->priotree, + &prev->priotree, + &request->dep, + 0); + } + + spin_lock_irq(&timeline->lock); + list_add_tail(&request->link, &timeline->requests); + spin_unlock_irq(&timeline->lock); + + GEM_BUG_ON(timeline->seqno != request->fence.seqno); + i915_gem_active_set(&timeline->last_request, request); + + list_add_tail(&request->ring_link, &ring->request_list); + request->emitted_jiffies = jiffies; + + /* + * Let the backend know a new request has arrived that may need + * to adjust the existing execution schedule due to a high priority + * request - i.e. we may want to preempt the current request in order + * to run a high priority dependency chain *before* we can execute this + * request. + * + * This is called before the request is ready to run so that we can + * decide whether to preempt the entire chain so that it is ready to + * run at the earliest possible convenience. + */ + if (engine->schedule) + engine->schedule(request, request->ctx->priority); + + local_bh_disable(); + i915_sw_fence_commit(&request->submit); + local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + + /* + * In typical scenarios, we do not expect the previous request on + * the timeline to be still tracked by timeline->last_request if it + * has been completed. If the completed request is still here, that + * implies that request retirement is a long way behind submission, + * suggesting that we haven't been retiring frequently enough from + * the combination of retire-before-alloc, waiters and the background + * retirement worker. So if the last request on this timeline was + * already completed, do a catch up pass, flushing the retirement queue + * up to this client. Since we have now moved the heaviest operations + * during retirement onto secondary workers, such as freeing objects + * or contexts, retiring a bunch of requests is mostly list management + * (and cache misses), and so we should not be overly penalizing this + * client by performing excess work, though we may still performing + * work on behalf of others -- but instead we should benefit from + * improved resource management. (Well, that's the theory at least.) + */ + if (prev && i915_request_completed(prev)) + i915_request_retire_upto(prev); +} + +static unsigned long local_clock_us(unsigned int *cpu) +{ + unsigned long t; + + /* + * Cheaply and approximately convert from nanoseconds to microseconds. + * The result and subsequent calculations are also defined in the same + * approximate microseconds units. The principal source of timing + * error here is from the simple truncation. + * + * Note that local_clock() is only defined wrt to the current CPU; + * the comparisons are no longer valid if we switch CPUs. Instead of + * blocking preemption for the entire busywait, we can detect the CPU + * switch and use that as indicator of system load and a reason to + * stop busywaiting, see busywait_stop(). + */ + *cpu = get_cpu(); + t = local_clock() >> 10; + put_cpu(); + + return t; +} + +static bool busywait_stop(unsigned long timeout, unsigned int cpu) +{ + unsigned int this_cpu; + + if (time_after(local_clock_us(&this_cpu), timeout)) + return true; + + return this_cpu != cpu; +} + +static bool __i915_spin_request(const struct i915_request *rq, + u32 seqno, int state, unsigned long timeout_us) +{ + struct intel_engine_cs *engine = rq->engine; + unsigned int irq, cpu; + + GEM_BUG_ON(!seqno); + + /* + * Only wait for the request if we know it is likely to complete. + * + * We don't track the timestamps around requests, nor the average + * request length, so we do not have a good indicator that this + * request will complete within the timeout. What we do know is the + * order in which requests are executed by the engine and so we can + * tell if the request has started. If the request hasn't started yet, + * it is a fair assumption that it will not complete within our + * relatively short timeout. + */ + if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) + return false; + + /* + * When waiting for high frequency requests, e.g. during synchronous + * rendering split between the CPU and GPU, the finite amount of time + * required to set up the irq and wait upon it limits the response + * rate. By busywaiting on the request completion for a short while we + * can service the high frequency waits as quick as possible. However, + * if it is a slow request, we want to sleep as quickly as possible. + * The tradeoff between waiting and sleeping is roughly the time it + * takes to sleep on a request, on the order of a microsecond. + */ + + irq = atomic_read(&engine->irq_count); + timeout_us += local_clock_us(&cpu); + do { + if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) + return seqno == i915_request_global_seqno(rq); + + /* + * Seqno are meant to be ordered *before* the interrupt. If + * we see an interrupt without a corresponding seqno advance, + * assume we won't see one in the near future but require + * the engine->seqno_barrier() to fixup coherency. + */ + if (atomic_read(&engine->irq_count) != irq) + break; + + if (signal_pending_state(state, current)) + break; + + if (busywait_stop(timeout_us, cpu)) + break; + + cpu_relax(); + } while (!need_resched()); + + return false; +} + +static bool __i915_wait_request_check_and_reset(struct i915_request *request) +{ + if (likely(!i915_reset_handoff(&request->i915->gpu_error))) + return false; + + __set_current_state(TASK_RUNNING); + i915_reset(request->i915, 0); + return true; +} + +/** + * i915_wait_request - wait until execution of request has finished + * @rq: the request to wait upon + * @flags: how to wait + * @timeout: how long to wait in jiffies + * + * i915_wait_request() waits for the request to be completed, for a + * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an + * unbounded wait). + * + * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED + * in via the flags, and vice versa if the struct_mutex is not held, the caller + * must not specify that the wait is locked. + * + * Returns the remaining time (in jiffies) if the request completed, which may + * be zero or -ETIME if the request is unfinished after the timeout expires. + * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is + * pending before the request completes. + */ +long i915_request_wait(struct i915_request *rq, + unsigned int flags, + long timeout) +{ + const int state = flags & I915_WAIT_INTERRUPTIBLE ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; + DEFINE_WAIT_FUNC(reset, default_wake_function); + DEFINE_WAIT_FUNC(exec, default_wake_function); + struct intel_wait wait; + + might_sleep(); +#if IS_ENABLED(CONFIG_LOCKDEP) + GEM_BUG_ON(debug_locks && + !!lockdep_is_held(&rq->i915->drm.struct_mutex) != + !!(flags & I915_WAIT_LOCKED)); +#endif + GEM_BUG_ON(timeout < 0); + + if (i915_request_completed(rq)) + return timeout; + + if (!timeout) + return -ETIME; + + trace_i915_request_wait_begin(rq, flags); + + add_wait_queue(&rq->execute, &exec); + if (flags & I915_WAIT_LOCKED) + add_wait_queue(errq, &reset); + + intel_wait_init(&wait, rq); + +restart: + do { + set_current_state(state); + if (intel_wait_update_request(&wait, rq)) + break; + + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(rq)) + continue; + + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + goto complete; + } + + if (!timeout) { + timeout = -ETIME; + goto complete; + } + + timeout = io_schedule_timeout(timeout); + } while (1); + + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + /* Optimistic short spin before touching IRQs */ + if (__i915_spin_request(rq, wait.seqno, state, 5)) + goto complete; + + set_current_state(state); + if (intel_engine_add_wait(rq->engine, &wait)) + /* + * In order to check that we haven't missed the interrupt + * as we enabled it, we need to kick ourselves to do a + * coherent check on the seqno before we sleep. + */ + goto wakeup; + + if (flags & I915_WAIT_LOCKED) + __i915_wait_request_check_and_reset(rq); + + for (;;) { + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + break; + } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); + + if (intel_wait_complete(&wait) && + intel_wait_check_request(&wait, rq)) + break; + + set_current_state(state); + +wakeup: + /* + * Carefully check if the request is complete, giving time + * for the seqno to be visible following the interrupt. + * We also have to check in case we are kicked by the GPU + * reset in order to drop the struct_mutex. + */ + if (__i915_request_irq_complete(rq)) + break; + + /* + * If the GPU is hung, and we hold the lock, reset the GPU + * and then check for completion. On a full reset, the engine's + * HW seqno will be advanced passed us and we are complete. + * If we do a partial reset, we have to wait for the GPU to + * resume and update the breadcrumb. + * + * If we don't hold the mutex, we can just wait for the worker + * to come along and update the breadcrumb (either directly + * itself, or indirectly by recovering the GPU). + */ + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(rq)) + continue; + + /* Only spin if we know the GPU is processing this request */ + if (__i915_spin_request(rq, wait.seqno, state, 2)) + break; + + if (!intel_wait_check_request(&wait, rq)) { + intel_engine_remove_wait(rq->engine, &wait); + goto restart; + } + } + + intel_engine_remove_wait(rq->engine, &wait); +complete: + __set_current_state(TASK_RUNNING); + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(errq, &reset); + remove_wait_queue(&rq->execute, &exec); + trace_i915_request_wait_end(rq); + + return timeout; +} + +static void engine_retire_requests(struct intel_engine_cs *engine) +{ + struct i915_request *request, *next; + u32 seqno = intel_engine_get_seqno(engine); + LIST_HEAD(retire); + + spin_lock_irq(&engine->timeline->lock); + list_for_each_entry_safe(request, next, + &engine->timeline->requests, link) { + if (!i915_seqno_passed(seqno, request->global_seqno)) + break; + + list_move_tail(&request->link, &retire); + } + spin_unlock_irq(&engine->timeline->lock); + + list_for_each_entry_safe(request, next, &retire, link) + i915_request_retire(request); +} + +void i915_retire_requests(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + if (!i915->gt.active_requests) + return; + + for_each_engine(engine, i915, id) + engine_retire_requests(engine); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_request.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h new file mode 100644 index 000000000000..74311fc53e2f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_request.h @@ -0,0 +1,738 @@ +/* + * Copyright © 2008-2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_REQUEST_H +#define I915_REQUEST_H + +#include + +#include "i915_gem.h" +#include "i915_sw_fence.h" + +#include + +struct drm_file; +struct drm_i915_gem_object; +struct i915_request; + +struct intel_wait { + struct rb_node node; + struct task_struct *tsk; + struct i915_request *request; + u32 seqno; +}; + +struct intel_signal_node { + struct rb_node node; + struct intel_wait wait; +}; + +struct i915_dependency { + struct i915_priotree *signaler; + struct list_head signal_link; + struct list_head wait_link; + struct list_head dfs_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +/* + * "People assume that time is a strict progression of cause to effect, but + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 + * + * Requests exist in a complex web of interdependencies. Each request + * has to wait for some other request to complete before it is ready to be run + * (e.g. we have to wait until the pixels have been rendering into a texture + * before we can copy from it). We track the readiness of a request in terms + * of fences, but we also need to keep the dependency tree for the lifetime + * of the request (beyond the life of an individual fence). We use the tree + * at various points to reorder the requests whilst keeping the requests + * in order with respect to their various dependencies. + */ +struct i915_priotree { + struct list_head signalers_list; /* those before us, we depend upon */ + struct list_head waiters_list; /* those after us, they depend upon us */ + struct list_head link; + int priority; +}; + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN +}; + +struct i915_capture_list { + struct i915_capture_list *next; + struct i915_vma *vma; +}; + +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable sequence + * number comparisons on buffer last_read|write_seqno. It also allows an + * emission time to be associated with the request for tracking how far ahead + * of the GPU the submission is. + * + * When modifying this structure be very aware that we perform a lockless + * RCU lookup of it that may race against reallocation of the struct + * from the slab freelist. We intentionally do not zero the structure on + * allocation so that the lookup can use the dangling pointers (and is + * cogniscent that those pointers may be wrong). Instead, everything that + * needs to be initialised must be done so explicitly. + * + * The requests are reference counted. + */ +struct i915_request { + struct dma_fence fence; + spinlock_t lock; + + /** On Which ring this request was generated */ + struct drm_i915_private *i915; + + /** + * Context and ring buffer related to this request + * Contexts are refcounted, so when this request is associated with a + * context, we must increment the context's refcount, to guarantee that + * it persists while any request is linked to it. Requests themselves + * are also refcounted, so the request will only be freed when the last + * reference to it is dismissed, and the code in + * i915_request_free() will then decrement the refcount on the + * context. + */ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + struct intel_ring *ring; + struct intel_timeline *timeline; + struct intel_signal_node signaling; + + /* + * Fences for the various phases in the request's lifetime. + * + * The submit fence is used to await upon all of the request's + * dependencies. When it is signaled, the request is ready to run. + * It is used by the driver to then queue the request for execution. + */ + struct i915_sw_fence submit; + wait_queue_entry_t submitq; + wait_queue_head_t execute; + + /* + * A list of everyone we wait upon, and everyone who waits upon us. + * Even though we will not be submitted to the hardware before the + * submit fence is signaled (it waits for all external events as well + * as our own requests), the scheduler still needs to know the + * dependency tree for the lifetime of the request (from execbuf + * to retirement), i.e. bidirectional dependency information for the + * request not tied to individual fences. + */ + struct i915_priotree priotree; + struct i915_dependency dep; + + /** + * GEM sequence number associated with this request on the + * global execution timeline. It is zero when the request is not + * on the HW queue (i.e. not on the engine timeline list). + * Its value is guarded by the timeline spinlock. + */ + u32 global_seqno; + + /** Position in the ring of the start of the request */ + u32 head; + + /** + * Position in the ring of the start of the postfix. + * This is required to calculate the maximum available ring space + * without overwriting the postfix. + */ + u32 postfix; + + /** Position in the ring of the end of the whole request */ + u32 tail; + + /** Position in the ring of the end of any workarounds after the tail */ + u32 wa_tail; + + /** Preallocate space in the ring for the emitting the request */ + u32 reserved_space; + + /** Batch buffer related to this request if any (used for + * error state dump only). + */ + struct i915_vma *batch; + /** + * Additional buffers requested by userspace to be captured upon + * a GPU hang. The vma/obj on this list are protected by their + * active reference - all objects on this list must also be + * on the active_list (of their final request). + */ + struct i915_capture_list *capture_list; + struct list_head active_list; + + /** Time at which this request was emitted, in jiffies. */ + unsigned long emitted_jiffies; + + bool waitboost; + + /** engine->request_list entry for this request */ + struct list_head link; + + /** ring->request_list entry for this request */ + struct list_head ring_link; + + struct drm_i915_file_private *file_priv; + /** file_priv list entry for this request */ + struct list_head client_link; +}; + +#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) + +extern const struct dma_fence_ops i915_fence_ops; + +static inline bool dma_fence_is_i915(const struct dma_fence *fence) +{ + return fence->ops == &i915_fence_ops; +} + +struct i915_request * __must_check +i915_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); +void i915_request_retire_upto(struct i915_request *rq); + +static inline struct i915_request * +to_request(struct dma_fence *fence) +{ + /* We assume that NULL fence/request are interoperable */ + BUILD_BUG_ON(offsetof(struct i915_request, fence) != 0); + GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); + return container_of(fence, struct i915_request, fence); +} + +static inline struct i915_request * +i915_request_get(struct i915_request *rq) +{ + return to_request(dma_fence_get(&rq->fence)); +} + +static inline struct i915_request * +i915_request_get_rcu(struct i915_request *rq) +{ + return to_request(dma_fence_get_rcu(&rq->fence)); +} + +static inline void +i915_request_put(struct i915_request *rq) +{ + dma_fence_put(&rq->fence); +} + +/** + * i915_request_global_seqno - report the current global seqno + * @request - the request + * + * A request is assigned a global seqno only when it is on the hardware + * execution queue. The global seqno can be used to maintain a list of + * requests on the same engine in retirement order, for example for + * constructing a priority queue for waiting. Prior to its execution, or + * if it is subsequently removed in the event of preemption, its global + * seqno is zero. As both insertion and removal from the execution queue + * may operate in IRQ context, it is not guarded by the usual struct_mutex + * BKL. Instead those relying on the global seqno must be prepared for its + * value to change between reads. Only when the request is complete can + * the global seqno be stable (due to the memory barriers on submitting + * the commands to the hardware to write the breadcrumb, if the HWS shows + * that it has passed the global seqno and the global seqno is unchanged + * after the read, it is indeed complete). + */ +static u32 +i915_request_global_seqno(const struct i915_request *request) +{ + return READ_ONCE(request->global_seqno); +} + +int i915_request_await_object(struct i915_request *to, + struct drm_i915_gem_object *obj, + bool write); +int i915_request_await_dma_fence(struct i915_request *rq, + struct dma_fence *fence); + +void __i915_request_add(struct i915_request *rq, bool flush_caches); +#define i915_request_add(rq) \ + __i915_request_add(rq, false) + +void __i915_request_submit(struct i915_request *request); +void i915_request_submit(struct i915_request *request); + +void __i915_request_unsubmit(struct i915_request *request); +void i915_request_unsubmit(struct i915_request *request); + +long i915_request_wait(struct i915_request *rq, + unsigned int flags, + long timeout) + __attribute__((nonnull(1))); +#define I915_WAIT_INTERRUPTIBLE BIT(0) +#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ +#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ + +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); + +/** + * Returns true if seq1 is later than seq2. + */ +static inline bool i915_seqno_passed(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + +static inline bool +__i915_request_completed(const struct i915_request *rq, u32 seqno) +{ + GEM_BUG_ON(!seqno); + return i915_seqno_passed(intel_engine_get_seqno(rq->engine), seqno) && + seqno == i915_request_global_seqno(rq); +} + +static inline bool i915_request_completed(const struct i915_request *rq) +{ + u32 seqno; + + seqno = i915_request_global_seqno(rq); + if (!seqno) + return false; + + return __i915_request_completed(rq, seqno); +} + +static inline bool i915_request_started(const struct i915_request *rq) +{ + u32 seqno; + + seqno = i915_request_global_seqno(rq); + if (!seqno) + return false; + + return i915_seqno_passed(intel_engine_get_seqno(rq->engine), + seqno - 1); +} + +static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +{ + const struct i915_request *rq = + container_of(pt, const struct i915_request, priotree); + + return i915_request_completed(rq); +} + +void i915_retire_requests(struct drm_i915_private *i915); + +/* + * We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a #i915_gem_active to track the most recent (in + * retirement order) request relevant for the desired mode of access. + * The #i915_gem_active is updated with i915_gem_active_set() to track the + * most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the #i915_gem_active completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_gem_active.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ +struct i915_gem_active; + +typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, + struct i915_request *); + +struct i915_gem_active { + struct i915_request __rcu *request; + struct list_head link; + i915_gem_retire_fn retire; +}; + +void i915_gem_retire_noop(struct i915_gem_active *, + struct i915_request *request); + +/** + * init_request_active - prepares the activity tracker for use + * @active - the active tracker + * @func - a callback when then the tracker is retired (becomes idle), + * can be NULL + * + * init_request_active() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active request + * associated with it. When the last request becomes idle, when it is retired + * after completion, the optional callback @func is invoked. + */ +static inline void +init_request_active(struct i915_gem_active *active, + i915_gem_retire_fn retire) +{ + INIT_LIST_HEAD(&active->link); + active->retire = retire ?: i915_gem_retire_noop; +} + +/** + * i915_gem_active_set - updates the tracker to watch the current request + * @active - the active tracker + * @request - the request to watch + * + * i915_gem_active_set() watches the given @request for completion. Whilst + * that @request is busy, the @active reports busy. When that @request is + * retired, the @active tracker is updated to report idle. + */ +static inline void +i915_gem_active_set(struct i915_gem_active *active, + struct i915_request *request) +{ + list_move(&active->link, &request->active_list); + rcu_assign_pointer(active->request, request); +} + +/** + * i915_gem_active_set_retire_fn - updates the retirement callback + * @active - the active tracker + * @fn - the routine called when the request is retired + * @mutex - struct_mutex used to guard retirements + * + * i915_gem_active_set_retire_fn() updates the function pointer that + * is called when the final request associated with the @active tracker + * is retired. + */ +static inline void +i915_gem_active_set_retire_fn(struct i915_gem_active *active, + i915_gem_retire_fn fn, + struct mutex *mutex) +{ + lockdep_assert_held(mutex); + active->retire = fn ?: i915_gem_retire_noop; +} + +static inline struct i915_request * +__i915_gem_active_peek(const struct i915_gem_active *active) +{ + /* + * Inside the error capture (running with the driver in an unknown + * state), we want to bend the rules slightly (a lot). + * + * Work is in progress to make it safer, in the meantime this keeps + * the known issue from spamming the logs. + */ + return rcu_dereference_protected(active->request, 1); +} + +/** + * i915_gem_active_raw - return the active request + * @active - the active tracker + * + * i915_gem_active_raw() returns the current request being tracked, or NULL. + * It does not obtain a reference on the request for the caller, so the caller + * must hold struct_mutex. + */ +static inline struct i915_request * +i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) +{ + return rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); +} + +/** + * i915_gem_active_peek - report the active request being monitored + * @active - the active tracker + * + * i915_gem_active_peek() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, so the caller must hold struct_mutex. + */ +static inline struct i915_request * +i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) +{ + struct i915_request *request; + + request = i915_gem_active_raw(active, mutex); + if (!request || i915_request_completed(request)) + return NULL; + + return request; +} + +/** + * i915_gem_active_get - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold struct_mutex. + */ +static inline struct i915_request * +i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) +{ + return i915_request_get(i915_gem_active_peek(active, mutex)); +} + +/** + * __i915_gem_active_get_rcu - return a reference to the active request + * @active - the active tracker + * + * __i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold the RCU read lock, but + * the returned pointer is safe to use outside of RCU. + */ +static inline struct i915_request * +__i915_gem_active_get_rcu(const struct i915_gem_active *active) +{ + /* + * Performing a lockless retrieval of the active request is super + * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing + * slab of request objects will not be freed whilst we hold the + * RCU read lock. It does not guarantee that the request itself + * will not be freed and then *reused*. Viz, + * + * Thread A Thread B + * + * rq = active.request + * retire(rq) -> free(rq); + * (rq is now first on the slab freelist) + * active.request = NULL + * + * rq = new submission on a new object + * ref(rq) + * + * To prevent the request from being reused whilst the caller + * uses it, we take a reference like normal. Whilst acquiring + * the reference we check that it is not in a destroyed state + * (refcnt == 0). That prevents the request being reallocated + * whilst the caller holds on to it. To check that the request + * was not reallocated as we acquired the reference we have to + * check that our request remains the active request across + * the lookup, in the same manner as a seqlock. The visibility + * of the pointer versus the reference counting is controlled + * by using RCU barriers (rcu_dereference and rcu_assign_pointer). + * + * In the middle of all that, we inspect whether the request is + * complete. Retiring is lazy so the request may be completed long + * before the active tracker is updated. Querying whether the + * request is complete is far cheaper (as it involves no locked + * instructions setting cachelines to exclusive) than acquiring + * the reference, so we do it first. The RCU read lock ensures the + * pointer dereference is valid, but does not ensure that the + * seqno nor HWS is the right one! However, if the request was + * reallocated, that means the active tracker's request was complete. + * If the new request is also complete, then both are and we can + * just report the active tracker is idle. If the new request is + * incomplete, then we acquire a reference on it and check that + * it remained the active request. + * + * It is then imperative that we do not zero the request on + * reallocation, so that we can chase the dangling pointers! + * See i915_request_alloc(). + */ + do { + struct i915_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_request_completed(request)) + return NULL; + + /* + * An especially silly compiler could decide to recompute the + * result of i915_request_completed, more specifically + * re-emit the load for request->fence.seqno. A race would catch + * a later seqno value, which could flip the result from true to + * false. Which means part of the instructions below might not + * be executed, while later on instructions are executed. Due to + * barriers within the refcounting the inconsistency can't reach + * past the call to i915_request_get_rcu, but not executing + * that while still executing i915_request_put() creates + * havoc enough. Prevent this with a compiler barrier. + */ + barrier(); + + request = i915_request_get_rcu(request); + + /* + * What stops the following rcu_access_pointer() from occurring + * before the above i915_request_get_rcu()? If we were + * to read the value before pausing to get the reference to + * the request, we may not notice a change in the active + * tracker. + * + * The rcu_access_pointer() is a mere compiler barrier, which + * means both the CPU and compiler are free to perform the + * memory read without constraint. The compiler only has to + * ensure that any operations after the rcu_access_pointer() + * occur afterwards in program order. This means the read may + * be performed earlier by an out-of-order CPU, or adventurous + * compiler. + * + * The atomic operation at the heart of + * i915_request_get_rcu(), see dma_fence_get_rcu(), is + * atomic_inc_not_zero() which is only a full memory barrier + * when successful. That is, if i915_request_get_rcu() + * returns the request (and so with the reference counted + * incremented) then the following read for rcu_access_pointer() + * must occur after the atomic operation and so confirm + * that this request is the one currently being tracked. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). + */ + if (!request || request == rcu_access_pointer(active->request)) + return rcu_pointer_handoff(request); + + i915_request_put(request); + } while (1); +} + +/** + * i915_gem_active_get_unlocked - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get_unlocked() returns a reference to the active request, + * or NULL if the active tracker is idle. The reference is obtained under RCU, + * so no locking is required by the caller. + * + * The reference should be freed with i915_request_put(). + */ +static inline struct i915_request * +i915_gem_active_get_unlocked(const struct i915_gem_active *active) +{ + struct i915_request *request; + + rcu_read_lock(); + request = __i915_gem_active_get_rcu(active); + rcu_read_unlock(); + + return request; +} + +/** + * i915_gem_active_isset - report whether the active tracker is assigned + * @active - the active tracker + * + * i915_gem_active_isset() returns true if the active tracker is currently + * assigned to a request. Due to the lazy retiring, that request may be idle + * and this may report stale information. + */ +static inline bool +i915_gem_active_isset(const struct i915_gem_active *active) +{ + return rcu_access_pointer(active->request); +} + +/** + * i915_gem_active_wait - waits until the request is completed + * @active - the active request on which to wait + * @flags - how to wait + * @timeout - how long to wait at most + * @rps - userspace client to charge for a waitboost + * + * i915_gem_active_wait() waits until the request is completed before + * returning, without requiring any locks to be held. Note that it does not + * retire any requests before returning. + * + * This function relies on RCU in order to acquire the reference to the active + * request without holding any locks. See __i915_gem_active_get_rcu() for the + * glory details on how that is managed. Once the reference is acquired, we + * can then wait upon the request, and afterwards release our reference, + * free of any locking. + * + * This function wraps i915_request_wait(), see it for the full details on + * the arguments. + * + * Returns 0 if successful, or a negative error code. + */ +static inline int +i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) +{ + struct i915_request *request; + long ret = 0; + + request = i915_gem_active_get_unlocked(active); + if (request) { + ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT); + i915_request_put(request); + } + + return ret < 0 ? ret : 0; +} + +/** + * i915_gem_active_retire - waits until the request is retired + * @active - the active request on which to wait + * + * i915_gem_active_retire() waits until the request is completed, + * and then ensures that at least the retirement handler for this + * @active tracker is called before returning. If the @active + * tracker is idle, the function returns immediately. + */ +static inline int __must_check +i915_gem_active_retire(struct i915_gem_active *active, + struct mutex *mutex) +{ + struct i915_request *request; + long ret; + + request = i915_gem_active_raw(active, mutex); + if (!request) + return 0; + + ret = i915_request_wait(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (ret < 0) + return ret; + + list_del_init(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + + return 0; +} + +#define for_each_active(mask, idx) \ + for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) + +#endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index e1169c02eb2b..408827bf5d96 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -586,8 +586,7 @@ TRACE_EVENT(i915_gem_evict_vm, ); TRACE_EVENT(i915_gem_ring_sync_to, - TP_PROTO(struct drm_i915_gem_request *to, - struct drm_i915_gem_request *from), + TP_PROTO(struct i915_request *to, struct i915_request *from), TP_ARGS(to, from), TP_STRUCT__entry( @@ -610,9 +609,9 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->seqno) ); -TRACE_EVENT(i915_gem_request_queue, - TP_PROTO(struct drm_i915_gem_request *req, u32 flags), - TP_ARGS(req, flags), +TRACE_EVENT(i915_request_queue, + TP_PROTO(struct i915_request *rq, u32 flags), + TP_ARGS(rq, flags), TP_STRUCT__entry( __field(u32, dev) @@ -624,11 +623,11 @@ TRACE_EVENT(i915_gem_request_queue, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; __entry->flags = flags; ), @@ -637,9 +636,9 @@ TRACE_EVENT(i915_gem_request_queue, __entry->seqno, __entry->flags) ); -DECLARE_EVENT_CLASS(i915_gem_request, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req), +DECLARE_EVENT_CLASS(i915_request, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq), TP_STRUCT__entry( __field(u32, dev) @@ -651,12 +650,12 @@ DECLARE_EVENT_CLASS(i915_gem_request, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global = req->global_seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global = rq->global_seqno; ), TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u", @@ -664,26 +663,25 @@ DECLARE_EVENT_CLASS(i915_gem_request, __entry->seqno, __entry->global) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_add, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_add, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) -DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_execute, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -DECLARE_EVENT_CLASS(i915_gem_request_hw, - TP_PROTO(struct drm_i915_gem_request *req, - unsigned int port), - TP_ARGS(req, port), +DECLARE_EVENT_CLASS(i915_request_hw, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port), TP_STRUCT__entry( __field(u32, dev) @@ -696,14 +694,14 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global_seqno = req->global_seqno; - __entry->port = port; - ), + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->port = port; + ), TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", __entry->dev, __entry->hw_id, __entry->ring, @@ -711,34 +709,34 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, __entry->global_seqno, __entry->port) ); -DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, - TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), - TP_ARGS(req, port) +DEFINE_EVENT(i915_request_hw, i915_request_in, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_out, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_out, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void -trace_i915_gem_request_submit(struct drm_i915_gem_request *req) +trace_i915_request_submit(struct i915_request *rq) { } static inline void -trace_i915_gem_request_execute(struct drm_i915_gem_request *req) +trace_i915_request_execute(struct i915_request *rq) { } static inline void -trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) +trace_i915_request_in(struct i915_request *rq, unsigned int port) { } static inline void -trace_i915_gem_request_out(struct drm_i915_gem_request *req) +trace_i915_request_out(struct i915_request *rq) { } #endif @@ -767,14 +765,14 @@ TRACE_EVENT(intel_engine_notify, __entry->waiters) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_retire, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -TRACE_EVENT(i915_gem_request_wait_begin, - TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), - TP_ARGS(req, flags), +TRACE_EVENT(i915_request_wait_begin, + TP_PROTO(struct i915_request *rq, unsigned int flags), + TP_ARGS(rq, flags), TP_STRUCT__entry( __field(u32, dev) @@ -793,12 +791,12 @@ TRACE_EVENT(i915_gem_request_wait_begin, * less desirable. */ TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global = req->global_seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global = rq->global_seqno; __entry->flags = flags; ), @@ -808,9 +806,9 @@ TRACE_EVENT(i915_gem_request_wait_begin, !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_wait_end, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); TRACE_EVENT(i915_flip_request, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e0e7c48f45dc..4bda3bd29bf5 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -31,8 +31,7 @@ #include static void -i915_vma_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *rq) +i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) { const unsigned int idx = rq->engine->id; struct i915_vma *vma = diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index fd5b84904f7c..8c5022095418 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -32,8 +32,8 @@ #include "i915_gem_gtt.h" #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" -#include "i915_gem_request.h" +#include "i915_request.h" enum i915_cache_level; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b955f7d7bd0f..a83690642aab 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -588,7 +588,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->rb_lock); } -static bool signal_complete(const struct drm_i915_gem_request *request) +static bool signal_complete(const struct i915_request *request) { if (!request) return false; @@ -600,9 +600,9 @@ static bool signal_complete(const struct drm_i915_gem_request *request) return __i915_request_irq_complete(request); } -static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) +static struct i915_request *to_signaler(struct rb_node *rb) { - return rb_entry(rb, struct drm_i915_gem_request, signaling.node); + return rb_entry(rb, struct i915_request, signaling.node); } static void signaler_set_rtpriority(void) @@ -613,7 +613,7 @@ static void signaler_set_rtpriority(void) } static void __intel_engine_remove_signal(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -644,7 +644,7 @@ static void __intel_engine_remove_signal(struct intel_engine_cs *engine, } } -static struct drm_i915_gem_request * +static struct i915_request * get_first_signal_rcu(struct intel_breadcrumbs *b) { /* @@ -654,18 +654,18 @@ get_first_signal_rcu(struct intel_breadcrumbs *b) * the required memory barriers. */ do { - struct drm_i915_gem_request *request; + struct i915_request *request; request = rcu_dereference(b->first_signal); if (request) - request = i915_gem_request_get_rcu(request); + request = i915_request_get_rcu(request); barrier(); if (!request || request == rcu_access_pointer(b->first_signal)) return rcu_pointer_handoff(request); - i915_gem_request_put(request); + i915_request_put(request); } while (1); } @@ -673,7 +673,7 @@ static int intel_breadcrumbs_signaler(void *arg) { struct intel_engine_cs *engine = arg; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_gem_request *request; + struct i915_request *request; /* Install ourselves with high priority to reduce signalling latency */ signaler_set_rtpriority(); @@ -699,7 +699,7 @@ static int intel_breadcrumbs_signaler(void *arg) &request->fence.flags)) { local_bh_disable(); dma_fence_signal(&request->fence); - GEM_BUG_ON(!i915_gem_request_completed(request)); + GEM_BUG_ON(!i915_request_completed(request)); local_bh_enable(); /* kick start the tasklets */ } @@ -718,7 +718,7 @@ static int intel_breadcrumbs_signaler(void *arg) */ do_schedule = need_resched(); } - i915_gem_request_put(request); + i915_request_put(request); if (unlikely(do_schedule)) { if (kthread_should_park()) @@ -735,8 +735,7 @@ static int intel_breadcrumbs_signaler(void *arg) return 0; } -void intel_engine_enable_signaling(struct drm_i915_gem_request *request, - bool wakeup) +void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -753,7 +752,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); - seqno = i915_gem_request_global_seqno(request); + seqno = i915_request_global_seqno(request); if (!seqno) return; @@ -774,7 +773,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, */ wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); - if (!__i915_gem_request_completed(request, seqno)) { + if (!__i915_request_completed(request, seqno)) { struct rb_node *parent, **p; bool first; @@ -811,7 +810,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, wake_up_process(b->signaler); } -void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) +void intel_engine_cancel_signaling(struct i915_request *request) { GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 75baa5dab877..c14d2a25408d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12584,23 +12584,23 @@ struct wait_rps_boost { struct wait_queue_entry wait; struct drm_crtc *crtc; - struct drm_i915_gem_request *request; + struct i915_request *request; }; static int do_rps_boost(struct wait_queue_entry *_wait, unsigned mode, int sync, void *key) { struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait); - struct drm_i915_gem_request *rq = wait->request; + struct i915_request *rq = wait->request; /* * If we missed the vblank, but the request is already running it * is reasonable to assume that it will complete before the next * vblank without our intervention, so leave RPS alone. */ - if (!i915_gem_request_started(rq)) + if (!i915_request_started(rq)) gen6_rps_boost(rq, NULL); - i915_gem_request_put(rq); + i915_request_put(rq); drm_crtc_vblank_put(wait->crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c81be2c7b582..04fc4bd12329 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1894,8 +1894,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); -void gen6_rps_boost(struct drm_i915_gem_request *rq, - struct intel_rps_client *rps); +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps); void g4x_wm_get_hw_state(struct drm_device *dev); void vlv_wm_get_hw_state(struct drm_device *dev); void ilk_wm_get_hw_state(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f3c5100d629e..c31544406974 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1426,20 +1426,20 @@ int init_workarounds_ring(struct intel_engine_cs *engine) return 0; } -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +int intel_ring_workarounds_emit(struct i915_request *rq) { - struct i915_workarounds *w = &req->i915->workarounds; + struct i915_workarounds *w = &rq->i915->workarounds; u32 *cs; int ret, i; if (w->count == 0) return 0; - ret = req->engine->emit_flush(req, EMIT_BARRIER); + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; - cs = intel_ring_begin(req, (w->count * 2 + 2)); + cs = intel_ring_begin(rq, w->count * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1450,9 +1450,9 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - ret = req->engine->emit_flush(req, EMIT_BARRIER); + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; @@ -1552,7 +1552,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { const struct i915_gem_context * const kernel_context = engine->i915->kernel_context; - struct drm_i915_gem_request *rq; + struct i915_request *rq; lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -1664,12 +1664,12 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) } static void print_request(struct drm_printer *m, - struct drm_i915_gem_request *rq, + struct i915_request *rq, const char *prefix) { drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, rq->global_seqno, - i915_gem_request_completed(rq) ? "!" : "", + i915_request_completed(rq) ? "!" : "", rq->ctx->hw_id, rq->fence.seqno, rq->priotree.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), @@ -1803,7 +1803,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, rcu_read_lock(); for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int count; rq = port_unpack(&execlists->port[idx], &count); @@ -1837,7 +1837,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct rb_node *rb; if (header) { @@ -1866,12 +1866,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); rq = list_first_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); + struct i915_request, link); if (&rq->link != &engine->timeline->requests) print_request(m, rq, "\t\tfirst "); rq = list_last_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); + struct i915_request, link); if (&rq->link != &engine->timeline->requests) print_request(m, rq, "\t\tlast "); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 946766b62459..649113c7a3c2 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -496,8 +496,7 @@ static void guc_ring_doorbell(struct intel_guc_client *client) GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); } -static void guc_add_request(struct intel_guc *guc, - struct drm_i915_gem_request *rq) +static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; @@ -648,7 +647,7 @@ static void guc_submit(struct intel_engine_cs *engine) unsigned int n; for (n = 0; n < execlists_num_ports(execlists); n++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int count; rq = port_unpack(&port[n], &count); @@ -662,19 +661,18 @@ static void guc_submit(struct intel_engine_cs *engine) } } -static void port_assign(struct execlist_port *port, - struct drm_i915_gem_request *rq) +static void port_assign(struct execlist_port *port, struct i915_request *rq) { GEM_BUG_ON(port_isset(port)); - port_set(port, i915_gem_request_get(rq)); + port_set(port, i915_request_get(rq)); } static void guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - struct drm_i915_gem_request *last = NULL; + struct i915_request *last = NULL; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; bool submit = false; @@ -710,7 +708,7 @@ static void guc_dequeue(struct intel_engine_cs *engine) do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { if (last && rq->ctx != last->ctx) { @@ -727,9 +725,8 @@ static void guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&rq->priotree.link); - __i915_gem_request_submit(rq); - trace_i915_gem_request_in(rq, - port_index(port, execlists)); + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); last = rq; submit = true; } @@ -762,12 +759,12 @@ static void guc_submission_tasklet(unsigned long data) struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - struct drm_i915_gem_request *rq; + struct i915_request *rq; rq = port_request(&port[0]); - while (rq && i915_gem_request_completed(rq)) { - trace_i915_gem_request_out(rq); - i915_gem_request_put(rq); + while (rq && i915_request_completed(rq)) { + trace_i915_request_out(rq); + i915_request_put(rq); execlists_port_complete(execlists, port); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9b6d781b22ec..d8bca8ba2efc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -267,7 +267,7 @@ find_priolist: return ptr_pack_bits(p, first, 1); } -static void unwind_wa_tail(struct drm_i915_gem_request *rq) +static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); assert_ring_tail_valid(rq->ring, rq->tail); @@ -275,7 +275,7 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq) static void __unwind_incomplete_requests(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; @@ -284,10 +284,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->timeline->requests, link) { - if (i915_gem_request_completed(rq)) + if (i915_request_completed(rq)) return; - __i915_gem_request_unsubmit(rq); + __i915_request_unsubmit(rq); unwind_wa_tail(rq); GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); @@ -316,8 +316,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) } static inline void -execlists_context_status_change(struct drm_i915_gem_request *rq, - unsigned long status) +execlists_context_status_change(struct i915_request *rq, unsigned long status) { /* * Only used when GVT-g is enabled now. When GVT-g is disabled, @@ -331,14 +330,14 @@ execlists_context_status_change(struct drm_i915_gem_request *rq, } static inline void -execlists_context_schedule_in(struct drm_i915_gem_request *rq) +execlists_context_schedule_in(struct i915_request *rq) { execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(rq->engine); } static inline void -execlists_context_schedule_out(struct drm_i915_gem_request *rq) +execlists_context_schedule_out(struct i915_request *rq) { intel_engine_context_out(rq->engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -353,7 +352,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) ASSIGN_CTX_PDP(ppgtt, reg_state, 0); } -static u64 execlists_update_context(struct drm_i915_gem_request *rq) +static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; struct i915_hw_ppgtt *ppgtt = @@ -385,7 +384,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) unsigned int n; for (n = execlists_num_ports(&engine->execlists); n--; ) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int count; u64 desc; @@ -430,15 +429,14 @@ static bool can_merge_ctx(const struct i915_gem_context *prev, return true; } -static void port_assign(struct execlist_port *port, - struct drm_i915_gem_request *rq) +static void port_assign(struct execlist_port *port, struct i915_request *rq) { GEM_BUG_ON(rq == port_request(port)); if (port_isset(port)) - i915_gem_request_put(port_request(port)); + i915_request_put(port_request(port)); - port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); + port_set(port, port_pack(i915_request_get(rq), port_count(port))); } static void inject_preempt_context(struct intel_engine_cs *engine) @@ -476,7 +474,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct execlist_port *port = execlists->port; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; - struct drm_i915_gem_request *last = port_request(port); + struct i915_request *last = port_request(port); struct rb_node *rb; bool submit = false; @@ -565,7 +563,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent - * ring:HEAD == req:TAIL as we resubmit the + * ring:HEAD == rq:TAIL as we resubmit the * request. See gen8_emit_breadcrumb() for * where we prepare the padding after the * end of the request. @@ -576,7 +574,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { /* @@ -626,8 +624,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&rq->priotree.link); - __i915_gem_request_submit(rq); - trace_i915_gem_request_in(rq, port_index(port, execlists)); + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); last = rq; submit = true; } @@ -665,12 +663,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) unsigned int num_ports = execlists_num_ports(execlists); while (num_ports-- && port_isset(port)) { - struct drm_i915_gem_request *rq = port_request(port); + struct i915_request *rq = port_request(port); GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); - i915_gem_request_put(rq); + i915_request_put(rq); memset(port, 0, sizeof(*port)); port++; @@ -680,7 +678,7 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) static void execlists_cancel_requests(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; @@ -692,7 +690,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline->requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (!i915_gem_request_completed(rq)) + if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); } @@ -705,7 +703,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) INIT_LIST_HEAD(&rq->priotree.link); dma_fence_set_error(&rq->fence, -EIO); - __i915_gem_request_submit(rq); + __i915_request_submit(rq); } rb = rb_next(rb); @@ -806,7 +804,7 @@ static void execlists_submission_tasklet(unsigned long data) tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); while (head != tail) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int status; unsigned int count; @@ -885,10 +883,10 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(port_isset(&port[1]) && !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!i915_gem_request_completed(rq)); + GEM_BUG_ON(!i915_request_completed(rq)); execlists_context_schedule_out(rq); - trace_i915_gem_request_out(rq); - i915_gem_request_put(rq); + trace_i915_request_out(rq); + i915_request_put(rq); execlists_port_complete(execlists, port); } else { @@ -928,7 +926,7 @@ static void insert_request(struct intel_engine_cs *engine, tasklet_hi_schedule(&engine->execlists.tasklet); } -static void execlists_submit_request(struct drm_i915_gem_request *request) +static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; @@ -944,9 +942,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *pt_to_request(struct i915_priotree *pt) { - return container_of(pt, struct drm_i915_gem_request, priotree); + return container_of(pt, struct i915_request, priotree); } static struct intel_engine_cs * @@ -964,7 +962,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) return engine; } -static void execlists_schedule(struct drm_i915_gem_request *request, int prio) +static void execlists_schedule(struct i915_request *request, int prio) { struct intel_engine_cs *engine; struct i915_dependency *dep, *p; @@ -973,7 +971,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) GEM_BUG_ON(prio == I915_PRIORITY_INVALID); - if (i915_gem_request_completed(request)) + if (i915_request_completed(request)) return; if (prio <= READ_ONCE(request->priotree.priority)) @@ -1158,7 +1156,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, i915_gem_context_put(ctx); } -static int execlists_request_alloc(struct drm_i915_gem_request *request) +static int execlists_request_alloc(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; @@ -1590,7 +1588,7 @@ static void reset_irq(struct intel_engine_cs *engine) } static void reset_common_ring(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; @@ -1658,15 +1656,15 @@ static void reset_common_ring(struct intel_engine_cs *engine, unwind_wa_tail(request); } -static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) +static int intel_logical_ring_emit_pdps(struct i915_request *rq) { - struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_engine_cs *engine = req->engine; + struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; + struct intel_engine_cs *engine = rq->engine; const int num_lri_cmds = GEN8_3LVL_PDPES * 2; u32 *cs; int i; - cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); + cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1681,12 +1679,12 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } -static int gen8_emit_bb_start(struct drm_i915_gem_request *req, +static int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) { @@ -1699,18 +1697,18 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * it is unsafe in case of lite-restore (because the ctx is * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ - if (req->ctx->ppgtt && - (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) && - !i915_vm_is_48bit(&req->ctx->ppgtt->base) && - !intel_vgpu_active(req->i915)) { - ret = intel_logical_ring_emit_pdps(req); + if (rq->ctx->ppgtt && + (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) && + !i915_vm_is_48bit(&rq->ctx->ppgtt->base) && + !intel_vgpu_active(rq->i915)) { + ret = intel_logical_ring_emit_pdps(rq); if (ret) return ret; - req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); + rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1739,7 +1737,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1758,7 +1756,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } -static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) +static int gen8_emit_flush(struct i915_request *request, u32 mode) { u32 cmd, *cs; @@ -1790,7 +1788,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) return 0; } -static int gen8_emit_flush_render(struct drm_i915_gem_request *request, +static int gen8_emit_flush_render(struct i915_request *request, u32 mode) { struct intel_engine_cs *engine = request->engine; @@ -1865,7 +1863,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) +static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) { /* Ensure there's always at least one preemption point per-request. */ *cs++ = MI_ARB_CHECK; @@ -1873,7 +1871,7 @@ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) request->wa_tail = intel_ring_offset(request, cs); } -static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) +static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); @@ -1889,8 +1887,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, - u32 *cs) +static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1906,15 +1903,15 @@ static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, } static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; -static int gen8_init_rcs_context(struct drm_i915_gem_request *req) +static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(rq); if (ret) return ret; - ret = intel_rcs_context_init_mocs(req); + ret = intel_rcs_context_init_mocs(rq); /* * Failing to program the MOCS is non-fatal.The system will not * run at peak performance. So generate an error and carry on. @@ -1922,7 +1919,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return i915_gem_render_state_emit(req); + return i915_gem_render_state_emit(rq); } /** diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index abb7a8c1e340..c0b34b7943b9 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -265,7 +265,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) /** * emit_mocs_control_table() - emit the mocs control table - * @req: Request to set up the MOCS table for. + * @rq: Request to set up the MOCS table for. * @table: The values to program into the control regs. * * This function simply emits a MI_LOAD_REGISTER_IMM command for the @@ -273,17 +273,17 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) * * Return: 0 on success, otherwise the error status. */ -static int emit_mocs_control_table(struct drm_i915_gem_request *req, +static int emit_mocs_control_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { - enum intel_engine_id engine = req->engine->id; + enum intel_engine_id engine = rq->engine->id; unsigned int index; u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -308,7 +308,7 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -323,7 +323,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, /** * emit_mocs_l3cc_table() - emit the mocs control table - * @req: Request to set up the MOCS table for. + * @rq: Request to set up the MOCS table for. * @table: The values to program into the control regs. * * This function simply emits a MI_LOAD_REGISTER_IMM command for the @@ -332,7 +332,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, * * Return: 0 on success, otherwise the error status. */ -static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, +static int emit_mocs_l3cc_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { unsigned int i; @@ -341,7 +341,7 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); + cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -370,7 +370,7 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -417,7 +417,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) /** * intel_rcs_context_init_mocs() - program the MOCS register. - * @req: Request to set up the MOCS tables for. + * @rq: Request to set up the MOCS tables for. * * This function will emit a batch buffer with the values required for * programming the MOCS register values for all the currently supported @@ -431,19 +431,19 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) * * Return: 0 on success, otherwise the error status. */ -int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req) +int intel_rcs_context_init_mocs(struct i915_request *rq) { struct drm_i915_mocs_table t; int ret; - if (get_mocs_settings(req->i915, &t)) { + if (get_mocs_settings(rq->i915, &t)) { /* Program the RCS control registers */ - ret = emit_mocs_control_table(req, &t); + ret = emit_mocs_control_table(rq, &t); if (ret) return ret; /* Now program the l3cc registers */ - ret = emit_mocs_l3cc_table(req, &t); + ret = emit_mocs_l3cc_table(rq, &t); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h index ce4a5dfa5f94..d1751f91c1a4 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/intel_mocs.h @@ -52,7 +52,7 @@ #include #include "i915_drv.h" -int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); +int intel_rcs_context_init_mocs(struct i915_request *rq); void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); int intel_mocs_init_engine(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 89f568e739ee..36671a937fa4 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -234,50 +234,50 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, } static void intel_overlay_submit_request(struct intel_overlay *overlay, - struct drm_i915_gem_request *req, + struct i915_request *rq, i915_gem_retire_fn retire) { GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, &overlay->i915->drm.struct_mutex)); i915_gem_active_set_retire_fn(&overlay->last_flip, retire, &overlay->i915->drm.struct_mutex); - i915_gem_active_set(&overlay->last_flip, req); - i915_add_request(req); + i915_gem_active_set(&overlay->last_flip, rq); + i915_request_add(rq); } static int intel_overlay_do_wait_request(struct intel_overlay *overlay, - struct drm_i915_gem_request *req, + struct i915_request *rq, i915_gem_retire_fn retire) { - intel_overlay_submit_request(overlay, req, retire); + intel_overlay_submit_request(overlay, rq, retire); return i915_gem_active_retire(&overlay->last_flip, &overlay->i915->drm.struct_mutex); } -static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) +static struct i915_request *alloc_request(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = dev_priv->engine[RCS]; - return i915_gem_request_alloc(engine, dev_priv->kernel_context); + return i915_request_alloc(engine, dev_priv->kernel_context); } /* overlay needs to be disable in OCMD reg */ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 *cs; WARN_ON(overlay->active); - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } @@ -290,9 +290,9 @@ static int intel_overlay_on(struct intel_overlay *overlay) *cs++ = overlay->flip_addr | OFC_UPDATE; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - return intel_overlay_do_wait_request(overlay, req, NULL); + return intel_overlay_do_wait_request(overlay, rq, NULL); } static void intel_overlay_flip_prepare(struct intel_overlay *overlay, @@ -322,7 +322,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, bool load_polyphase_filter) { struct drm_i915_private *dev_priv = overlay->i915; - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 flip_addr = overlay->flip_addr; u32 tmp, *cs; @@ -336,23 +336,23 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (tmp & (1 << 17)) DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; *cs++ = flip_addr; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); intel_overlay_flip_prepare(overlay, vma); - intel_overlay_submit_request(overlay, req, NULL); + intel_overlay_submit_request(overlay, rq, NULL); return 0; } @@ -373,7 +373,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) } static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, - struct drm_i915_gem_request *req) + struct i915_request *rq) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -382,7 +382,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, } static void intel_overlay_off_tail(struct i915_gem_active *active, - struct drm_i915_gem_request *req) + struct i915_request *rq) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -401,7 +401,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, /* overlay needs to be disabled in OCMD reg */ static int intel_overlay_off(struct intel_overlay *overlay) { - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -412,13 +412,13 @@ static int intel_overlay_off(struct intel_overlay *overlay) * of the hw. Do it in both cases */ flip_addr |= OFC_UPDATE; - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } @@ -432,11 +432,11 @@ static int intel_overlay_off(struct intel_overlay *overlay) *cs++ = flip_addr; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); intel_overlay_flip_prepare(overlay, NULL); - return intel_overlay_do_wait_request(overlay, req, + return intel_overlay_do_wait_request(overlay, rq, intel_overlay_off_tail); } @@ -468,23 +468,23 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ - struct drm_i915_gem_request *req; + struct i915_request *rq; - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - ret = intel_overlay_do_wait_request(overlay, req, + ret = intel_overlay_do_wait_request(overlay, rq, intel_overlay_release_old_vid_tail); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index abf80e462833..21dac6ebc202 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6360,7 +6360,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->pcu_lock); } -void gen6_rps_boost(struct drm_i915_gem_request *rq, +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps_client) { struct intel_rps *rps = &rq->i915->gt_pm.rps; @@ -6376,7 +6376,7 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq, if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) return; - /* Serializes with i915_gem_request_retire() */ + /* Serializes with i915_request_retire() */ boost = false; spin_lock_irqsave(&rq->lock, flags); if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5718f37160c5..1d599524a759 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -66,7 +66,7 @@ unsigned int intel_ring_update_space(struct intel_ring *ring) } static int -gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen2_render_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; @@ -75,19 +75,19 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen4_render_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; @@ -122,17 +122,17 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - if (IS_G4X(req->i915) || IS_GEN5(req->i915)) + if (IS_G4X(rq->i915) || IS_GEN5(rq->i915)) cmd |= MI_INVALIDATE_ISP; } - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -175,13 +175,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) * really our business. That leaves only stall at scoreboard. */ static int -intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) +intel_emit_post_sync_nonzero_flush(struct i915_request *rq) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs; - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -191,9 +191,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) *cs++ = 0; /* low dword */ *cs++ = 0; /* high dword */ *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -203,21 +203,21 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) *cs++ = 0; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen6_render_ring_flush(struct i915_request *rq, u32 mode) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = intel_emit_post_sync_nonzero_flush(req); + ret = intel_emit_post_sync_nonzero_flush(rq); if (ret) return ret; @@ -247,7 +247,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -255,17 +255,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = flags; *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) +gen7_render_ring_cs_stall_wa(struct i915_request *rq) { u32 *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -273,16 +273,16 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; *cs++ = 0; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen7_render_ring_flush(struct i915_request *rq, u32 mode) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; /* @@ -324,10 +324,10 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ - gen7_render_ring_cs_stall_wa(req); + gen7_render_ring_cs_stall_wa(rq); } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -335,7 +335,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = flags; *cs++ = scratch_addr; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -531,7 +531,7 @@ out: } static void reset_ring_common(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* * RC6 must be prevented until the reset is complete and the engine @@ -595,15 +595,15 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) +static int intel_rcs_ctx_init(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(rq); if (ret != 0) return ret; - ret = i915_gem_render_state_emit(req); + ret = i915_gem_render_state_emit(rq); if (ret) return ret; @@ -661,9 +661,9 @@ static int init_render_ring(struct intel_engine_cs *engine) return init_workarounds_ring(engine); } -static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) +static u32 *gen6_signal(struct i915_request *rq, u32 *cs) { - struct drm_i915_private *dev_priv = req->i915; + struct drm_i915_private *dev_priv = rq->i915; struct intel_engine_cs *engine; enum intel_engine_id id; int num_rings = 0; @@ -674,11 +674,11 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) continue; - mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; + mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(mbox_reg); - *cs++ = req->global_seqno; + *cs++ = rq->global_seqno; num_rings++; } } @@ -690,7 +690,7 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) static void cancel_requests(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct i915_request *request; unsigned long flags; spin_lock_irqsave(&engine->timeline->lock, flags); @@ -698,7 +698,7 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->timeline->requests, link) { GEM_BUG_ON(!request->global_seqno); - if (!i915_gem_request_completed(request)) + if (!i915_request_completed(request)) dma_fence_set_error(&request->fence, -EIO); } /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -706,48 +706,46 @@ static void cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static void i9xx_submit_request(struct drm_i915_gem_request *request) +static void i9xx_submit_request(struct i915_request *request) { struct drm_i915_private *dev_priv = request->i915; - i915_gem_request_submit(request); + i915_request_submit(request); I915_WRITE_TAIL(request->engine, intel_ring_set_tail(request->ring, request->tail)); } -static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) +static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - *cs++ = req->global_seqno; + *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - req->tail = intel_ring_offset(req, cs); - assert_ring_tail_valid(req->ring, req->tail); + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); } static const int i9xx_emit_breadcrumb_sz = 4; -static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) +static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - return i9xx_emit_breadcrumb(req, - req->engine->semaphore.signal(req, cs)); + return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs)); } static int -gen6_ring_sync_to(struct drm_i915_gem_request *req, - struct drm_i915_gem_request *signal) +gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal) { u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id]; u32 *cs; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -759,7 +757,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req, *cs++ = signal->global_seqno - 1; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -858,17 +856,17 @@ i8xx_irq_disable(struct intel_engine_cs *engine) } static int -bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) +bsd_ring_flush(struct i915_request *rq, u32 mode) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_FLUSH; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -911,20 +909,20 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine) } static int -i965_emit_bb_start(struct drm_i915_gem_request *req, +i965_emit_bb_start(struct i915_request *rq, u64 offset, u32 length, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -934,13 +932,13 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int -i830_emit_bb_start(struct drm_i915_gem_request *req, +i830_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { - u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); + u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -951,13 +949,13 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, *cs++ = cs_offset; *cs++ = 0xdeadbeef; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - cs = intel_ring_begin(req, 6 + 2); + cs = intel_ring_begin(rq, 6 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -974,39 +972,39 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, *cs++ = MI_FLUSH; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); /* ... and execute it. */ offset = cs_offset; } - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -i915_emit_bb_start(struct drm_i915_gem_request *req, +i915_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1377,7 +1375,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) intel_ring_reset(engine->buffer, 0); } -static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) +static inline int mi_set_context(struct i915_request *rq, u32 flags) { struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; @@ -1463,7 +1461,7 @@ static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) return 0; } -static int remap_l3(struct drm_i915_gem_request *rq, int slice) +static int remap_l3(struct i915_request *rq, int slice) { u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; int i; @@ -1491,7 +1489,7 @@ static int remap_l3(struct drm_i915_gem_request *rq, int slice) return 0; } -static int switch_context(struct drm_i915_gem_request *rq) +static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *to_ctx = rq->ctx; @@ -1561,7 +1559,7 @@ err: return ret; } -static int ring_request_alloc(struct drm_i915_gem_request *request) +static int ring_request_alloc(struct i915_request *request) { int ret; @@ -1587,7 +1585,7 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) { - struct drm_i915_gem_request *target; + struct i915_request *target; long timeout; lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); @@ -1605,13 +1603,13 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - timeout = i915_wait_request(target, + timeout = i915_request_wait(target, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) return timeout; - i915_gem_request_retire_upto(target); + i915_request_retire_upto(target); intel_ring_update_space(ring); GEM_BUG_ON(ring->space < bytes); @@ -1634,10 +1632,9 @@ int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes) return 0; } -u32 *intel_ring_begin(struct drm_i915_gem_request *req, - unsigned int num_dwords) +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) { - struct intel_ring *ring = req->ring; + struct intel_ring *ring = rq->ring; const unsigned int remain_usable = ring->effective_size - ring->emit; const unsigned int bytes = num_dwords * sizeof(u32); unsigned int need_wrap = 0; @@ -1647,7 +1644,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, /* Packets must be qword aligned. */ GEM_BUG_ON(num_dwords & 1); - total_bytes = bytes + req->reserved_space; + total_bytes = bytes + rq->reserved_space; GEM_BUG_ON(total_bytes > ring->effective_size); if (unlikely(total_bytes > remain_usable)) { @@ -1668,7 +1665,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, * wrap and only need to effectively wait for the * reserved size from the start of ringbuffer. */ - total_bytes = req->reserved_space + remain_actual; + total_bytes = rq->reserved_space + remain_actual; } } @@ -1682,9 +1679,9 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, * overallocation and the assumption is that then we never need * to wait (which has the risk of failing with EINTR). * - * See also i915_gem_request_alloc() and i915_add_request(). + * See also i915_request_alloc() and i915_request_add(). */ - GEM_BUG_ON(!req->reserved_space); + GEM_BUG_ON(!rq->reserved_space); ret = wait_for_space(ring, total_bytes); if (unlikely(ret)) @@ -1713,29 +1710,28 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, } /* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct drm_i915_gem_request *req) +int intel_ring_cacheline_align(struct i915_request *rq) { - int num_dwords = - (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); u32 *cs; if (num_dwords == 0) return 0; - num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - cs = intel_ring_begin(req, num_dwords); + num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; + cs = intel_ring_begin(rq, num_dwords); if (IS_ERR(cs)) return PTR_ERR(cs); while (num_dwords--) *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } -static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) +static void gen6_bsd_submit_request(struct i915_request *request) { struct drm_i915_private *dev_priv = request->i915; @@ -1772,11 +1768,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) +static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1802,18 +1798,18 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -hsw_emit_bb_start(struct drm_i915_gem_request *req, +hsw_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1823,19 +1819,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, MI_BATCH_RESOURCE_STREAMER : 0); /* bit0-7 is the length on GEN6+ */ *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen6_emit_bb_start(struct drm_i915_gem_request *req, +gen6_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1843,18 +1839,18 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, 0 : MI_BATCH_NON_SECURE_I965); /* bit0-7 is the length on GEN6+ */ *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) +static int gen6_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1879,7 +1875,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 51523ad049de..a9b83bf7e837 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,10 +3,12 @@ #define _INTEL_RINGBUFFER_H_ #include + #include "i915_gem_batch_pool.h" -#include "i915_gem_request.h" #include "i915_gem_timeline.h" + #include "i915_pmu.h" +#include "i915_request.h" #include "i915_selftest.h" struct drm_printer; @@ -115,7 +117,7 @@ struct intel_engine_hangcheck { unsigned long action_timestamp; int deadlock; struct intel_instdone instdone; - struct drm_i915_gem_request *active_request; + struct i915_request *active_request; bool stalled; }; @@ -156,7 +158,7 @@ struct i915_ctx_workarounds { struct i915_vma *vma; }; -struct drm_i915_gem_request; +struct i915_request; /* * Engine IDs definitions. @@ -218,7 +220,7 @@ struct intel_engine_execlists { /** * @request_count: combined request and submission count */ - struct drm_i915_gem_request *request_count; + struct i915_request *request_count; #define EXECLIST_COUNT_BITS 2 #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) @@ -339,7 +341,7 @@ struct intel_engine_cs { struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct drm_i915_gem_request __rcu *first_signal; + struct i915_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ @@ -391,7 +393,7 @@ struct intel_engine_cs { int (*init_hw)(struct intel_engine_cs *engine); void (*reset_hw)(struct intel_engine_cs *engine, - struct drm_i915_gem_request *req); + struct i915_request *rq); void (*park)(struct intel_engine_cs *engine); void (*unpark)(struct intel_engine_cs *engine); @@ -402,22 +404,20 @@ struct intel_engine_cs { struct i915_gem_context *ctx); void (*context_unpin)(struct intel_engine_cs *engine, struct i915_gem_context *ctx); - int (*request_alloc)(struct drm_i915_gem_request *req); - int (*init_context)(struct drm_i915_gem_request *req); + int (*request_alloc)(struct i915_request *rq); + int (*init_context)(struct i915_request *rq); - int (*emit_flush)(struct drm_i915_gem_request *request, - u32 mode); + int (*emit_flush)(struct i915_request *request, u32 mode); #define EMIT_INVALIDATE BIT(0) #define EMIT_FLUSH BIT(1) #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) - int (*emit_bb_start)(struct drm_i915_gem_request *req, + int (*emit_bb_start)(struct i915_request *rq, u64 offset, u32 length, unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) - void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *cs); + void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -426,7 +426,7 @@ struct intel_engine_cs { * This is called from an atomic context with irqs disabled; must * be irq safe. */ - void (*submit_request)(struct drm_i915_gem_request *req); + void (*submit_request)(struct i915_request *rq); /* Call when the priority on a request has changed and it and its * dependencies may need rescheduling. Note the request itself may @@ -434,8 +434,7 @@ struct intel_engine_cs { * * Called under the struct_mutex. */ - void (*schedule)(struct drm_i915_gem_request *request, - int priority); + void (*schedule)(struct i915_request *request, int priority); /* * Cancel all requests on the hardware, or queued for execution. @@ -503,9 +502,9 @@ struct intel_engine_cs { } mbox; /* AKA wait() */ - int (*sync_to)(struct drm_i915_gem_request *req, - struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); + int (*sync_to)(struct i915_request *rq, + struct i915_request *signal); + u32 *(*signal)(struct i915_request *rq, u32 *cs); } semaphore; struct intel_engine_execlists execlists; @@ -726,14 +725,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +int __must_check intel_ring_cacheline_align(struct i915_request *rq); int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); -u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, - unsigned int n); +u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); -static inline void -intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) { /* Dummy function. * @@ -743,22 +740,20 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); } -static inline u32 -intel_ring_wrap(const struct intel_ring *ring, u32 pos) +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) { return pos & (ring->size - 1); } -static inline u32 -intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - req->ring->vaddr; - GEM_BUG_ON(offset > req->ring->size); - return intel_ring_wrap(req->ring, offset); + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); } static inline void @@ -796,7 +791,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) { /* Whilst writes to the tail are strictly order, there is no * serialisation between readers and the writers. The tail may be - * read by i915_gem_request_retire() just as it is being updated + * read by i915_request_retire() just as it is being updated * by execlists, as although the breadcrumb is complete, the context * switch hasn't been seen. */ @@ -838,7 +833,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) } int init_workarounds_ring(struct intel_engine_cs *engine); -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); +int intel_ring_workarounds_emit(struct i915_request *rq); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); @@ -866,7 +861,7 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_wait_init(struct intel_wait *wait, - struct drm_i915_gem_request *rq) + struct i915_request *rq) { wait->tsk = current; wait->request = rq; @@ -892,9 +887,9 @@ intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) static inline bool intel_wait_update_request(struct intel_wait *wait, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { - return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); + return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); } static inline bool @@ -905,9 +900,9 @@ intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) static inline bool intel_wait_check_request(const struct intel_wait *wait, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { - return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); + return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); } static inline bool intel_wait_complete(const struct intel_wait *wait) @@ -919,9 +914,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); -void intel_engine_enable_signaling(struct drm_i915_gem_request *request, - bool wakeup); -void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); +void intel_engine_enable_signaling(struct i915_request *request, bool wakeup); +void intel_engine_cancel_signaling(struct i915_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 52b1bd17bf46..05bbef363fff 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -964,7 +964,7 @@ static int gpu_write(struct i915_vma *vma, u32 dword, u32 value) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *batch; int flags = 0; int err; @@ -975,7 +975,7 @@ static int gpu_write(struct i915_vma *vma, if (err) return err; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1003,7 +1003,7 @@ static int gpu_write(struct i915_vma *vma, reservation_object_unlock(vma->resv); err_request: - __i915_add_request(rq, err == 0); + __i915_request_add(rq, err == 0); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 7a0d1e17c1ad..340a98c0c804 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -178,7 +178,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, u32 v) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; u32 *cs; int err; @@ -191,7 +191,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -199,7 +199,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - __i915_add_request(rq, false); + __i915_request_add(rq, false); i915_vma_unpin(vma); return PTR_ERR(cs); } @@ -229,7 +229,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_unlock(obj->resv); - __i915_add_request(rq, true); + __i915_request_add(rq, true); return 0; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 6da2a2f29c54..7ecaed50d0b9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -114,7 +114,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; struct i915_vma *batch; unsigned int flags; @@ -152,7 +152,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, goto err_vma; } - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; @@ -180,12 +180,12 @@ static int gpu_fill(struct drm_i915_gem_object *obj, reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_unlock(obj->resv); - __i915_add_request(rq, true); + __i915_request_add(rq, true); return 0; err_request: - __i915_add_request(rq, false); + __i915_request_add(rq, false); err_batch: i915_vma_unpin(batch); err_vma: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index e1ddad635d73..ab9d7bee0aae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -407,7 +407,7 @@ static int igt_evict_contexts(void *arg) mutex_lock(&i915->drm.struct_mutex); onstack_fence_init(&fence); do { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_gem_context *ctx; ctx = live_context(i915, file); @@ -416,7 +416,7 @@ static int igt_evict_contexts(void *arg) /* We will need some GGTT space for the rq's context */ igt_evict_ctl.fail_if_busy = true; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); igt_evict_ctl.fail_if_busy = false; if (IS_ERR(rq)) { @@ -437,7 +437,7 @@ static int igt_evict_contexts(void *arg) if (err < 0) break; - i915_add_request(rq); + i915_request_add(rq); count++; err = 0; } while(1); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 3c64815e910b..fbdb2419d418 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -436,7 +436,7 @@ out: static int make_obj_busy(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; int err; @@ -448,14 +448,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); } i915_vma_move_to_active(vma, rq, 0); - i915_add_request(rq); + i915_request_add(rq); i915_gem_object_set_active_reference(obj); i915_vma_unpin(vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c deleted file mode 100644 index 647bf2bbd799..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ /dev/null @@ -1,868 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include - -#include "../i915_selftest.h" - -#include "mock_context.h" -#include "mock_gem_device.h" - -static int igt_add_request(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; - int err = -ENOMEM; - - /* Basic preliminary test to create a request and let it loose! */ - - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], - i915->kernel_context, - HZ / 10); - if (!request) - goto out_unlock; - - i915_add_request(request); - - err = 0; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int igt_wait_request(void *arg) -{ - const long T = HZ / 4; - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; - int err = -EINVAL; - - /* Submit a request, then wait upon it */ - - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], i915->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { - pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, T) != -ETIME) { - pr_err("request wait succeeded (expected timeout before submit!)\n"); - goto out_unlock; - } - - if (i915_gem_request_completed(request)) { - pr_err("request completed before submit!!\n"); - goto out_unlock; - } - - i915_add_request(request); - - if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { - pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); - goto out_unlock; - } - - if (i915_gem_request_completed(request)) { - pr_err("request completed immediately!\n"); - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { - pr_err("request wait succeeded (expected timeout!)\n"); - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { - pr_err("request wait timed out!\n"); - goto out_unlock; - } - - if (!i915_gem_request_completed(request)) { - pr_err("request not complete after waiting!\n"); - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { - pr_err("request wait timed out when already complete!\n"); - goto out_unlock; - } - - err = 0; -out_unlock: - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int igt_fence_wait(void *arg) -{ - const long T = HZ / 4; - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; - int err = -EINVAL; - - /* Submit a request, treat it as a fence and wait upon it */ - - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], i915->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_locked; - } - mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ - - if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { - pr_err("fence wait success before submit (expected timeout)!\n"); - goto out_device; - } - - mutex_lock(&i915->drm.struct_mutex); - i915_add_request(request); - mutex_unlock(&i915->drm.struct_mutex); - - if (dma_fence_is_signaled(&request->fence)) { - pr_err("fence signaled immediately!\n"); - goto out_device; - } - - if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { - pr_err("fence wait success after submit (expected timeout)!\n"); - goto out_device; - } - - if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { - pr_err("fence wait timed out (expected success)!\n"); - goto out_device; - } - - if (!dma_fence_is_signaled(&request->fence)) { - pr_err("fence unsignaled after waiting!\n"); - goto out_device; - } - - if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { - pr_err("fence wait timed out when complete (expected success)!\n"); - goto out_device; - } - - err = 0; -out_device: - mutex_lock(&i915->drm.struct_mutex); -out_locked: - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int igt_request_rewind(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request, *vip; - struct i915_gem_context *ctx[2]; - int err = -EINVAL; - - mutex_lock(&i915->drm.struct_mutex); - ctx[0] = mock_context(i915, "A"); - request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); - if (!request) { - err = -ENOMEM; - goto err_context_0; - } - - i915_gem_request_get(request); - i915_add_request(request); - - ctx[1] = mock_context(i915, "B"); - vip = mock_request(i915->engine[RCS], ctx[1], 0); - if (!vip) { - err = -ENOMEM; - goto err_context_1; - } - - /* Simulate preemption by manual reordering */ - if (!mock_cancel_request(request)) { - pr_err("failed to cancel request (already executed)!\n"); - i915_add_request(vip); - goto err_context_1; - } - i915_gem_request_get(vip); - i915_add_request(vip); - rcu_read_lock(); - request->engine->submit_request(request); - rcu_read_unlock(); - - mutex_unlock(&i915->drm.struct_mutex); - - if (i915_wait_request(vip, 0, HZ) == -ETIME) { - pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", - vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); - goto err; - } - - if (i915_gem_request_completed(request)) { - pr_err("low priority request already completed\n"); - goto err; - } - - err = 0; -err: - i915_gem_request_put(vip); - mutex_lock(&i915->drm.struct_mutex); -err_context_1: - mock_context_close(ctx[1]); - i915_gem_request_put(request); -err_context_0: - mock_context_close(ctx[0]); - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -int i915_gem_request_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_add_request), - SUBTEST(igt_wait_request), - SUBTEST(igt_fence_wait), - SUBTEST(igt_request_rewind), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - drm_dev_unref(&i915->drm); - - return err; -} - -struct live_test { - struct drm_i915_private *i915; - const char *func; - const char *name; - - unsigned int reset_count; -}; - -static int begin_live_test(struct live_test *t, - struct drm_i915_private *i915, - const char *func, - const char *name) -{ - int err; - - t->i915 = i915; - t->func = func; - t->name = name; - - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; - } - - i915->gpu_error.missed_irq_rings = 0; - t->reset_count = i915_reset_count(&i915->gpu_error); - - return 0; -} - -static int end_live_test(struct live_test *t) -{ - struct drm_i915_private *i915 = t->i915; - - i915_gem_retire_requests(i915); - - if (wait_for(intel_engines_are_idle(i915), 10)) { - pr_err("%s(%s): GPU not idle\n", t->func, t->name); - return -EIO; - } - - if (t->reset_count != i915_reset_count(&i915->gpu_error)) { - pr_err("%s(%s): GPU was reset %d times!\n", - t->func, t->name, - i915_reset_count(&i915->gpu_error) - t->reset_count); - return -EIO; - } - - if (i915->gpu_error.missed_irq_rings) { - pr_err("%s(%s): Missed interrupts on engines %lx\n", - t->func, t->name, i915->gpu_error.missed_irq_rings); - return -EIO; - } - - return 0; -} - -static int live_nop_request(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct live_test t; - unsigned int id; - int err = -ENODEV; - - /* Submit various sized batches of empty requests, to each engine - * (individually), and wait for the batch to complete. We can check - * the overhead of submitting requests to the hardware. - */ - - mutex_lock(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - struct drm_i915_gem_request *request; - unsigned long n, prime; - ktime_t times[2] = {}; - - err = begin_live_test(&t, i915, __func__, engine->name); - if (err) - goto out_unlock; - - for_each_prime_number_from(prime, 1, 8192) { - times[1] = ktime_get_raw(); - - for (n = 0; n < prime; n++) { - request = i915_gem_request_alloc(engine, - i915->kernel_context); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_unlock; - } - - /* This space is left intentionally blank. - * - * We do not actually want to perform any - * action with this request, we just want - * to measure the latency in allocation - * and submission of our breadcrumbs - - * ensuring that the bare request is sufficient - * for the system to work (i.e. proper HEAD - * tracking of the rings, interrupt handling, - * etc). It also gives us the lowest bounds - * for latency. - */ - - i915_add_request(request); - } - i915_wait_request(request, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - times[1] = ktime_sub(ktime_get_raw(), times[1]); - if (prime == 1) - times[0] = times[1]; - - if (__igt_timeout(end_time, NULL)) - break; - } - - err = end_live_test(&t); - if (err) - goto out_unlock; - - pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", - engine->name, - ktime_to_ns(times[0]), - prime, div64_u64(ktime_to_ns(times[1]), prime)); - } - -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static struct i915_vma *empty_batch(struct drm_i915_private *i915) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 *cmd; - int err; - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - goto err; - - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static struct drm_i915_gem_request * -empty_request(struct intel_engine_cs *engine, - struct i915_vma *batch) -{ - struct drm_i915_gem_request *request; - int err; - - request = i915_gem_request_alloc(engine, - engine->i915->kernel_context); - if (IS_ERR(request)) - return request; - - err = engine->emit_bb_start(request, - batch->node.start, - batch->node.size, - I915_DISPATCH_SECURE); - if (err) - goto out_request; - -out_request: - __i915_add_request(request, err == 0); - return err ? ERR_PTR(err) : request; -} - -static int live_empty_request(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct live_test t; - struct i915_vma *batch; - unsigned int id; - int err = 0; - - /* Submit various sized batches of empty requests, to each engine - * (individually), and wait for the batch to complete. We can check - * the overhead of submitting requests to the hardware. - */ - - mutex_lock(&i915->drm.struct_mutex); - - batch = empty_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unlock; - } - - for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - struct drm_i915_gem_request *request; - unsigned long n, prime; - ktime_t times[2] = {}; - - err = begin_live_test(&t, i915, __func__, engine->name); - if (err) - goto out_batch; - - /* Warmup / preload */ - request = empty_request(engine, batch); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_batch; - } - i915_wait_request(request, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - for_each_prime_number_from(prime, 1, 8192) { - times[1] = ktime_get_raw(); - - for (n = 0; n < prime; n++) { - request = empty_request(engine, batch); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_batch; - } - } - i915_wait_request(request, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - times[1] = ktime_sub(ktime_get_raw(), times[1]); - if (prime == 1) - times[0] = times[1]; - - if (__igt_timeout(end_time, NULL)) - break; - } - - err = end_live_test(&t); - if (err) - goto out_batch; - - pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", - engine->name, - ktime_to_ns(times[0]), - prime, div64_u64(ktime_to_ns(times[1]), prime)); - } - -out_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static struct i915_vma *recursive_batch(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx = i915->kernel_context; - struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; - struct drm_i915_gem_object *obj; - const int gen = INTEL_GEN(i915); - struct i915_vma *vma; - u32 *cmd; - int err; - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - err = i915_gem_object_set_to_wc_domain(obj, true); - if (err) - goto err; - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - if (gen >= 8) { - *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; - *cmd++ = lower_32_bits(vma->node.start); - *cmd++ = upper_32_bits(vma->node.start); - } else if (gen >= 6) { - *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; - *cmd++ = lower_32_bits(vma->node.start); - } else if (gen >= 4) { - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cmd++ = lower_32_bits(vma->node.start); - } else { - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; - *cmd++ = lower_32_bits(vma->node.start); - } - *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static int recursive_batch_resolve(struct i915_vma *batch) -{ - u32 *cmd; - - cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); - if (IS_ERR(cmd)) - return PTR_ERR(cmd); - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(batch->vm->i915); - - i915_gem_object_unpin_map(batch->obj); - - return 0; -} - -static int live_all_engines(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct drm_i915_gem_request *request[I915_NUM_ENGINES]; - struct i915_vma *batch; - struct live_test t; - unsigned int id; - int err; - - /* Check we can submit requests to all engines simultaneously. We - * send a recursive batch to each engine - checking that we don't - * block doing so, and that they don't complete too soon. - */ - - mutex_lock(&i915->drm.struct_mutex); - - err = begin_live_test(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - batch = recursive_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - pr_err("%s: Unable to create batch, err=%d\n", __func__, err); - goto out_unlock; - } - - for_each_engine(engine, i915, id) { - request[id] = i915_gem_request_alloc(engine, - i915->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); - pr_err("%s: Request allocation failed with err=%d\n", - __func__, err); - goto out_request; - } - - err = engine->emit_bb_start(request[id], - batch->node.start, - batch->node.size, - 0); - GEM_BUG_ON(err); - request[id]->batch = batch; - - if (!i915_gem_object_has_active_reference(batch->obj)) { - i915_gem_object_get(batch->obj); - i915_gem_object_set_active_reference(batch->obj); - } - - i915_vma_move_to_active(batch, request[id], 0); - i915_gem_request_get(request[id]); - i915_add_request(request[id]); - } - - for_each_engine(engine, i915, id) { - if (i915_gem_request_completed(request[id])) { - pr_err("%s(%s): request completed too early!\n", - __func__, engine->name); - err = -EINVAL; - goto out_request; - } - } - - err = recursive_batch_resolve(batch); - if (err) { - pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); - goto out_request; - } - - for_each_engine(engine, i915, id) { - long timeout; - - timeout = i915_wait_request(request[id], - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) { - err = timeout; - pr_err("%s: error waiting for request on %s, err=%d\n", - __func__, engine->name, err); - goto out_request; - } - - GEM_BUG_ON(!i915_gem_request_completed(request[id])); - i915_gem_request_put(request[id]); - request[id] = NULL; - } - - err = end_live_test(&t); - -out_request: - for_each_engine(engine, i915, id) - if (request[id]) - i915_gem_request_put(request[id]); - i915_vma_unpin(batch); - i915_vma_put(batch); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int live_sequential_engines(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request[I915_NUM_ENGINES] = {}; - struct drm_i915_gem_request *prev = NULL; - struct intel_engine_cs *engine; - struct live_test t; - unsigned int id; - int err; - - /* Check we can submit requests to all engines sequentially, such - * that each successive request waits for the earlier ones. This - * tests that we don't execute requests out of order, even though - * they are running on independent engines. - */ - - mutex_lock(&i915->drm.struct_mutex); - - err = begin_live_test(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - for_each_engine(engine, i915, id) { - struct i915_vma *batch; - - batch = recursive_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - pr_err("%s: Unable to create batch for %s, err=%d\n", - __func__, engine->name, err); - goto out_unlock; - } - - request[id] = i915_gem_request_alloc(engine, - i915->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); - pr_err("%s: Request allocation failed for %s with err=%d\n", - __func__, engine->name, err); - goto out_request; - } - - if (prev) { - err = i915_gem_request_await_dma_fence(request[id], - &prev->fence); - if (err) { - i915_add_request(request[id]); - pr_err("%s: Request await failed for %s with err=%d\n", - __func__, engine->name, err); - goto out_request; - } - } - - err = engine->emit_bb_start(request[id], - batch->node.start, - batch->node.size, - 0); - GEM_BUG_ON(err); - request[id]->batch = batch; - - i915_vma_move_to_active(batch, request[id], 0); - i915_gem_object_set_active_reference(batch->obj); - i915_vma_get(batch); - - i915_gem_request_get(request[id]); - i915_add_request(request[id]); - - prev = request[id]; - } - - for_each_engine(engine, i915, id) { - long timeout; - - if (i915_gem_request_completed(request[id])) { - pr_err("%s(%s): request completed too early!\n", - __func__, engine->name); - err = -EINVAL; - goto out_request; - } - - err = recursive_batch_resolve(request[id]->batch); - if (err) { - pr_err("%s: failed to resolve batch, err=%d\n", - __func__, err); - goto out_request; - } - - timeout = i915_wait_request(request[id], - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) { - err = timeout; - pr_err("%s: error waiting for request on %s, err=%d\n", - __func__, engine->name, err); - goto out_request; - } - - GEM_BUG_ON(!i915_gem_request_completed(request[id])); - } - - err = end_live_test(&t); - -out_request: - for_each_engine(engine, i915, id) { - u32 *cmd; - - if (!request[id]) - break; - - cmd = i915_gem_object_pin_map(request[id]->batch->obj, - I915_MAP_WC); - if (!IS_ERR(cmd)) { - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(request[id]->batch->obj); - } - - i915_vma_put(request[id]->batch); - i915_gem_request_put(request[id]); - } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -int i915_gem_request_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_nop_request), - SUBTEST(live_all_engines), - SUBTEST(live_sequential_engines), - SUBTEST(live_empty_request), - }; - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 088f45bc6199..9c76f0305b6a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,7 +11,7 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) -selftest(requests, i915_gem_request_live_selftests) +selftest(requests, i915_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 19c6fce837df..9a48aa441743 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -16,7 +16,7 @@ selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(timelines, i915_gem_timeline_mock_selftests) -selftest(requests, i915_gem_request_mock_selftests) +selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c new file mode 100644 index 000000000000..94bc2e1898a4 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -0,0 +1,865 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "../i915_selftest.h" + +#include "mock_context.h" +#include "mock_gem_device.h" + +static int igt_add_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_request *request; + int err = -ENOMEM; + + /* Basic preliminary test to create a request and let it loose! */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], + i915->kernel_context, + HZ / 10); + if (!request) + goto out_unlock; + + i915_request_add(request); + + err = 0; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_wait_request(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct i915_request *request; + int err = -EINVAL; + + /* Submit a request, then wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) { + pr_err("request wait succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_request_completed(request)) { + pr_err("request completed before submit!!\n"); + goto out_unlock; + } + + i915_request_add(request); + + if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); + goto out_unlock; + } + + if (i915_request_completed(request)) { + pr_err("request completed immediately!\n"); + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { + pr_err("request wait succeeded (expected timeout!)\n"); + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out!\n"); + goto out_unlock; + } + + if (!i915_request_completed(request)) { + pr_err("request not complete after waiting!\n"); + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out when already complete!\n"); + goto out_unlock; + } + + err = 0; +out_unlock: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_fence_wait(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct i915_request *request; + int err = -EINVAL; + + /* Submit a request, treat it as a fence and wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_locked; + } + mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ + + if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { + pr_err("fence wait success before submit (expected timeout)!\n"); + goto out_device; + } + + mutex_lock(&i915->drm.struct_mutex); + i915_request_add(request); + mutex_unlock(&i915->drm.struct_mutex); + + if (dma_fence_is_signaled(&request->fence)) { + pr_err("fence signaled immediately!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { + pr_err("fence wait success after submit (expected timeout)!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out (expected success)!\n"); + goto out_device; + } + + if (!dma_fence_is_signaled(&request->fence)) { + pr_err("fence unsignaled after waiting!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out when complete (expected success)!\n"); + goto out_device; + } + + err = 0; +out_device: + mutex_lock(&i915->drm.struct_mutex); +out_locked: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_request_rewind(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_request *request, *vip; + struct i915_gem_context *ctx[2]; + int err = -EINVAL; + + mutex_lock(&i915->drm.struct_mutex); + ctx[0] = mock_context(i915, "A"); + request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + if (!request) { + err = -ENOMEM; + goto err_context_0; + } + + i915_request_get(request); + i915_request_add(request); + + ctx[1] = mock_context(i915, "B"); + vip = mock_request(i915->engine[RCS], ctx[1], 0); + if (!vip) { + err = -ENOMEM; + goto err_context_1; + } + + /* Simulate preemption by manual reordering */ + if (!mock_cancel_request(request)) { + pr_err("failed to cancel request (already executed)!\n"); + i915_request_add(vip); + goto err_context_1; + } + i915_request_get(vip); + i915_request_add(vip); + rcu_read_lock(); + request->engine->submit_request(request); + rcu_read_unlock(); + + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_request_wait(vip, 0, HZ) == -ETIME) { + pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", + vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + goto err; + } + + if (i915_request_completed(request)) { + pr_err("low priority request already completed\n"); + goto err; + } + + err = 0; +err: + i915_request_put(vip); + mutex_lock(&i915->drm.struct_mutex); +err_context_1: + mock_context_close(ctx[1]); + i915_request_put(request); +err_context_0: + mock_context_close(ctx[0]); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_request_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_add_request), + SUBTEST(igt_wait_request), + SUBTEST(igt_fence_wait), + SUBTEST(igt_request_rewind), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + drm_dev_unref(&i915->drm); + + return err; +} + +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_count; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + int err; + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915->gpu_error.missed_irq_rings = 0; + t->reset_count = i915_reset_count(&i915->gpu_error); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + + i915_retire_requests(i915); + + if (wait_for(intel_engines_are_idle(i915), 10)) { + pr_err("%s(%s): GPU not idle\n", t->func, t->name); + return -EIO; + } + + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_count); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} + +static int live_nop_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err = -ENODEV; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct i915_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = i915_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_unlock; + } + + /* This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_request_add(request); + } + i915_request_wait(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_unlock; + + pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *empty_batch(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static struct i915_request * +empty_request(struct intel_engine_cs *engine, + struct i915_vma *batch) +{ + struct i915_request *request; + int err; + + request = i915_request_alloc(engine, engine->i915->kernel_context); + if (IS_ERR(request)) + return request; + + err = engine->emit_bb_start(request, + batch->node.start, + batch->node.size, + I915_DISPATCH_SECURE); + if (err) + goto out_request; + +out_request: + __i915_request_add(request, err == 0); + return err ? ERR_PTR(err) : request; +} + +static int live_empty_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + struct i915_vma *batch; + unsigned int id; + int err = 0; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + batch = empty_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct i915_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_batch; + + /* Warmup / preload */ + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + i915_request_wait(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + } + i915_request_wait(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_batch; + + pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *recursive_batch(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(i915); + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + err = i915_gem_object_set_to_wc_domain(obj, true); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + if (gen >= 8) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + } else if (gen >= 6) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; + *cmd++ = lower_32_bits(vma->node.start); + } else if (gen >= 4) { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(vma->node.start); + } else { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; + *cmd++ = lower_32_bits(vma->node.start); + } + *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + i915_gem_chipset_flush(i915); + + i915_gem_object_unpin_map(obj); + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int recursive_batch_resolve(struct i915_vma *batch) +{ + u32 *cmd; + + cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(batch->vm->i915); + + i915_gem_object_unpin_map(batch->obj); + + return 0; +} + +static int live_all_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_request *request[I915_NUM_ENGINES]; + struct i915_vma *batch; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines simultaneously. We + * send a recursive batch to each engine - checking that we don't + * block doing so, and that they don't complete too soon. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch, err=%d\n", __func__, err); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + request[id] = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed with err=%d\n", + __func__, err); + goto out_request; + } + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + if (!i915_gem_object_has_active_reference(batch->obj)) { + i915_gem_object_get(batch->obj); + i915_gem_object_set_active_reference(batch->obj); + } + + i915_vma_move_to_active(batch, request[id], 0); + i915_request_get(request[id]); + i915_request_add(request[id]); + } + + for_each_engine(engine, i915, id) { + if (i915_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + } + + err = recursive_batch_resolve(batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); + goto out_request; + } + + for_each_engine(engine, i915, id) { + long timeout; + + timeout = i915_request_wait(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_request_completed(request[id])); + i915_request_put(request[id]); + request[id] = NULL; + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) + if (request[id]) + i915_request_put(request[id]); + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_sequential_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_request *request[I915_NUM_ENGINES] = {}; + struct i915_request *prev = NULL; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines sequentially, such + * that each successive request waits for the earlier ones. This + * tests that we don't execute requests out of order, even though + * they are running on independent engines. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + struct i915_vma *batch; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch for %s, err=%d\n", + __func__, engine->name, err); + goto out_unlock; + } + + request[id] = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + if (prev) { + err = i915_request_await_dma_fence(request[id], + &prev->fence); + if (err) { + i915_request_add(request[id]); + pr_err("%s: Request await failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + } + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_get(batch); + + i915_request_get(request[id]); + i915_request_add(request[id]); + + prev = request[id]; + } + + for_each_engine(engine, i915, id) { + long timeout; + + if (i915_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + + err = recursive_batch_resolve(request[id]->batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", + __func__, err); + goto out_request; + } + + timeout = i915_request_wait(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_request_completed(request[id])); + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) { + u32 *cmd; + + if (!request[id]) + break; + + cmd = i915_gem_object_pin_map(request[id]->batch->obj, + I915_MAP_WC); + if (!IS_ERR(cmd)) { + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + + i915_gem_object_unpin_map(request[id]->batch->obj); + } + + i915_vma_put(request[id]->batch); + i915_request_put(request[id]); + } +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_request_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_request), + SUBTEST(live_all_engines), + SUBTEST(live_sequential_engines), + SUBTEST(live_empty_request), + }; + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index d1d2c2456f69..3edbb3e8c0e1 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -92,13 +92,13 @@ err_ctx: } static u64 hws_address(const struct i915_vma *hws, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); } static int emit_recurse_batch(struct hang *h, - struct drm_i915_gem_request *rq) + struct i915_request *rq) { struct drm_i915_private *i915 = h->i915; struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; @@ -204,10 +204,10 @@ unpin_vma: return err; } -static struct drm_i915_gem_request * +static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; int err; if (i915_gem_object_is_active(h->obj)) { @@ -232,21 +232,20 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) h->batch = vaddr; } - rq = i915_gem_request_alloc(engine, h->ctx); + rq = i915_request_alloc(engine, h->ctx); if (IS_ERR(rq)) return rq; err = emit_recurse_batch(h, rq); if (err) { - __i915_add_request(rq, false); + __i915_request_add(rq, false); return ERR_PTR(err); } return rq; } -static u32 hws_seqno(const struct hang *h, - const struct drm_i915_gem_request *rq) +static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) { return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); } @@ -319,7 +318,7 @@ static void hang_fini(struct hang *h) flush_test(h->i915, I915_WAIT_LOCKED); } -static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +static bool wait_for_hang(struct hang *h, struct i915_request *rq) { return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), rq->fence.seqno), @@ -332,7 +331,7 @@ static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) static int igt_hang_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -359,17 +358,17 @@ static int igt_hang_sanitycheck(void *arg) goto fini; } - i915_gem_request_get(rq); + i915_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - __i915_add_request(rq, true); + __i915_request_add(rq, true); - timeout = i915_wait_request(rq, + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(rq); + i915_request_put(rq); if (timeout < 0) { err = timeout; @@ -485,7 +484,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); @@ -495,8 +494,8 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); if (!wait_for_hang(&h, rq)) { @@ -507,12 +506,12 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) intel_engine_dump(engine, &p, "%s\n", engine->name); - i915_gem_request_put(rq); + i915_request_put(rq); err = -EIO; break; } - i915_gem_request_put(rq); + i915_request_put(rq); } engine->hangcheck.stalled = true; @@ -577,7 +576,7 @@ static int igt_reset_active_engine(void *arg) static int active_engine(void *data) { struct intel_engine_cs *engine = data; - struct drm_i915_gem_request *rq[2] = {}; + struct i915_request *rq[2] = {}; struct i915_gem_context *ctx[2]; struct drm_file *file; unsigned long count = 0; @@ -606,29 +605,29 @@ static int active_engine(void *data) while (!kthread_should_stop()) { unsigned int idx = count++ & 1; - struct drm_i915_gem_request *old = rq[idx]; - struct drm_i915_gem_request *new; + struct i915_request *old = rq[idx]; + struct i915_request *new; mutex_lock(&engine->i915->drm.struct_mutex); - new = i915_gem_request_alloc(engine, ctx[idx]); + new = i915_request_alloc(engine, ctx[idx]); if (IS_ERR(new)) { mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } - rq[idx] = i915_gem_request_get(new); - i915_add_request(new); + rq[idx] = i915_request_get(new); + i915_request_add(new); mutex_unlock(&engine->i915->drm.struct_mutex); if (old) { - i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(old); + i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(old); } } for (count = 0; count < ARRAY_SIZE(rq); count++) - i915_gem_request_put(rq[count]); + i915_request_put(rq[count]); err_file: mock_file_free(engine->i915, file); @@ -692,7 +691,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); @@ -702,8 +701,8 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, break; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); if (!wait_for_hang(&h, rq)) { @@ -714,12 +713,12 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, intel_engine_dump(engine, &p, "%s\n", engine->name); - i915_gem_request_put(rq); + i915_request_put(rq); err = -EIO; break; } - i915_gem_request_put(rq); + i915_request_put(rq); } engine->hangcheck.stalled = true; @@ -814,7 +813,7 @@ static int igt_reset_active_engine_others(void *arg) return __igt_reset_engine_others(arg, true); } -static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +static u32 fake_hangcheck(struct i915_request *rq) { u32 reset_count; @@ -832,7 +831,7 @@ static u32 fake_hangcheck(struct drm_i915_gem_request *rq) static int igt_wait_reset(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; @@ -856,8 +855,8 @@ static int igt_wait_reset(void *arg) goto fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -875,7 +874,7 @@ static int igt_wait_reset(void *arg) reset_count = fake_hangcheck(rq); - timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { pr_err("i915_wait_request failed on a stuck request: err=%ld\n", timeout); @@ -891,7 +890,7 @@ static int igt_wait_reset(void *arg) } out_rq: - i915_gem_request_put(rq); + i915_request_put(rq); fini: hang_fini(&h); unlock: @@ -922,7 +921,7 @@ static int igt_reset_queue(void *arg) goto unlock; for_each_engine(engine, i915, id) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; @@ -935,12 +934,12 @@ static int igt_reset_queue(void *arg) goto fini; } - i915_gem_request_get(prev); - __i915_add_request(prev, true); + i915_request_get(prev); + __i915_request_add(prev, true); count = 0; do { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int reset_count; rq = hang_create_request(&h, engine); @@ -949,8 +948,8 @@ static int igt_reset_queue(void *arg) goto fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, prev)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -960,8 +959,8 @@ static int igt_reset_queue(void *arg) intel_engine_dump(prev->engine, &p, "%s\n", prev->engine->name); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -980,8 +979,8 @@ static int igt_reset_queue(void *arg) if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", prev->fence.error); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } @@ -989,21 +988,21 @@ static int igt_reset_queue(void *arg) if (rq->fence.error) { pr_err("Fence error status not zero [%d] after unrelated reset\n", rq->fence.error); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } - i915_gem_request_put(prev); + i915_request_put(prev); prev = rq; count++; } while (time_before(jiffies, end_time)); @@ -1012,7 +1011,7 @@ static int igt_reset_queue(void *arg) *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - i915_gem_request_put(prev); + i915_request_put(prev); err = flush_test(i915, I915_WAIT_LOCKED); if (err) @@ -1036,7 +1035,7 @@ static int igt_handle_error(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine = i915->engine[RCS]; struct hang h; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_gpu_state *error; int err; @@ -1060,8 +1059,8 @@ static int igt_handle_error(void *arg) goto err_fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -1098,7 +1097,7 @@ static int igt_handle_error(void *arg) } err_request: - i915_gem_request_put(rq); + i915_request_put(rq); err_fini: hang_fini(&h); err_unlock: diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 55c0e2c15782..78a89efa1119 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -81,7 +81,7 @@ static void mock_context_unpin(struct intel_engine_cs *engine, i915_gem_context_put(ctx); } -static int mock_request_alloc(struct drm_i915_gem_request *request) +static int mock_request_alloc(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); @@ -91,24 +91,24 @@ static int mock_request_alloc(struct drm_i915_gem_request *request) return 0; } -static int mock_emit_flush(struct drm_i915_gem_request *request, +static int mock_emit_flush(struct i915_request *request, unsigned int flags) { return 0; } -static void mock_emit_breadcrumb(struct drm_i915_gem_request *request, +static void mock_emit_breadcrumb(struct i915_request *request, u32 *flags) { } -static void mock_submit_request(struct drm_i915_gem_request *request) +static void mock_submit_request(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = container_of(request->engine, typeof(*engine), base); - i915_gem_request_submit(request); + i915_request_submit(request); GEM_BUG_ON(!request->global_seqno); spin_lock_irq(&engine->hw_lock); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 3175db70cc6e..e6d4b882599a 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -43,7 +43,7 @@ void mock_device_flush(struct drm_i915_private *i915) for_each_engine(engine, i915, id) mock_engine_flush(engine); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); } static void mock_device_release(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 8097e3693ec4..0dc29e242597 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -25,16 +25,16 @@ #include "mock_engine.h" #include "mock_request.h" -struct drm_i915_gem_request * +struct i915_request * mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, unsigned long delay) { - struct drm_i915_gem_request *request; + struct i915_request *request; struct mock_request *mock; /* NB the i915->requests slab cache is enlarged to fit mock_request */ - request = i915_gem_request_alloc(engine, context); + request = i915_request_alloc(engine, context); if (IS_ERR(request)) return NULL; @@ -44,7 +44,7 @@ mock_request(struct intel_engine_cs *engine, return &mock->base; } -bool mock_cancel_request(struct drm_i915_gem_request *request) +bool mock_cancel_request(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = @@ -57,7 +57,7 @@ bool mock_cancel_request(struct drm_i915_gem_request *request) spin_unlock_irq(&engine->hw_lock); if (was_queued) - i915_gem_request_unsubmit(request); + i915_request_unsubmit(request); return was_queued; } diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h index 4dea74c8e96d..995fb728380c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.h +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -27,20 +27,20 @@ #include -#include "../i915_gem_request.h" +#include "../i915_request.h" struct mock_request { - struct drm_i915_gem_request base; + struct i915_request base; struct list_head link; unsigned long delay; }; -struct drm_i915_gem_request * +struct i915_request * mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, unsigned long delay); -bool mock_cancel_request(struct drm_i915_gem_request *request); +bool mock_cancel_request(struct i915_request *request); #endif /* !__MOCK_REQUEST__ */ -- cgit v1.2.3 From 561210706cd00f83f14af124edb3a5ff1b732912 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Feb 2018 13:32:36 +0000 Subject: drm/i915/execlists: Remove the ring advancement under preemption Load an empty ringbuffer for preemption, ignoring the lite-restore workaround as we know the preempt context is always idle before preemption. Note that after some digging by Michal Winiarski, we found that RING_HEAD is no longer being updated (due to inhibiting context save restore) so this patch is already in effect! Signed-off-by: Chris Wilson Cc: Michal Winiarski Cc: Michel Thierry Cc: Michal Wajdeczko Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180221133236.29402-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d8bca8ba2efc..e781c912f197 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -447,13 +447,6 @@ static void inject_preempt_context(struct intel_engine_cs *engine) GEM_BUG_ON(engine->execlists.preempt_complete_status != upper_32_bits(ce->lrc_desc)); - GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES)); - - memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES); - ce->ring->tail += WA_TAIL_BYTES; - ce->ring->tail &= (ce->ring->size - 1); - ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; - GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] & _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) != -- cgit v1.2.3 From 65cb8c0f040bd791b9adb761be2710adcd9df00f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Feb 2018 15:15:53 +0000 Subject: drm/i915/execlists: Add a GEM_TRACE to show when the context is completed Include a GEM_TRACE to show when the context is complete and we advance the ELSP port. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180221151553.9054-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e781c912f197..34fa8778b1f4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -881,6 +881,9 @@ static void execlists_submission_tasklet(unsigned long data) trace_i915_request_out(rq); i915_request_put(rq); + GEM_TRACE("%s completed ctx=%d\n", + engine->name, port->context_id); + execlists_port_complete(execlists, port); } else { port_set(port, port_pack(rq, count)); -- cgit v1.2.3 From e084039b5840ee6822b65167766cdfc47536964e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Feb 2018 15:23:01 +0000 Subject: drm/i915/execlists: Move the GEM_BUG_ON context matches CSB later Print out the current request/context before doing the GEM_BUG_ON, so that we can inspect the values in the ftrace. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180221152301.9178-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 34fa8778b1f4..964885b5d7cb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -863,14 +863,15 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); - rq = port_unpack(port, &count); GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0); + + /* Check the context/desc id for this event matches */ + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); + GEM_BUG_ON(count == 0); if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); -- cgit v1.2.3 From d55cb4fa2cf0105bfb16b60a2846737b91fdc173 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 20 Feb 2018 17:37:52 +0200 Subject: drm/i915/icl: Add the ICL PCI IDs This is the current PCI ID list in our documentation. Let's leave the _gt#_ part out for now since our current documentation is not 100% clear and we don't need this info now anyway. v2: Use the new ICL_11 naming (Kelvin Gardiner). v3: Latest IDs as per BSpec (Oscar). v4: Make it compile (Paulo). v5: Remove comments (Lucas). v6: Multile rebases (Paulo). v7: Rebase (Mika) Reviewed-by: Anuj Phogat (v1) Signed-off-by: Paulo Zanoni Signed-off-by: Oscar Mateo Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi Signed-off-by: Mika Kuoppala Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180220153755.13509-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + include/drm/i915_pciids.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1eaabf28d7b7..26e8f5c13231 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -664,6 +664,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), INTEL_CNL_IDS(&intel_cannonlake_info), + INTEL_ICL_11_IDS(&intel_icelake_11_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 0b2ba46fa00b..70f0c2535b87 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -431,4 +431,16 @@ INTEL_VGA_DEVICE(0x5A44, info), \ INTEL_VGA_DEVICE(0x5A4C, info) +/* ICL */ +#define INTEL_ICL_11_IDS(info) \ + INTEL_VGA_DEVICE(0x8A50, info), \ + INTEL_VGA_DEVICE(0x8A51, info), \ + INTEL_VGA_DEVICE(0x8A5C, info), \ + INTEL_VGA_DEVICE(0x8A5D, info), \ + INTEL_VGA_DEVICE(0x8A52, info), \ + INTEL_VGA_DEVICE(0x8A5A, info), \ + INTEL_VGA_DEVICE(0x8A5B, info), \ + INTEL_VGA_DEVICE(0x8A71, info), \ + INTEL_VGA_DEVICE(0x8A70, info) + #endif /* _I915_PCIIDS_H */ -- cgit v1.2.3 From 80d893501bb6b28d838b8d45ec47ed0de8482736 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 20 Feb 2018 17:37:53 +0200 Subject: drm/i915/icl: Show interrupt registers in debugfs Show GEN11 specific interrupt registers in debugfs v2: Update for POR changes. (Daniele Ceraolo Spurio) v3: get runtime pm ref. unify common parts with gen8 (Daniele) Cc: Ceraolo Spurio, Daniele Signed-off-by: Tvrtko Ursulin Signed-off-by: Rodrigo Vivi Signed-off-by: Mika Kuoppala Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20180220153755.13509-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 139 ++++++++++++++++++++++++------------ 1 file changed, 95 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bad2ed7050ba..33fbf3965309 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -646,6 +646,56 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) return 0; } +static void gen8_display_interrupt_info(struct seq_file *m) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + int pipe; + + for_each_pipe(dev_priv, pipe) { + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } + seq_printf(m, "Pipe %c IMR:\t%08x\n", + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IMR(pipe))); + seq_printf(m, "Pipe %c IIR:\t%08x\n", + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IIR(pipe))); + seq_printf(m, "Pipe %c IER:\t%08x\n", + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IER(pipe))); + + intel_display_power_put(dev_priv, power_domain); + } + + seq_printf(m, "Display Engine port interrupt mask:\t%08x\n", + I915_READ(GEN8_DE_PORT_IMR)); + seq_printf(m, "Display Engine port interrupt identity:\t%08x\n", + I915_READ(GEN8_DE_PORT_IIR)); + seq_printf(m, "Display Engine port interrupt enable:\t%08x\n", + I915_READ(GEN8_DE_PORT_IER)); + + seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n", + I915_READ(GEN8_DE_MISC_IMR)); + seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n", + I915_READ(GEN8_DE_MISC_IIR)); + seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n", + I915_READ(GEN8_DE_MISC_IER)); + + seq_printf(m, "PCU interrupt mask:\t%08x\n", + I915_READ(GEN8_PCU_IMR)); + seq_printf(m, "PCU interrupt identity:\t%08x\n", + I915_READ(GEN8_PCU_IIR)); + seq_printf(m, "PCU interrupt enable:\t%08x\n", + I915_READ(GEN8_PCU_IER)); +} + static int i915_interrupt_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -709,6 +759,27 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(GEN8_PCU_IIR)); seq_printf(m, "PCU interrupt enable:\t%08x\n", I915_READ(GEN8_PCU_IER)); + } else if (INTEL_GEN(dev_priv) >= 11) { + seq_printf(m, "Master Interrupt Control: %08x\n", + I915_READ(GEN11_GFX_MSTR_IRQ)); + + seq_printf(m, "Render/Copy Intr Enable: %08x\n", + I915_READ(GEN11_RENDER_COPY_INTR_ENABLE)); + seq_printf(m, "VCS/VECS Intr Enable: %08x\n", + I915_READ(GEN11_VCS_VECS_INTR_ENABLE)); + seq_printf(m, "GUC/SG Intr Enable:\t %08x\n", + I915_READ(GEN11_GUC_SG_INTR_ENABLE)); + seq_printf(m, "GPM/WGBOXPERF Intr Enable: %08x\n", + I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE)); + seq_printf(m, "Crypto Intr Enable:\t %08x\n", + I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE)); + seq_printf(m, "GUnit/CSME Intr Enable:\t %08x\n", + I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE)); + + seq_printf(m, "Display Interrupt Control:\t%08x\n", + I915_READ(GEN11_DISPLAY_INT_CTL)); + + gen8_display_interrupt_info(m); } else if (INTEL_GEN(dev_priv) >= 8) { seq_printf(m, "Master Interrupt Control:\t%08x\n", I915_READ(GEN8_MASTER_IRQ)); @@ -722,49 +793,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) i, I915_READ(GEN8_GT_IER(i))); } - for_each_pipe(dev_priv, pipe) { - enum intel_display_power_domain power_domain; - - power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, - power_domain)) { - seq_printf(m, "Pipe %c power disabled\n", - pipe_name(pipe)); - continue; - } - seq_printf(m, "Pipe %c IMR:\t%08x\n", - pipe_name(pipe), - I915_READ(GEN8_DE_PIPE_IMR(pipe))); - seq_printf(m, "Pipe %c IIR:\t%08x\n", - pipe_name(pipe), - I915_READ(GEN8_DE_PIPE_IIR(pipe))); - seq_printf(m, "Pipe %c IER:\t%08x\n", - pipe_name(pipe), - I915_READ(GEN8_DE_PIPE_IER(pipe))); - - intel_display_power_put(dev_priv, power_domain); - } - - seq_printf(m, "Display Engine port interrupt mask:\t%08x\n", - I915_READ(GEN8_DE_PORT_IMR)); - seq_printf(m, "Display Engine port interrupt identity:\t%08x\n", - I915_READ(GEN8_DE_PORT_IIR)); - seq_printf(m, "Display Engine port interrupt enable:\t%08x\n", - I915_READ(GEN8_DE_PORT_IER)); - - seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n", - I915_READ(GEN8_DE_MISC_IMR)); - seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n", - I915_READ(GEN8_DE_MISC_IIR)); - seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n", - I915_READ(GEN8_DE_MISC_IER)); - - seq_printf(m, "PCU interrupt mask:\t%08x\n", - I915_READ(GEN8_PCU_IMR)); - seq_printf(m, "PCU interrupt identity:\t%08x\n", - I915_READ(GEN8_PCU_IIR)); - seq_printf(m, "PCU interrupt enable:\t%08x\n", - I915_READ(GEN8_PCU_IER)); + gen8_display_interrupt_info(m); } else if (IS_VALLEYVIEW(dev_priv)) { seq_printf(m, "Display IER:\t%08x\n", I915_READ(VLV_IER)); @@ -846,13 +875,35 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "Graphics Interrupt mask: %08x\n", I915_READ(GTIMR)); } - if (INTEL_GEN(dev_priv) >= 6) { + + if (INTEL_GEN(dev_priv) >= 11) { + seq_printf(m, "RCS Intr Mask:\t %08x\n", + I915_READ(GEN11_RCS0_RSVD_INTR_MASK)); + seq_printf(m, "BCS Intr Mask:\t %08x\n", + I915_READ(GEN11_BCS_RSVD_INTR_MASK)); + seq_printf(m, "VCS0/VCS1 Intr Mask:\t %08x\n", + I915_READ(GEN11_VCS0_VCS1_INTR_MASK)); + seq_printf(m, "VCS2/VCS3 Intr Mask:\t %08x\n", + I915_READ(GEN11_VCS2_VCS3_INTR_MASK)); + seq_printf(m, "VECS0/VECS1 Intr Mask:\t %08x\n", + I915_READ(GEN11_VECS0_VECS1_INTR_MASK)); + seq_printf(m, "GUC/SG Intr Mask:\t %08x\n", + I915_READ(GEN11_GUC_SG_INTR_MASK)); + seq_printf(m, "GPM/WGBOXPERF Intr Mask: %08x\n", + I915_READ(GEN11_GPM_WGBOXPERF_INTR_MASK)); + seq_printf(m, "Crypto Intr Mask:\t %08x\n", + I915_READ(GEN11_CRYPTO_RSVD_INTR_MASK)); + seq_printf(m, "Gunit/CSME Intr Mask:\t %08x\n", + I915_READ(GEN11_GUNIT_CSME_INTR_MASK)); + + } else if (INTEL_GEN(dev_priv) >= 6) { for_each_engine(engine, dev_priv, id) { seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", engine->name, I915_READ_IMR(engine)); } } + intel_runtime_pm_put(dev_priv); return 0; -- cgit v1.2.3 From b8ec759e6f1c6da0418238df066a0f1ef8fd2075 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 21 Feb 2018 20:49:02 +0000 Subject: drm/i915/hsw: add missing disabled EUs registers reads It turns out that HSW has a register that tells us how many EUs are disabled per half-slice (roughly a similar notion to subslice). We didn't read those registers so far as most userspace drivers didn't need those values prior to Gen8, but an internal library would like to have access to this. Since we already have the getparam interface, there is no harm in exposing this. v2: Rename bits value (Joonas) v3: s/GEM_BUG_ON/MISSING_CASE/ (Joonas) v4: s/GEM_BUG_ON/MISSING_CASE/ again... (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180221204902.23084-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 7 ++++ drivers/gpu/drm/i915/intel_device_info.c | 57 +++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0fc24ab3a8ca..2e548fb72170 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2807,6 +2807,13 @@ enum i915_power_well_id { #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) /* Fuse readout registers for GT */ +#define HSW_PAVP_FUSE1 _MMIO(0x911C) +#define HSW_F1_EU_DIS_SHIFT 16 +#define HSW_F1_EU_DIS_MASK (0x3 << HSW_F1_EU_DIS_SHIFT) +#define HSW_F1_EU_DIS_10EUS 0 +#define HSW_F1_EU_DIS_8EUS 1 +#define HSW_F1_EU_DIS_6EUS 2 + #define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 298f8996cc54..1c780cc4cd48 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -357,6 +357,59 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = 0; } +static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *info = mkwrite_device_info(dev_priv); + struct sseu_dev_info *sseu = &info->sseu; + u32 fuse1; + + /* + * There isn't a register to tell us how many slices/subslices. We + * work off the PCI-ids here. + */ + switch (info->gt) { + default: + MISSING_CASE(info->gt); + /* fall through */ + case 1: + sseu->slice_mask = BIT(0); + sseu->subslice_mask = BIT(0); + break; + case 2: + sseu->slice_mask = BIT(0); + sseu->subslice_mask = BIT(0) | BIT(1); + break; + case 3: + sseu->slice_mask = BIT(0) | BIT(1); + sseu->subslice_mask = BIT(0) | BIT(1); + break; + } + + fuse1 = I915_READ(HSW_PAVP_FUSE1); + switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { + default: + MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >> + HSW_F1_EU_DIS_SHIFT); + /* fall through */ + case HSW_F1_EU_DIS_10EUS: + sseu->eu_per_subslice = 10; + break; + case HSW_F1_EU_DIS_8EUS: + sseu->eu_per_subslice = 8; + break; + case HSW_F1_EU_DIS_6EUS: + sseu->eu_per_subslice = 6; + break; + } + + sseu->eu_total = sseu_subslice_total(sseu) * sseu->eu_per_subslice; + + /* No powergating for you. */ + sseu->has_slice_pg = 0; + sseu->has_subslice_pg = 0; + sseu->has_eu_pg = 0; +} + static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) { u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE); @@ -574,7 +627,9 @@ void intel_device_info_runtime_init(struct intel_device_info *info) } /* Initialize slice/subslice/EU info */ - if (IS_CHERRYVIEW(dev_priv)) + if (IS_HASWELL(dev_priv)) + haswell_sseu_info_init(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) cherryview_sseu_info_init(dev_priv); else if (IS_BROADWELL(dev_priv)) broadwell_sseu_info_init(dev_priv); -- cgit v1.2.3 From 4552f50a439c36fd5f8c6f1ec2d5ba7c8574ce97 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 22 Feb 2018 11:16:58 +0000 Subject: drm/i915: Move page sizes out of the 8-bit sandwich Slightly smaller code and a bit more logical layout. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180222111658.4999-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_device_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 71fdfb0451ef..8904ad87bf37 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -137,14 +137,14 @@ struct intel_device_info { enum intel_platform platform; u32 platform_mask; + unsigned int page_sizes; /* page sizes supported by the HW */ + u32 display_mmio_offset; u8 num_pipes; u8 num_sprites[I915_MAX_PIPES]; u8 num_scalers[I915_MAX_PIPES]; - unsigned int page_sizes; /* page sizes supported by the HW */ - #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG -- cgit v1.2.3 From db9c06dfff43b2a040ba2b4f2300c30e454dd930 Mon Sep 17 00:00:00 2001 From: Johnson Lin Date: Tue, 30 Jan 2018 21:21:29 +0530 Subject: drm/i915: Fix Limited Range Color Handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some panels support limited range output (16-235) compared to full range RGB values (0-255). Also userspace can control the RGB range using "Broadcast RGB" property. Currently the code to handle full range to limited range is broken. This patch fixes the same by properly scaling down all the full range co-efficients with limited range scaling factor. v2: Fixed Ville's review comments. v3: Changed input to const and used correct data types as suggested by Ville v4: Fixed some missing data type corrections. Signed-off-by: Johnson Lin Signed-off-by: Uma Shankar Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1517327489-26128-1-git-send-email-uma.shankar@intel.com --- drivers/gpu/drm/i915/intel_color.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index aa66e952a95d..a383d993b844 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -84,26 +84,25 @@ static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) /* * When using limited range, multiply the matrix given by userspace by - * the matrix that we would use for the limited range. We do the - * multiplication in U2.30 format. + * the matrix that we would use for the limited range. */ -static void ctm_mult_by_limited(uint64_t *result, int64_t *input) +static void ctm_mult_by_limited(u64 *result, const u64 *input) { int i; - for (i = 0; i < 9; i++) - result[i] = 0; + for (i = 0; i < 9; i++) { + u64 user_coeff = input[i]; + u32 limited_coeff = CTM_COEFF_LIMITED_RANGE; + u32 abs_coeff = clamp_val(CTM_COEFF_ABS(user_coeff), 0, + CTM_COEFF_4_0 - 1) >> 2; - for (i = 0; i < 3; i++) { - int64_t user_coeff = input[i * 3 + i]; - uint64_t limited_coeff = CTM_COEFF_LIMITED_RANGE >> 2; - uint64_t abs_coeff = clamp_val(CTM_COEFF_ABS(user_coeff), - 0, - CTM_COEFF_4_0 - 1) >> 2; - - result[i * 3 + i] = (limited_coeff * abs_coeff) >> 27; - if (CTM_COEFF_NEGATIVE(user_coeff)) - result[i * 3 + i] |= CTM_COEFF_SIGN; + /* + * By scaling every co-efficient with limited range (16-235) + * vs full range (0-255) the final o/p will be scaled down to + * fit in the limited range supported by the panel. + */ + result[i] = mul_u32_u32(limited_coeff, abs_coeff) >> 30; + result[i] |= user_coeff & CTM_COEFF_SIGN; } } -- cgit v1.2.3 From 85798ac9b35f8cc7608a4a798d0b0626f0d54d61 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 21 Feb 2018 18:02:30 +0200 Subject: drm/i915: Fail if we can't get a fence for gen2/3 tiled scanout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen2/3 display engine depends on the fence for tiled scanout. So if we fail to get a fence fail the entire operation. Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180221160235.11134-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c14d2a25408d..c96032c0406f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2123,6 +2123,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, goto err; if (i915_vma_is_map_and_fenceable(vma)) { + int ret; + /* Install a fence for tiled scan-out. Pre-i965 always needs a * fence, whereas 965+ only requires a fence if using * framebuffer compression. For simplicity, we always, when @@ -2139,7 +2141,13 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, * something and try to run the system in a "less than optimal" * mode that matches the user configuration. */ - if (i915_vma_pin_fence(vma) == 0 && vma->fence) + ret = i915_vma_pin_fence(vma); + if (ret != 0 && INTEL_GEN(dev_priv) < 4) { + vma = ERR_PTR(ret); + goto err; + } + + if (ret == 0 && vma->fence) *out_flags |= PLANE_HAS_FENCE; } -- cgit v1.2.3 From f7a02ad7d16b24908b9fddbd6176b1c1a2b35058 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 21 Feb 2018 20:48:07 +0200 Subject: drm/i915: Only pin the fence for primary planes (and gen2/3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we pin a fence on every plane doing tiled scanout. The number of planes we have available is fast apporaching the number of fences so we really should stop wasting them. Only FBC needs the fence on gen4+, so let's use fences only for the primary planes on those platforms. v2: drop the tiling check from plane_uses_fence() as the obj is NULL during initial_plane_config() and we don't rally need the check since i915_vma_pin_fence() does the check anyway Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180221184807.577-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 14 +++++++++++++- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_fbdev.c | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c96032c0406f..a991195e5354 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2067,9 +2067,18 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, } } +static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + + return INTEL_GEN(dev_priv) < 4 || plane->id == PLANE_PRIMARY; +} + struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation, + bool uses_fence, unsigned long *out_flags) { struct drm_device *dev = fb->dev; @@ -2122,7 +2131,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, if (IS_ERR(vma)) goto err; - if (i915_vma_is_map_and_fenceable(vma)) { + if (uses_fence && i915_vma_is_map_and_fenceable(vma)) { int ret; /* Install a fence for tiled scan-out. Pre-i965 always needs a @@ -2836,6 +2845,7 @@ valid_fb: intel_state->vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation, + intel_plane_uses_fence(intel_state), &intel_state->flags); mutex_unlock(&dev->struct_mutex); if (IS_ERR(intel_state->vma)) { @@ -12730,6 +12740,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation, + intel_plane_uses_fence(to_intel_plane_state(new_state)), &to_intel_plane_state(new_state)->flags); if (!IS_ERR(vma)) to_intel_plane_state(new_state)->vma = vma; @@ -13143,6 +13154,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, } else { vma = intel_pin_and_fence_fb_obj(fb, new_plane_state->rotation, + false, &to_intel_plane_state(new_plane_state)->flags); if (IS_ERR(vma)) { DRM_DEBUG_KMS("failed to pin object\n"); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 04fc4bd12329..80881218bfc9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1424,6 +1424,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation, + bool uses_fence, unsigned long *out_flags); void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags); struct drm_framebuffer * diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 055f409f8b75..6f12adc06365 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -215,7 +215,7 @@ static int intelfb_create(struct drm_fb_helper *helper, */ vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_MODE_ROTATE_0, - &flags); + false, &flags); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out_unlock; -- cgit v1.2.3 From cf1805e65802cf77243e2cdcf1b265240f70eda2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 21 Feb 2018 19:31:01 +0200 Subject: drm/i915: Clean up fbc vs. plane checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's record the information whether a plane can do fbc or not under struct inte_plane. v2: Rebase due to i9xx_plane_id Handle BDW/HSW correctly v3: Move inte_fbc_init() back since we depend on it happening even with i915.disable_display, and populate fbc->possible_framebuffer_bits directly from the plane init code instead v4: Add note about plane A being tied to pipe A on HSW+ Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180221173101.19385-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180221160235.11134-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 41 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_fbc.c | 26 ++--------------------- 3 files changed, 44 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a991195e5354..9ca8ba2c894c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13215,6 +13215,32 @@ static const struct drm_plane_funcs intel_cursor_plane_funcs = { .format_mod_supported = intel_cursor_plane_format_mod_supported, }; +static bool i9xx_plane_has_fbc(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) +{ + if (!HAS_FBC(dev_priv)) + return false; + + if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + return i9xx_plane == PLANE_A; /* tied to pipe A */ + else if (IS_IVYBRIDGE(dev_priv)) + return i9xx_plane == PLANE_A || i9xx_plane == PLANE_B || + i9xx_plane == PLANE_C; + else if (INTEL_GEN(dev_priv) >= 4) + return i9xx_plane == PLANE_A || i9xx_plane == PLANE_B; + else + return i9xx_plane == PLANE_A; +} + +static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id) +{ + if (!HAS_FBC(dev_priv)) + return false; + + return pipe == PIPE_A && plane_id == PLANE_PRIMARY; +} + static struct intel_plane * intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -13257,6 +13283,21 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->i9xx_plane = (enum i9xx_plane_id) pipe; primary->id = PLANE_PRIMARY; primary->frontbuffer_bit = INTEL_FRONTBUFFER(pipe, primary->id); + + if (INTEL_GEN(dev_priv) >= 9) + primary->has_fbc = skl_plane_has_fbc(dev_priv, + primary->pipe, + primary->id); + else + primary->has_fbc = i9xx_plane_has_fbc(dev_priv, + primary->i9xx_plane); + + if (primary->has_fbc) { + struct intel_fbc *fbc = &dev_priv->fbc; + + fbc->possible_framebuffer_bits |= primary->frontbuffer_bit; + } + primary->check_plane = intel_check_primary_plane; if (INTEL_GEN(dev_priv) >= 9) { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 80881218bfc9..f87e81deb7c3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -853,6 +853,7 @@ struct intel_plane { enum plane_id id; enum pipe pipe; bool can_scale; + bool has_fbc; int max_downscale; uint32_t frontbuffer_bit; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 2cc2eada5576..17f6d5deeb38 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -46,16 +46,6 @@ static inline bool fbc_supported(struct drm_i915_private *dev_priv) return HAS_FBC(dev_priv); } -static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv) -{ - return IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8; -} - -static inline bool fbc_on_plane_a_only(struct drm_i915_private *dev_priv) -{ - return INTEL_GEN(dev_priv) < 4; -} - static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv) { return INTEL_GEN(dev_priv) <= 3; @@ -1095,13 +1085,10 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state; struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); - if (!plane_state->base.visible) + if (!plane->has_fbc) continue; - if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A) - continue; - - if (fbc_on_plane_a_only(dev_priv) && plane->i9xx_plane != PLANE_A) + if (!plane_state->base.visible) continue; crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -1358,7 +1345,6 @@ static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) void intel_fbc_init(struct drm_i915_private *dev_priv) { struct intel_fbc *fbc = &dev_priv->fbc; - enum pipe pipe; INIT_WORK(&fbc->work.work, intel_fbc_work_fn); INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); @@ -1379,14 +1365,6 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) return; } - for_each_pipe(dev_priv, pipe) { - fbc->possible_framebuffer_bits |= - INTEL_FRONTBUFFER(pipe, PLANE_PRIMARY); - - if (fbc_on_pipe_a_only(dev_priv)) - break; - } - /* This value was pulled out of someone's hat */ if (INTEL_GEN(dev_priv) <= 4 && !IS_GM45(dev_priv)) I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT); -- cgit v1.2.3 From 32febd91951eb40d9e9437f10b967c87715d8ef5 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 21 Feb 2018 18:02:33 +0200 Subject: drm/i915: Require fence only for FBC capable planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As only a subset of primary planes are FBC capable there's no need to waste fences on all of them. So let's skip the fence if the plane isn't even fbc capable. In the future we might extend this to skip the fence even for FBC capable planes if the crtc and/or plane state isn't suitable for FBC. Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180221160235.11134-5-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9ca8ba2c894c..dd9e280365f9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2072,7 +2072,7 @@ static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) struct intel_plane *plane = to_intel_plane(plane_state->base.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - return INTEL_GEN(dev_priv) < 4 || plane->id == PLANE_PRIMARY; + return INTEL_GEN(dev_priv) < 4 || plane->has_fbc; } struct i915_vma * -- cgit v1.2.3 From ef1a19146827b11d0c8024f9401774703bcb3a48 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 21 Feb 2018 18:02:34 +0200 Subject: drm/i915: Extract intel_plane_{pin,unpin}_fb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've replicated the fb pin/unpin code in a few places. Pull it into convenint helpers. Slight change in locking behaviour as intel_cleanup_plane_fb() now grab struct_mutex unconditionally. v2: Change the locking to be symmetric between pin and unpin Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180221160235.11134-6-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 96 +++++++++++++++++------------------- 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index dd9e280365f9..2ec3ad2d0583 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12656,6 +12656,42 @@ static void add_rps_boost_after_vblank(struct drm_crtc *crtc, add_wait_queue(drm_crtc_vblank_waitqueue(crtc), &wait->wait); } +static int intel_plane_pin_fb(struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct drm_framebuffer *fb = plane_state->base.fb; + struct i915_vma *vma; + + if (plane->id == PLANE_CURSOR && + INTEL_INFO(dev_priv)->cursor_needs_physical) { + struct drm_i915_gem_object *obj = intel_fb_obj(fb); + const int align = intel_cursor_alignment(dev_priv); + + return i915_gem_object_attach_phys(obj, align); + } + + vma = intel_pin_and_fence_fb_obj(fb, + plane_state->base.rotation, + intel_plane_uses_fence(plane_state), + &plane_state->flags); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + plane_state->vma = vma; + + return 0; +} + +static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) +{ + struct i915_vma *vma; + + vma = fetch_and_zero(&old_plane_state->vma); + if (vma) + intel_unpin_fb_vma(vma, old_plane_state->flags); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for @@ -12730,23 +12766,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, return ret; } - if (plane->type == DRM_PLANE_TYPE_CURSOR && - INTEL_INFO(dev_priv)->cursor_needs_physical) { - const int align = intel_cursor_alignment(dev_priv); - - ret = i915_gem_object_attach_phys(obj, align); - } else { - struct i915_vma *vma; - - vma = intel_pin_and_fence_fb_obj(fb, - new_state->rotation, - intel_plane_uses_fence(to_intel_plane_state(new_state)), - &to_intel_plane_state(new_state)->flags); - if (!IS_ERR(vma)) - to_intel_plane_state(new_state)->vma = vma; - else - ret = PTR_ERR(vma); - } + ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); @@ -12790,15 +12810,12 @@ void intel_cleanup_plane_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct i915_vma *vma; + struct drm_i915_private *dev_priv = to_i915(plane->dev); /* Should only be called after a successful intel_prepare_plane_fb()! */ - vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma); - if (vma) { - mutex_lock(&plane->dev->struct_mutex); - intel_unpin_fb_vma(vma, to_intel_plane_state(old_state)->flags); - mutex_unlock(&plane->dev->struct_mutex); - } + mutex_lock(&dev_priv->drm.struct_mutex); + intel_plane_unpin_fb(to_intel_plane_state(old_state)); + mutex_unlock(&dev_priv->drm.struct_mutex); } int @@ -13084,7 +13101,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *old_fb; struct drm_crtc_state *crtc_state = crtc->state; - struct i915_vma *old_vma, *vma; /* * When crtc is inactive or there is a modeset pending, @@ -13143,28 +13159,9 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - if (INTEL_INFO(dev_priv)->cursor_needs_physical) { - int align = intel_cursor_alignment(dev_priv); - - ret = i915_gem_object_attach_phys(intel_fb_obj(fb), align); - if (ret) { - DRM_DEBUG_KMS("failed to attach phys object\n"); - goto out_unlock; - } - } else { - vma = intel_pin_and_fence_fb_obj(fb, - new_plane_state->rotation, - false, - &to_intel_plane_state(new_plane_state)->flags); - if (IS_ERR(vma)) { - DRM_DEBUG_KMS("failed to pin object\n"); - - ret = PTR_ERR(vma); - goto out_unlock; - } - - to_intel_plane_state(new_plane_state)->vma = vma; - } + ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state)); + if (ret) + goto out_unlock; old_fb = old_plane_state->fb; @@ -13184,10 +13181,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc)); } - old_vma = fetch_and_zero(&to_intel_plane_state(old_plane_state)->vma); - if (old_vma) - intel_unpin_fb_vma(old_vma, - to_intel_plane_state(old_plane_state)->flags); + intel_plane_unpin_fb(to_intel_plane_state(old_plane_state)); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); -- cgit v1.2.3 From 61b8b359d02e33aa8b378fdb974c2e2a8fb932dc Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 21 Feb 2018 18:02:35 +0200 Subject: drm/i915: Add a FIXME about FBC vs. fence. 90/270 degree rotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the FBC code doesn't handle the 90/270 degree rotated case correctly. We would need the GTT tracking to monitor the fence on the normal GTT view (the rotated view doesn't even have a fence). Not quite sure how we should program the fence Y offset etc. in that case. For now we'll end up disabling FBC with 90/270 degree rotation. Add a FIXME to remind people about this fact. v2: Reword the text (Chris) Move the FIXME to the fbc code Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180221160235.11134-7-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_fbc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 17f6d5deeb38..bd8181310879 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -810,6 +810,12 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) * Note that is possible for a tiled surface to be unmappable (and * so have no fence associated with it) due to aperture constaints * at the time of pinning. + * + * FIXME with 90/270 degree rotation we should use the fence on + * the normal GTT view (the rotated view doesn't even have a + * fence). Would need changes to the FBC fence Y offset as well. + * For now this will effecively disable FBC with 90/270 degree + * rotation. */ if (!(cache->flags & PLANE_HAS_FENCE)) { fbc->no_fbc_reason = "framebuffer not tiled or fenced"; -- cgit v1.2.3 From ff63861c15ecf451db34a47daa75b463ed4293f8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 20:10:29 +0200 Subject: drm/i915: Use the correct power domain for aux ch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Select the aux power domain based on the aux ch rather than based on the port. Now we can rid ourselves of the port E FIXME as well. v2: Split from the enum aux_ch patch (Rodrigo) Reviewed-by: Rodrigo Vivi #v1 Reviewed-by: Chris Wilson #v1 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222181036.15251-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 1868f73f730c..388184bb9f3d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -6020,7 +6020,7 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) encoder->hpd_pin = intel_hpd_pin_default(dev_priv, encoder->port); - switch (encoder->port) { + switch (intel_aux_port(dev_priv, encoder->port)) { case PORT_A: intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; break; @@ -6033,10 +6033,6 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) case PORT_D: intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; break; - case PORT_E: - /* FIXME: Check VBT for actual wiring of PORT E */ - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; - break; case PORT_F: intel_dp->aux_power_domain = POWER_DOMAIN_AUX_F; break; -- cgit v1.2.3 From bdabdb635010a3bb973ace5590a61008291d5bd5 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 20:10:30 +0200 Subject: drm/i915: Add enum aux_ch and clean up the aux init to use it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we no longer have a 1:1 correspondence between ports and AUX channels, let's give AUX channels their own enum. Makes it easier to tell the apples from the oranges, and we get rid of the port E AUX power domain FIXME since we now derive the power domain from the actual AUX CH. v2: Rebase due to AUX F v3: Split out the power domain fix (Rodrigo) Reviewed-by: Rodrigo Vivi #v2 Reviewed-by: Chris Wilson #v2 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222181036.15251-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_reg.h | 8 +- drivers/gpu/drm/i915/intel_display.h | 11 ++ drivers/gpu/drm/i915/intel_dp.c | 236 +++++++++++++++++------------------ drivers/gpu/drm/i915/intel_drv.h | 1 + 4 files changed, 131 insertions(+), 125 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2e548fb72170..5af9e529c165 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5353,8 +5353,8 @@ enum { #define _DPF_AUX_CH_DATA4 (dev_priv->info.display_mmio_offset + 0x64520) #define _DPF_AUX_CH_DATA5 (dev_priv->info.display_mmio_offset + 0x64524) -#define DP_AUX_CH_CTL(port) _MMIO_PORT(port, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL) -#define DP_AUX_CH_DATA(port, i) _MMIO(_PORT(port, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ +#define DP_AUX_CH_CTL(aux_ch) _MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL) +#define DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ #define DP_AUX_CH_CTL_SEND_BUSY (1 << 31) #define DP_AUX_CH_CTL_DONE (1 << 30) @@ -7881,8 +7881,8 @@ enum { #define _PCH_DPD_AUX_CH_DATA4 0xe4320 #define _PCH_DPD_AUX_CH_DATA5 0xe4324 -#define PCH_DP_AUX_CH_CTL(port) _MMIO_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_CTL, _PCH_DPC_AUX_CH_CTL) -#define PCH_DP_AUX_CH_DATA(port, i) _MMIO(_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_DATA1, _PCH_DPC_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ +#define PCH_DP_AUX_CH_CTL(aux_ch) _MMIO_PORT((aux_ch) - AUX_CH_B, _PCH_DPB_AUX_CH_CTL, _PCH_DPC_AUX_CH_CTL) +#define PCH_DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT((aux_ch) - AUX_CH_B, _PCH_DPB_AUX_CH_DATA1, _PCH_DPC_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ /* CPT */ #define PORT_TRANS_A_SEL_CPT 0 diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index c4042e342f50..f5733a2576e7 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -139,6 +139,17 @@ enum dpio_phy { #define I915_NUM_PHYS_VLV 2 +enum aux_ch { + AUX_CH_A, + AUX_CH_B, + AUX_CH_C, + AUX_CH_D, + _AUX_CH_E, /* does not exist */ + AUX_CH_F, +}; + +#define aux_ch_name(a) ((a) + 'A') + enum intel_display_power_domain { POWER_DOMAIN_PIPE_A, POWER_DOMAIN_PIPE_B, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 388184bb9f3d..d105abed6c97 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1307,171 +1307,194 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return ret; } -static enum port intel_aux_port(struct drm_i915_private *dev_priv, - enum port port) +static enum aux_ch intel_aux_ch(struct intel_dp *intel_dp) { + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; const struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; - enum port aux_port; + enum aux_ch aux_ch; if (!info->alternate_aux_channel) { + aux_ch = (enum aux_ch) port; + DRM_DEBUG_KMS("using AUX %c for port %c (platform default)\n", - port_name(port), port_name(port)); - return port; + aux_ch_name(aux_ch), port_name(port)); + return aux_ch; } switch (info->alternate_aux_channel) { case DP_AUX_A: - aux_port = PORT_A; + aux_ch = AUX_CH_A; break; case DP_AUX_B: - aux_port = PORT_B; + aux_ch = AUX_CH_B; break; case DP_AUX_C: - aux_port = PORT_C; + aux_ch = AUX_CH_C; break; case DP_AUX_D: - aux_port = PORT_D; + aux_ch = AUX_CH_D; break; case DP_AUX_F: - aux_port = PORT_F; + aux_ch = AUX_CH_F; break; default: MISSING_CASE(info->alternate_aux_channel); - aux_port = PORT_A; + aux_ch = AUX_CH_A; break; } DRM_DEBUG_KMS("using AUX %c for port %c (VBT)\n", - port_name(aux_port), port_name(port)); + aux_ch_name(aux_ch), port_name(port)); - return aux_port; + return aux_ch; +} + +static enum intel_display_power_domain +intel_aux_power_domain(struct intel_dp *intel_dp) +{ + switch (intel_dp->aux_ch) { + case AUX_CH_A: + return POWER_DOMAIN_AUX_A; + case AUX_CH_B: + return POWER_DOMAIN_AUX_B; + case AUX_CH_C: + return POWER_DOMAIN_AUX_C; + case AUX_CH_D: + return POWER_DOMAIN_AUX_D; + case AUX_CH_F: + return POWER_DOMAIN_AUX_F; + default: + MISSING_CASE(intel_dp->aux_ch); + return POWER_DOMAIN_AUX_A; + } } static i915_reg_t g4x_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) + enum aux_ch aux_ch) { - switch (port) { - case PORT_B: - case PORT_C: - case PORT_D: - return DP_AUX_CH_CTL(port); + switch (aux_ch) { + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return DP_AUX_CH_CTL(aux_ch); default: - MISSING_CASE(port); - return DP_AUX_CH_CTL(PORT_B); + MISSING_CASE(aux_ch); + return DP_AUX_CH_CTL(AUX_CH_B); } } static i915_reg_t g4x_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) + enum aux_ch aux_ch, int index) { - switch (port) { - case PORT_B: - case PORT_C: - case PORT_D: - return DP_AUX_CH_DATA(port, index); + switch (aux_ch) { + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return DP_AUX_CH_DATA(aux_ch, index); default: - MISSING_CASE(port); - return DP_AUX_CH_DATA(PORT_B, index); + MISSING_CASE(aux_ch); + return DP_AUX_CH_DATA(AUX_CH_B, index); } } static i915_reg_t ilk_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) -{ - switch (port) { - case PORT_A: - return DP_AUX_CH_CTL(port); - case PORT_B: - case PORT_C: - case PORT_D: - return PCH_DP_AUX_CH_CTL(port); + enum aux_ch aux_ch) +{ + switch (aux_ch) { + case AUX_CH_A: + return DP_AUX_CH_CTL(aux_ch); + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return PCH_DP_AUX_CH_CTL(aux_ch); default: - MISSING_CASE(port); - return DP_AUX_CH_CTL(PORT_A); + MISSING_CASE(aux_ch); + return DP_AUX_CH_CTL(AUX_CH_A); } } static i915_reg_t ilk_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) -{ - switch (port) { - case PORT_A: - return DP_AUX_CH_DATA(port, index); - case PORT_B: - case PORT_C: - case PORT_D: - return PCH_DP_AUX_CH_DATA(port, index); + enum aux_ch aux_ch, int index) +{ + switch (aux_ch) { + case AUX_CH_A: + return DP_AUX_CH_DATA(aux_ch, index); + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + return PCH_DP_AUX_CH_DATA(aux_ch, index); default: - MISSING_CASE(port); - return DP_AUX_CH_DATA(PORT_A, index); + MISSING_CASE(aux_ch); + return DP_AUX_CH_DATA(AUX_CH_A, index); } } static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) -{ - switch (port) { - case PORT_A: - case PORT_B: - case PORT_C: - case PORT_D: - case PORT_F: - return DP_AUX_CH_CTL(port); + enum aux_ch aux_ch) +{ + switch (aux_ch) { + case AUX_CH_A: + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + case AUX_CH_F: + return DP_AUX_CH_CTL(aux_ch); default: - MISSING_CASE(port); - return DP_AUX_CH_CTL(PORT_A); + MISSING_CASE(aux_ch); + return DP_AUX_CH_CTL(AUX_CH_A); } } static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) -{ - switch (port) { - case PORT_A: - case PORT_B: - case PORT_C: - case PORT_D: - case PORT_F: - return DP_AUX_CH_DATA(port, index); + enum aux_ch aux_ch, int index) +{ + switch (aux_ch) { + case AUX_CH_A: + case AUX_CH_B: + case AUX_CH_C: + case AUX_CH_D: + case AUX_CH_F: + return DP_AUX_CH_DATA(aux_ch, index); default: - MISSING_CASE(port); - return DP_AUX_CH_DATA(PORT_A, index); + MISSING_CASE(aux_ch); + return DP_AUX_CH_DATA(AUX_CH_A, index); } } static i915_reg_t intel_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) + enum aux_ch aux_ch) { if (INTEL_GEN(dev_priv) >= 9) - return skl_aux_ctl_reg(dev_priv, port); + return skl_aux_ctl_reg(dev_priv, aux_ch); else if (HAS_PCH_SPLIT(dev_priv)) - return ilk_aux_ctl_reg(dev_priv, port); + return ilk_aux_ctl_reg(dev_priv, aux_ch); else - return g4x_aux_ctl_reg(dev_priv, port); + return g4x_aux_ctl_reg(dev_priv, aux_ch); } static i915_reg_t intel_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) + enum aux_ch aux_ch, int index) { if (INTEL_GEN(dev_priv) >= 9) - return skl_aux_data_reg(dev_priv, port, index); + return skl_aux_data_reg(dev_priv, aux_ch, index); else if (HAS_PCH_SPLIT(dev_priv)) - return ilk_aux_data_reg(dev_priv, port, index); + return ilk_aux_data_reg(dev_priv, aux_ch, index); else - return g4x_aux_data_reg(dev_priv, port, index); + return g4x_aux_data_reg(dev_priv, aux_ch, index); } static void intel_aux_reg_init(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - enum port port = intel_aux_port(dev_priv, - dp_to_dig_port(intel_dp)->base.port); + enum aux_ch aux_ch = intel_dp->aux_ch; int i; - intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, port); + intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, aux_ch); for (i = 0; i < ARRAY_SIZE(intel_dp->aux_ch_data_reg); i++) - intel_dp->aux_ch_data_reg[i] = intel_aux_data_reg(dev_priv, port, i); + intel_dp->aux_ch_data_reg[i] = intel_aux_data_reg(dev_priv, aux_ch, i); } static void @@ -1483,14 +1506,17 @@ intel_dp_aux_fini(struct intel_dp *intel_dp) static void intel_dp_aux_init(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->base.port; + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + + intel_dp->aux_ch = intel_aux_ch(intel_dp); + intel_dp->aux_power_domain = intel_aux_power_domain(intel_dp); intel_aux_reg_init(intel_dp); drm_dp_aux_init(&intel_dp->aux); /* Failure to allocate our preferred name is not critical */ - intel_dp->aux.name = kasprintf(GFP_KERNEL, "DPDDC-%c", port_name(port)); + intel_dp->aux.name = kasprintf(GFP_KERNEL, "DPDDC-%c", + port_name(encoder->port)); intel_dp->aux.transfer = intel_dp_aux_transfer; } @@ -6009,38 +6035,6 @@ out_vdd_off: return false; } -/* Set up the hotplug pin and aux power domain. */ -static void -intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) -{ - struct intel_encoder *encoder = &intel_dig_port->base; - struct intel_dp *intel_dp = &intel_dig_port->dp; - struct intel_encoder *intel_encoder = &intel_dig_port->base; - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - - encoder->hpd_pin = intel_hpd_pin_default(dev_priv, encoder->port); - - switch (intel_aux_port(dev_priv, encoder->port)) { - case PORT_A: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; - break; - case PORT_B: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_B; - break; - case PORT_C: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_C; - break; - case PORT_D: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; - break; - case PORT_F: - intel_dp->aux_power_domain = POWER_DOMAIN_AUX_F; - break; - default: - MISSING_CASE(encoder->port); - } -} - static void intel_dp_modeset_retry_work_fn(struct work_struct *work) { struct intel_connector *intel_connector; @@ -6146,7 +6140,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; - intel_dp_init_connector_port_info(intel_dig_port); + intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); intel_dp_aux_init(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f87e81deb7c3..ad7b6a62d6a6 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -971,6 +971,7 @@ struct intel_dp { bool detect_done; bool channel_eq_status; bool reset_link_params; + enum aux_ch aux_ch; uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; -- cgit v1.2.3 From 4904fa66f72d06b2a0a7a57e9196fb3102db0840 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 20:10:31 +0200 Subject: drm/i915: Nuke aux regs from intel_dp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just store function pointers that give us the correct register offsets instead of storing the register offsets themselves. Slightly less efficient perhaps but saves a few bytes and better matches how we do things elsewhere. v2: Keep a local array of data registers (Chris) Reviewed-by: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222181036.15251-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 85 ++++++++++++++++++++-------------------- drivers/gpu/drm/i915/intel_drv.h | 5 ++- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d105abed6c97..0326a3680364 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -934,7 +934,7 @@ static uint32_t intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg; + i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); uint32_t status; bool done; @@ -1068,7 +1068,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg; + i915_reg_t ch_ctl, ch_data[5]; uint32_t aux_clock_divider; int i, ret, recv_bytes; uint32_t status; @@ -1076,6 +1076,10 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, bool has_aux_irq = HAS_AUX_IRQ(dev_priv); bool vdd; + ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); + for (i = 0; i < ARRAY_SIZE(ch_data); i++) + ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i); + pps_lock(intel_dp); /* @@ -1132,7 +1136,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, for (try = 0; try < 5; try++) { /* Load the send data into the aux channel data registers */ for (i = 0; i < send_bytes; i += 4) - I915_WRITE(intel_dp->aux_ch_data_reg[i >> 2], + I915_WRITE(ch_data[i >> 2], intel_dp_pack_aux(send + i, send_bytes - i)); @@ -1217,7 +1221,7 @@ done: recv_bytes = recv_size; for (i = 0; i < recv_bytes; i += 4) - intel_dp_unpack_aux(I915_READ(intel_dp->aux_ch_data_reg[i >> 2]), + intel_dp_unpack_aux(I915_READ(ch_data[i >> 2]), recv + i, recv_bytes - i); ret = recv_bytes; @@ -1372,9 +1376,11 @@ intel_aux_power_domain(struct intel_dp *intel_dp) } } -static i915_reg_t g4x_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum aux_ch aux_ch) +static i915_reg_t g4x_aux_ctl_reg(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + switch (aux_ch) { case AUX_CH_B: case AUX_CH_C: @@ -1386,9 +1392,11 @@ static i915_reg_t g4x_aux_ctl_reg(struct drm_i915_private *dev_priv, } } -static i915_reg_t g4x_aux_data_reg(struct drm_i915_private *dev_priv, - enum aux_ch aux_ch, int index) +static i915_reg_t g4x_aux_data_reg(struct intel_dp *intel_dp, int index) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + switch (aux_ch) { case AUX_CH_B: case AUX_CH_C: @@ -1400,9 +1408,11 @@ static i915_reg_t g4x_aux_data_reg(struct drm_i915_private *dev_priv, } } -static i915_reg_t ilk_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum aux_ch aux_ch) +static i915_reg_t ilk_aux_ctl_reg(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + switch (aux_ch) { case AUX_CH_A: return DP_AUX_CH_CTL(aux_ch); @@ -1416,9 +1426,11 @@ static i915_reg_t ilk_aux_ctl_reg(struct drm_i915_private *dev_priv, } } -static i915_reg_t ilk_aux_data_reg(struct drm_i915_private *dev_priv, - enum aux_ch aux_ch, int index) +static i915_reg_t ilk_aux_data_reg(struct intel_dp *intel_dp, int index) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + switch (aux_ch) { case AUX_CH_A: return DP_AUX_CH_DATA(aux_ch, index); @@ -1432,9 +1444,11 @@ static i915_reg_t ilk_aux_data_reg(struct drm_i915_private *dev_priv, } } -static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum aux_ch aux_ch) +static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + switch (aux_ch) { case AUX_CH_A: case AUX_CH_B: @@ -1448,9 +1462,11 @@ static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv, } } -static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv, - enum aux_ch aux_ch, int index) +static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + enum aux_ch aux_ch = intel_dp->aux_ch; + switch (aux_ch) { case AUX_CH_A: case AUX_CH_B: @@ -1464,37 +1480,20 @@ static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv, } } -static i915_reg_t intel_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum aux_ch aux_ch) -{ - if (INTEL_GEN(dev_priv) >= 9) - return skl_aux_ctl_reg(dev_priv, aux_ch); - else if (HAS_PCH_SPLIT(dev_priv)) - return ilk_aux_ctl_reg(dev_priv, aux_ch); - else - return g4x_aux_ctl_reg(dev_priv, aux_ch); -} - -static i915_reg_t intel_aux_data_reg(struct drm_i915_private *dev_priv, - enum aux_ch aux_ch, int index) -{ - if (INTEL_GEN(dev_priv) >= 9) - return skl_aux_data_reg(dev_priv, aux_ch, index); - else if (HAS_PCH_SPLIT(dev_priv)) - return ilk_aux_data_reg(dev_priv, aux_ch, index); - else - return g4x_aux_data_reg(dev_priv, aux_ch, index); -} - static void intel_aux_reg_init(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - enum aux_ch aux_ch = intel_dp->aux_ch; - int i; - intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, aux_ch); - for (i = 0; i < ARRAY_SIZE(intel_dp->aux_ch_data_reg); i++) - intel_dp->aux_ch_data_reg[i] = intel_aux_data_reg(dev_priv, aux_ch, i); + if (INTEL_GEN(dev_priv) >= 9) { + intel_dp->aux_ch_ctl_reg = skl_aux_ctl_reg; + intel_dp->aux_ch_data_reg = skl_aux_data_reg; + } else if (HAS_PCH_SPLIT(dev_priv)) { + intel_dp->aux_ch_ctl_reg = ilk_aux_ctl_reg; + intel_dp->aux_ch_data_reg = ilk_aux_data_reg; + } else { + intel_dp->aux_ch_ctl_reg = g4x_aux_ctl_reg; + intel_dp->aux_ch_data_reg = g4x_aux_data_reg; + } } static void diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ad7b6a62d6a6..95e2b699f253 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -960,8 +960,6 @@ struct intel_dp_compliance { struct intel_dp { i915_reg_t output_reg; - i915_reg_t aux_ch_ctl_reg; - i915_reg_t aux_ch_data_reg[5]; uint32_t DP; int link_rate; uint8_t lane_count; @@ -1046,6 +1044,9 @@ struct intel_dp { int send_bytes, uint32_t aux_clock_divider); + i915_reg_t (*aux_ch_ctl_reg)(struct intel_dp *dp); + i915_reg_t (*aux_ch_data_reg)(struct intel_dp *dp, int index); + /* This is called before a link training is starterd */ void (*prepare_link_retrain)(struct intel_dp *intel_dp); -- cgit v1.2.3 From 91e939aef02fa30f7ec14fa12749f6af0b3e35ff Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 20:10:32 +0200 Subject: drm/i915: Collect aux ch vfunc setup into intel_dp_aux_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collect all the aux ch vfunc assignments into intel_dp_aux_init() instead of having it spread around. Reviewed-by: Chris Wilson Reviewed-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222181036.15251-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 53 +++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0326a3680364..475a19d76a49 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1480,9 +1480,20 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) } } -static void intel_aux_reg_init(struct intel_dp *intel_dp) +static void +intel_dp_aux_fini(struct intel_dp *intel_dp) +{ + kfree(intel_dp->aux.name); +} + +static void +intel_dp_aux_init(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + + intel_dp->aux_ch = intel_aux_ch(intel_dp); + intel_dp->aux_power_domain = intel_aux_power_domain(intel_dp); if (INTEL_GEN(dev_priv) >= 9) { intel_dp->aux_ch_ctl_reg = skl_aux_ctl_reg; @@ -1494,23 +1505,21 @@ static void intel_aux_reg_init(struct intel_dp *intel_dp) intel_dp->aux_ch_ctl_reg = g4x_aux_ctl_reg; intel_dp->aux_ch_data_reg = g4x_aux_data_reg; } -} -static void -intel_dp_aux_fini(struct intel_dp *intel_dp) -{ - kfree(intel_dp->aux.name); -} - -static void -intel_dp_aux_init(struct intel_dp *intel_dp) -{ - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + if (INTEL_GEN(dev_priv) >= 9) + intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider; + else if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider; + else if (HAS_PCH_SPLIT(dev_priv)) + intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider; + else + intel_dp->get_aux_clock_divider = g4x_get_aux_clock_divider; - intel_dp->aux_ch = intel_aux_ch(intel_dp); - intel_dp->aux_power_domain = intel_aux_power_domain(intel_dp); + if (INTEL_GEN(dev_priv) >= 9) + intel_dp->get_aux_send_ctl = skl_get_aux_send_ctl; + else + intel_dp->get_aux_send_ctl = g4x_get_aux_send_ctl; - intel_aux_reg_init(intel_dp); drm_dp_aux_init(&intel_dp->aux); /* Failure to allocate our preferred name is not critical */ @@ -6085,20 +6094,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp->active_pipe = INVALID_PIPE; /* intel_dp vfuncs */ - if (INTEL_GEN(dev_priv) >= 9) - intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider; - else if (HAS_PCH_SPLIT(dev_priv)) - intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider; - else - intel_dp->get_aux_clock_divider = g4x_get_aux_clock_divider; - - if (INTEL_GEN(dev_priv) >= 9) - intel_dp->get_aux_send_ctl = skl_get_aux_send_ctl; - else - intel_dp->get_aux_send_ctl = g4x_get_aux_send_ctl; - if (HAS_DDI(dev_priv)) intel_dp->prepare_link_retrain = intel_ddi_prepare_link_retrain; -- cgit v1.2.3 From e532be8971c606869ba420e34e85762a947d53cb Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 22 Feb 2018 09:24:05 -0800 Subject: drm/i915: Update missing parts after the rename to i915_request Mostly doc/print messages that were not updated after commit e61e0f51ba79 ("drm/i915: Rename drm_i915_gem_request to i915_request"). Signed-off-by: Michel Thierry Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180222172405.11386-1-michel.thierry@intel.com --- drivers/gpu/drm/i915/i915_gem_context.h | 2 +- drivers/gpu/drm/i915/i915_request.c | 4 ++-- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 1829dafe54b4..7854262ddfd9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -38,8 +38,8 @@ struct drm_file; struct drm_i915_private; struct drm_i915_file_private; -struct drm_i915_gem_request; struct i915_hw_ppgtt; +struct i915_request; struct i915_vma; struct intel_ring; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9b25270f2491..2265bb8ff4fa 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1213,12 +1213,12 @@ static bool __i915_wait_request_check_and_reset(struct i915_request *request) } /** - * i915_wait_request - wait until execution of request has finished + * i915_request_wait - wait until execution of request has finished * @rq: the request to wait upon * @flags: how to wait * @timeout: how long to wait in jiffies * - * i915_wait_request() waits for the request to be completed, for a + * i915_request_wait() waits for the request to be completed, for a * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an * unbounded wait). * diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 3edbb3e8c0e1..df7898c8edcb 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -876,7 +876,7 @@ static int igt_wait_reset(void *arg) timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { - pr_err("i915_wait_request failed on a stuck request: err=%ld\n", + pr_err("i915_request_wait failed on a stuck request: err=%ld\n", timeout); err = timeout; goto out_rq; -- cgit v1.2.3 From f6322eddaff7662e81178a28730e420bf934a512 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Feb 2018 14:22:29 +0000 Subject: drm/i915/preemption: Allow preemption between submission ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes we need to boost the priority of an in-flight request, which may lead to the situation where the second submission port then contains a higher priority context than the first and so we need to inject a preemption event. To do so we must always check inside execlists_dequeue() whether there is a priority inversion between the ports themselves as well as the head of the priority sorted queue, and we cannot just skip dequeuing if the queue is empty. As Michał noted, this doesn't simply extend to handling more than 2-port submission, as we may need to reorder within the array of executing requests which themselves are lower priority than the first. A task for later! Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Michel Thierry Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180222142229.14517-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/intel_guc_submission.c | 17 +-- drivers/gpu/drm/i915/intel_lrc.c | 161 ++++++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 10 ++ 4 files changed, 112 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index c31544406974..ce7fcf55ba18 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + execlists->queue_priority = INT_MIN; execlists->queue = RB_ROOT; execlists->first = NULL; } @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) print_request(m, rq, "\t\tE "); + drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); for (rb = execlists->first; rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 649113c7a3c2..586dde579903 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -75,6 +75,11 @@ * */ +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + static inline bool is_high_priority(struct intel_guc_client *client) { return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine) rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (!rb) - goto unlock; - if (port_isset(port)) { if (engine->i915->preempt_context) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; - if (rb_entry(rb, struct i915_priolist, node)->priority > + if (execlists->queue_priority > max(port_request(port)->priotree.priority, 0)) { execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine) } GEM_BUG_ON(port_isset(port)); - do { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + while (rb) { + struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } while (rb); + } done: + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; if (submit) { port_assign(port, last); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 964885b5d7cb..14288743909f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->priotree.priority; +} + +static inline bool need_preempt(const struct intel_engine_cs *engine, + const struct i915_request *last, + int prio) +{ + return engine->i915->preempt_context && prio > max(rq_prio(last), 0); +} + /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context @@ -224,7 +241,7 @@ find_priolist: parent = &execlists->queue.rb_node; while (*parent) { rb = *parent; - p = rb_entry(rb, typeof(*p), node); + p = to_priolist(rb); if (prio > p->priority) { parent = &rb->rb_left; } else if (prio < p->priority) { @@ -264,7 +281,7 @@ find_priolist: if (first) execlists->first = &p->node; - return ptr_pack_bits(p, first, 1); + return p; } static void unwind_wa_tail(struct i915_request *rq) @@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) __i915_request_unsubmit(rq); unwind_wa_tail(rq); - GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); - if (rq->priotree.priority != last_prio) { - p = lookup_priolist(engine, - &rq->priotree, - rq->priotree.priority); - p = ptr_mask_bits(p, 1); - - last_prio = rq->priotree.priority; + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != last_prio) { + last_prio = rq_prio(rq); + p = lookup_priolist(engine, &rq->priotree, last_prio); } list_add(&rq->priotree.link, &p->requests); @@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n", engine->name, n, port[n].context_id, count, - rq->global_seqno); + rq->global_seqno, + rq_prio(rq)); } else { GEM_BUG_ON(!n); desc = 0; @@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine) _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)); + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ GEM_TRACE("%s\n", engine->name); for (n = execlists_num_ports(&engine->execlists); --n; ) elsp_write(0, engine->execlists.elsp); elsp_write(ce->lrc_desc, engine->execlists.elsp); execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); + execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } static void execlists_dequeue(struct intel_engine_cs *engine) @@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (!rb) - goto unlock; if (last) { /* @@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) goto unlock; - if (engine->i915->preempt_context && - rb_entry(rb, struct i915_priolist, node)->priority > - max(last->priotree.priority, 0)) { - /* - * Switch to our empty preempt context so - * the state of the GPU is known (idle). - */ + if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - execlists_set_active(execlists, - EXECLISTS_ACTIVE_PREEMPT); goto unlock; - } else { - /* - * In theory, we could coalesce more requests onto - * the second port (the first port is active, with - * no preemptions pending). However, that means we - * then have to deal with the possible lite-restore - * of the second port (as we submit the ELSP, there - * may be a context-switch) but also we may complete - * the resubmission before the context-switch. Ergo, - * coalescing onto the second port will cause a - * preemption event, but we cannot predict whether - * that will affect port[0] or port[1]. - * - * If the second port is already active, we can wait - * until the next context-switch before contemplating - * new requests. The GPU will be busy and we should be - * able to resubmit the new ELSP before it idles, - * avoiding pipeline bubbles (momentary pauses where - * the driver is unable to keep up the supply of new - * work). - */ - if (port_count(&port[1])) - goto unlock; - - /* WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent - * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_breadcrumb() for - * where we prepare the padding after the - * end of the request. - */ - last->tail = last->wa_tail; } + + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). However, we have to double check that the + * priorities of the ports haven't been switch. + */ + if (port_count(&port[1])) + goto unlock; + + /* + * WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == rq:TAIL as we resubmit the + * request. See gen8_emit_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; } - do { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + while (rb) { + struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { @@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } while (rb); + } done: + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; if (submit) port_assign(port, last); @@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Flush the queued requests to the timeline list (for retiring). */ rb = execlists->first; while (rb) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + struct i915_priolist *p = to_priolist(rb); list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { INIT_LIST_HEAD(&rq->priotree.link); @@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - + execlists->queue_priority = INT_MIN; execlists->queue = RB_ROOT; execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); @@ -864,10 +876,11 @@ static void execlists_submission_tasklet(unsigned long data) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", engine->name, port->context_id, count, - rq ? rq->global_seqno : 0); + rq ? rq->global_seqno : 0, + rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -912,15 +925,19 @@ static void execlists_submission_tasklet(unsigned long data) intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); } -static void insert_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +static void queue_request(struct intel_engine_cs *engine, + struct i915_priotree *pt, + int prio) { - struct i915_priolist *p = lookup_priolist(engine, pt, prio); + list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); +} - list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1)) +static void submit_queue(struct intel_engine_cs *engine, int prio) +{ + if (prio > engine->execlists.queue_priority) { + engine->execlists.queue_priority = prio; tasklet_hi_schedule(&engine->execlists.tasklet); + } } static void execlists_submit_request(struct i915_request *request) @@ -931,7 +948,8 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - insert_request(engine, &request->priotree, request->priotree.priority); + queue_request(engine, &request->priotree, rq_prio(request)); + submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(list_empty(&request->priotree.link)); @@ -987,7 +1005,7 @@ static void execlists_schedule(struct i915_request *request, int prio) * static void update_priorities(struct i915_priotree *pt, prio) { * list_for_each_entry(dep, &pt->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * insert_request(pt); + * queue_request(pt); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio) pt->priority = prio; if (!list_empty(&pt->link)) { __list_del_entry(&pt->link); - insert_request(engine, pt, prio); + queue_request(engine, pt, prio); } + submit_queue(engine, prio); } spin_unlock_irq(&engine->timeline->lock); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a9b83bf7e837..bbacf4d0f4cb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -257,6 +257,16 @@ struct intel_engine_execlists { */ unsigned int port_mask; + /** + * @queue_priority: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + */ + int queue_priority; + /** * @queue: queue of requests, in priority lists */ -- cgit v1.2.3 From ba1c06a5727b59b4ac118a91cc9fe82c123acbab Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Mon, 26 Feb 2018 19:11:15 -0800 Subject: drm/i915/dp: Fix the order of platforms for setting DP source rates The usual if ladder order should be from newest to oldest platform. However the CNL conditional statement was misplaced. This patch sets the DP source for platforms starting from the newest to oldest. Suggested-by: Jani Nikula Cc: Rodrigo Vivi Cc: Jani Nikula Signed-off-by: Manasi Navare Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1519701075-9894-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 475a19d76a49..81ca13e32cc7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -276,13 +276,13 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) /* This should only be done once */ WARN_ON(intel_dp->source_rates || intel_dp->num_source_rates); - if (IS_GEN9_LP(dev_priv)) { - source_rates = bxt_rates; - size = ARRAY_SIZE(bxt_rates); - } else if (IS_CANNONLAKE(dev_priv)) { + if (IS_CANNONLAKE(dev_priv)) { source_rates = cnl_rates; size = ARRAY_SIZE(cnl_rates); max_rate = cnl_max_source_rate(intel_dp); + } else if (IS_GEN9_LP(dev_priv)) { + source_rates = bxt_rates; + size = ARRAY_SIZE(bxt_rates); } else if (IS_GEN9_BC(dev_priv)) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); -- cgit v1.2.3 From 5028a4fb7d8d93438e2c6c1b15140ca44b1f647d Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Mon, 26 Feb 2018 17:37:59 +0100 Subject: drm/i915/guc: Fill preempt context once at init time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we're inhibiting context save of preempt context, we're no longer tracking the position of HEAD/TAIL. With GuC, we're adding a new breadcrumb for each preemption, which means that the HW will do more and more breadcrumb writes. Eventually the ring is filled, and we're submitting the preemption context with HEAD==TAIL==0, which won't result in breadcrumb write, but will trigger hangcheck instead. Instead of writing a new preempt breadcrumb for each preemption, let's just fill the ring once at init time (which also saves a couple of instructions in the tasklet). v2: Assert that context save restore is inhibited, don't assert on ring alignment. (Chris) v3: Cleanup checkpatch. Fixes: 517aaffe0c1b ("drm/i915/execlists: Inhibit context save/restore for the fake preempt context") Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Michel Thierry Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180226163800.21745-1-michal.winiarski@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_guc_submission.c | 96 +++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 586dde579903..8a8ad2fe158d 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -26,8 +26,14 @@ #include #include "intel_guc_submission.h" +#include "intel_lrc_reg.h" #include "i915_drv.h" +#define GUC_PREEMPT_FINISHED 0x1 +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 +#define GUC_PREEMPT_BREADCRUMB_BYTES \ + (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) + /** * DOC: GuC-based command submission * @@ -535,8 +541,6 @@ static void flush_ggtt_writes(struct i915_vma *vma) POSTING_READ_FW(GUC_STATUS); } -#define GUC_PREEMPT_FINISHED 0x1 -#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 static void inject_preempt_context(struct work_struct *work) { struct guc_preempt_work *preempt_work = @@ -546,37 +550,17 @@ static void inject_preempt_context(struct work_struct *work) preempt_work[engine->id]); struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); - struct intel_ring *ring = client->owner->engine[engine->id].ring; u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, engine)); - u32 *cs = ring->vaddr + ring->tail; u32 data[7]; - if (engine->id == RCS) { - cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, - intel_hws_preempt_done_address(engine)); - } else { - cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, - intel_hws_preempt_done_address(engine)); - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; - } - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - GEM_BUG_ON(!IS_ALIGNED(ring->size, - GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32))); - GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) != - GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)); - - ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32); - ring->tail &= (ring->size - 1); - - flush_ggtt_writes(ring->vma); - + /* + * The ring contains commands to write GUC_PREEMPT_FINISHED into HWSP. + * See guc_fill_preempt_context(). + */ spin_lock_irq(&client->wq_lock); guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring->tail / sizeof(u64), 0); + GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0); spin_unlock_irq(&client->wq_lock); /* @@ -972,6 +956,62 @@ static void guc_client_free(struct intel_guc_client *client) kfree(client); } +static inline bool ctx_save_restore_disabled(struct intel_context *ce) +{ + u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1]; + +#define SR_DISABLED \ + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \ + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) + + return (sr & SR_DISABLED) == SR_DISABLED; + +#undef SR_DISABLED +} + +static void guc_fill_preempt_context(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_guc_client *client = guc->preempt_client; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + struct intel_context *ce = &client->owner->engine[id]; + u32 addr = intel_hws_preempt_done_address(engine); + u32 *cs; + + GEM_BUG_ON(!ce->pin_count); + + /* + * We rely on this context image *not* being saved after + * preemption. This ensures that the RING_HEAD / RING_TAIL + * remain pointing at initial values forever. + */ + GEM_BUG_ON(!ctx_save_restore_disabled(ce)); + + cs = ce->ring->vaddr; + if (id == RCS) { + cs = gen8_emit_ggtt_write_rcs(cs, + GUC_PREEMPT_FINISHED, + addr); + } else { + cs = gen8_emit_ggtt_write(cs, + GUC_PREEMPT_FINISHED, + addr); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + GEM_BUG_ON((void *)cs - ce->ring->vaddr != + GUC_PREEMPT_BREADCRUMB_BYTES); + + flush_ggtt_writes(ce->ring->vma); + } +} + static int guc_clients_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -1002,6 +1042,8 @@ static int guc_clients_create(struct intel_guc *guc) return PTR_ERR(client); } guc->preempt_client = client; + + guc_fill_preempt_context(guc); } return 0; -- cgit v1.2.3 From b891d5e46c96b0a41a5b09024794c631ee77c37b Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 23 Feb 2018 14:15:15 -0800 Subject: drm/i915/psr: New power domain for AUX IO. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PSR on CNL requires AUX IO wells to be kept on and the existing AUX domain for AUX-A enables DC_OFF well too. This is not required, so add a new AUX_IO_A domain for AUX-A to allow DC states to remain enabled. Other AUX channels re-use the existing AUX domains. v4: Reword comment (Rodrigo and Ville) Rename _get and _put functions to include aux_io substring(Rodrigo) Remove unnecessary diff that got included. v3: Extract aux domain selection into a function (Ville) v2: Add AUX IO domain only for AUX-A Rebased on top of Ville's AUX series. Cc: Imre Deak Cc: Rodrigo Vivi Cc: Ville Syrjälä Suggested-by: Imre Deak Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180223221520.18464-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_display.h | 1 + drivers/gpu/drm/i915/intel_psr.c | 41 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +++ 3 files changed, 45 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index f5733a2576e7..4e7418b345bc 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -186,6 +186,7 @@ enum intel_display_power_domain { POWER_DOMAIN_AUX_C, POWER_DOMAIN_AUX_D, POWER_DOMAIN_AUX_F, + POWER_DOMAIN_AUX_IO_A, POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2ef374f936b9..04430d4c99c9 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -56,6 +56,43 @@ #include "intel_drv.h" #include "i915_drv.h" +static inline enum intel_display_power_domain +psr_aux_domain(struct intel_dp *intel_dp) +{ + /* CNL HW requires corresponding AUX IOs to be powered up for PSR. + * However, for non-A AUX ports the corresponding non-EDP transcoders + * would have already enabled power well 2 and DC_OFF. This means we can + * acquire a wider POWER_DOMAIN_AUX_{B,C,D,F} reference instead of a + * specific AUX_IO reference without powering up any extra wells. + * Note that PSR is enabled only on Port A even though this function + * returns the correct domain for other ports too. + */ + return intel_dp->aux_ch == AUX_CH_A ? POWER_DOMAIN_AUX_IO_A : + intel_dp->aux_power_domain; +} + +static void psr_aux_io_power_get(struct intel_dp *intel_dp) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + + if (INTEL_GEN(dev_priv) < 10) + return; + + intel_display_power_get(dev_priv, psr_aux_domain(intel_dp)); +} + +static void psr_aux_io_power_put(struct intel_dp *intel_dp) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + + if (INTEL_GEN(dev_priv) < 10) + return; + + intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); +} + static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -459,6 +496,8 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 chicken; + psr_aux_io_power_get(intel_dp); + if (dev_priv->psr.psr2_support) { chicken = PSR2_VSC_ENABLE_PROG_HEADER; if (dev_priv->psr.y_cord_support) @@ -617,6 +656,8 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, else WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); } + + psr_aux_io_power_put(intel_dp); } /** diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index b7924feb9f27..53ea564f971e 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -130,6 +130,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "AUX_D"; case POWER_DOMAIN_AUX_F: return "AUX_F"; + case POWER_DOMAIN_AUX_IO_A: + return "AUX_IO_A"; case POWER_DOMAIN_GMBUS: return "GMBUS"; case POWER_DOMAIN_INIT: @@ -1853,6 +1855,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_AUX_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_AUX_B_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ -- cgit v1.2.3 From 51e98eb851616ecd9ae5017d606c5e4e8e5be79d Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 23 Feb 2018 14:15:16 -0800 Subject: drm/i915/frontbuffer: Mark frontbuffer flush and invalidate with might_sleep() Frontbuffer flush and invalidate call psr, fbc and drrs functions that use mutexes but they can be called in atomic contexts in the fbdev path. The point where the spinlocks are acquired is up in the call stack that is not entirely easy to spot, so annotate with might_sleep(). Cc: Rodrigo Vivi Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180223221520.18464-2-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_frontbuffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index fcfc217e754e..3a8d3d06c26a 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -79,6 +79,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, spin_unlock(&dev_priv->fb_tracking.lock); } + might_sleep(); intel_psr_invalidate(dev_priv, frontbuffer_bits); intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); @@ -108,6 +109,7 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, if (!frontbuffer_bits) return; + might_sleep(); intel_edp_drrs_flush(dev_priv, frontbuffer_bits); intel_psr_flush(dev_priv, frontbuffer_bits, origin); intel_fbc_flush(dev_priv, frontbuffer_bits, origin); -- cgit v1.2.3 From 77fe36ff04707bc03aeb88f110e76283d570a631 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 23 Feb 2018 14:15:17 -0800 Subject: drm/i915/psr: Extract PSR DPCD initialization and move it to intel_psr.c intel_edp_init_dpcd() is cluttered with PSR specific DPCD checks and intel_dp.c is huge. No functional change intended. v2: Rebased. Cc: Rodrigo Vivi Signed-off-by: Dhinakaran Pandiyan Reviewed-by: David Weinehall Acked-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180223221520.18464-3-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 64 +------------------------------------ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_psr.c | 68 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 81ca13e32cc7..62fcf77f0551 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3182,35 +3182,6 @@ intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_ DP_LINK_STATUS_SIZE) == DP_LINK_STATUS_SIZE; } -static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp) -{ - uint8_t psr_caps = 0; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1) - return false; - return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED; -} - -static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) -{ - uint8_t dprx = 0; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST, - &dprx) != 1) - return false; - return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; -} - -static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) -{ - uint8_t alpm_caps = 0; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, - &alpm_caps) != 1) - return false; - return alpm_caps & DP_ALPM_CAP; -} - /* These are source-specific values. */ uint8_t intel_dp_voltage_max(struct intel_dp *intel_dp) @@ -3761,40 +3732,7 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - /* Check if the panel supports PSR */ - drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, - intel_dp->psr_dpcd, - sizeof(intel_dp->psr_dpcd)); - if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { - dev_priv->psr.sink_support = true; - DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); - } - - if (INTEL_GEN(dev_priv) >= 9 && - (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { - uint8_t frame_sync_cap; - - dev_priv->psr.sink_support = true; - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap) != 1) - frame_sync_cap = 0; - dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; - /* PSR2 needs frame sync as well */ - dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; - DRM_DEBUG_KMS("PSR2 %s on sink", - dev_priv->psr.psr2_support ? "supported" : "not supported"); - - if (dev_priv->psr.psr2_support) { - dev_priv->psr.y_cord_support = - intel_dp_get_y_cord_status(intel_dp); - dev_priv->psr.colorimetry_support = - intel_dp_get_colorimetry_status(intel_dp); - dev_priv->psr.alpm = - intel_dp_get_alpm_status(intel_dp); - } - - } + intel_psr_init_dpcd(intel_dp); /* * Read the eDP display control registers. diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 95e2b699f253..e4b1e7dbac99 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1772,6 +1772,7 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con /* intel_psr.c */ #define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support) +void intel_psr_init_dpcd(struct intel_dp *intel_dp); void intel_psr_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_psr_disable(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 04430d4c99c9..8f8bcffd8d49 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -93,6 +93,74 @@ static void psr_aux_io_power_put(struct intel_dp *intel_dp) intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); } +static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp) +{ + uint8_t psr_caps = 0; + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1) + return false; + return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED; +} + +static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) +{ + uint8_t dprx = 0; + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST, + &dprx) != 1) + return false; + return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; +} + +static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) +{ + uint8_t alpm_caps = 0; + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, + &alpm_caps) != 1) + return false; + return alpm_caps & DP_ALPM_CAP; +} + +void intel_psr_init_dpcd(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = + to_i915(dp_to_dig_port(intel_dp)->base.base.dev); + + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, + sizeof(intel_dp->psr_dpcd)); + + if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { + dev_priv->psr.sink_support = true; + DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); + } + + if (INTEL_GEN(dev_priv) >= 9 && + (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { + uint8_t frame_sync_cap; + + dev_priv->psr.sink_support = true; + if (drm_dp_dpcd_readb(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, + &frame_sync_cap) != 1) + frame_sync_cap = 0; + dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; + /* PSR2 needs frame sync as well */ + dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; + DRM_DEBUG_KMS("PSR2 %s on sink", + dev_priv->psr.psr2_support ? "supported" : "not supported"); + + if (dev_priv->psr.psr2_support) { + dev_priv->psr.y_cord_support = + intel_dp_get_y_cord_status(intel_dp); + dev_priv->psr.colorimetry_support = + intel_dp_get_colorimetry_status(intel_dp); + dev_priv->psr.alpm = + intel_dp_get_alpm_status(intel_dp); + } + } +} + static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); -- cgit v1.2.3 From e2770e2e0509e8b1189e2471af3012d68ca511a2 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 23 Feb 2018 14:15:18 -0800 Subject: drm/i915/psr: Check for the specific AUX_FRAME_SYNC cap bit. The cap check should be specifically for bit 0 instead of any bit. Cc: Rodrigo Vivi Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Rodrigo Vivi Fixes: 474d1ec4a3d7 ("drm/i915/skl: Enabling PSR2 SU with frame sync") Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180223221520.18464-4-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 8f8bcffd8d49..b7cc6dd45c9e 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -144,7 +144,7 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, &frame_sync_cap) != 1) frame_sync_cap = 0; - dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; + dev_priv->psr.aux_frame_sync = frame_sync_cap & DP_AUX_FRAME_SYNC_CAP; /* PSR2 needs frame sync as well */ dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; DRM_DEBUG_KMS("PSR2 %s on sink", -- cgit v1.2.3 From 62d5ac27f4562b3a69dc42107200f315561bdc30 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 23 Feb 2018 14:15:19 -0800 Subject: drm/i915/dp: Remove redundant sleep after AUX transaction length check. The core already takes care of the delay before retrying. The delay now changes to (500, 600)us instead of (500 + 1000, 600 + 1500)us. Cc: Rodrigo Vivi Signed-off-by: Dhinakaran Pandiyan Reviewed-by: David Weinehall Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180223221520.18464-5-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 62fcf77f0551..41b0e27a157c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1205,14 +1205,6 @@ done: if (recv_bytes == 0 || recv_bytes > 20) { DRM_DEBUG_KMS("Forbidden recv_bytes = %d on aux transaction\n", recv_bytes); - /* - * FIXME: This patch was created on top of a series that - * organize the retries at drm level. There EBUSY should - * also take care for 1ms wait before retrying. - * That aux retries re-org is still needed and after that is - * merged we remove this sleep from here. - */ - usleep_range(1000, 1500); ret = -EBUSY; goto out; } -- cgit v1.2.3 From 3975f0aaa30371d711dc8d572f679314c415a58c Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 23 Feb 2018 14:15:20 -0800 Subject: drm/i915/dp: Move comment about hw timeout to the right place. No functional change. Signed-off-by: Dhinakaran Pandiyan Reviewed-by: David Weinehall Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180223221520.18464-6-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 41b0e27a157c..909311823595 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1152,14 +1152,14 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, DP_AUX_CH_CTL_TIME_OUT_ERROR | DP_AUX_CH_CTL_RECEIVE_ERROR); - if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) - continue; - /* DP CTS 1.2 Core Rev 1.1, 4.2.1.1 & 4.2.1.2 * 400us delay required for errors and timeouts * Timeout errors from the HW already meet this * requirement so skip to next iteration */ + if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) + continue; + if (status & DP_AUX_CH_CTL_RECEIVE_ERROR) { usleep_range(400, 500); continue; -- cgit v1.2.3 From 06d058e1a008e202addc3bff9ab025fbcb23040f Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Mon, 26 Feb 2018 19:27:23 -0800 Subject: drm/i915/psr: Check for power state control capability. eDP spec says - "If PSR/PSR2 is supported, the SET_POWER_CAPABLE bit in the EDP_GENERAL_CAPABILITY_1 register (DPCD Address 00701h, bit d7) must be set to 1." Reject PSR on panels without this cap bit set as such panels cannot be controlled via SET_POWER & SET_DP_PWR_VOLTAGE register and the DP source needs to be able to do that for PSR. Thanks to Nathan for debugging this. Panel cap checks like this can be done just once, let's fix this when PSR dpcd init movement lands. Cc: Nathan D Ciobanu Cc: Rodrigo Vivi Signed-off-by: Dhinakaran Pandiyan Tested-by: Nathan Ciobanu Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180227032723.15474-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index b7cc6dd45c9e..1f77633fe809 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -508,6 +508,11 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; } + if (!(intel_dp->edp_dpcd[1] & DP_EDP_SET_POWER_CAP)) { + DRM_DEBUG_KMS("PSR condition failed: panel lacks power state control\n"); + return; + } + /* * FIXME psr2_support is messed up. It's both computed * dynamically during PSR enable, and extracted from sink -- cgit v1.2.3 From 8cef3e5c0d1927926a95a6a72dc00d39048ecb12 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 27 Feb 2018 13:29:12 -0800 Subject: drm/i915/psr2: Fix max resolution supported. According to spec: "PSR2 is supported for pipe active sizes up to 3640 pixels wide and 2304 lines tall." BSpec: 7713 Cc: Dhinakaran Pandiyan Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180227212913.14083-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 1f77633fe809..2f685beac21b 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -523,9 +523,9 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; } - /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ - if (adjusted_mode->crtc_hdisplay > 3200 || - adjusted_mode->crtc_vdisplay > 2000) { + /* PSR2 is restricted to work with panel resolutions up to 3640x2304 */ + if (adjusted_mode->crtc_hdisplay > 3640 || + adjusted_mode->crtc_vdisplay > 2304) { DRM_DEBUG_KMS("PSR2 disabled, panel resolution too big\n"); return; } -- cgit v1.2.3 From c4932d7956d8226e20c0c44b04fe9a2cbfcd8f51 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 27 Feb 2018 13:29:13 -0800 Subject: drm/i915/psr: Don't avoid PSR when PSR2 conditions are not met. We can still use PSR1 when PSR2 conditions are not met. So, let's split the check in a way that we make sure has_psr gets set independently of PSR2 criteria. v2: Duh! Handle proper return to avoid breaking PSR2. v3: (DK): - better name for psr2 conditions check function - Don't remove FIXME block and psr2.support check. - Add a debug message to show us what PSR or PSR2 is getting enabled now we have ways to enabled PSR on PSR2 panels. - s/PSR2 disabled/PSR2 not enabled Cc: Dhinakaran Pandiyan Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20180227212913.14083-2-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 64 +++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2f685beac21b..05770790a4e9 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -446,6 +446,41 @@ static void hsw_psr_activate(struct intel_dp *intel_dp) hsw_activate_psr1(intel_dp); } +static bool intel_psr2_config_valid(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + + /* + * FIXME psr2_support is messed up. It's both computed + * dynamically during PSR enable, and extracted from sink + * caps during eDP detection. + */ + if (!dev_priv->psr.psr2_support) + return false; + + /* PSR2 is restricted to work with panel resolutions up to 3640x2304 */ + if (adjusted_mode->crtc_hdisplay > 3640 || + adjusted_mode->crtc_vdisplay > 2304) { + DRM_DEBUG_KMS("PSR2 not enabled, panel resolution too big\n"); + return false; + } + + /* + * FIXME:enable psr2 only for y-cordinate psr2 panels + * After gtc implementation , remove this restriction. + */ + if (!dev_priv->psr.y_cord_support) { + DRM_DEBUG_KMS("PSR2 not enabled, panel does not support Y coordinate\n"); + return false; + } + + return true; +} + void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -513,34 +548,9 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; } - /* - * FIXME psr2_support is messed up. It's both computed - * dynamically during PSR enable, and extracted from sink - * caps during eDP detection. - */ - if (!dev_priv->psr.psr2_support) { - crtc_state->has_psr = true; - return; - } - - /* PSR2 is restricted to work with panel resolutions up to 3640x2304 */ - if (adjusted_mode->crtc_hdisplay > 3640 || - adjusted_mode->crtc_vdisplay > 2304) { - DRM_DEBUG_KMS("PSR2 disabled, panel resolution too big\n"); - return; - } - - /* - * FIXME:enable psr2 only for y-cordinate psr2 panels - * After gtc implementation , remove this restriction. - */ - if (!dev_priv->psr.y_cord_support) { - DRM_DEBUG_KMS("PSR2 disabled, panel does not support Y coordinate\n"); - return; - } - crtc_state->has_psr = true; - crtc_state->has_psr2 = true; + crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state); + DRM_DEBUG_KMS("Enabling PSR%s\n", crtc_state->has_psr2 ? "2" : ""); } static void intel_psr_activate(struct intel_dp *intel_dp) -- cgit v1.2.3 From d66047e4a582103d4c6a884692f402b905032f26 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 22 Feb 2018 12:05:35 -0800 Subject: drm/i915/cnl: Add WaRsDisableCoarsePowerGating Old Wa added now forever on CNL all steppings. With CPU P states enabled along with RC6, dispatcher hangs can happen. Cc: Rafael Antognolli Signed-off-by: Rodrigo Vivi Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20180222200535.9290-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 5 +++-- drivers/gpu/drm/i915/intel_guc.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9143d0d6be5a..2711149189f1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2788,9 +2788,10 @@ intel_info(const struct drm_i915_private *dev_priv) /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) -/* WaRsDisableCoarsePowerGating:skl,bxt */ +/* WaRsDisableCoarsePowerGating:skl,cnl */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) + (IS_CANNONLAKE(dev_priv) || \ + IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 21140ccd7a97..e6512cccef75 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -370,7 +370,7 @@ int intel_guc_sample_forcewake(struct intel_guc *guc) u32 action[2]; action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; - /* WaRsDisableCoarsePowerGating:skl,bxt */ + /* WaRsDisableCoarsePowerGating:skl,cnl */ if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) action[1] = 0; else diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 21dac6ebc202..3c1499687d13 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6715,7 +6715,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. - * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. + * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6. */ if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) I915_WRITE(GEN9_PG_ENABLE, 0); -- cgit v1.2.3 From 128326a10c8d098d82af6a5d1c903dbe4b041ff4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Feb 2018 21:18:16 +0000 Subject: drm/i915: Repeat the GEM_BUG_ON message in the ftrace log As the ftrace log is overflowing the pstore capture, we lose the last gasps from dmesg which includes the GEM_BUG_ON function:line and condition that failed. Vital information for tracking down the bug, so append it to the frace log as well. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180227211816.5546-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index e920dab7f1b8..d9f0709973d1 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -29,7 +29,10 @@ #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ - printk(KERN_ERR "GEM_BUG_ON(%s)\n", __stringify(condition)); \ + pr_err("%s:%d GEM_BUG_ON(%s)\n", \ + __func__, __LINE__, __stringify(condition)); \ + GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \ + __func__, __LINE__, __stringify(condition)); \ BUG(); \ } \ } while(0) -- cgit v1.2.3 From 73c0fcac97bf7f4a6a61b825b205d1cf127cfca7 Mon Sep 17 00:00:00 2001 From: Mustamin B Mustaffa Date: Tue, 27 Feb 2018 11:07:34 +0800 Subject: drm/i915: Enable VBT based BL control for DP Currently, BXT_PP is hardcoded with value '0'. It practically disabled eDP backlight on MRB (BXT) platform. This patch will tell which BXT_PP registers (there are two set of PP_CONTROL in the spec) to be used as defined in VBT (Video Bios Timing table) and this will enabled eDP backlight controller on MRB (BXT) platform. v2: - Remove unnecessary information in commit message. - Assign vbt.backlight.controller to a backlight_controller variable and return the variable value. v3: - Rebased to latest code base. - updated commit title. Signed-off-by: Mustamin B Mustaffa Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180227030734.37901-1-mustamin.b.mustaffa@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 909311823595..2a79f7113776 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -654,19 +654,15 @@ static int bxt_power_sequencer_idx(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + int backlight_controller = dev_priv->vbt.backlight.controller; lockdep_assert_held(&dev_priv->pps_mutex); /* We should never land here with regular DP ports */ WARN_ON(!intel_dp_is_edp(intel_dp)); - /* - * TODO: BXT has 2 PPS instances. The correct port->PPS instance - * mapping needs to be retrieved from VBT, for now just hard-code to - * use instance #0 always. - */ if (!intel_dp->pps_reset) - return 0; + return backlight_controller; intel_dp->pps_reset = false; @@ -676,7 +672,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp) */ intel_dp_init_panel_power_sequencer_registers(intel_dp, false); - return 0; + return backlight_controller; } typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From db61d160b3ed36b9fc2f65b416e02cc453043fef Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 23:42:30 +0200 Subject: drm/i915: Remove the pointless 1:1 matrix copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we don't have to frob with the user provided ctm matrix there's no point in copying it over. Just point at the user ctm directly. Also the matrix gets fully populated by ctm_mult_by_limited() so no need to zero initialize it. Cc: Johnson Lin Cc: Uma Shankar Cc: Shashank Sharma Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222214232.6064-2-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/intel_color.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index a383d993b844..c9af260be113 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -86,7 +86,7 @@ static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) * When using limited range, multiply the matrix given by userspace by * the matrix that we would use for the limited range. */ -static void ctm_mult_by_limited(u64 *result, const u64 *input) +static u64 *ctm_mult_by_limited(u64 *result, const u64 *input) { int i; @@ -104,6 +104,8 @@ static void ctm_mult_by_limited(u64 *result, const u64 *input) result[i] = mul_u32_u32(limited_coeff, abs_coeff) >> 30; result[i] |= user_coeff & CTM_COEFF_SIGN; } + + return result; } static void i9xx_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc) @@ -146,14 +148,13 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) } else if (crtc_state->ctm) { struct drm_color_ctm *ctm = (struct drm_color_ctm *)crtc_state->ctm->data; - uint64_t input[9] = { 0, }; + const u64 *input; + u64 temp[9]; - if (intel_crtc_state->limited_color_range) { - ctm_mult_by_limited(input, ctm->matrix); - } else { - for (i = 0; i < ARRAY_SIZE(input); i++) - input[i] = ctm->matrix[i]; - } + if (intel_crtc_state->limited_color_range) + input = ctm_mult_by_limited(temp, ctm->matrix); + else + input = ctm->matrix; /* * Convert fixed point S31.32 input to format supported by the -- cgit v1.2.3 From c35e8a25dfda24b7fc4c2c48cddecd4a5e3297d8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 23:42:31 +0200 Subject: drm/i915: Rename pipe CSC to use ilk_ prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipe CSC was introduced by ILK, so change everything related to use ilk_ as the prefix. Cc: Johnson Lin Cc: Uma Shankar Cc: Shashank Sharma Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222214232.6064-3-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/intel_color.c | 39 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index c9af260be113..af1e61d3bacd 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -66,13 +66,13 @@ * of the CTM coefficient and we write the value from bit 3. We also round the * value. */ -#define I9XX_CSC_COEFF_FP(coeff, fbits) \ +#define ILK_CSC_COEFF_FP(coeff, fbits) \ (clamp_val(((coeff) >> (32 - (fbits) - 3)) + 4, 0, 0xfff) & 0xff8) -#define I9XX_CSC_COEFF_LIMITED_RANGE \ - I9XX_CSC_COEFF_FP(CTM_COEFF_LIMITED_RANGE, 9) -#define I9XX_CSC_COEFF_1_0 \ - ((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) +#define ILK_CSC_COEFF_LIMITED_RANGE \ + ILK_CSC_COEFF_FP(CTM_COEFF_LIMITED_RANGE, 9) +#define ILK_CSC_COEFF_1_0 \ + ((7 << 12) | ILK_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) { @@ -108,7 +108,7 @@ static u64 *ctm_mult_by_limited(u64 *result, const u64 *input) return result; } -static void i9xx_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc) +static void ilk_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc) { int pipe = intel_crtc->pipe; struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); @@ -132,8 +132,7 @@ static void i9xx_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc) I915_WRITE(PIPE_CSC_MODE(pipe), 0); } -/* Set up the pipe CSC unit. */ -static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) +static void ilk_load_csc_matrix(struct drm_crtc_state *crtc_state) { struct drm_crtc *crtc = crtc_state->crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); @@ -143,7 +142,7 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) struct intel_crtc_state *intel_crtc_state = to_intel_crtc_state(crtc_state); if (intel_crtc_state->ycbcr420) { - i9xx_load_ycbcr_conversion_matrix(intel_crtc); + ilk_load_ycbcr_conversion_matrix(intel_crtc); return; } else if (crtc_state->ctm) { struct drm_color_ctm *ctm = @@ -175,21 +174,21 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) if (abs_coeff < CTM_COEFF_0_125) coeffs[i] |= (3 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 12); + ILK_CSC_COEFF_FP(abs_coeff, 12); else if (abs_coeff < CTM_COEFF_0_25) coeffs[i] |= (2 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 11); + ILK_CSC_COEFF_FP(abs_coeff, 11); else if (abs_coeff < CTM_COEFF_0_5) coeffs[i] |= (1 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 10); + ILK_CSC_COEFF_FP(abs_coeff, 10); else if (abs_coeff < CTM_COEFF_1_0) - coeffs[i] |= I9XX_CSC_COEFF_FP(abs_coeff, 9); + coeffs[i] |= ILK_CSC_COEFF_FP(abs_coeff, 9); else if (abs_coeff < CTM_COEFF_2_0) coeffs[i] |= (7 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 8); + ILK_CSC_COEFF_FP(abs_coeff, 8); else coeffs[i] |= (6 << 12) | - I9XX_CSC_COEFF_FP(abs_coeff, 7); + ILK_CSC_COEFF_FP(abs_coeff, 7); } } else { /* @@ -203,9 +202,9 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state) for (i = 0; i < 3; i++) { if (intel_crtc_state->limited_color_range) coeffs[i * 3 + i] = - I9XX_CSC_COEFF_LIMITED_RANGE; + ILK_CSC_COEFF_LIMITED_RANGE; else - coeffs[i * 3 + i] = I9XX_CSC_COEFF_1_0; + coeffs[i * 3 + i] = ILK_CSC_COEFF_1_0; } } @@ -651,14 +650,14 @@ void intel_color_init(struct drm_crtc *crtc) dev_priv->display.load_csc_matrix = cherryview_load_csc_matrix; dev_priv->display.load_luts = cherryview_load_luts; } else if (IS_HASWELL(dev_priv)) { - dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = haswell_load_luts; } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { - dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; } else if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = glk_load_luts; } else { dev_priv->display.load_luts = i9xx_load_luts; -- cgit v1.2.3 From 5857c0d4256a0eba999d1760a91b0e6a968da6fe Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 23:42:32 +0200 Subject: drm/i915: Don't mangle the CTM on pre-HSW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On pre-HSW we have dedicated hardware for the RGB limited range handling, and so we don't want to compress with the CSC matrix. Toss in a FIXME about gamma LUT vs. limited range using the CSC. Cc: Johnson Lin Cc: Uma Shankar Cc: Shashank Sharma Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222214232.6064-4-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/intel_color.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index af1e61d3bacd..89ab0f70aa22 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -140,6 +140,14 @@ static void ilk_load_csc_matrix(struct drm_crtc_state *crtc_state) int i, pipe = intel_crtc->pipe; uint16_t coeffs[9] = { 0, }; struct intel_crtc_state *intel_crtc_state = to_intel_crtc_state(crtc_state); + bool limited_color_range = false; + + /* + * FIXME if there's a gamma LUT after the CSC, we should + * do the range compression using the gamma LUT instead. + */ + if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv)) + limited_color_range = intel_crtc_state->limited_color_range; if (intel_crtc_state->ycbcr420) { ilk_load_ycbcr_conversion_matrix(intel_crtc); @@ -150,7 +158,7 @@ static void ilk_load_csc_matrix(struct drm_crtc_state *crtc_state) const u64 *input; u64 temp[9]; - if (intel_crtc_state->limited_color_range) + if (limited_color_range) input = ctm_mult_by_limited(temp, ctm->matrix); else input = ctm->matrix; @@ -200,7 +208,7 @@ static void ilk_load_csc_matrix(struct drm_crtc_state *crtc_state) * into consideration. */ for (i = 0; i < 3; i++) { - if (intel_crtc_state->limited_color_range) + if (limited_color_range) coeffs[i * 3 + i] = ILK_CSC_COEFF_LIMITED_RANGE; else @@ -224,7 +232,7 @@ static void ilk_load_csc_matrix(struct drm_crtc_state *crtc_state) if (INTEL_GEN(dev_priv) > 6) { uint16_t postoff = 0; - if (intel_crtc_state->limited_color_range) + if (limited_color_range) postoff = (16 * (1 << 12) / 255) & 0x1fff; I915_WRITE(PIPE_CSC_POSTOFF_HI(pipe), postoff); @@ -235,7 +243,7 @@ static void ilk_load_csc_matrix(struct drm_crtc_state *crtc_state) } else { uint32_t mode = CSC_MODE_YUV_TO_RGB; - if (intel_crtc_state->limited_color_range) + if (limited_color_range) mode |= CSC_BLACK_SCREEN_OFFSET; I915_WRITE(PIPE_CSC_MODE(pipe), mode); -- cgit v1.2.3 From 367a35a6c6c7ea84fe3f47825668ca017cb566ed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Feb 2018 09:47:32 +0000 Subject: drm/i915: Don't deref request->ctx inside unlocked print_request() Although we protect the request itself, we don't lock inside intel_engine_dump() and so the request maybe retired as we peek into it. One consequence is that the request->ctx may be freed before we dereference it, leading to a use-after-free. Replace the hw_id we are peeking from inside request->ctx with the request->fence.context, with which we can still track from which context the request originated (although to tie to HW reports requires a little more legwork, but is good enough to follow the GEM traces). [52640.729670] general protection fault: 0000 [#2] SMP [52640.729694] Dumping ftrace buffer: [52640.729701] (ftrace buffer empty) [52640.729705] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_\ temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel snd_hda_codec snd_hwdep gha\ sh_clmulni_intel snd_hda_core snd_pcm mei_me mei i915 r8169 mii prime_numbers i2c_hid [52640.729748] CPU: 2 PID: 4335 Comm: gem_exec_schedu Tainted: G UD W 4.16.0-rc3+ #7 [52640.729759] Hardware name: Acer Aspire E5-575G/Ironman_SK , BIOS V1.12 08/02/2016 [52640.729803] RIP: 0010:print_request+0x2b/0xb0 [i915] [52640.729811] RSP: 0018:ffffc90001453c18 EFLAGS: 00010206 [52640.729820] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8801e0292d40 RCX: 0000000000000006 [52640.729829] RDX: ffffc90001453c60 RSI: ffff8801e0292d40 RDI: 0000000000000003 [52640.729838] RBP: ffffc90001453d80 R08: 0000000000000000 R09: 0000000000000001 [52640.729847] R10: ffffc90001453bd0 R11: ffffc90001453c73 R12: ffffc90001453c60 [52640.729856] R13: ffffc90001453d80 R14: ffff8801d5a683c8 R15: ffff8801e0292d40 [52640.729866] FS: 00007f1ee50548c0(0000) GS:ffff8801e8200000(0000) knlGS:0000000000000000 [52640.729876] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [52640.729884] CR2: 00007f1ee5077000 CR3: 00000001d9411004 CR4: 00000000003606e0 [52640.729893] Call Trace: [52640.729922] intel_engine_print_registers+0x623/0x890 [i915] [52640.729948] intel_engine_dump+0x4a3/0x590 [i915] [52640.729957] ? seq_printf+0x3a/0x50 [52640.729977] i915_engine_info+0xb8/0xe0 [i915] [52640.729984] ? drm_mode_gamma_get_ioctl+0xf0/0xf0 [52640.729990] seq_read+0xd5/0x410 [52640.729997] full_proxy_read+0x4b/0x70 [52640.730004] __vfs_read+0x1e/0x120 [52640.730009] ? do_sys_open+0x134/0x220 [52640.730015] ? kmem_cache_free+0x174/0x2b0 [52640.730021] vfs_read+0xa1/0x150 [52640.730026] SyS_read+0x40/0xa0 [52640.730032] do_syscall_64+0x65/0x1a0 [52640.730038] entry_SYSCALL_64_after_hwframe+0x42/0xb7 Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180228094732.28462-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ce7fcf55ba18..3e1107ecb6ee 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1668,10 +1668,10 @@ static void print_request(struct drm_printer *m, struct i915_request *rq, const char *prefix) { - drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, + drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", - rq->ctx->hw_id, rq->fence.seqno, + rq->fence.context, rq->fence.seqno, rq->priotree.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), rq->timeline->common->name); -- cgit v1.2.3 From 449059a9692c5f01305bffcca72befc4621f6dbb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 20:10:33 +0200 Subject: drm/i915: Consult aux_ch instead of port in ->get_aux_clock_divider() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it seems totally unlikely that any system would mix a cpu/north aux channel with a pch/south port (or vice versa) we should still consult intel_dp->aux_ch rather than encoder->port when figuring out which clock is actually used by the aux ch. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222181036.15251-5-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson #irc --- drivers/gpu/drm/i915/intel_dp.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2a79f7113776..0e98b36d6ee8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -950,8 +950,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) static uint32_t g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (index) return 0; @@ -965,8 +964,7 @@ static uint32_t g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (index) return 0; @@ -976,7 +974,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * like to run at 2MHz. So, take the cdclk or PCH rawclk value and * divide by 2000 and use that */ - if (intel_dig_port->base.port == PORT_A) + if (intel_dp->aux_ch == AUX_CH_A) return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); @@ -984,10 +982,9 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - if (intel_dig_port->base.port != PORT_A && HAS_PCH_LPT_H(dev_priv)) { + if (intel_dp->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(dev_priv)) { /* Workaround for non-ULT HSW */ switch (index) { case 0: return 63; -- cgit v1.2.3 From 229675d5c0384c170c665387c2fec75a2ddcb61a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Feb 2018 12:59:11 +0200 Subject: drm/i915/dp: move link rate arrays where they're used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Localize link rate arrays by moving them to the functions where they're used. Further clarify the distinction between source and sink capabilities. Split pre and post Haswell arrays, and get rid of the array size arithmetics. Use a direct rate value in the paranoia case of no common rates find. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180227105911.4485-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 44 +++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0e98b36d6ee8..afb49b906d71 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -94,15 +94,6 @@ static const struct dp_link_dpll chv_dpll[] = { { .p1 = 2, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } } }; -static const int bxt_rates[] = { 162000, 216000, 243000, 270000, - 324000, 432000, 540000 }; -static const int skl_rates[] = { 162000, 216000, 270000, - 324000, 432000, 540000 }; -static const int cnl_rates[] = { 162000, 216000, 270000, - 324000, 432000, 540000, - 648000, 810000 }; -static const int default_rates[] = { 162000, 270000, 540000 }; - /** * intel_dp_is_edp - is the given port attached to an eDP panel (either CPU or PCH) * @intel_dp: DP struct @@ -142,14 +133,17 @@ static void intel_dp_unset_edid(struct intel_dp *intel_dp); /* update sink rates from dpcd */ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { + static const int dp_rates[] = { + 162000, 270000, 540000 + }; int i, max_rate; max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); - for (i = 0; i < ARRAY_SIZE(default_rates); i++) { - if (default_rates[i] > max_rate) + for (i = 0; i < ARRAY_SIZE(dp_rates); i++) { + if (dp_rates[i] > max_rate) break; - intel_dp->sink_rates[i] = default_rates[i]; + intel_dp->sink_rates[i] = dp_rates[i]; } intel_dp->num_sink_rates = i; @@ -266,6 +260,22 @@ static int cnl_max_source_rate(struct intel_dp *intel_dp) static void intel_dp_set_source_rates(struct intel_dp *intel_dp) { + /* The values must be in increasing order */ + static const int cnl_rates[] = { + 162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000 + }; + static const int bxt_rates[] = { + 162000, 216000, 243000, 270000, 324000, 432000, 540000 + }; + static const int skl_rates[] = { + 162000, 216000, 270000, 324000, 432000, 540000 + }; + static const int hsw_rates[] = { + 162000, 270000, 540000 + }; + static const int g4x_rates[] = { + 162000, 270000 + }; struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); const struct ddi_vbt_port_info *info = @@ -288,11 +298,11 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) size = ARRAY_SIZE(skl_rates); } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || IS_BROADWELL(dev_priv)) { - source_rates = default_rates; - size = ARRAY_SIZE(default_rates); + source_rates = hsw_rates; + size = ARRAY_SIZE(hsw_rates); } else { - source_rates = default_rates; - size = ARRAY_SIZE(default_rates) - 1; + source_rates = g4x_rates; + size = ARRAY_SIZE(g4x_rates); } if (max_rate && vbt_max_rate) @@ -354,7 +364,7 @@ static void intel_dp_set_common_rates(struct intel_dp *intel_dp) /* Paranoia, there should always be something in common. */ if (WARN_ON(intel_dp->num_common_rates == 0)) { - intel_dp->common_rates[0] = default_rates[0]; + intel_dp->common_rates[0] = 162000; intel_dp->num_common_rates = 1; } } -- cgit v1.2.3 From c71b53cc66c5053ff3524a6132f8fc8199d618c3 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Wed, 28 Feb 2018 14:31:50 -0800 Subject: drm/i915/dp: Add HBR3 rate (8.1 Gbps) to dp_rates array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dp_rates[] array is a superset of all the link rates supported by sink devices. DP 1.3 specification adds HBR3 (8.1Gbps) link rate to the set of link rates supported by sink. This patch adds this rate to dp_rates[] array that gets used to populate the sink_rates[] array limited by max rate obtained from DP_MAX_LINK_RATE DPCD register. v2: * Rebased on top of Jani's localized rates patch Cc: Jani Nikula Cc: Ville Syrjälä Signed-off-by: Manasi Navare Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1519857110-26916-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index afb49b906d71..be67106a7759 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -134,7 +134,7 @@ static void intel_dp_unset_edid(struct intel_dp *intel_dp); static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { static const int dp_rates[] = { - 162000, 270000, 540000 + 162000, 270000, 540000, 810000 }; int i, max_rate; -- cgit v1.2.3 From 022d3093a9102a8b8b7a3796a8aba5a9c4e40ec7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 28 Feb 2018 12:11:52 +0200 Subject: drm/i915/icl: Prepare for more rings Gen11 will add more VCS and VECS rings so prepare the infrastructure to support that. Bspec: 7021 v2: Rebase. v3: Rebase. v4: Rebase. v5: Rebase. v6: - Update for POR changes. (Daniele Ceraolo Spurio) - Add provisional guc engine ids - to be checked and confirmed. v7: - Rebased. - Added the new ring masks. - Added the new HW ids. v8: - Introduce I915_MAX_VCS/VECS to avoid magic numbers (Michal) v9: increase MAX_ENGINE_INSTANCE to 3 Cc: Michal Wajdeczko Signed-off-by: Tvrtko Ursulin Signed-off-by: Rodrigo Vivi Signed-off-by: Oscar Mateo Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Oscar Mateo Signed-off-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180228101153.7224-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_gem.h | 2 +- drivers/gpu/drm/i915/i915_reg.h | 5 ++++- drivers/gpu/drm/i915/intel_device_info.c | 3 +++ drivers/gpu/drm/i915/intel_device_info.h | 4 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 ++++++++- 6 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7bbec5546d12..10c9e5e619ab 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2746,6 +2746,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define BLT_RING ENGINE_MASK(BCS) #define VEBOX_RING ENGINE_MASK(VECS) #define BSD2_RING ENGINE_MASK(VCS2) +#define BSD3_RING ENGINE_MASK(VCS3) +#define BSD4_RING ENGINE_MASK(VCS4) +#define VEBOX2_RING ENGINE_MASK(VECS2) #define ALL_ENGINES (~0) #define HAS_ENGINE(dev_priv, id) \ diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index d9f0709973d1..f54c4ff74ded 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -57,6 +57,6 @@ #define GEM_TRACE(...) do { } while (0) #endif -#define I915_NUM_ENGINES 5 +#define I915_NUM_ENGINES 8 #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index eea5b2c537d4..95a2e51ecbb0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -178,6 +178,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define BCS_HW 2 #define VECS_HW 3 #define VCS2_HW 4 +#define VCS3_HW 6 +#define VCS4_HW 7 +#define VECS2_HW 12 /* Engine class */ @@ -188,7 +191,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OTHER_CLASS 4 #define MAX_ENGINE_CLASS 4 -#define MAX_ENGINE_INSTANCE 1 +#define MAX_ENGINE_INSTANCE 3 /* PCI config space */ diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 1c780cc4cd48..cadc5f81ed72 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -542,6 +542,9 @@ void intel_device_info_runtime_init(struct intel_device_info *info) info->num_scalers[PIPE_C] = 1; } + BUILD_BUG_ON(I915_NUM_ENGINES > + sizeof(intel_ring_mask_t) * BITS_PER_BYTE); + /* * Skylake and Broxton currently don't expose the topmost plane as its * use is exclusive with the legacy cursor and we only want to expose diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 8904ad87bf37..ab5bfd305477 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -125,6 +125,8 @@ struct sseu_dev_info { u8 has_eu_pg:1; }; +typedef u8 intel_ring_mask_t; + struct intel_device_info { u16 device_id; u16 gen_mask; @@ -132,7 +134,7 @@ struct intel_device_info { u8 gen; u8 gt; /* GT number, 0 if undefined */ u8 num_rings; - u8 ring_mask; /* Rings supported by the HW */ + intel_ring_mask_t ring_mask; /* Rings supported by the HW */ enum intel_platform platform; u32 platform_mask; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index bbacf4d0f4cb..90e4380cbdd5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -160,6 +160,9 @@ struct i915_ctx_workarounds { struct i915_request; +#define I915_MAX_VCS 4 +#define I915_MAX_VECS 2 + /* * Engine IDs definitions. * Keep instances of the same type engine together. @@ -169,8 +172,12 @@ enum intel_engine_id { BCS, VCS, VCS2, + VCS3, + VCS4, #define _VCS(n) (VCS + (n)) - VECS + VECS, + VECS2 +#define _VECS(n) (VECS + (n)) }; struct i915_priolist { -- cgit v1.2.3 From 51951ae7ed0088cd1c6eb71f39217ac1b1aa9c5d Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 28 Feb 2018 12:11:53 +0200 Subject: drm/i915/icl: Interrupt handling v2: Rebase. v3: * Remove DPF, it has been removed from SKL+. * Fix -internal rebase wrt. execlists interrupt handling. v4: Rebase. v5: * Updated for POR changes. (Daniele Ceraolo Spurio) * Merged with irq handling fixes by Daniele Ceraolo Spurio: * Simplify the code by using gen8_cs_irq_handler. * Fix interrupt handling for the upstream kernel. v6: * Remove early bringup debug messages (Tvrtko) * Add NB about arbitrary spin wait timeout (Tvrtko) v7 (from Paulo): * Don't try to write RO bits to registers. * Don't check for PCH types that don't exist. PCH interrupts are not here yet. v9: * squashed in selector and shared register handling (Daniele) * skip writing of irq if data is not valid (Daniele) * use time_after32 (Chris) * use I915_MAX_VCS and I915_MAX_VECS (Daniele) * remove fake pm interrupt handling for later patch (Mika) v10: * Direct processing of banks. clear banks early (Chris) * remove poll on valid bit, only clear valid bit (Mika) * use raw accessors, better naming (Chris) v11: * adapt to raw_reg_[read|write] * bring back polling the valid bit (Daniele) v12: * continue if unset intr_dw (Daniele) * comment the usage of gen8_de_irq_handler bits (Daniele) Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Oscar Mateo Signed-off-by: Tvrtko Ursulin Signed-off-by: Rodrigo Vivi Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Oscar Mateo Signed-off-by: Paulo Zanoni Signed-off-by: Mika Kuoppala Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20180228101153.7224-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_irq.c | 235 ++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 7 +- 2 files changed, 241 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0a7ed990a8d1..ce16003ef048 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -415,6 +415,9 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) if (READ_ONCE(rps->interrupts_enabled)) return; + if (WARN_ON_ONCE(IS_GEN11(dev_priv))) + return; + spin_lock_irq(&dev_priv->irq_lock); WARN_ON_ONCE(rps->pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); @@ -431,6 +434,9 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) if (!READ_ONCE(rps->interrupts_enabled)) return; + if (WARN_ON_ONCE(IS_GEN11(dev_priv))) + return; + spin_lock_irq(&dev_priv->irq_lock); rps->interrupts_enabled = false; @@ -2755,6 +2761,156 @@ static void __fini_wedge(struct wedge_me *w) (W)->i915; \ __fini_wedge((W))) +static __always_inline void +gen11_cs_irq_handler(struct intel_engine_cs * const engine, const u32 iir) +{ + gen8_cs_irq_handler(engine, iir, 0); +} + +static void +gen11_gt_engine_irq_handler(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int engine_n, + const u16 iir) +{ + struct intel_engine_cs ** const engine = i915->engine; + + switch (bank) { + case 0: + switch (engine_n) { + + case GEN11_RCS0: + return gen11_cs_irq_handler(engine[RCS], iir); + + case GEN11_BCS: + return gen11_cs_irq_handler(engine[BCS], iir); + } + case 1: + switch (engine_n) { + + case GEN11_VCS(0): + return gen11_cs_irq_handler(engine[_VCS(0)], iir); + case GEN11_VCS(1): + return gen11_cs_irq_handler(engine[_VCS(1)], iir); + case GEN11_VCS(2): + return gen11_cs_irq_handler(engine[_VCS(2)], iir); + case GEN11_VCS(3): + return gen11_cs_irq_handler(engine[_VCS(3)], iir); + + case GEN11_VECS(0): + return gen11_cs_irq_handler(engine[_VECS(0)], iir); + case GEN11_VECS(1): + return gen11_cs_irq_handler(engine[_VECS(1)], iir); + } + } +} + +static u32 +gen11_gt_engine_intr(struct drm_i915_private * const i915, + const unsigned int bank, const unsigned int bit) +{ + void __iomem * const regs = i915->regs; + u32 timeout_ts; + u32 ident; + + raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit)); + + /* + * NB: Specs do not specify how long to spin wait, + * so we do ~100us as an educated guess. + */ + timeout_ts = (local_clock() >> 10) + 100; + do { + ident = raw_reg_read(regs, GEN11_INTR_IDENTITY_REG(bank)); + } while (!(ident & GEN11_INTR_DATA_VALID) && + !time_after32(local_clock() >> 10, timeout_ts)); + + if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { + DRM_ERROR("INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); + return 0; + } + + raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank), + GEN11_INTR_DATA_VALID); + + return ident & GEN11_INTR_ENGINE_MASK; +} + +static void +gen11_gt_irq_handler(struct drm_i915_private * const i915, + const u32 master_ctl) +{ + void __iomem * const regs = i915->regs; + unsigned int bank; + + for (bank = 0; bank < 2; bank++) { + unsigned long intr_dw; + unsigned int bit; + + if (!(master_ctl & GEN11_GT_DW_IRQ(bank))) + continue; + + intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + + if (unlikely(!intr_dw)) { + DRM_ERROR("GT_INTR_DW%u blank!\n", bank); + continue; + } + + for_each_set_bit(bit, &intr_dw, 32) { + const u16 iir = gen11_gt_engine_intr(i915, bank, bit); + + if (unlikely(!iir)) + continue; + + gen11_gt_engine_irq_handler(i915, bank, bit, iir); + } + + /* Clear must be after shared has been served for engine */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); + } +} + +static irqreturn_t gen11_irq_handler(int irq, void *arg) +{ + struct drm_i915_private * const i915 = to_i915(arg); + void __iomem * const regs = i915->regs; + u32 master_ctl; + + if (!intel_irqs_enabled(i915)) + return IRQ_NONE; + + master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ); + master_ctl &= ~GEN11_MASTER_IRQ; + if (!master_ctl) + return IRQ_NONE; + + /* Disable interrupts. */ + raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, 0); + + /* Find, clear, then process each source of interrupt. */ + gen11_gt_irq_handler(i915, master_ctl); + + /* IRQs are synced during runtime_suspend, we don't require a wakeref */ + if (master_ctl & GEN11_DISPLAY_IRQ) { + const u32 disp_ctl = raw_reg_read(regs, GEN11_DISPLAY_INT_CTL); + + disable_rpm_wakeref_asserts(i915); + /* + * GEN11_DISPLAY_INT_CTL has same format as GEN8_MASTER_IRQ + * for the display related bits. + */ + gen8_de_irq_handler(i915, disp_ctl); + enable_rpm_wakeref_asserts(i915); + } + + /* Acknowledge and enable interrupts. */ + raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ | master_ctl); + + return IRQ_HANDLED; +} + /** * i915_reset_device - do process context error handling work * @dev_priv: i915 device private @@ -3180,6 +3336,42 @@ static void gen8_irq_reset(struct drm_device *dev) ibx_irq_reset(dev_priv); } +static void gen11_gt_irq_reset(struct drm_i915_private *dev_priv) +{ + /* Disable RCS, BCS, VCS and VECS class engines. */ + I915_WRITE(GEN11_RENDER_COPY_INTR_ENABLE, 0); + I915_WRITE(GEN11_VCS_VECS_INTR_ENABLE, 0); + + /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ + I915_WRITE(GEN11_RCS0_RSVD_INTR_MASK, ~0); + I915_WRITE(GEN11_BCS_RSVD_INTR_MASK, ~0); + I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~0); + I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~0); + I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~0); +} + +static void gen11_irq_reset(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + + I915_WRITE(GEN11_GFX_MSTR_IRQ, 0); + POSTING_READ(GEN11_GFX_MSTR_IRQ); + + gen11_gt_irq_reset(dev_priv); + + I915_WRITE(GEN11_DISPLAY_INT_CTL, 0); + + for_each_pipe(dev_priv, pipe) + if (intel_display_power_is_enabled(dev_priv, + POWER_DOMAIN_PIPE(pipe))) + GEN8_IRQ_RESET_NDX(DE_PIPE, pipe); + + GEN3_IRQ_RESET(GEN8_DE_PORT_); + GEN3_IRQ_RESET(GEN8_DE_MISC_); + GEN3_IRQ_RESET(GEN8_PCU_); +} + void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, u8 pipe_mask) { @@ -3677,6 +3869,41 @@ static int gen8_irq_postinstall(struct drm_device *dev) return 0; } +static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) +{ + const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; + + BUILD_BUG_ON(irqs & 0xffff0000); + + /* Enable RCS, BCS, VCS and VECS class interrupts. */ + I915_WRITE(GEN11_RENDER_COPY_INTR_ENABLE, irqs << 16 | irqs); + I915_WRITE(GEN11_VCS_VECS_INTR_ENABLE, irqs << 16 | irqs); + + /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ + I915_WRITE(GEN11_RCS0_RSVD_INTR_MASK, ~(irqs << 16)); + I915_WRITE(GEN11_BCS_RSVD_INTR_MASK, ~(irqs << 16)); + I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~(irqs | irqs << 16)); + I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~(irqs | irqs << 16)); + I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~(irqs | irqs << 16)); + + dev_priv->pm_imr = 0xffffffff; /* TODO */ +} + +static int gen11_irq_postinstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + gen11_gt_irq_postinstall(dev_priv); + gen8_de_irq_postinstall(dev_priv); + + I915_WRITE(GEN11_DISPLAY_INT_CTL, GEN11_DISPLAY_IRQ_ENABLE); + + I915_WRITE(GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ); + POSTING_READ(GEN11_GFX_MSTR_IRQ); + + return 0; +} + static int cherryview_irq_postinstall(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -4125,6 +4352,14 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev->driver->enable_vblank = i965_enable_vblank; dev->driver->disable_vblank = i965_disable_vblank; dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; + } else if (INTEL_GEN(dev_priv) >= 11) { + dev->driver->irq_handler = gen11_irq_handler; + dev->driver->irq_preinstall = gen11_irq_reset; + dev->driver->irq_postinstall = gen11_irq_postinstall; + dev->driver->irq_uninstall = gen11_irq_reset; + dev->driver->enable_vblank = gen8_enable_vblank; + dev->driver->disable_vblank = gen8_disable_vblank; + dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup; } else if (INTEL_GEN(dev_priv) >= 8) { dev->driver->irq_handler = gen8_irq_handler; dev->driver->irq_preinstall = gen8_irq_reset; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3c1499687d13..3e60279f18b1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8026,7 +8026,10 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); - gen6_reset_rps_interrupts(dev_priv); + if (INTEL_GEN(dev_priv) < 11) + gen6_reset_rps_interrupts(dev_priv); + else + WARN_ON_ONCE(1); } static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) @@ -8139,6 +8142,8 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv) cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); + } else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) { + /* TODO */ } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rps(dev_priv); } else if (IS_BROADWELL(dev_priv)) { -- cgit v1.2.3 From 41d3fdcd15d5ecf29cc73e8b79c2327ebb54b960 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 1 Mar 2018 11:06:13 +0000 Subject: drm/i915/perf: fix perf stream opening lock We're seeing on CI that some contexts don't have the programmed OA period timer that directs the OA unit on how often to write reports. The issue is that we're not holding the drm lock from when we edit the context images down to when we set the exclusive_stream variable. This leaves a window for the deferred context allocation to call i915_oa_init_reg_state() that will not program the expected OA timer value, because we haven't set the exclusive_stream yet. v2: Drop need_lock from gen8_configure_all_contexts() (Matt) Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Reviewed-by: Chris Wilson Fixes: 701f8231a2f ("drm/i915/perf: prune OA configs") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102254 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103715 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103755 Link: https://patchwork.freedesktop.org/patch/msgid/20180301110613.1737-1-lionel.g.landwerlin@intel.com Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Cc: # v4.14+ --- drivers/gpu/drm/i915/i915_perf.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2741b1bc7095..abaca6edeb71 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1303,9 +1303,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) */ mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.exclusive_stream = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); free_oa_buffer(dev_priv); @@ -1756,22 +1755,13 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr * Note: it's only the RCS/Render context that has any OA state. */ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, - const struct i915_oa_config *oa_config, - bool interruptible) + const struct i915_oa_config *oa_config) { struct i915_gem_context *ctx; int ret; unsigned int wait_flags = I915_WAIT_LOCKED; - if (interruptible) { - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ret; - - wait_flags |= I915_WAIT_INTERRUPTIBLE; - } else { - mutex_lock(&dev_priv->drm.struct_mutex); - } + lockdep_assert_held(&dev_priv->drm.struct_mutex); /* Switch away from any user context. */ ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); @@ -1819,8 +1809,6 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, } out: - mutex_unlock(&dev_priv->drm.struct_mutex); - return ret; } @@ -1863,7 +1851,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen8_configure_all_contexts(dev_priv, oa_config, true); + ret = gen8_configure_all_contexts(dev_priv, oa_config); if (ret) return ret; @@ -1878,7 +1866,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) { /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(dev_priv, NULL, false); + gen8_configure_all_contexts(dev_priv, NULL); I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & ~GT_NOA_ENABLE)); @@ -1888,7 +1876,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) { /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(dev_priv, NULL, false); + gen8_configure_all_contexts(dev_priv, NULL); /* Make sure we disable noa to save power. */ I915_WRITE(RPM_CONFIG1, @@ -2138,6 +2126,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) goto err_oa_buf_alloc; + ret = i915_mutex_lock_interruptible(&dev_priv->drm); + if (ret) + goto err_lock; + ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, stream->oa_config); if (ret) @@ -2145,23 +2137,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->ops = &i915_oa_stream_ops; - /* Lock device for exclusive_stream access late because - * enable_metric_set() might lock as well on gen8+. - */ - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - goto err_lock; - dev_priv->perf.oa.exclusive_stream = stream; mutex_unlock(&dev_priv->drm.struct_mutex); return 0; -err_lock: +err_enable: dev_priv->perf.oa.ops.disable_metric_set(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); -err_enable: +err_lock: free_oa_buffer(dev_priv); err_oa_buf_alloc: -- cgit v1.2.3 From ffed7bd23641b1a724f7fa49aa3781aa0ccbd9a0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Mar 2018 10:33:38 +0000 Subject: drm/i915: Replace open-coded wait-for loop Now that we can pass arbitrary commands into the base __wait_for() macro, we can reimplement the open-coded wait-for inside i915_gem_idle_work_handler() using the new macro. This means that instead of using ktime, we now use jiffies, and benefit from the exponential sleep backoff that allows a fast response if the HW settles quickly. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180301103338.5380-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 14c855b1a3a4..c29b1a1cbe96 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3417,25 +3417,22 @@ i915_gem_idle_work_handler(struct work_struct *work) container_of(work, typeof(*dev_priv), gt.idle_work.work); unsigned int epoch = I915_EPOCH_INVALID; bool rearm_hangcheck; - ktime_t end; if (!READ_ONCE(dev_priv->gt.awake)) return; /* * Wait for last execlists context complete, but bail out in case a - * new request is submitted. + * new request is submitted. As we don't trust the hardware, we + * continue on if the wait times out. This is necessary to allow + * the machine to suspend even if the hardware dies, and we will + * try to recover in resume (after depriving the hardware of power, + * it may be in a better mmod). */ - end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT); - do { - if (new_requests_since_last_retire(dev_priv)) - return; - - if (intel_engines_are_idle(dev_priv)) - break; - - usleep_range(100, 500); - } while (ktime_before(ktime_get(), end)); + __wait_for(if (new_requests_since_last_retire(dev_priv)) return, + intel_engines_are_idle(dev_priv), + I915_IDLE_ENGINES_TIMEOUT * 1000, + 10, 500); rearm_hangcheck = cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); -- cgit v1.2.3 From 8c58f73c48e517f8be7349ef915da871a95641b9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 21 Feb 2018 10:28:08 +0100 Subject: drm/i915: Check for I915_MODE_FLAG_INHERITED before drm_atomic_helper_check_modeset Moving the check upwards will mean we we no longer have to add planes and connectors manually, because everything is handled correctly by drm_atomic_helper_check_modeset() as intended. [applied with whitespace changes to make sparse happy] Signed-off-by: Maarten Lankhorst Cc: Lyude Paul Cc: Daniel Vetter Reviewed-by: Daniel Vetter Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180221092808.30060-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 65c8487be7c7..90f0fc8cc2bd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12050,6 +12050,14 @@ static int intel_atomic_check(struct drm_device *dev, int ret, i; bool any_ms = false; + /* Catch I915_MODE_FLAG_INHERITED */ + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, + crtc_state, i) { + if (crtc_state->mode.private_flags != + old_crtc_state->mode.private_flags) + crtc_state->mode_changed = true; + } + ret = drm_atomic_helper_check_modeset(dev, state); if (ret) return ret; @@ -12058,10 +12066,6 @@ static int intel_atomic_check(struct drm_device *dev, struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); - /* Catch I915_MODE_FLAG_INHERITED */ - if (crtc_state->mode.private_flags != old_crtc_state->mode.private_flags) - crtc_state->mode_changed = true; - if (!needs_modeset(crtc_state)) continue; @@ -12070,13 +12074,6 @@ static int intel_atomic_check(struct drm_device *dev, continue; } - /* FIXME: For only active_changed we shouldn't need to do any - * state recomputation at all. */ - - ret = drm_atomic_add_affected_connectors(state, crtc); - if (ret) - return ret; - ret = intel_modeset_pipe_config(crtc, pipe_config); if (ret) { intel_dump_pipe_config(to_intel_crtc(crtc), @@ -12095,10 +12092,6 @@ static int intel_atomic_check(struct drm_device *dev, if (needs_modeset(crtc_state)) any_ms = true; - ret = drm_atomic_add_affected_planes(state, crtc); - if (ret) - return ret; - intel_dump_pipe_config(to_intel_crtc(crtc), pipe_config, needs_modeset(crtc_state) ? "[modeset]" : "[fastset]"); -- cgit v1.2.3 From 57312eaacd9b6c560032b9c9a755e0165f48ce52 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Thu, 1 Mar 2018 22:15:45 +0530 Subject: drm/i915/uc: Make GuC/HuC fw fetch and loading functions/file structure symmetric GuC load function is named intel_guc_fw_upload() and HuC load function is named intel_huc_init_hw(). Make them consistent intel_*_fw_upload. Also move HuC fw loading functions and declarations to separate files intel_huc_fw.c|h like GuC. While at this, do below changes 1. Update kernel-doc comment for intel_*_fw_upload() functions 2. s/huc_ucode_xfer/huc_fw_xfer 3. Introduce intel_huc_fw_init_early() v2: Changed patch to update HuC functions instead of changing guc_fw_upload and update file structure. (Michal Wajdeczko) v3: Added SPDX License identifier to huc_fw.c|h. (Michal Wajdeczko) Signed-off-by: Sagar Arun Kamble Cc: Michal Winiarski Cc: Michal Wajdeczko Cc: Chris Wilson Cc: Anusha Srivatsa Reviewed-by: Michal Wajdeczko Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1519922745-25441-1-git-send-email-sagar.a.kamble@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/intel_guc_fw.c | 10 +-- drivers/gpu/drm/i915/intel_huc.c | 154 +-------------------------------- drivers/gpu/drm/i915/intel_huc.h | 2 +- drivers/gpu/drm/i915/intel_huc_fw.c | 166 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_huc_fw.h | 15 ++++ drivers/gpu/drm/i915/intel_uc.c | 2 +- 7 files changed, 191 insertions(+), 161 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_huc_fw.c create mode 100644 drivers/gpu/drm/i915/intel_huc_fw.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 881d7124c597..1bd9bc5b8c5c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -89,7 +89,8 @@ i915-y += intel_uc.o \ intel_guc_fw.o \ intel_guc_log.o \ intel_guc_submission.o \ - intel_huc.o + intel_huc.o \ + intel_huc_fw.o # autogenerated null render state i915-y += intel_renderstate_gen6.o \ diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 3b0932942857..d07f2b985f1c 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -269,15 +269,15 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) } /** - * intel_guc_fw_upload() - finish preparing the GuC for activity + * intel_guc_fw_upload() - load GuC uCode to device * @guc: intel_guc structure * - * Called during driver loading and also after a GPU reset. + * Called from intel_uc_init_hw() during driver load, resume from sleep and + * after a GPU reset. * - * The main action required here it to load the GuC uCode into the device. * The firmware image should have already been fetched into memory by the - * earlier call to intel_guc_init(), so here we need only check that - * worked, and then transfer the image to the h/w. + * earlier call to intel_uc_init_fw(), so here we need to only check that + * fetch succeeded, and then transfer the image to the h/w. * * Return: non-zero code on error */ diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index ef9a05d8e5a9..e37f58e760d8 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -27,161 +27,9 @@ #include "intel_huc.h" #include "i915_drv.h" -/** - * DOC: HuC Firmware - * - * Motivation: - * GEN9 introduces a new dedicated firmware for usage in media HEVC (High - * Efficiency Video Coding) operations. Userspace can use the firmware - * capabilities by adding HuC specific commands to batch buffers. - * - * Implementation: - * The same firmware loader is used as the GuC. However, the actual - * loading to HW is deferred until GEM initialization is done. - * - * Note that HuC firmware loading must be done before GuC loading. - */ - -#define BXT_HUC_FW_MAJOR 01 -#define BXT_HUC_FW_MINOR 07 -#define BXT_BLD_NUM 1398 - -#define SKL_HUC_FW_MAJOR 01 -#define SKL_HUC_FW_MINOR 07 -#define SKL_BLD_NUM 1398 - -#define KBL_HUC_FW_MAJOR 02 -#define KBL_HUC_FW_MINOR 00 -#define KBL_BLD_NUM 1810 - -#define HUC_FW_PATH(platform, major, minor, bld_num) \ - "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ - __stringify(minor) "_" __stringify(bld_num) ".bin" - -#define I915_SKL_HUC_UCODE HUC_FW_PATH(skl, SKL_HUC_FW_MAJOR, \ - SKL_HUC_FW_MINOR, SKL_BLD_NUM) -MODULE_FIRMWARE(I915_SKL_HUC_UCODE); - -#define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \ - BXT_HUC_FW_MINOR, BXT_BLD_NUM) -MODULE_FIRMWARE(I915_BXT_HUC_UCODE); - -#define I915_KBL_HUC_UCODE HUC_FW_PATH(kbl, KBL_HUC_FW_MAJOR, \ - KBL_HUC_FW_MINOR, KBL_BLD_NUM) -MODULE_FIRMWARE(I915_KBL_HUC_UCODE); - -static void huc_fw_select(struct intel_uc_fw *huc_fw) -{ - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); - struct drm_i915_private *dev_priv = huc_to_i915(huc); - - GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); - - if (!HAS_HUC(dev_priv)) - return; - - if (i915_modparams.huc_firmware_path) { - huc_fw->path = i915_modparams.huc_firmware_path; - huc_fw->major_ver_wanted = 0; - huc_fw->minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - huc_fw->path = I915_SKL_HUC_UCODE; - huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - huc_fw->path = I915_BXT_HUC_UCODE; - huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - huc_fw->path = I915_KBL_HUC_UCODE; - huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; - } else { - DRM_WARN("%s: No firmware known for this platform!\n", - intel_uc_fw_type_repr(huc_fw->type)); - } -} - -/** - * intel_huc_init_early() - initializes HuC struct - * @huc: intel_huc struct - * - * On platforms with HuC selects firmware for uploading - */ void intel_huc_init_early(struct intel_huc *huc) { - struct intel_uc_fw *huc_fw = &huc->fw; - - intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); - huc_fw_select(huc_fw); -} - -/** - * huc_ucode_xfer() - DMA's the firmware - * @huc_fw: the firmware descriptor - * @vma: the firmware image (bound into the GGTT) - * - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Return: 0 on success, non-zero on failure - */ -static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) -{ - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); - struct drm_i915_private *dev_priv = huc_to_i915(huc); - unsigned long offset = 0; - u32 size; - int ret; - - GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); - - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - - /* Set the source address for the uCode */ - offset = guc_ggtt_offset(vma) + huc_fw->header_offset; - I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); - I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); - - /* Hardware doesn't look at destination address for HuC. Set it to 0, - * but still program the correct address space. - */ - I915_WRITE(DMA_ADDR_1_LOW, 0); - I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); - - size = huc_fw->header_size + huc_fw->ucode_size; - I915_WRITE(DMA_COPY_SIZE, size); - - /* Start the DMA */ - I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); - - /* Wait for DMA to finish */ - ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100); - - DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); - - /* Disable the bits once DMA is over */ - I915_WRITE(DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL)); - - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - - return ret; -} - -/** - * intel_huc_init_hw() - load HuC uCode to device - * @huc: intel_huc structure - * - * Called from intel_uc_init_hw() during driver loading and also after a GPU - * reset. Be note that HuC loading must be done before GuC loading. - * - * The firmware image should have already been fetched into memory by the - * earlier call to intel_uc_init_fw(), so here we need only check that - * is succeeded, and then transfer the image to the h/w. - * - */ -int intel_huc_init_hw(struct intel_huc *huc) -{ - return intel_uc_fw_upload(&huc->fw, huc_ucode_xfer); + intel_huc_fw_init_early(huc); } /** diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index 40039db59e04..5d6e804f9771 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -26,6 +26,7 @@ #define _INTEL_HUC_H_ #include "intel_uc_fw.h" +#include "intel_huc_fw.h" struct intel_huc { /* Generic uC firmware management */ @@ -35,7 +36,6 @@ struct intel_huc { }; void intel_huc_init_early(struct intel_huc *huc); -int intel_huc_init_hw(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); #endif diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c new file mode 100644 index 000000000000..c66afa9b989a --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -0,0 +1,166 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#include "intel_huc_fw.h" +#include "i915_drv.h" + +/** + * DOC: HuC Firmware + * + * Motivation: + * GEN9 introduces a new dedicated firmware for usage in media HEVC (High + * Efficiency Video Coding) operations. Userspace can use the firmware + * capabilities by adding HuC specific commands to batch buffers. + * + * Implementation: + * The same firmware loader is used as the GuC. However, the actual + * loading to HW is deferred until GEM initialization is done. + * + * Note that HuC firmware loading must be done before GuC loading. + */ + +#define BXT_HUC_FW_MAJOR 01 +#define BXT_HUC_FW_MINOR 07 +#define BXT_BLD_NUM 1398 + +#define SKL_HUC_FW_MAJOR 01 +#define SKL_HUC_FW_MINOR 07 +#define SKL_BLD_NUM 1398 + +#define KBL_HUC_FW_MAJOR 02 +#define KBL_HUC_FW_MINOR 00 +#define KBL_BLD_NUM 1810 + +#define HUC_FW_PATH(platform, major, minor, bld_num) \ + "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ + __stringify(minor) "_" __stringify(bld_num) ".bin" + +#define I915_SKL_HUC_UCODE HUC_FW_PATH(skl, SKL_HUC_FW_MAJOR, \ + SKL_HUC_FW_MINOR, SKL_BLD_NUM) +MODULE_FIRMWARE(I915_SKL_HUC_UCODE); + +#define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \ + BXT_HUC_FW_MINOR, BXT_BLD_NUM) +MODULE_FIRMWARE(I915_BXT_HUC_UCODE); + +#define I915_KBL_HUC_UCODE HUC_FW_PATH(kbl, KBL_HUC_FW_MAJOR, \ + KBL_HUC_FW_MINOR, KBL_BLD_NUM) +MODULE_FIRMWARE(I915_KBL_HUC_UCODE); + +static void huc_fw_select(struct intel_uc_fw *huc_fw) +{ + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct drm_i915_private *dev_priv = huc_to_i915(huc); + + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); + + if (!HAS_HUC(dev_priv)) + return; + + if (i915_modparams.huc_firmware_path) { + huc_fw->path = i915_modparams.huc_firmware_path; + huc_fw->major_ver_wanted = 0; + huc_fw->minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + huc_fw->path = I915_SKL_HUC_UCODE; + huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + huc_fw->path = I915_BXT_HUC_UCODE; + huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + huc_fw->path = I915_KBL_HUC_UCODE; + huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; + } else { + DRM_WARN("%s: No firmware known for this platform!\n", + intel_uc_fw_type_repr(huc_fw->type)); + } +} + +/** + * intel_huc_fw_init_early() - initializes HuC firmware struct + * @huc: intel_huc struct + * + * On platforms with HuC selects firmware for uploading + */ +void intel_huc_fw_init_early(struct intel_huc *huc) +{ + struct intel_uc_fw *huc_fw = &huc->fw; + + intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); + huc_fw_select(huc_fw); +} + +/** + * huc_fw_xfer() - DMA's the firmware + * @huc_fw: the firmware descriptor + * @vma: the firmware image (bound into the GGTT) + * + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * Return: 0 on success, non-zero on failure + */ +static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) +{ + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct drm_i915_private *dev_priv = huc_to_i915(huc); + unsigned long offset = 0; + u32 size; + int ret; + + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* Set the source address for the uCode */ + offset = guc_ggtt_offset(vma) + huc_fw->header_offset; + I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); + I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); + + /* Hardware doesn't look at destination address for HuC. Set it to 0, + * but still program the correct address space. + */ + I915_WRITE(DMA_ADDR_1_LOW, 0); + I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + size = huc_fw->header_size + huc_fw->ucode_size; + I915_WRITE(DMA_COPY_SIZE, size); + + /* Start the DMA */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); + + /* Wait for DMA to finish */ + ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100); + + DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); + + /* Disable the bits once DMA is over */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL)); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + return ret; +} + +/** + * intel_huc_fw_upload() - load HuC uCode to device + * @huc: intel_huc structure + * + * Called from intel_uc_init_hw() during driver load, resume from sleep and + * after a GPU reset. Note that HuC must be loaded before GuC. + * + * The firmware image should have already been fetched into memory by the + * earlier call to intel_uc_init_fw(), so here we need to only check that + * fetch succeeded, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_huc_fw_upload(struct intel_huc *huc) +{ + return intel_uc_fw_upload(&huc->fw, huc_fw_xfer); +} diff --git a/drivers/gpu/drm/i915/intel_huc_fw.h b/drivers/gpu/drm/i915/intel_huc_fw.h new file mode 100644 index 000000000000..8a00a0ebddc5 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc_fw.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef _INTEL_HUC_FW_H_ +#define _INTEL_HUC_FW_H_ + +struct intel_huc; + +void intel_huc_fw_init_early(struct intel_huc *huc); +int intel_huc_fw_upload(struct intel_huc *huc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 9f1bac6398fb..8e25474b435c 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -361,7 +361,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_out; if (USES_HUC(dev_priv)) { - ret = intel_huc_init_hw(huc); + ret = intel_huc_fw_upload(huc); if (ret) goto err_out; } -- cgit v1.2.3 From c27557ab56bc943c59ea353bfcb6fc37a6f7b728 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 28 Feb 2018 17:18:44 +0000 Subject: drm/i915: Wedged engine mask makes more sense in hex In decimal its just a weird big number, while in hex can actually log which engines were requested to be wedged. Signed-off-by: Tvrtko Ursulin Reviewed-by: Michel Thierry Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180228171844.20006-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 33fbf3965309..e838c765b251 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3977,7 +3977,8 @@ i915_wedged_set(void *data, u64 val) engine->hangcheck.stalled = true; } - i915_handle_error(i915, val, "Manually setting wedged to %llu", val); + i915_handle_error(i915, val, "Manually set wedged engine mask = %llx", + val); wait_on_bit(&i915->gpu_error.flags, I915_RESET_HANDOFF, -- cgit v1.2.3 From fee0fddc1dc114039bf9aed9fdeeecdc1949cdc7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 1 Mar 2018 15:44:57 +0200 Subject: drm/i915/gen9, gen10: Disable FBC on planes with a misaligned Y-offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling FBC on a plane having a Y-offset that isn't divisible by 4 may cause pipe FIFO underruns and flickers, so disable FBC on such a config. I tried the followings to work around the issue: - enable each HW work around in ILK_DPFC_CHICKEN - disable each compression algorithm in ILK_DPFC_CONTROL - disable low-power watermarks None of the above got rid of the problem. I haven't found this issue in the Bspec/WA database either. Besides the igt testcase below (yet to be merged) an easy way to reproduce the issue is to enable a plane with FBC and a plane Y-offset not aligned to 4 and then just enable/disable FBC in a loop, keeping the plane enabled. I could trigger the problem on BXT/GLK/SKL/CNL, so assume for now that it's only present on GEN9 and GEN10. v2: (Ville) - Run the test/apply the WA on CNL as well. - Use IS_GEN() instead of INTEL_GEN(). - Fix spelling. Cc: Paulo Zanoni Cc: Ville Syrjälä Testcase: igt/kms_plane/plane-clipping-pipe-A-planes Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180301134457.13974-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_fbc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 38b036c499d9..38a5535a5c63 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -859,6 +859,17 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } + /* + * Work around a problem on GEN9+ HW, where enabling FBC on a plane + * having a Y offset that isn't divisible by 4 causes FIFO underrun + * and screen flicker. + */ + if (IS_GEN(dev_priv, 9, 10) && + (fbc->state_cache.plane.adjusted_y & 3)) { + fbc->no_fbc_reason = "plane Y offset is misaligned"; + return false; + } + return true; } -- cgit v1.2.3 From f76062651823bf7318f0bc9c104f86853c1d9046 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 20:10:34 +0200 Subject: drm/i915: s/intel_dp_aux_ch/intel_dp_aux_xfer/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename intel_dp_aux_ch() to intel_dp_aux_xfer() to better convey what it actually does. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222181036.15251-6-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson #irc --- drivers/gpu/drm/i915/intel_dp.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index aba2f45819d8..10a0e9f149b5 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1085,9 +1085,9 @@ static uint32_t intel_dp_get_aux_send_ctl(struct intel_dp *intel_dp, } static int -intel_dp_aux_ch(struct intel_dp *intel_dp, - const uint8_t *send, int send_bytes, - uint8_t *recv, int recv_size, bool aksv_write) +intel_dp_aux_xfer(struct intel_dp *intel_dp, + const uint8_t *send, int send_bytes, + uint8_t *recv, int recv_size, bool aksv_write) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = @@ -1284,8 +1284,8 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (msg->buffer) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); - ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize, - false); + ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, + rxbuf, rxsize, false); if (ret > 0) { msg->reply = rxbuf[0] >> 4; @@ -1307,8 +1307,8 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (WARN_ON(rxsize > 20)) return -E2BIG; - ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize, - false); + ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, + rxbuf, rxsize, false); if (ret > 0) { msg->reply = rxbuf[0] >> 4; /* @@ -5045,8 +5045,8 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, txbuf[2] = DP_AUX_HDCP_AKSV & 0xff; txbuf[3] = DRM_HDCP_KSV_LEN - 1; - ret = intel_dp_aux_ch(intel_dp, txbuf, sizeof(txbuf), rxbuf, - sizeof(rxbuf), true); + ret = intel_dp_aux_xfer(intel_dp, txbuf, sizeof(txbuf), + rxbuf, sizeof(rxbuf), true); if (ret < 0) { DRM_ERROR("Write Aksv over DP/AUX failed (%d)\n", ret); return ret; -- cgit v1.2.3 From 8159c796b6d7919bd77c84b8b9a4aa2e76f45491 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 23:27:32 +0200 Subject: drm/i915: Keep the AKSV details in intel_dp_hdcp_write_an_aksv() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's try to keep the details on the AKSV stuff concentrated in one place. So move the control bit and +5 data size handling there. v2: Increase txbuf[] to include the payload which intel_dp_aux_xfer() will still load into the registers even though the hardware will ignore it Cc: Sean Paul Cc: Ramalingam C Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222212732.4665-1-ville.syrjala@linux.intel.com Reviewed-by: Ramalingam C --- drivers/gpu/drm/i915/intel_dp.c | 42 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 10a0e9f149b5..a30303c836c9 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1065,29 +1065,11 @@ static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_SYNC_PULSE_SKL(32); } -static uint32_t intel_dp_get_aux_send_ctl(struct intel_dp *intel_dp, - bool has_aux_irq, - int send_bytes, - uint32_t aux_clock_divider, - bool aksv_write) -{ - uint32_t val = 0; - - if (aksv_write) { - send_bytes += 5; - val |= DP_AUX_CH_CTL_AUX_AKSV_SELECT; - } - - return val | intel_dp->get_aux_send_ctl(intel_dp, - has_aux_irq, - send_bytes, - aux_clock_divider); -} - static int intel_dp_aux_xfer(struct intel_dp *intel_dp, const uint8_t *send, int send_bytes, - uint8_t *recv, int recv_size, bool aksv_write) + uint8_t *recv, int recv_size, + u32 aux_send_ctl_flags) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = @@ -1151,11 +1133,12 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, } while ((aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, clock++))) { - u32 send_ctl = intel_dp_get_aux_send_ctl(intel_dp, - has_aux_irq, - send_bytes, - aux_clock_divider, - aksv_write); + u32 send_ctl = intel_dp->get_aux_send_ctl(intel_dp, + has_aux_irq, + send_bytes, + aux_clock_divider); + + send_ctl |= aux_send_ctl_flags; /* Must try at least 3 times according to DP spec */ for (try = 0; try < 5; try++) { @@ -1285,7 +1268,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, - rxbuf, rxsize, false); + rxbuf, rxsize, 0); if (ret > 0) { msg->reply = rxbuf[0] >> 4; @@ -1308,7 +1291,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return -E2BIG; ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, - rxbuf, rxsize, false); + rxbuf, rxsize, 0); if (ret > 0) { msg->reply = rxbuf[0] >> 4; /* @@ -5021,7 +5004,7 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, u8 *an) { struct intel_dp *intel_dp = enc_to_intel_dp(&intel_dig_port->base.base); - uint8_t txbuf[4], rxbuf[2], reply = 0; + uint8_t txbuf[4+5] = {}, rxbuf[2], reply = 0; ssize_t dpcd_ret; int ret; @@ -5046,7 +5029,8 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, txbuf[3] = DRM_HDCP_KSV_LEN - 1; ret = intel_dp_aux_xfer(intel_dp, txbuf, sizeof(txbuf), - rxbuf, sizeof(rxbuf), true); + rxbuf, sizeof(rxbuf), + DP_AUX_CH_CTL_AUX_AKSV_SELECT); if (ret < 0) { DRM_ERROR("Write Aksv over DP/AUX failed (%d)\n", ret); return ret; -- cgit v1.2.3 From 32078b727ded0231b6265c6694ff49b0f76c27a0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 22 Feb 2018 23:28:02 +0200 Subject: drm/i915: Deduplicate the code to fill the aux message header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have two instances of the code to fill out the header for the aux message. Pull it into a small helper. v2: Rebase due to txbuf[] changes Cc: Sean Paul Cc: Ramalingam C Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180222212802.4826-1-ville.syrjala@linux.intel.com Reviewed-by: Ramalingam C --- drivers/gpu/drm/i915/intel_dp.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index a30303c836c9..c722a6750e90 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1238,6 +1238,17 @@ out: #define BARE_ADDRESS_SIZE 3 #define HEADER_SIZE (BARE_ADDRESS_SIZE + 1) + +static void +intel_dp_aux_header(u8 txbuf[HEADER_SIZE], + const struct drm_dp_aux_msg *msg) +{ + txbuf[0] = (msg->request << 4) | ((msg->address >> 16) & 0xf); + txbuf[1] = (msg->address >> 8) & 0xff; + txbuf[2] = msg->address & 0xff; + txbuf[3] = msg->size - 1; +} + static ssize_t intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -1246,11 +1257,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) size_t txsize, rxsize; int ret; - txbuf[0] = (msg->request << 4) | - ((msg->address >> 16) & 0xf); - txbuf[1] = (msg->address >> 8) & 0xff; - txbuf[2] = msg->address & 0xff; - txbuf[3] = msg->size - 1; + intel_dp_aux_header(txbuf, msg); switch (msg->request & ~DP_AUX_I2C_MOT) { case DP_AUX_NATIVE_WRITE: @@ -5004,7 +5011,12 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, u8 *an) { struct intel_dp *intel_dp = enc_to_intel_dp(&intel_dig_port->base.base); - uint8_t txbuf[4+5] = {}, rxbuf[2], reply = 0; + static const struct drm_dp_aux_msg msg = { + .request = DP_AUX_NATIVE_WRITE, + .address = DP_AUX_HDCP_AKSV, + .size = DRM_HDCP_KSV_LEN, + }; + uint8_t txbuf[HEADER_SIZE + DRM_HDCP_KSV_LEN] = {}, rxbuf[2], reply = 0; ssize_t dpcd_ret; int ret; @@ -5022,13 +5034,9 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, * we were writing the data, and then tickle the hardware to output the * data once the header is sent out. */ - txbuf[0] = (DP_AUX_NATIVE_WRITE << 4) | - ((DP_AUX_HDCP_AKSV >> 16) & 0xf); - txbuf[1] = (DP_AUX_HDCP_AKSV >> 8) & 0xff; - txbuf[2] = DP_AUX_HDCP_AKSV & 0xff; - txbuf[3] = DRM_HDCP_KSV_LEN - 1; + intel_dp_aux_header(txbuf, &msg); - ret = intel_dp_aux_xfer(intel_dp, txbuf, sizeof(txbuf), + ret = intel_dp_aux_xfer(intel_dp, txbuf, HEADER_SIZE + msg.size, rxbuf, sizeof(rxbuf), DP_AUX_CH_CTL_AUX_AKSV_SELECT); if (ret < 0) { -- cgit v1.2.3 From 963ddd63c314e9b5d9cd999873d473a93aed5380 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Mar 2018 11:33:24 +0000 Subject: drm/i915: Suspend submission tasklets around wedging After staring hard at sequences like [ 28.199013] systemd-1 2..s. 26062228us : execlists_submission_tasklet: rcs0 cs-irq head=0 [0?], tail=1 [1?] [ 28.199095] systemd-1 2..s. 26062229us : execlists_submission_tasklet: rcs0 csb[1]: status=0x00000018:0x00000000, active=0x1 [ 28.199177] systemd-1 2..s. 26062230us : execlists_submission_tasklet: rcs0 out[0]: ctx=0.1, seqno=3, prio=-1024 [ 28.199258] systemd-1 2..s. 26062231us : execlists_submission_tasklet: rcs0 completed ctx=0 [ 28.199340] gem_eio-829 1..s1 26066853us : execlists_submission_tasklet: rcs0 in[0]: ctx=1.1, seqno=1, prio=0 [ 28.199421] -0 2..s. 26066863us : execlists_submission_tasklet: rcs0 cs-irq head=1 [1?], tail=2 [2?] [ 28.199503] -0 2..s. 26066865us : execlists_submission_tasklet: rcs0 csb[2]: status=0x00000001:0x00000000, active=0x1 [ 28.199585] gem_eio-829 1..s1 26067077us : execlists_submission_tasklet: rcs0 in[1]: ctx=3.1, seqno=2, prio=0 [ 28.199667] gem_eio-829 1..s1 26067078us : execlists_submission_tasklet: rcs0 in[0]: ctx=1.2, seqno=1, prio=0 [ 28.199749] -0 2..s. 26067084us : execlists_submission_tasklet: rcs0 cs-irq head=2 [2?], tail=3 [3?] [ 28.199830] -0 2..s. 26067085us : execlists_submission_tasklet: rcs0 csb[3]: status=0x00008002:0x00000001, active=0x1 [ 28.199912] -0 2..s. 26067086us : execlists_submission_tasklet: rcs0 out[0]: ctx=1.2, seqno=1, prio=0 [ 28.199994] gem_eio-829 2..s. 28246084us : execlists_submission_tasklet: rcs0 cs-irq head=3 [3?], tail=4 [4?] [ 28.200096] gem_eio-829 2..s. 28246088us : execlists_submission_tasklet: rcs0 csb[4]: status=0x00000014:0x00000001, active=0x5 [ 28.200178] gem_eio-829 2..s. 28246089us : execlists_submission_tasklet: rcs0 out[0]: ctx=0.0, seqno=0, prio=0 [ 28.200260] gem_eio-829 2..s. 28246127us : execlists_submission_tasklet: execlists_submission_tasklet:886 GEM_BUG_ON(buf[2 * head + 1] != port->context_id) the conclusion is that the only place where the ports are reset to zero, is from engine->cancel_requests called during i915_gem_set_wedged(). The race is horrible as it results from calling set-wedged on active HW (the GPU reset failed) and as such we need to be careful as the HW state changes beneath us. Fortunately, it's the same scary conditions as affect normal reset, so we can reuse the same machinery to disable state tracking as we clobber it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104945 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Michel Thierry Fixes: af7a8ffad9c5 ("drm/i915: Use rcu instead of stop_machine in set_wedged") Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180302113324.23189-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 6 +++++- drivers/gpu/drm/i915/intel_lrc.c | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c29b1a1cbe96..dcdcc09240b9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3212,8 +3212,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * rolling the global seqno forward (since this would complete requests * for which we haven't set the fence error to EIO yet). */ - for_each_engine(engine, i915, id) + for_each_engine(engine, i915, id) { + i915_gem_reset_prepare_engine(engine); engine->submit_request = nop_submit_request; + } /* * Make sure no one is running the old callback before we proceed with @@ -3255,6 +3257,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) intel_engine_init_global_seqno(engine, intel_engine_last_submit(engine)); spin_unlock_irqrestore(&engine->timeline->lock, flags); + + i915_gem_reset_finish_engine(engine); } wake_up_all(&i915->gpu_error.reset_queue); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 14288743909f..c1a3636e94fc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -687,6 +687,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; + GEM_TRACE("%s\n", engine->name); + spin_lock_irqsave(&engine->timeline->lock, flags); /* Cancel the requests on the HW and clear the ELSP tracker. */ @@ -733,6 +735,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + /* Mark all CS interrupts as complete */ + execlists->active = 0; + spin_unlock_irqrestore(&engine->timeline->lock, flags); } -- cgit v1.2.3 From aebbc2d7b3887202c9b02e69e2a470901d0eda71 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Mar 2018 13:12:46 +0000 Subject: drm/i915/execlists: Move irq state manipulation inside irq disabled region Although this state (execlists->active and engine->irq_posted) itself is not protected by the engine->timeline spinlock, it does conveniently ensure that irqs are disabled. We can use this to protect our manipulation of the state and so ensure that the next IRQ to arrive sees consistent state and (hopefully) ignores the reset engine. Suggested-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Michel Thierry Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180302131246.22036-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c1a3636e94fc..0482e54c94f0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1618,10 +1618,10 @@ static void reset_common_ring(struct intel_engine_cs *engine, GEM_TRACE("%s seqno=%x\n", engine->name, request ? request->global_seqno : 0); - reset_irq(engine); - spin_lock_irqsave(&engine->timeline->lock, flags); + reset_irq(engine); + /* * Catch up with any missed context-switch interrupts. * @@ -1636,11 +1636,11 @@ static void reset_common_ring(struct intel_engine_cs *engine, /* Push back any incomplete requests for replay after the reset. */ __unwind_incomplete_requests(engine); - spin_unlock_irqrestore(&engine->timeline->lock, flags); - /* Mark all CS interrupts as complete */ execlists->active = 0; + spin_unlock_irqrestore(&engine->timeline->lock, flags); + /* If the request was innocent, we leave the request in the ELSP * and will try to replay it on restarting. The context image may * have been corrupted by the reset, in which case we may have -- cgit v1.2.3 From a3e3883646c05828f96369f7809436f1b3247234 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Mar 2018 14:32:45 +0000 Subject: drm/i915/execlists: Split spinlock from its irq disabling side-effect During reset/wedging, we have to clean up the requests on the timeline and flush the pending interrupt state. Currently, we are abusing the irq disabling of the timeline spinlock to protect the irq state in conjunction to the engine's timeline requests, but this is accidental and conflates the spinlock with the irq state. A baffling state of affairs for the reader. Instead, explicitly disable irqs over the critical section, and separate modifying the irq state from the timeline's requests. Suggested-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180302143246.2579-4-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0482e54c94f0..36b376e4b105 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -689,11 +689,27 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) GEM_TRACE("%s\n", engine->name); - spin_lock_irqsave(&engine->timeline->lock, flags); + /* + * Before we call engine->cancel_requests(), we should have exclusive + * access to the submission state. This is arranged for us by the + * caller disabling the interrupt generation, the tasklet and other + * threads that may then access the same state, giving us a free hand + * to reset state. However, we still need to let lockdep be aware that + * we know this state may be accessed in hardirq context, so we + * disable the irq around this manipulation and we want to keep + * the spinlock focused on its duties and not accidentally conflate + * coverage to the submission's irq state. (Similarly, although we + * shouldn't need to disable irq around the manipulation of the + * submission's irq state, we also wish to remind ourselves that + * it is irq state.) + */ + local_irq_save(flags); /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); + spin_lock(&engine->timeline->lock); + /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline->requests, link) { GEM_BUG_ON(!rq->global_seqno); @@ -727,6 +743,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); + spin_unlock(&engine->timeline->lock); + /* * The port is checked prior to scheduling a tasklet, but * just in case we have suspended the tasklet to do the @@ -738,7 +756,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Mark all CS interrupts as complete */ execlists->active = 0; - spin_unlock_irqrestore(&engine->timeline->lock, flags); + local_irq_restore(flags); } /* @@ -1618,7 +1636,8 @@ static void reset_common_ring(struct intel_engine_cs *engine, GEM_TRACE("%s seqno=%x\n", engine->name, request ? request->global_seqno : 0); - spin_lock_irqsave(&engine->timeline->lock, flags); + /* See execlists_cancel_requests() for the irq/spinlock split. */ + local_irq_save(flags); reset_irq(engine); @@ -1634,14 +1653,17 @@ static void reset_common_ring(struct intel_engine_cs *engine, execlists_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ + spin_lock(&engine->timeline->lock); __unwind_incomplete_requests(engine); + spin_unlock(&engine->timeline->lock); /* Mark all CS interrupts as complete */ execlists->active = 0; - spin_unlock_irqrestore(&engine->timeline->lock, flags); + local_irq_restore(flags); - /* If the request was innocent, we leave the request in the ELSP + /* + * If the request was innocent, we leave the request in the ELSP * and will try to replay it on restarting. The context image may * have been corrupted by the reset, in which case we may have * to service a new GPU hang, but more likely we can continue on @@ -1654,7 +1676,8 @@ static void reset_common_ring(struct intel_engine_cs *engine, if (!request || request->fence.error != -EIO) return; - /* We want a simple context + ring to execute the breadcrumb update. + /* + * We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, * so clear it and rebuild just what we need for the breadcrumb. * All pending requests for this context will be zapped, and any -- cgit v1.2.3 From 7cfca4afd656f0aa6f136d8a1260d994abc96a0c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 2 Mar 2018 11:15:49 +0000 Subject: drm/i915/uc: Introduce intel_uc_suspend|resume We want to use higher level 'uc' functions as the main entry points to the GuC/HuC code to hide some details and keep code layered. While here, move call to disable_guc_interrupts after sending suspend action to the GuC to allow it work also with CTB as comm mechanism. v2: update commit msg (Sagar) Signed-off-by: Michal Wajdeczko Cc: Sagar Arun Kamble Cc: Chris Wilson Reviewed-by: Sagar Arun Kamble Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180302111550.21328-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 6 +++--- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/intel_guc.c | 42 +++++++++++++------------------------ drivers/gpu/drm/i915/intel_guc.h | 4 ++-- drivers/gpu/drm/i915/intel_uc.c | 45 ++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc.h | 2 ++ 6 files changed, 68 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index aaa861b51024..d61b51c0bf0b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2575,7 +2575,7 @@ static int intel_runtime_suspend(struct device *kdev) */ i915_gem_runtime_suspend(dev_priv); - intel_guc_suspend(dev_priv); + intel_uc_suspend(dev_priv); intel_runtime_pm_disable_interrupts(dev_priv); @@ -2597,7 +2597,7 @@ static int intel_runtime_suspend(struct device *kdev) intel_runtime_pm_enable_interrupts(dev_priv); - intel_guc_resume(dev_priv); + intel_uc_resume(dev_priv); i915_gem_init_swizzling(dev_priv); i915_gem_restore_fences(dev_priv); @@ -2683,7 +2683,7 @@ static int intel_runtime_resume(struct device *kdev) intel_runtime_pm_enable_interrupts(dev_priv); - intel_guc_resume(dev_priv); + intel_uc_resume(dev_priv); /* * No point of rolling back things in case of an error, as the best diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dcdcc09240b9..a5bd07338b46 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4920,7 +4920,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) i915_gem_contexts_lost(dev_priv); mutex_unlock(&dev->struct_mutex); - intel_guc_suspend(dev_priv); + intel_uc_suspend(dev_priv); cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gt.retire_work); @@ -4987,7 +4987,7 @@ void i915_gem_resume(struct drm_i915_private *i915) if (i915_gem_init_hw(i915)) goto err_wedged; - intel_guc_resume(i915); + intel_uc_resume(i915); /* Always reload a context for powersaving. */ if (i915_gem_switch_to_kernel_context(i915)) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index e6512cccef75..ff08ea0ebf49 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -403,22 +403,15 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) /** * intel_guc_suspend() - notify GuC entering suspend state - * @dev_priv: i915 device private + * @guc: the guc */ -int intel_guc_suspend(struct drm_i915_private *dev_priv) +int intel_guc_suspend(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - gen9_disable_guc_interrupts(dev_priv); - - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; - /* any value greater than GUC_POWER_D0 */ - data[1] = GUC_POWER_D1; - data[2] = guc_ggtt_offset(guc->shared_data); + u32 data[] = { + INTEL_GUC_ACTION_ENTER_S_STATE, + GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ + guc_ggtt_offset(guc->shared_data) + }; return intel_guc_send(guc, data, ARRAY_SIZE(data)); } @@ -448,22 +441,15 @@ int intel_guc_reset_engine(struct intel_guc *guc, /** * intel_guc_resume() - notify GuC resuming from suspend state - * @dev_priv: i915 device private + * @guc: the guc */ -int intel_guc_resume(struct drm_i915_private *dev_priv) +int intel_guc_resume(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - if (i915_modparams.guc_log_level) - gen9_enable_guc_interrupts(dev_priv); - - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; - data[1] = GUC_POWER_D0; - data[2] = guc_ggtt_offset(guc->shared_data); + u32 data[] = { + INTEL_GUC_ACTION_EXIT_S_STATE, + GUC_POWER_D0, + guc_ggtt_offset(guc->shared_data) + }; return intel_guc_send(guc, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 52856a97477d..b9424ac644ac 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -127,8 +127,8 @@ int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); -int intel_guc_suspend(struct drm_i915_private *dev_priv); -int intel_guc_resume(struct drm_i915_private *dev_priv); +int intel_guc_suspend(struct intel_guc *guc); +int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 8e25474b435c..e5bf0d37bf43 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -445,3 +445,48 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) if (USES_GUC_SUBMISSION(dev_priv)) gen9_disable_guc_interrupts(dev_priv); } + +int intel_uc_suspend(struct drm_i915_private *i915) +{ + struct intel_guc *guc = &i915->guc; + int err; + + if (!USES_GUC(i915)) + return 0; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + err = intel_guc_suspend(guc); + if (err) { + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + return err; + } + + gen9_disable_guc_interrupts(i915); + + return 0; +} + +int intel_uc_resume(struct drm_i915_private *i915) +{ + struct intel_guc *guc = &i915->guc; + int err; + + if (!USES_GUC(i915)) + return 0; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + if (i915_modparams.guc_log_level) + gen9_enable_guc_interrupts(i915); + + err = intel_guc_resume(guc); + if (err) { + DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index f2984e01e257..f76d51d1ce70 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -39,6 +39,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_uc_init(struct drm_i915_private *dev_priv); void intel_uc_fini(struct drm_i915_private *dev_priv); +int intel_uc_suspend(struct drm_i915_private *dev_priv); +int intel_uc_resume(struct drm_i915_private *dev_priv); static inline bool intel_uc_is_using_guc(void) { -- cgit v1.2.3 From 7b026763cff902debe1184ef4e0609dc148338ad Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 2 Mar 2018 13:37:17 +0000 Subject: drm/i915/huc: Mark firmware as failed on auth failure If we fail to authenticate HuC firmware, we should change its load status to FAIL. While around, print HUC_STATUS on firmware verification failure. v2: keep the variables sorted by length (Chris) Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Anusha Srivatsa Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180302133718.1260-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index e37f58e760d8..65e2afb9b955 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -48,6 +48,7 @@ int intel_huc_auth(struct intel_huc *huc) struct drm_i915_private *i915 = huc_to_i915(huc); struct intel_guc *guc = &i915->guc; struct i915_vma *vma; + u32 status; int ret; if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) @@ -58,28 +59,35 @@ int intel_huc_auth(struct intel_huc *huc) if (IS_ERR(vma)) { ret = PTR_ERR(vma); DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); - return ret; + goto fail; } ret = intel_guc_auth_huc(guc, guc_ggtt_offset(vma) + huc->fw.rsa_offset); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); - goto out; + goto fail_unpin; } /* Check authentication status, it should be done by now */ - ret = intel_wait_for_register(i915, - HUC_STATUS2, - HUC_FW_VERIFIED, - HUC_FW_VERIFIED, - 50); + ret = __intel_wait_for_register(i915, + HUC_STATUS2, + HUC_FW_VERIFIED, + HUC_FW_VERIFIED, + 2, 50, &status); if (ret) { - DRM_ERROR("HuC: Authentication failed %d\n", ret); - goto out; + DRM_ERROR("HuC: Firmware not verified %#x\n", status); + goto fail_unpin; } -out: i915_vma_unpin(vma); + return 0; + +fail_unpin: + i915_vma_unpin(vma); +fail: + huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; + + DRM_ERROR("HuC: Authentication failed %d\n", ret); return ret; } -- cgit v1.2.3 From 1f267a572b573b0b155022750cba93001f4367a8 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 5 Mar 2018 11:56:15 +0200 Subject: drm/i915: Update DRIVER_DATE to 20180305 Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 10c9e5e619ab..7eec99d7fad4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -83,8 +83,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20180221" -#define DRIVER_TIMESTAMP 1519219289 +#define DRIVER_DATE "20180305" +#define DRIVER_TIMESTAMP 1520243775 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions -- cgit v1.2.3 From 3d2011cfa41faf4981f72b050f0891c528c09f52 Mon Sep 17 00:00:00 2001 From: Mahesh Kumar Date: Tue, 6 Feb 2018 11:38:55 +0530 Subject: drm/i915/icl: remove port A/E lane sharing limitation. Platforms before Gen11 were sharing lanes between port-A & port-E. This limitation is no more there. Changes since V1: - optimize the code (Shashank/Jani) - create helper function to get max lanes (ville) Changes since V2: - Include BIOS fail fix-up in same helper function (ville) Changes since V3: - remove confusing if/else (jani) - group intel_encoder initialization Signed-off-by: Mahesh Kumar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180206060855.30026-1-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 85 ++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 8ca376aca8bd..bfdaa5d86861 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2842,39 +2842,45 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dport) return false; } +static int +intel_ddi_max_lanes(struct intel_digital_port *intel_dport) +{ + struct drm_i915_private *dev_priv = to_i915(intel_dport->base.base.dev); + enum port port = intel_dport->base.port; + int max_lanes = 4; + + if (INTEL_GEN(dev_priv) >= 11) + return max_lanes; + + if (port == PORT_A || port == PORT_E) { + if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) + max_lanes = port == PORT_A ? 4 : 0; + else + /* Both A and E share 2 lanes */ + max_lanes = 2; + } + + /* + * Some BIOS might fail to set this bit on port A if eDP + * wasn't lit up at boot. Force this bit set when needed + * so we use the proper lane count for our calculations. + */ + if (intel_ddi_a_force_4_lanes(intel_dport)) { + DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n"); + intel_dport->saved_port_bits |= DDI_A_4_LANES; + max_lanes = 4; + } + + return max_lanes; +} + void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; struct intel_encoder *intel_encoder; struct drm_encoder *encoder; bool init_hdmi, init_dp, init_lspcon = false; - int max_lanes; - if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) { - switch (port) { - case PORT_A: - max_lanes = 4; - break; - case PORT_E: - max_lanes = 0; - break; - default: - max_lanes = 4; - break; - } - } else { - switch (port) { - case PORT_A: - max_lanes = 2; - break; - case PORT_E: - max_lanes = 2; - break; - default: - max_lanes = 4; - break; - } - } init_hdmi = (dev_priv->vbt.ddi_port_info[port].supports_dvi || dev_priv->vbt.ddi_port_info[port].supports_hdmi); @@ -2920,10 +2926,17 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->get_config = intel_ddi_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; intel_encoder->get_power_domains = intel_ddi_get_power_domains; + intel_encoder->type = INTEL_OUTPUT_DDI; + intel_encoder->power_domain = intel_port_to_power_domain(port); + intel_encoder->port = port; + intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->cloneable = 0; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + intel_dig_port->dp.output_reg = INVALID_MMIO_REG; + intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port); switch (port) { case PORT_A: @@ -2954,26 +2967,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) MISSING_CASE(port); } - /* - * Some BIOS might fail to set this bit on port A if eDP - * wasn't lit up at boot. Force this bit set when needed - * so we use the proper lane count for our calculations. - */ - if (intel_ddi_a_force_4_lanes(intel_dig_port)) { - DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n"); - intel_dig_port->saved_port_bits |= DDI_A_4_LANES; - max_lanes = 4; - } - - intel_dig_port->dp.output_reg = INVALID_MMIO_REG; - intel_dig_port->max_lanes = max_lanes; - - intel_encoder->type = INTEL_OUTPUT_DDI; - intel_encoder->power_domain = intel_port_to_power_domain(port); - intel_encoder->port = port; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); - intel_encoder->cloneable = 0; - intel_infoframe_init(intel_dig_port); if (init_dp) { -- cgit v1.2.3 From 7509702bd8bd0975cc42a7272221446a897be950 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 5 Mar 2018 10:33:12 +0000 Subject: drm/i915: Unwind vma pinning for intel_pin_and_fence_fb_obj error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we fail to acquire a fence when we must, we must unwind before reporting the error. Otherwise, we lose tracking of the vma pinning and eventually hit a bug like <3>[ 46.163202] i915_vma_unpin:333 GEM_BUG_ON(!i915_vma_is_pinned(vma)) <4>[ 46.163424] ------------[ cut here ]------------ <2>[ 46.163429] kernel BUG at drivers/gpu/drm/i915/i915_vma.h:333! <4>[ 46.163444] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI <0>[ 46.163451] Dumping ftrace buffer: <0>[ 46.163457] --------------------------------- <0>[ 46.163630] <...>-84 1.... 46260767us : i915_gem_object_unpin_from_display_plane: i915_vma_unpin:333 GEM_BUG_ON(!i915_vma_is_pinned(vma)) <0>[ 46.163635] --------------------------------- <4>[ 46.163638] Modules linked in: vgem i915 snd_hda_codec_analog snd_hda_codec_generic coretemp snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm lpc_ich mei_me e1000e mei prime_numbers <4>[ 46.163667] CPU: 1 PID: 84 Comm: kworker/u16:1 Tainted: G U 4.16.0-rc3-gc07ef2c77d14-kasan_18+ #1 <4>[ 46.163671] Hardware name: Dell Inc. OptiPlex 755 /0PU052, BIOS A08 02/19/2008 <4>[ 46.163743] Workqueue: events_unbound intel_atomic_commit_work [i915] <4>[ 46.163809] RIP: 0010:i915_gem_object_unpin_from_display_plane+0x253/0x2f0 [i915] <4>[ 46.163813] RSP: 0018:ffff8800624cfb48 EFLAGS: 00010286 <4>[ 46.163818] RAX: 000000000000000c RBX: ffff880064446c40 RCX: ffff8800653135b8 <4>[ 46.163822] RDX: dffffc0000000000 RSI: 0000000000000054 RDI: ffff8800651e30d0 <4>[ 46.163825] RBP: 00000000000003d0 R08: 0000000000000001 R09: ffff8800651e3158 <4>[ 46.163829] R10: 0000000000000000 R11: ffff8800651e30f0 R12: 0000000000000001 <4>[ 46.163832] R13: ffff880054c58620 R14: 0000000000000000 R15: dffffc0000000000 <4>[ 46.163836] FS: 0000000000000000(0000) GS:ffff880066040000(0000) knlGS:0000000000000000 <4>[ 46.163840] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 46.163843] CR2: 00007f1fc6fb0000 CR3: 00000000526fe000 CR4: 00000000000006e0 <4>[ 46.163846] Call Trace: <4>[ 46.163918] intel_unpin_fb_vma+0xbd/0x300 [i915] <4>[ 46.163990] intel_cleanup_plane_fb+0x99/0xc0 [i915] <4>[ 46.163998] drm_atomic_helper_cleanup_planes+0x166/0x280 <4>[ 46.164071] intel_atomic_commit_tail+0x1594/0x33a0 [i915] <4>[ 46.164081] ? process_one_work+0x66e/0x1460 <4>[ 46.164151] ? skl_update_crtcs+0x9c0/0x9c0 [i915] <4>[ 46.164157] ? lock_acquire+0x13d/0x390 <4>[ 46.164161] ? lock_acquire+0x13d/0x390 <4>[ 46.164169] process_one_work+0x71a/0x1460 <4>[ 46.164175] ? __schedule+0x838/0x1e50 <4>[ 46.164182] ? pwq_dec_nr_in_flight+0x2b0/0x2b0 <4>[ 46.164188] ? _raw_spin_lock_irq+0xa/0x40 <4>[ 46.164194] worker_thread+0xdf/0xf60 <4>[ 46.164204] ? process_one_work+0x1460/0x1460 <4>[ 46.164209] kthread+0x2cf/0x3c0 <4>[ 46.164213] ? _kthread_create_on_node+0xa0/0xa0 <4>[ 46.164218] ret_from_fork+0x3a/0x50 <4>[ 46.164227] Code: e8 78 d9 cd e8 48 8b 35 cc 9e 47 00 49 c7 c0 c0 31 84 c0 b9 4d 01 00 00 48 c7 c2 e0 80 84 c0 48 c7 c7 0e bb 57 c0 e8 5d 4b df e8 <0f> 0b 48 c7 c1 c0 30 84 c0 ba 4e 01 00 00 48 c7 c6 e0 80 84 c0 <1>[ 46.164368] RIP: i915_gem_object_unpin_from_display_plane+0x253/0x2f0 [i915] RSP: ffff8800624cfb48 Fixes: 85798ac9b35f ("drm/i915: Fail if we can't get a fence for gen2/3 tiled scanout") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180305103312.29492-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 90f0fc8cc2bd..ceed0821b37d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2152,6 +2152,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, */ ret = i915_vma_pin_fence(vma); if (ret != 0 && INTEL_GEN(dev_priv) < 4) { + i915_gem_object_unpin_from_display_plane(vma); vma = ERR_PTR(ret); goto err; } -- cgit v1.2.3 From d4ccceb055912c2e5e15ec488cee3378ee4ca73c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 2 Mar 2018 18:14:56 +0200 Subject: drm/i915/icl: Ringbuffer interrupt handling On Gen11 interrupt masks need to be clear to allow C6 entry. We keep them all enabled knowing that we generate extra interrupts. v2: Rebase. v3: Remove gen 11 extra check in logical_render_ring_init. v4: Rebase fixes. v5: Rebase/refactor. v6: Rebase. v7: Rebase. v8: Update comment and commit message (Daniele) Signed-off-by: Tvrtko Ursulin Signed-off-by: Rodrigo Vivi Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180302161501.28594-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 16 ++++++++++------ drivers/gpu/drm/i915/intel_lrc.c | 13 +++++++++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index a83690642aab..094f010908b8 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -168,17 +168,21 @@ static void irq_enable(struct intel_engine_cs *engine) set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); + if (engine->irq_enable) { + spin_lock(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->i915->irq_lock); + } } static void irq_disable(struct intel_engine_cs *engine) { /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); + if (engine->irq_disable) { + spin_lock(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->i915->irq_lock); + } } void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 36b376e4b105..75d2daa4f6c1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2037,8 +2037,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->set_default_submission = execlists_set_default_submission; - engine->irq_enable = gen8_logical_ring_enable_irq; - engine->irq_disable = gen8_logical_ring_disable_irq; + if (INTEL_GEN(engine->i915) < 11) { + engine->irq_enable = gen8_logical_ring_enable_irq; + engine->irq_disable = gen8_logical_ring_disable_irq; + } else { + /* + * TODO: On Gen11 interrupt masks need to be clear + * to allow C6 entry. Keep interrupts enabled at + * and take the hit of generating extra interrupts + * until a more refined solution exists. + */ + } engine->emit_bb_start = gen8_emit_bb_start; } -- cgit v1.2.3 From ede9d0cfcb789b6fd86ecb71b4721a19c53956e6 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:40 +0800 Subject: drm/i915/gvt: Rework shadow graphic memory management code This is a big one and the GVT shadow graphic memory management code is heavily refined. The new code is more straightforward with less code. The struct intel_vgpu_mm is restructured to be clearly defined, use accurate names and some of the original fields are removed which are really redundant. Now we only manage ppgtt mm object with mm->ppgtt_mm.lru_list. No need to mix ppgtt and ggtt together, since one vGPU only has one ggtt object. v4: Don't invoke ppgtt_free_all_shadow_page before intel_vgpu_destroy_all_ppgtt_mm. v3: Add GVT_RING_CTX_NR_PDPS to avoid confusing about the PDPs. v2: Split some changes into small standalone patches. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 520 +++++++++++++++-------------------- drivers/gpu/drm/i915/gvt/gtt.h | 80 +++--- drivers/gpu/drm/i915/gvt/handlers.c | 15 +- drivers/gpu/drm/i915/gvt/mmio.c | 5 +- drivers/gpu/drm/i915/gvt/scheduler.c | 27 +- drivers/gpu/drm/i915/gvt/trace.h | 8 +- 6 files changed, 299 insertions(+), 356 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 8d5317d0122d..bd55fbb7910d 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -481,7 +481,10 @@ int intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; int ret; - e->type = mm->page_table_entry_type; + if (mm->type == INTEL_GVT_MM_PPGTT) + e->type = mm->ppgtt_mm.root_entry_type; + else + e->type = GTT_TYPE_GGTT_PTE; ret = ops->get_entry(page_table, e, index, false, 0, mm->vgpu); if (ret) @@ -782,7 +785,7 @@ static int ppgtt_write_protection_handler(void *data, u64 pa, return ret; } -static int reclaim_one_mm(struct intel_gvt *gvt); +static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page( struct intel_vgpu *vgpu, int type, unsigned long gfn) @@ -793,7 +796,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page( retry: spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); if (!spt) { - if (reclaim_one_mm(vgpu->gvt)) + if (reclaim_one_ppgtt_mm(vgpu->gvt)) goto retry; gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); @@ -1445,111 +1448,37 @@ static int ppgtt_handle_guest_write_page_table_bytes( return 0; } -/* - * mm page table allocation policy for bdw+ - * - for ggtt, only virtual page table will be allocated. - * - for ppgtt, dedicated virtual/shadow page table will be allocated. - */ -static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm) -{ - struct intel_vgpu *vgpu = mm->vgpu; - struct intel_gvt *gvt = vgpu->gvt; - const struct intel_gvt_device_info *info = &gvt->device_info; - void *mem; - - if (mm->type == INTEL_GVT_MM_PPGTT) { - mm->page_table_entry_cnt = 4; - mm->page_table_entry_size = mm->page_table_entry_cnt * - info->gtt_entry_size; - mem = kzalloc(mm->has_shadow_page_table ? - mm->page_table_entry_size * 2 - : mm->page_table_entry_size, GFP_KERNEL); - if (!mem) - return -ENOMEM; - mm->virtual_page_table = mem; - if (!mm->has_shadow_page_table) - return 0; - mm->shadow_page_table = mem + mm->page_table_entry_size; - } else if (mm->type == INTEL_GVT_MM_GGTT) { - mm->page_table_entry_cnt = - (gvt_ggtt_gm_sz(gvt) >> I915_GTT_PAGE_SHIFT); - mm->page_table_entry_size = mm->page_table_entry_cnt * - info->gtt_entry_size; - mem = vzalloc(mm->page_table_entry_size); - if (!mem) - return -ENOMEM; - mm->virtual_page_table = mem; - } - return 0; -} - -static void gen8_mm_free_page_table(struct intel_vgpu_mm *mm) -{ - if (mm->type == INTEL_GVT_MM_PPGTT) { - kfree(mm->virtual_page_table); - } else if (mm->type == INTEL_GVT_MM_GGTT) { - if (mm->virtual_page_table) - vfree(mm->virtual_page_table); - } - mm->virtual_page_table = mm->shadow_page_table = NULL; -} - -static void invalidate_mm(struct intel_vgpu_mm *mm) +static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) { struct intel_vgpu *vgpu = mm->vgpu; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_gtt *gtt = &gvt->gtt; struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; struct intel_gvt_gtt_entry se; - int i; + int index; - if (WARN_ON(!mm->has_shadow_page_table || !mm->shadowed)) + if (!mm->ppgtt_mm.shadowed) return; - for (i = 0; i < mm->page_table_entry_cnt; i++) { - ppgtt_get_shadow_root_entry(mm, &se, i); + for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { + ppgtt_get_shadow_root_entry(mm, &se, index); + if (!ops->test_present(&se)) continue; - ppgtt_invalidate_shadow_page_by_shadow_entry( - vgpu, &se); + + ppgtt_invalidate_shadow_page_by_shadow_entry(vgpu, &se); se.val64 = 0; - ppgtt_set_shadow_root_entry(mm, &se, i); + ppgtt_set_shadow_root_entry(mm, &se, index); trace_gpt_change(vgpu->id, "destroy root pointer", - NULL, se.type, se.val64, i); + NULL, se.type, se.val64, index); } - mm->shadowed = false; -} -/** - * intel_vgpu_destroy_mm - destroy a mm object - * @mm: a kref object - * - * This function is used to destroy a mm object for vGPU - * - */ -void intel_vgpu_destroy_mm(struct kref *mm_ref) -{ - struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); - struct intel_vgpu *vgpu = mm->vgpu; - struct intel_gvt *gvt = vgpu->gvt; - struct intel_gvt_gtt *gtt = &gvt->gtt; - - if (!mm->initialized) - goto out; - - list_del(&mm->list); - list_del(&mm->lru_list); - - if (mm->has_shadow_page_table) - invalidate_mm(mm); - - gtt->mm_free_page_table(mm); -out: - kfree(mm); + mm->ppgtt_mm.shadowed = false; } -static int shadow_mm(struct intel_vgpu_mm *mm) + +static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) { struct intel_vgpu *vgpu = mm->vgpu; struct intel_gvt *gvt = vgpu->gvt; @@ -1557,21 +1486,21 @@ static int shadow_mm(struct intel_vgpu_mm *mm) struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; struct intel_vgpu_ppgtt_spt *spt; struct intel_gvt_gtt_entry ge, se; - int i; - int ret; + int index, ret; - if (WARN_ON(!mm->has_shadow_page_table || mm->shadowed)) + if (mm->ppgtt_mm.shadowed) return 0; - mm->shadowed = true; + mm->ppgtt_mm.shadowed = true; + + for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { + ppgtt_get_guest_root_entry(mm, &ge, index); - for (i = 0; i < mm->page_table_entry_cnt; i++) { - ppgtt_get_guest_root_entry(mm, &ge, i); if (!ops->test_present(&ge)) continue; trace_gpt_change(vgpu->id, __func__, NULL, - ge.type, ge.val64, i); + ge.type, ge.val64, index); spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); if (IS_ERR(spt)) { @@ -1580,96 +1509,132 @@ static int shadow_mm(struct intel_vgpu_mm *mm) goto fail; } ppgtt_generate_shadow_entry(&se, spt, &ge); - ppgtt_set_shadow_root_entry(mm, &se, i); + ppgtt_set_shadow_root_entry(mm, &se, index); trace_gpt_change(vgpu->id, "populate root pointer", - NULL, se.type, se.val64, i); + NULL, se.type, se.val64, index); } + return 0; fail: - invalidate_mm(mm); + invalidate_ppgtt_mm(mm); return ret; } +static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_mm *mm; + + mm = kzalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) + return NULL; + + mm->vgpu = vgpu; + kref_init(&mm->ref); + atomic_set(&mm->pincount, 0); + + return mm; +} + +static void vgpu_free_mm(struct intel_vgpu_mm *mm) +{ + kfree(mm); +} + /** - * intel_vgpu_create_mm - create a mm object for a vGPU + * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU * @vgpu: a vGPU - * @mm_type: mm object type, should be PPGTT or GGTT - * @virtual_page_table: page table root pointers. Could be NULL if user wants - * to populate shadow later. - * @page_table_level: describe the page table level of the mm object - * @pde_base_index: pde root pointer base in GGTT MMIO. + * @root_entry_type: ppgtt root entry type + * @pdps: guest pdps. * - * This function is used to create a mm object for a vGPU. + * This function is used to create a ppgtt mm object for a vGPU. * * Returns: * Zero on success, negative error code in pointer if failed. */ -struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, - int mm_type, void *virtual_page_table, int page_table_level, - u32 pde_base_index) +struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, + intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) { struct intel_gvt *gvt = vgpu->gvt; - struct intel_gvt_gtt *gtt = &gvt->gtt; struct intel_vgpu_mm *mm; int ret; - mm = kzalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) { - ret = -ENOMEM; - goto fail; - } + mm = vgpu_alloc_mm(vgpu); + if (!mm) + return ERR_PTR(-ENOMEM); - mm->type = mm_type; + mm->type = INTEL_GVT_MM_PPGTT; - if (page_table_level == 1) - mm->page_table_entry_type = GTT_TYPE_GGTT_PTE; - else if (page_table_level == 3) - mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; - else if (page_table_level == 4) - mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; - else { - WARN_ON(1); - ret = -EINVAL; - goto fail; - } + GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && + root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); + mm->ppgtt_mm.root_entry_type = root_entry_type; - mm->page_table_level = page_table_level; - mm->pde_base_index = pde_base_index; + INIT_LIST_HEAD(&mm->ppgtt_mm.list); + INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); - mm->vgpu = vgpu; - mm->has_shadow_page_table = !!(mm_type == INTEL_GVT_MM_PPGTT); - - kref_init(&mm->ref); - atomic_set(&mm->pincount, 0); - INIT_LIST_HEAD(&mm->list); - INIT_LIST_HEAD(&mm->lru_list); - list_add_tail(&mm->list, &vgpu->gtt.mm_list_head); + if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) + mm->ppgtt_mm.guest_pdps[0] = pdps[0]; + else + memcpy(mm->ppgtt_mm.guest_pdps, pdps, + sizeof(mm->ppgtt_mm.guest_pdps)); - ret = gtt->mm_alloc_page_table(mm); + ret = shadow_ppgtt_mm(mm); if (ret) { - gvt_vgpu_err("fail to allocate page table for mm\n"); - goto fail; + gvt_vgpu_err("failed to shadow ppgtt mm\n"); + vgpu_free_mm(mm); + return ERR_PTR(ret); } - mm->initialized = true; + list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); + list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); + return mm; +} - if (virtual_page_table) - memcpy(mm->virtual_page_table, virtual_page_table, - mm->page_table_entry_size); +static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_mm *mm; + unsigned long nr_entries; - if (mm->has_shadow_page_table) { - ret = shadow_mm(mm); - if (ret) - goto fail; - list_add_tail(&mm->lru_list, &gvt->gtt.mm_lru_list_head); + mm = vgpu_alloc_mm(vgpu); + if (!mm) + return ERR_PTR(-ENOMEM); + + mm->type = INTEL_GVT_MM_GGTT; + + nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; + mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries * + vgpu->gvt->device_info.gtt_entry_size); + if (!mm->ggtt_mm.virtual_ggtt) { + vgpu_free_mm(mm); + return ERR_PTR(-ENOMEM); } + return mm; -fail: - gvt_vgpu_err("fail to create mm\n"); - if (mm) - intel_gvt_mm_unreference(mm); - return ERR_PTR(ret); +} + +/** + * intel_vgpu_destroy_mm - destroy a mm object + * @mm_ref: a kref object + * + * This function is used to destroy a mm object for vGPU + * + */ +void intel_vgpu_destroy_mm(struct kref *mm_ref) +{ + struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); + + if (GEM_WARN_ON(atomic_read(&mm->pincount))) + gvt_err("vgpu mm pin count bug detected\n"); + + if (mm->type == INTEL_GVT_MM_PPGTT) { + list_del(&mm->ppgtt_mm.list); + list_del(&mm->ppgtt_mm.lru_list); + invalidate_ppgtt_mm(mm); + } else { + vfree(mm->ggtt_mm.virtual_ggtt); + } + + vgpu_free_mm(mm); } /** @@ -1680,9 +1645,6 @@ fail: */ void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) { - if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) - return; - atomic_dec(&mm->pincount); } @@ -1701,36 +1663,34 @@ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) { int ret; - if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) - return 0; + atomic_inc(&mm->pincount); - if (!mm->shadowed) { - ret = shadow_mm(mm); + if (mm->type == INTEL_GVT_MM_PPGTT) { + ret = shadow_ppgtt_mm(mm); if (ret) return ret; + + list_move_tail(&mm->ppgtt_mm.lru_list, + &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); + } - atomic_inc(&mm->pincount); - list_del_init(&mm->lru_list); - list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head); return 0; } -static int reclaim_one_mm(struct intel_gvt *gvt) +static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) { struct intel_vgpu_mm *mm; struct list_head *pos, *n; - list_for_each_safe(pos, n, &gvt->gtt.mm_lru_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, lru_list); + list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); - if (mm->type != INTEL_GVT_MM_PPGTT) - continue; if (atomic_read(&mm->pincount)) continue; - list_del_init(&mm->lru_list); - invalidate_mm(mm); + list_del_init(&mm->ppgtt_mm.lru_list); + invalidate_ppgtt_mm(mm); return 1; } return 0; @@ -1746,9 +1706,6 @@ static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; - if (WARN_ON(!mm->has_shadow_page_table)) - return -EINVAL; - s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); if (!s) return -ENXIO; @@ -1780,78 +1737,65 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) unsigned long gpa = INTEL_GVT_INVALID_ADDR; unsigned long gma_index[4]; struct intel_gvt_gtt_entry e; - int i, index; + int i, levels = 0; int ret; - if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT) - return INTEL_GVT_INVALID_ADDR; + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && + mm->type != INTEL_GVT_MM_PPGTT); if (mm->type == INTEL_GVT_MM_GGTT) { if (!vgpu_gmadr_is_valid(vgpu, gma)) goto err; - ret = ggtt_get_guest_entry(mm, &e, - gma_ops->gma_to_ggtt_pte_index(gma)); - if (ret) - goto err; + ggtt_get_guest_entry(mm, &e, + gma_ops->gma_to_ggtt_pte_index(gma)); + gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + (gma & ~I915_GTT_PAGE_MASK); trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); - return gpa; - } - - switch (mm->page_table_level) { - case 4: - ret = ppgtt_get_shadow_root_entry(mm, &e, 0); - if (ret) - goto err; - gma_index[0] = gma_ops->gma_to_pml4_index(gma); - gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); - gma_index[2] = gma_ops->gma_to_pde_index(gma); - gma_index[3] = gma_ops->gma_to_pte_index(gma); - index = 4; - break; - case 3: - ret = ppgtt_get_shadow_root_entry(mm, &e, - gma_ops->gma_to_l3_pdp_index(gma)); - if (ret) - goto err; - gma_index[0] = gma_ops->gma_to_pde_index(gma); - gma_index[1] = gma_ops->gma_to_pte_index(gma); - index = 2; - break; - case 2: - ret = ppgtt_get_shadow_root_entry(mm, &e, - gma_ops->gma_to_pde_index(gma)); - if (ret) - goto err; - gma_index[0] = gma_ops->gma_to_pte_index(gma); - index = 1; - break; - default: - WARN_ON(1); - goto err; - } + } else { + switch (mm->ppgtt_mm.root_entry_type) { + case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: + ppgtt_get_shadow_root_entry(mm, &e, 0); + + gma_index[0] = gma_ops->gma_to_pml4_index(gma); + gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); + gma_index[2] = gma_ops->gma_to_pde_index(gma); + gma_index[3] = gma_ops->gma_to_pte_index(gma); + levels = 4; + break; + case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: + ppgtt_get_shadow_root_entry(mm, &e, + gma_ops->gma_to_l3_pdp_index(gma)); + + gma_index[0] = gma_ops->gma_to_pde_index(gma); + gma_index[1] = gma_ops->gma_to_pte_index(gma); + levels = 2; + break; + default: + GEM_BUG_ON(1); + } - /* walk into the shadow page table and get gpa from guest entry */ - for (i = 0; i < index; i++) { - ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], - (i == index - 1)); - if (ret) - goto err; + /* walk the shadow page table and get gpa from guest entry */ + for (i = 0; i < levels; i++) { + ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], + (i == levels - 1)); + if (ret) + goto err; - if (!pte_ops->test_present(&e)) { - gvt_dbg_core("GMA 0x%lx is not present\n", gma); - goto err; + if (!pte_ops->test_present(&e)) { + gvt_dbg_core("GMA 0x%lx is not present\n", gma); + goto err; + } } - } - gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) - + (gma & ~I915_GTT_PAGE_MASK); + gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + + (gma & ~I915_GTT_PAGE_MASK); + trace_gma_translate(vgpu->id, "ppgtt", 0, + mm->ppgtt_mm.root_entry_type, gma, gpa); + } - trace_gma_translate(vgpu->id, "ppgtt", 0, - mm->page_table_level, gma, gpa); return gpa; err: gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); @@ -2131,43 +2075,48 @@ err: int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) { struct intel_vgpu_gtt *gtt = &vgpu->gtt; - struct intel_vgpu_mm *ggtt_mm; hash_init(gtt->tracked_guest_page_hash_table); hash_init(gtt->shadow_page_hash_table); - INIT_LIST_HEAD(>t->mm_list_head); + INIT_LIST_HEAD(>t->ppgtt_mm_list_head); INIT_LIST_HEAD(>t->oos_page_list_head); INIT_LIST_HEAD(>t->post_shadow_list_head); - intel_vgpu_reset_ggtt(vgpu); - - ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT, - NULL, 1, 0); - if (IS_ERR(ggtt_mm)) { + gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); + if (IS_ERR(gtt->ggtt_mm)) { gvt_vgpu_err("fail to create mm for ggtt.\n"); - return PTR_ERR(ggtt_mm); + return PTR_ERR(gtt->ggtt_mm); } - gtt->ggtt_mm = ggtt_mm; + intel_vgpu_reset_ggtt(vgpu); return create_scratch_page_tree(vgpu); } -static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) +static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) { struct list_head *pos, *n; struct intel_vgpu_mm *mm; - list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, list); - if (mm->type == type) { - vgpu->gvt->gtt.mm_free_page_table(mm); - list_del(&mm->list); - list_del(&mm->lru_list); - kfree(mm); - } + list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); + intel_vgpu_destroy_mm(&mm->ref); } + + if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) + gvt_err("vgpu ppgtt mm is not fully destoried\n"); + + if (GEM_WARN_ON(!hlist_empty(vgpu->gtt.shadow_page_hash_table))) { + gvt_err("Why we still has spt not freed?\n"); + ppgtt_free_all_shadow_page(vgpu); + } +} + +static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) +{ + intel_vgpu_destroy_mm(&vgpu->gtt.ggtt_mm->ref); + vgpu->gtt.ggtt_mm = NULL; } /** @@ -2182,11 +2131,9 @@ static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) */ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) { - ppgtt_free_all_shadow_page(vgpu); + intel_vgpu_destroy_all_ppgtt_mm(vgpu); + intel_vgpu_destroy_ggtt_mm(vgpu); release_scratch_page_tree(vgpu); - - intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); - intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT); } static void clean_spt_oos(struct intel_gvt *gvt) @@ -2248,32 +2195,26 @@ fail: * pointer to mm object on success, NULL if failed. */ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level, void *root_entry) + u64 pdps[]) { - struct list_head *pos; struct intel_vgpu_mm *mm; - u64 *src, *dst; - - list_for_each(pos, &vgpu->gtt.mm_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, list); - if (mm->type != INTEL_GVT_MM_PPGTT) - continue; - - if (mm->page_table_level != page_table_level) - continue; + struct list_head *pos; - src = root_entry; - dst = mm->virtual_page_table; + list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); - if (page_table_level == 3) { - if (src[0] == dst[0] - && src[1] == dst[1] - && src[2] == dst[2] - && src[3] == dst[3]) + switch (mm->ppgtt_mm.root_entry_type) { + case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: + if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) return mm; - } else { - if (src[0] == dst[0]) + break; + case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: + if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps, + sizeof(mm->ppgtt_mm.guest_pdps))) return mm; + break; + default: + GEM_BUG_ON(1); } } return NULL; @@ -2283,7 +2224,8 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from * g2v notification * @vgpu: a vGPU - * @page_table_level: PPGTT page table level + * @root_entry_type: ppgtt root entry type + * @pdps: guest pdps * * This function is used to create a PPGTT mm object from a guest to GVT-g * notification. @@ -2292,20 +2234,15 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, * Zero on success, negative error code if failed. */ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level) + intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) { - u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); struct intel_vgpu_mm *mm; - if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) - return -EINVAL; - - mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); + mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); if (mm) { intel_gvt_mm_reference(mm); } else { - mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT, - pdp, page_table_level, 0); + mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); if (IS_ERR(mm)) { gvt_vgpu_err("fail to create mm\n"); return PTR_ERR(mm); @@ -2318,7 +2255,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from * g2v notification * @vgpu: a vGPU - * @page_table_level: PPGTT page table level + * @pdps: guest pdps * * This function is used to create a PPGTT mm object from a guest to GVT-g * notification. @@ -2327,15 +2264,11 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, * Zero on success, negative error code if failed. */ int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level) + u64 pdps[]) { - u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); struct intel_vgpu_mm *mm; - if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) - return -EINVAL; - - mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); + mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); if (!mm) { gvt_vgpu_err("fail to find ppgtt instance.\n"); return -EINVAL; @@ -2367,8 +2300,6 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) || IS_KABYLAKE(gvt->dev_priv)) { gvt->gtt.pte_ops = &gen8_gtt_pte_ops; gvt->gtt.gma_ops = &gen8_gtt_gma_ops; - gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table; - gvt->gtt.mm_free_page_table = gen8_mm_free_page_table; } else { return -ENODEV; } @@ -2399,7 +2330,7 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) return ret; } } - INIT_LIST_HEAD(&gvt->gtt.mm_lru_list_head); + INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); return 0; } @@ -2471,13 +2402,10 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) */ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) { - ppgtt_free_all_shadow_page(vgpu); - /* Shadow pages are only created when there is no page * table tracking data, so remove page tracking data after * removing the shadow pages. */ - intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); - + intel_vgpu_destroy_all_ppgtt_mm(vgpu); intel_vgpu_reset_ggtt(vgpu); } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 4cc13b5934f1..037dcbd1c11b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -84,17 +84,12 @@ struct intel_gvt_gtt { void (*mm_free_page_table)(struct intel_vgpu_mm *mm); struct list_head oos_page_use_list_head; struct list_head oos_page_free_list_head; - struct list_head mm_lru_list_head; + struct list_head ppgtt_mm_lru_list_head; struct page *scratch_page; unsigned long scratch_mfn; }; -enum { - INTEL_GVT_MM_GGTT = 0, - INTEL_GVT_MM_PPGTT, -}; - typedef enum { GTT_TYPE_INVALID = -1, @@ -125,26 +120,39 @@ typedef enum { GTT_TYPE_MAX, } intel_gvt_gtt_type_t; -struct intel_vgpu_mm { - int type; - bool initialized; - bool shadowed; +enum intel_gvt_mm_type { + INTEL_GVT_MM_GGTT, + INTEL_GVT_MM_PPGTT, +}; - int page_table_entry_type; - u32 page_table_entry_size; - u32 page_table_entry_cnt; - void *virtual_page_table; - void *shadow_page_table; +#define GVT_RING_CTX_NR_PDPS GEN8_3LVL_PDPES - int page_table_level; - bool has_shadow_page_table; - u32 pde_base_index; +struct intel_vgpu_mm { + enum intel_gvt_mm_type type; + struct intel_vgpu *vgpu; - struct list_head list; struct kref ref; atomic_t pincount; - struct list_head lru_list; - struct intel_vgpu *vgpu; + + union { + struct { + intel_gvt_gtt_type_t root_entry_type; + /* + * The 4 PDPs in ring context. For 48bit addressing, + * only PDP0 is valid and point to PML4. For 32it + * addressing, all 4 are used as true PDPs. + */ + u64 guest_pdps[GVT_RING_CTX_NR_PDPS]; + u64 shadow_pdps[GVT_RING_CTX_NR_PDPS]; + bool shadowed; + + struct list_head list; + struct list_head lru_list; + } ppgtt_mm; + struct { + void *virtual_ggtt; + } ggtt_mm; + }; }; extern int intel_vgpu_mm_get_entry( @@ -158,32 +166,31 @@ extern int intel_vgpu_mm_set_entry( unsigned long index); #define ggtt_get_guest_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->virtual_page_table, e, index) + intel_vgpu_mm_get_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index) #define ggtt_set_guest_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->virtual_page_table, e, index) + intel_vgpu_mm_set_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index) #define ggtt_get_shadow_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->shadow_page_table, e, index) + intel_vgpu_mm_get_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index) #define ggtt_set_shadow_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->shadow_page_table, e, index) + intel_vgpu_mm_set_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index) #define ppgtt_get_guest_root_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->virtual_page_table, e, index) + intel_vgpu_mm_get_entry(mm, mm->ppgtt_mm.guest_pdps, e, index) #define ppgtt_set_guest_root_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->virtual_page_table, e, index) + intel_vgpu_mm_set_entry(mm, mm->ppgtt_mm.guest_pdps, e, index) #define ppgtt_get_shadow_root_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->shadow_page_table, e, index) + intel_vgpu_mm_get_entry(mm, mm->ppgtt_mm.shadow_pdps, e, index) #define ppgtt_set_shadow_root_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->shadow_page_table, e, index) + intel_vgpu_mm_set_entry(mm, mm->ppgtt_mm.shadow_pdps, e, index) -extern struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, - int mm_type, void *virtual_page_table, int page_table_level, - u32 pde_base_index); +struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, + intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); extern void intel_vgpu_destroy_mm(struct kref *mm_ref); struct intel_vgpu_guest_page; @@ -196,7 +203,7 @@ struct intel_vgpu_scratch_pt { struct intel_vgpu_gtt { struct intel_vgpu_mm *ggtt_mm; unsigned long active_ppgtt_mm_bitmap; - struct list_head mm_list_head; + struct list_head ppgtt_mm_list_head; DECLARE_HASHTABLE(shadow_page_hash_table, INTEL_GVT_GTT_HASH_BITS); DECLARE_HASHTABLE(tracked_guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS); atomic_t n_tracked_guest_page; @@ -294,13 +301,12 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma); struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level, void *root_entry); + u64 pdps[]); int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level); + intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); -int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, - int page_table_level); +int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 9be639aa3b55..c51a5bd4e109 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1139,20 +1139,27 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) { + u64 *pdps; int ret = 0; + pdps = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); + switch (notification) { case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: - ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, 3); + ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, + GTT_TYPE_PPGTT_ROOT_L3_ENTRY, + pdps); break; case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY: - ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, 3); + ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, pdps); break; case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: - ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, 4); + ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, + GTT_TYPE_PPGTT_ROOT_L4_ENTRY, + pdps); break; case VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY: - ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, 4); + ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, pdps); break; case VGT_G2V_EXECLIST_CONTEXT_CREATE: case VGT_G2V_EXECLIST_CONTEXT_DESTROY: diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 5c869e3fdf3b..b8118cbeafe2 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -76,10 +76,9 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, else intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes); - } else if (reg_is_gtt(gvt, offset) && - vgpu->gtt.ggtt_mm->virtual_page_table) { + } else if (reg_is_gtt(gvt, offset)) { offset -= gvt->device_info.gtt_start_offset; - pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset; + pt = vgpu->gtt.ggtt_mm->ggtt_mm.virtual_ggtt + offset; if (read) memcpy(p_data, pt, bytes); else diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 92df1b44fe1d..5668c3d0f542 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -113,7 +113,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) #undef COPY_REG set_context_pdp_root_pointer(shadow_ring_context, - workload->shadow_mm->shadow_page_table); + (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa + @@ -1181,27 +1181,30 @@ static int prepare_mm(struct intel_vgpu_workload *workload) struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; struct intel_vgpu_mm *mm; struct intel_vgpu *vgpu = workload->vgpu; - int page_table_level; - u32 pdp[8]; + intel_gvt_gtt_type_t root_entry_type; + u64 pdps[GVT_RING_CTX_NR_PDPS]; - if (desc->addressing_mode == 1) { /* legacy 32-bit */ - page_table_level = 3; - } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ - page_table_level = 4; - } else { + switch (desc->addressing_mode) { + case 1: /* legacy 32-bit */ + root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; + break; + case 3: /* legacy 64-bit */ + root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; + break; + default: gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); return -EINVAL; } - read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp); + read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps); - mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp); + mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, pdps); if (mm) { intel_gvt_mm_reference(mm); } else { - mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT, - pdp, page_table_level, 0); + mm = intel_vgpu_create_ppgtt_mm(workload->vgpu, root_entry_type, + pdps); if (IS_ERR(mm)) { gvt_vgpu_err("fail to create mm object.\n"); return PTR_ERR(mm); diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 7a2511538f34..5a060dacdb26 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -113,10 +113,10 @@ TRACE_EVENT(gma_index, ); TRACE_EVENT(gma_translate, - TP_PROTO(int id, char *type, int ring_id, int pt_level, + TP_PROTO(int id, char *type, int ring_id, int root_entry_type, unsigned long gma, unsigned long gpa), - TP_ARGS(id, type, ring_id, pt_level, gma, gpa), + TP_ARGS(id, type, ring_id, root_entry_type, gma, gpa), TP_STRUCT__entry( __array(char, buf, MAX_BUF_LEN) @@ -124,8 +124,8 @@ TRACE_EVENT(gma_translate, TP_fast_assign( snprintf(__entry->buf, MAX_BUF_LEN, - "VM%d %s ring %d pt_level %d gma 0x%lx -> gpa 0x%lx\n", - id, type, ring_id, pt_level, gma, gpa); + "VM%d %s ring %d root_entry_type %d gma 0x%lx -> gpa 0x%lx\n", + id, type, ring_id, root_entry_type, gma, gpa); ), TP_printk("%s", __entry->buf) -- cgit v1.2.3 From 1bc258519dc72070f21291cdd37aeaa192082abd Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:41 +0800 Subject: drm/i915/gvt: Refine the intel_vgpu_mm reference management If we manage an object with a reference count, then its life cycle must flow the reference count operations. Meanwhile, change the operation functions to generic name *put* and *get*. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 12 ++++++------ drivers/gpu/drm/i915/gvt/gtt.h | 28 +++++++++++++++++----------- drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++-- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index bd55fbb7910d..353c92d287ff 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1613,13 +1613,13 @@ static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) } /** - * intel_vgpu_destroy_mm - destroy a mm object + * _intel_vgpu_mm_release - destroy a mm object * @mm_ref: a kref object * * This function is used to destroy a mm object for vGPU * */ -void intel_vgpu_destroy_mm(struct kref *mm_ref) +void _intel_vgpu_mm_release(struct kref *mm_ref) { struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); @@ -2101,7 +2101,7 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); - intel_vgpu_destroy_mm(&mm->ref); + intel_vgpu_destroy_mm(mm); } if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) @@ -2115,7 +2115,7 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) { - intel_vgpu_destroy_mm(&vgpu->gtt.ggtt_mm->ref); + intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); vgpu->gtt.ggtt_mm = NULL; } @@ -2240,7 +2240,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); if (mm) { - intel_gvt_mm_reference(mm); + intel_vgpu_mm_get(mm); } else { mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); if (IS_ERR(mm)) { @@ -2273,7 +2273,7 @@ int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, gvt_vgpu_err("fail to find ppgtt instance.\n"); return -EINVAL; } - intel_gvt_mm_unreference(mm); + intel_vgpu_mm_put(mm); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 037dcbd1c11b..b5ac094ddbcb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -191,7 +191,23 @@ extern int intel_vgpu_mm_set_entry( struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); -extern void intel_vgpu_destroy_mm(struct kref *mm_ref); + +static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm) +{ + kref_get(&mm->ref); +} + +void _intel_vgpu_mm_release(struct kref *mm_ref); + +static inline void intel_vgpu_mm_put(struct intel_vgpu_mm *mm) +{ + kref_put(&mm->ref, _intel_vgpu_mm_release); +} + +static inline void intel_vgpu_destroy_mm(struct intel_vgpu_mm *mm) +{ + intel_vgpu_mm_put(mm); +} struct intel_vgpu_guest_page; @@ -283,16 +299,6 @@ int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu); int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu); -static inline void intel_gvt_mm_reference(struct intel_vgpu_mm *mm) -{ - kref_get(&mm->ref); -} - -static inline void intel_gvt_mm_unreference(struct intel_vgpu_mm *mm) -{ - kref_put(&mm->ref, intel_vgpu_destroy_mm); -} - int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm); void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 5668c3d0f542..989304ef18e3 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1132,7 +1132,7 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) struct intel_vgpu_submission *s = &workload->vgpu->submission; if (workload->shadow_mm) - intel_gvt_mm_unreference(workload->shadow_mm); + intel_vgpu_mm_put(workload->shadow_mm); kmem_cache_free(s->workloads, workload); } @@ -1200,7 +1200,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload) mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, pdps); if (mm) { - intel_gvt_mm_reference(mm); + intel_vgpu_mm_get(mm); } else { mm = intel_vgpu_create_ppgtt_mm(workload->vgpu, root_entry_type, -- cgit v1.2.3 From 3aff35128025baa58c0676e501eb4597687ca80f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:42 +0800 Subject: drm/i915/gvt: Refine ggtt and ppgtt root entry ops Separate ggtt and ppgtt since they are different. A little more code but straightforward. And move these helpers to gtt.c since that is the only client. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 97 ++++++++++++++++++++++++++++++++---------- drivers/gpu/drm/i915/gvt/gtt.h | 34 --------------- 2 files changed, 75 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 353c92d287ff..862fac5345c3 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -473,35 +473,88 @@ static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p, /* * MM helpers. */ -int intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm, - void *page_table, struct intel_gvt_gtt_entry *e, - unsigned long index) +static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index, + bool guest) { - struct intel_gvt *gvt = mm->vgpu->gvt; - struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; - int ret; + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; - if (mm->type == INTEL_GVT_MM_PPGTT) - e->type = mm->ppgtt_mm.root_entry_type; - else - e->type = GTT_TYPE_GGTT_PTE; + GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); - ret = ops->get_entry(page_table, e, index, false, 0, mm->vgpu); - if (ret) - return ret; + entry->type = mm->ppgtt_mm.root_entry_type; + pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : + mm->ppgtt_mm.shadow_pdps, + entry, index, false, 0, mm->vgpu); - ops->test_pse(e); - return 0; + pte_ops->test_pse(entry); } -int intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm, - void *page_table, struct intel_gvt_gtt_entry *e, - unsigned long index) +static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) { - struct intel_gvt *gvt = mm->vgpu->gvt; - struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; + _ppgtt_get_root_entry(mm, entry, index, true); +} + +static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + _ppgtt_get_root_entry(mm, entry, index, false); +} + +static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index, + bool guest) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : + mm->ppgtt_mm.shadow_pdps, + entry, index, false, 0, mm->vgpu); +} + +static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + _ppgtt_set_root_entry(mm, entry, index, true); +} + +static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + _ppgtt_set_root_entry(mm, entry, index, false); +} + +static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); + + entry->type = GTT_TYPE_GGTT_PTE; + pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, + false, 0, mm->vgpu); +} + +static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); + + pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, + false, 0, mm->vgpu); +} + +static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); - return ops->set_entry(page_table, e, index, false, 0, mm->vgpu); + pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); } /* @@ -1897,7 +1950,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, } out: - ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); + ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); gtt_invalidate(gvt->dev_priv); ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); return 0; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index b5ac094ddbcb..1d414792e72e 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -155,40 +155,6 @@ struct intel_vgpu_mm { }; }; -extern int intel_vgpu_mm_get_entry( - struct intel_vgpu_mm *mm, - void *page_table, struct intel_gvt_gtt_entry *e, - unsigned long index); - -extern int intel_vgpu_mm_set_entry( - struct intel_vgpu_mm *mm, - void *page_table, struct intel_gvt_gtt_entry *e, - unsigned long index); - -#define ggtt_get_guest_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index) - -#define ggtt_set_guest_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index) - -#define ggtt_get_shadow_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index) - -#define ggtt_set_shadow_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index) - -#define ppgtt_get_guest_root_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->ppgtt_mm.guest_pdps, e, index) - -#define ppgtt_set_guest_root_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->ppgtt_mm.guest_pdps, e, index) - -#define ppgtt_get_shadow_root_entry(mm, e, index) \ - intel_vgpu_mm_get_entry(mm, mm->ppgtt_mm.shadow_pdps, e, index) - -#define ppgtt_set_shadow_root_entry(mm, e, index) \ - intel_vgpu_mm_set_entry(mm, mm->ppgtt_mm.shadow_pdps, e, index) - struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); -- cgit v1.2.3 From b0c766bf2911ad5d16affa0d34cf00c246a3c47a Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:43 +0800 Subject: drm/i915/gvt: Refine ggtt_set_shadow_entry Less code and use existed helper ggtt_set_host_entry. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 862fac5345c3..82454fb1d566 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2421,26 +2421,23 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *dev_priv = gvt->dev_priv; - struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; u32 index; - u32 offset; u32 num_entries; - struct intel_gvt_gtt_entry e; - memset(&e, 0, sizeof(struct intel_gvt_gtt_entry)); - e.type = GTT_TYPE_GGTT_PTE; - ops->set_pfn(&e, gvt->gtt.scratch_mfn); - e.val64 |= _PAGE_PRESENT; + pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); + pte_ops->set_present(&entry); index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; - for (offset = 0; offset < num_entries; offset++) - ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); + while (num_entries--) + ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; - for (offset = 0; offset < num_entries; offset++) - ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); + while (num_entries--) + ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); gtt_invalidate(dev_priv); } -- cgit v1.2.3 From bc37ab56790fdd57da36fd98aca2dacfb6453d3d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:44 +0800 Subject: drm/i915/gvt: Add verbose gtt shadow logs This add a new macro gvt_vdbg_mm() to print more verbose logs for gtt shadowing. The added verbose logs are very useful for debugging. gvt_vdbg_mm() only comes into effect if VERBOSE_DEBUG is defined by the developer. Signed-off-by: Changbin Du Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 82454fb1d566..8ce82a294bea 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -38,6 +38,12 @@ #include "i915_pvinfo.h" #include "trace.h" +#if defined(VERBOSE_DEBUG) +#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) +#else +#define gvt_vdbg_mm(fmt, args...) +#endif + static bool enable_out_of_sync = false; static int preallocated_oos_pages = 8192; @@ -582,6 +588,9 @@ static inline int ppgtt_spt_get_entry( return ret; ops->test_pse(e); + + gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", + type, e->type, index, e->val64); return 0; } @@ -597,6 +606,9 @@ static inline int ppgtt_spt_set_entry( if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) return -EINVAL; + gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", + type, e->type, index, e->val64); + return ops->set_entry(page_table, e, index, guest, spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); @@ -1109,6 +1121,9 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, se->val64, index); + gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", + se->type, index, se->val64); + if (!ops->test_present(se)) return 0; @@ -1147,6 +1162,9 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt, trace_gpt_change(spt->vgpu->id, "add", spt, sp->type, we->val64, index); + gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", + we->type, index, we->val64); + if (gtt_type_is_pt(get_next_pt_type(we->type))) { s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, we); if (IS_ERR(s)) { -- cgit v1.2.3 From a143cef7dbefc1cb9853d990c18b16347ecceb39 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:45 +0800 Subject: drm/i915/gvt: Rename ggtt related functions to be more specific Accurate names help to avoid confusing so improve readability. Signed-off-by: Changbin Du Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 22 +++++++++++----------- drivers/gpu/drm/i915/gvt/gtt.h | 4 ++-- drivers/gpu/drm/i915/gvt/mmio.c | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 8ce82a294bea..162daad11ca4 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -270,7 +270,7 @@ static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) return readq(addr); } -static void gtt_invalidate(struct drm_i915_private *dev_priv) +static void ggtt_invalidate(struct drm_i915_private *dev_priv) { mmio_hw_access_pre(dev_priv); I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); @@ -1873,7 +1873,7 @@ err: return INTEL_GVT_INVALID_ADDR; } -static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu, +static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; @@ -1902,7 +1902,7 @@ static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu, * Returns: * Zero on success, error code if failed. */ -int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, +int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; @@ -1912,11 +1912,11 @@ int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, return -EINVAL; off -= info->gtt_start_offset; - ret = emulate_gtt_mmio_read(vgpu, off, p_data, bytes); + ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); return ret; } -static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, +static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { struct intel_gvt *gvt = vgpu->gvt; @@ -1969,13 +1969,13 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, out: ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); - gtt_invalidate(gvt->dev_priv); + ggtt_invalidate(gvt->dev_priv); ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); return 0; } /* - * intel_vgpu_emulate_gtt_mmio_write - emulate GTT MMIO register write + * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write * @vgpu: a vGPU * @off: register offset * @p_data: data from guest write @@ -1986,8 +1986,8 @@ out: * Returns: * Zero on success, error code if failed. */ -int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, - void *p_data, unsigned int bytes) +int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, + unsigned int off, void *p_data, unsigned int bytes) { const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; int ret; @@ -1996,7 +1996,7 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, return -EINVAL; off -= info->gtt_start_offset; - ret = emulate_gtt_mmio_write(vgpu, off, p_data, bytes); + ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); return ret; } @@ -2457,7 +2457,7 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) while (num_entries--) ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); - gtt_invalidate(dev_priv); + ggtt_invalidate(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 1d414792e72e..3bef5c9fc926 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -280,10 +280,10 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); -int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, +int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); -int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, +int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index b8118cbeafe2..11b71b33f1c0 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -124,7 +124,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, if (WARN_ON(!reg_is_gtt(gvt, offset + bytes - 1))) goto err; - ret = intel_vgpu_emulate_gtt_mmio_read(vgpu, offset, + ret = intel_vgpu_emulate_ggtt_mmio_read(vgpu, offset, p_data, bytes); if (ret) goto err; @@ -197,7 +197,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, if (WARN_ON(!reg_is_gtt(gvt, offset + bytes - 1))) goto err; - ret = intel_vgpu_emulate_gtt_mmio_write(vgpu, offset, + ret = intel_vgpu_emulate_ggtt_mmio_write(vgpu, offset, p_data, bytes); if (ret) goto err; -- cgit v1.2.3 From e6e9c46fd2351a07f31b3bf3101c57170c13aeab Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:46 +0800 Subject: drm/i915/gvt: Factor out intel_vgpu_{get, put}_ppgtt_mm interface Factor out these two interfaces so we can kill some duplicated code in scheduler.c. v2: - rename to intel_vgpu_{get,put}_ppgtt_mm - refine handle_g2v_notification Signed-off-by: Changbin Du Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 23 ++++++++--------------- drivers/gpu/drm/i915/gvt/gtt.h | 4 ++-- drivers/gpu/drm/i915/gvt/handlers.c | 23 ++++++++--------------- drivers/gpu/drm/i915/gvt/scheduler.c | 16 ++++------------ 4 files changed, 22 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 162daad11ca4..a6a84ccdc571 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2292,19 +2292,17 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, } /** - * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from - * g2v notification + * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. * @vgpu: a vGPU * @root_entry_type: ppgtt root entry type * @pdps: guest pdps * - * This function is used to create a PPGTT mm object from a guest to GVT-g - * notification. + * This function is used to find or create a PPGTT mm object from a guest. * * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, +struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) { struct intel_vgpu_mm *mm; @@ -2314,28 +2312,23 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, intel_vgpu_mm_get(mm); } else { mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); - if (IS_ERR(mm)) { + if (IS_ERR(mm)) gvt_vgpu_err("fail to create mm\n"); - return PTR_ERR(mm); - } } - return 0; + return mm; } /** - * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from - * g2v notification + * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. * @vgpu: a vGPU * @pdps: guest pdps * - * This function is used to create a PPGTT mm object from a guest to GVT-g - * notification. + * This function is used to find a PPGTT mm object from a guest and destroy it. * * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, - u64 pdps[]) +int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) { struct intel_vgpu_mm *mm; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 3bef5c9fc926..652a76ef6706 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -275,10 +275,10 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); -int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, +struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); -int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); +int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index c51a5bd4e109..fbb908e797c4 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1139,28 +1139,21 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) { + intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; + struct intel_vgpu_mm *mm; u64 *pdps; - int ret = 0; pdps = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); switch (notification) { case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: - ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, - GTT_TYPE_PPGTT_ROOT_L3_ENTRY, - pdps); - break; - case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY: - ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, pdps); - break; + root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: - ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, - GTT_TYPE_PPGTT_ROOT_L4_ENTRY, - pdps); - break; + mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps); + return PTR_ERR_OR_ZERO(mm); + case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY: case VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY: - ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, pdps); - break; + return intel_vgpu_put_ppgtt_mm(vgpu, pdps); case VGT_G2V_EXECLIST_CONTEXT_CREATE: case VGT_G2V_EXECLIST_CONTEXT_DESTROY: case 1: /* Remove this in guest driver. */ @@ -1168,7 +1161,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) default: gvt_vgpu_err("Invalid PV notification %d\n", notification); } - return ret; + return 0; } static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 989304ef18e3..f4765ed4e92a 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1198,18 +1198,10 @@ static int prepare_mm(struct intel_vgpu_workload *workload) read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps); - mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, pdps); - if (mm) { - intel_vgpu_mm_get(mm); - } else { - - mm = intel_vgpu_create_ppgtt_mm(workload->vgpu, root_entry_type, - pdps); - if (IS_ERR(mm)) { - gvt_vgpu_err("fail to create mm object.\n"); - return PTR_ERR(mm); - } - } + mm = intel_vgpu_get_ppgtt_mm(workload->vgpu, root_entry_type, pdps); + if (IS_ERR(mm)) + return PTR_ERR(mm); + workload->shadow_mm = mm; return 0; } -- cgit v1.2.3 From d861ca237df8f1ef7b6380cd61f403edfcfc2be1 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:47 +0800 Subject: drm/i915/gvt: Use standard pte bit definition GTT entry has similar format with the CPU PTE. We'd prefer named macro instead of hardcode. Signed-off-by: Changbin Du Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index a6a84ccdc571..b15b9e55a997 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -346,11 +346,11 @@ static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) unsigned long pfn; if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) - pfn = (e->val64 & ADDR_1G_MASK) >> 12; + pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) - pfn = (e->val64 & ADDR_2M_MASK) >> 12; + pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; else - pfn = (e->val64 & ADDR_4K_MASK) >> 12; + pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; return pfn; } @@ -358,16 +358,16 @@ static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) { if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { e->val64 &= ~ADDR_1G_MASK; - pfn &= (ADDR_1G_MASK >> 12); + pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { e->val64 &= ~ADDR_2M_MASK; - pfn &= (ADDR_2M_MASK >> 12); + pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); } else { e->val64 &= ~ADDR_4K_MASK; - pfn &= (ADDR_4K_MASK >> 12); + pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); } - e->val64 |= (pfn << 12); + e->val64 |= (pfn << PAGE_SHIFT); } static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) @@ -377,7 +377,7 @@ static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) return false; e->type = get_entry_type(e->type); - if (!(e->val64 & BIT(7))) + if (!(e->val64 & _PAGE_PSE)) return false; e->type = get_pse_type(e->type); @@ -395,17 +395,17 @@ static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) return (e->val64 != 0); else - return (e->val64 & BIT(0)); + return (e->val64 & _PAGE_PRESENT); } static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) { - e->val64 &= ~BIT(0); + e->val64 &= ~_PAGE_PRESENT; } static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) { - e->val64 |= BIT(0); + e->val64 |= _PAGE_PRESENT; } /* -- cgit v1.2.3 From 72f03d7ea16794c3ac4b7ae945510cf0015d3c3c Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:48 +0800 Subject: drm/i915/gvt: Refine pte shadowing process Make the shadow PTE population code clear. Later we will add huge gtt support based on this. v2: - rebase to latest code. Signed-off-by: Changbin Du Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 173 ++++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index b15b9e55a997..7b4a345a0d52 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -453,29 +453,6 @@ static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { .gma_to_pml4_index = gen8_gma_to_pml4_index, }; -static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p, - struct intel_gvt_gtt_entry *m) -{ - struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - unsigned long gfn, mfn; - - *m = *p; - - if (!ops->test_present(p)) - return 0; - - gfn = ops->get_pfn(p); - - mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); - if (mfn == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("fail to translate gfn: 0x%lx\n", gfn); - return -ENXIO; - } - - ops->set_pfn(m, mfn); - return 0; -} - /* * MM helpers. */ @@ -943,8 +920,7 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, struct intel_vgpu_ppgtt_spt *s; intel_gvt_gtt_type_t cur_pt_type; - if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type)))) - return -EINVAL; + GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { @@ -982,14 +958,26 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) goto release; for_each_present_shadow_entry(spt, &e, index) { - if (!gtt_type_is_pt(get_next_pt_type(e.type))) { - gvt_vgpu_err("GVT doesn't support pse bit for now\n"); - return -EINVAL; + switch (e.type) { + case GTT_TYPE_PPGTT_PTE_4K_ENTRY: + gvt_vdbg_mm("invalidate 4K entry\n"); + continue; + case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + case GTT_TYPE_PPGTT_PTE_1G_ENTRY: + WARN(1, "GVT doesn't support 2M/1GB page\n"); + continue; + case GTT_TYPE_PPGTT_PML4_ENTRY: + case GTT_TYPE_PPGTT_PDP_ENTRY: + case GTT_TYPE_PPGTT_PDE_ENTRY: + gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); + ret = ppgtt_invalidate_shadow_page_by_shadow_entry( + spt->vgpu, &e); + if (ret) + goto fail; + break; + default: + GEM_BUG_ON(1); } - ret = ppgtt_invalidate_shadow_page_by_shadow_entry( - spt->vgpu, &e); - if (ret) - goto fail; } release: trace_spt_change(spt->vgpu->id, "release", spt, @@ -1013,10 +1001,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( struct intel_vgpu_page_track *t; int ret; - if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) { - ret = -EINVAL; - goto fail; - } + GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); t = intel_vgpu_find_tracked_page(vgpu, ops->get_pfn(we)); if (t) { @@ -1062,6 +1047,41 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, ops->set_pfn(se, s->shadow_page.mfn); } +static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, + struct intel_vgpu_ppgtt_spt *spt, unsigned long index, + struct intel_gvt_gtt_entry *ge) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_entry se = *ge; + unsigned long gfn, mfn; + + if (!pte_ops->test_present(ge)) + return 0; + + gfn = pte_ops->get_pfn(ge); + + switch (ge->type) { + case GTT_TYPE_PPGTT_PTE_4K_ENTRY: + gvt_vdbg_mm("shadow 4K gtt entry\n"); + break; + case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + case GTT_TYPE_PPGTT_PTE_1G_ENTRY: + gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n"); + return -EINVAL; + default: + GEM_BUG_ON(1); + }; + + /* direct shadow */ + mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); + if (mfn == INTEL_GVT_INVALID_ADDR) + return -ENXIO; + + pte_ops->set_pfn(&se, mfn); + ppgtt_set_shadow_entry(spt, &se, index); + return 0; +} + static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; @@ -1075,32 +1095,29 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) trace_spt_change(spt->vgpu->id, "born", spt, spt->guest_page.track.gfn, spt->shadow_page.type); - if (gtt_type_is_pte_pt(spt->shadow_page.type)) { - for_each_present_guest_entry(spt, &ge, i) { + for_each_present_guest_entry(spt, &ge, i) { + if (gtt_type_is_pt(get_next_pt_type(ge.type))) { + s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, + &ge); + if (IS_ERR(s)) { + ret = PTR_ERR(s); + goto fail; + } + ppgtt_get_shadow_entry(spt, &se, i); + ppgtt_generate_shadow_entry(&se, s, &ge); + ppgtt_set_shadow_entry(spt, &se, i); + } else { gfn = ops->get_pfn(&ge); - if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn) || - gtt_entry_p2m(vgpu, &ge, &se)) + if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) { ops->set_pfn(&se, gvt->gtt.scratch_mfn); - ppgtt_set_shadow_entry(spt, &se, i); - } - return 0; - } - - for_each_present_guest_entry(spt, &ge, i) { - if (!gtt_type_is_pt(get_next_pt_type(ge.type))) { - gvt_vgpu_err("GVT doesn't support pse bit now\n"); - ret = -EINVAL; - goto fail; - } + ppgtt_set_shadow_entry(spt, &se, i); + continue; + } - s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); - if (IS_ERR(s)) { - ret = PTR_ERR(s); - goto fail; + ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge); + if (ret) + goto fail; } - ppgtt_get_shadow_entry(spt, &se, i); - ppgtt_generate_shadow_entry(&se, s, &ge); - ppgtt_set_shadow_entry(spt, &se, i); } return 0; fail: @@ -1175,10 +1192,9 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt, ppgtt_generate_shadow_entry(&m, s, we); ppgtt_set_shadow_entry(spt, &m, index); } else { - ret = gtt_entry_p2m(vgpu, we, &m); + ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we); if (ret) goto fail; - ppgtt_set_shadow_entry(spt, &m, index); } return 0; fail: @@ -1195,7 +1211,7 @@ static int sync_oos_page(struct intel_vgpu *vgpu, struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(oos_page->guest_page); - struct intel_gvt_gtt_entry old, new, m; + struct intel_gvt_gtt_entry old, new; int index; int ret; @@ -1219,12 +1235,11 @@ static int sync_oos_page(struct intel_vgpu *vgpu, oos_page->guest_page, spt->guest_page_type, new.val64, index); - ret = gtt_entry_p2m(vgpu, &new, &m); + ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); if (ret) return ret; ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); - ppgtt_set_shadow_entry(spt, &m, index); } oos_page->guest_page->write_cnt = 0; @@ -1371,10 +1386,9 @@ static int ppgtt_handle_guest_write_page_table( struct intel_vgpu *vgpu = spt->vgpu; int type = spt->shadow_page.type; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_gvt_gtt_entry se; - - int ret; + struct intel_gvt_gtt_entry old_se; int new_present; + int ret; new_present = ops->test_present(we); @@ -1383,7 +1397,7 @@ static int ppgtt_handle_guest_write_page_table( * guarantee the ppgtt table is validated during the window between * adding and removal. */ - ppgtt_get_shadow_entry(spt, &se, index); + ppgtt_get_shadow_entry(spt, &old_se, index); if (new_present) { ret = ppgtt_handle_guest_entry_add(gpt, we, index); @@ -1391,13 +1405,13 @@ static int ppgtt_handle_guest_write_page_table( goto fail; } - ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); + ret = ppgtt_handle_guest_entry_removal(gpt, &old_se, index); if (ret) goto fail; if (!new_present) { - ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); - ppgtt_set_shadow_entry(spt, &se, index); + ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &old_se, index); } return 0; @@ -1407,6 +1421,8 @@ fail: return ret; } + + static inline bool can_do_out_of_sync(struct intel_vgpu_guest_page *gpt) { return enable_out_of_sync @@ -1924,9 +1940,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; - unsigned long gma, gfn; + unsigned long gma, gfn, mfn; struct intel_gvt_gtt_entry e, m; - int ret; if (bytes != 4 && bytes != 8) return -EINVAL; @@ -1941,6 +1956,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, bytes); + m = e; if (ops->test_present(&e)) { gfn = ops->get_pfn(&e); @@ -1953,19 +1969,18 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, goto out; } - ret = gtt_entry_p2m(vgpu, &e, &m); - if (ret) { - gvt_vgpu_err("fail to translate guest gtt entry\n"); + mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); + if (mfn == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial * update the entry in this situation p2m will fail * settting the shadow entry to point to a scratch page */ ops->set_pfn(&m, gvt->gtt.scratch_mfn); - } - } else { - m = e; + } else + ops->set_pfn(&m, mfn); + } else ops->set_pfn(&m, gvt->gtt.scratch_mfn); - } out: ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); -- cgit v1.2.3 From 44b467338094d86586d3ec351d8594a6cef0842a Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:49 +0800 Subject: drm/i915/gvt: Rework shadow page management code This is a another big one and the GVT shadow page management code is heavily refined. The new code only use struct intel_vgpu_ppgtt_spt to represent a vgpu shadow page table - w/ or wo/ a guest page associated with. A pure shadow page (no guest page associated) will be used to shadow splited 2M huge gtt. In this case, the spt.guest_page.gfn should be a zero. To search a existed shadow page table, we have two new interfaces: - intel_vgpu_find_spt_by_gfn(), find a spt by guest gfn. It must not be a pure spt. - intel_vgpu_find_spt_by_mfn, Find the spt using shadow page mfn in shadowed PTE. The oos_page management is remained as what is was. v2: Split some changes into small standalone patches. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 470 +++++++++++++++------------------------ drivers/gpu/drm/i915/gvt/gtt.h | 51 ++--- drivers/gpu/drm/i915/gvt/trace.h | 2 +- 3 files changed, 203 insertions(+), 320 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 7b4a345a0d52..2189c45d44fc 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -593,11 +593,11 @@ static inline int ppgtt_spt_set_entry( #define ppgtt_get_guest_entry(spt, e, index) \ ppgtt_spt_get_entry(spt, NULL, \ - spt->guest_page_type, e, index, true) + spt->guest_page.type, e, index, true) #define ppgtt_set_guest_entry(spt, e, index) \ ppgtt_spt_set_entry(spt, NULL, \ - spt->guest_page_type, e, index, true) + spt->guest_page.type, e, index, true) #define ppgtt_get_shadow_entry(spt, e, index) \ ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ @@ -607,52 +607,29 @@ static inline int ppgtt_spt_set_entry( ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ spt->shadow_page.type, e, index, false) -/** - * intel_vgpu_init_page_track - init a page track data structure - * @vgpu: a vGPU - * @t: a page track data structure - * @gfn: guest memory page frame number - * @handler: the function will be called when target guest memory page has - * been modified. - * - * This function is called when a user wants to prepare a page track data - * structure to track a guest memory page. - * - * Returns: - * Zero on success, negative error code if failed. - */ -int intel_vgpu_init_page_track(struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t, - unsigned long gfn, - int (*handler)(void *, u64, void *, int), - void *data) +#define page_track_to_ppgtt_spt(ptr) \ + container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page.track) + +static void *alloc_spt(gfp_t gfp_mask) { - INIT_HLIST_NODE(&t->node); + struct intel_vgpu_ppgtt_spt *spt; - t->tracked = false; - t->gfn = gfn; - t->handler = handler; - t->data = data; + spt = kzalloc(sizeof(*spt), gfp_mask); + if (!spt) + return NULL; - hash_add(vgpu->gtt.tracked_guest_page_hash_table, &t->node, t->gfn); - return 0; + spt->shadow_page.page = alloc_page(gfp_mask); + if (!spt->shadow_page.page) { + kfree(spt); + return NULL; + } + return spt; } -/** - * intel_vgpu_clean_page_track - release a page track data structure - * @vgpu: a vGPU - * @t: a page track data structure - * - * This function is called before a user frees a page track data structure. - */ -void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t) +static void free_spt(struct intel_vgpu_ppgtt_spt *spt) { - if (!hlist_unhashed(&t->node)) - hash_del(&t->node); - - if (t->tracked) - intel_gvt_hypervisor_disable_page_track(vgpu, t); + __free_page(spt->shadow_page.page); + kfree(spt); } /** @@ -679,139 +656,53 @@ struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( return NULL; } -static int init_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p, - unsigned long gfn, - int (*handler)(void *, u64, void *, int), - void *data) -{ - p->oos_page = NULL; - p->write_cnt = 0; - - return intel_vgpu_init_page_track(vgpu, &p->track, gfn, handler, data); -} - static int detach_oos_page(struct intel_vgpu *vgpu, struct intel_vgpu_oos_page *oos_page); -static void clean_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p) -{ - if (p->oos_page) - detach_oos_page(vgpu, p->oos_page); - - intel_vgpu_clean_page_track(vgpu, &p->track); -} - -static inline int init_shadow_page(struct intel_vgpu *vgpu, - struct intel_vgpu_shadow_page *p, int type, bool hash) -{ - struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr; - - daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(kdev, daddr)) { - gvt_vgpu_err("fail to map dma addr\n"); - return -EINVAL; - } - - p->vaddr = page_address(p->page); - p->type = type; - - INIT_HLIST_NODE(&p->node); - - p->mfn = daddr >> I915_GTT_PAGE_SHIFT; - if (hash) - hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); - return 0; -} - -static inline void clean_shadow_page(struct intel_vgpu *vgpu, - struct intel_vgpu_shadow_page *p) -{ - struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; - - dma_unmap_page(kdev, p->mfn << I915_GTT_PAGE_SHIFT, 4096, - PCI_DMA_BIDIRECTIONAL); - - if (!hlist_unhashed(&p->node)) - hash_del(&p->node); -} - -static inline struct intel_vgpu_shadow_page *find_shadow_page( - struct intel_vgpu *vgpu, unsigned long mfn) +static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) { - struct intel_vgpu_shadow_page *p; - - hash_for_each_possible(vgpu->gtt.shadow_page_hash_table, - p, node, mfn) { - if (p->mfn == mfn) - return p; - } - return NULL; -} + struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev; -#define page_track_to_guest_page(ptr) \ - container_of(ptr, struct intel_vgpu_guest_page, track) + trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); -#define guest_page_to_ppgtt_spt(ptr) \ - container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page) + dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, + PCI_DMA_BIDIRECTIONAL); + if (!hlist_unhashed(&spt->node)) + hash_del(&spt->node); -#define shadow_page_to_ppgtt_spt(ptr) \ - container_of(ptr, struct intel_vgpu_ppgtt_spt, shadow_page) + if (spt->guest_page.oos_page) + detach_oos_page(spt->vgpu, spt->guest_page.oos_page); -static void *alloc_spt(gfp_t gfp_mask) -{ - struct intel_vgpu_ppgtt_spt *spt; + if (!hlist_unhashed(&spt->guest_page.track.node)) + hash_del(&spt->guest_page.track.node); - spt = kzalloc(sizeof(*spt), gfp_mask); - if (!spt) - return NULL; + if (spt->guest_page.track.tracked) + intel_gvt_hypervisor_disable_page_track(spt->vgpu, + &spt->guest_page.track); - spt->shadow_page.page = alloc_page(gfp_mask); - if (!spt->shadow_page.page) { - kfree(spt); - return NULL; - } - return spt; -} - -static void free_spt(struct intel_vgpu_ppgtt_spt *spt) -{ - __free_page(spt->shadow_page.page); - kfree(spt); -} - -static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) -{ - trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type); - - clean_shadow_page(spt->vgpu, &spt->shadow_page); - clean_guest_page(spt->vgpu, &spt->guest_page); list_del_init(&spt->post_shadow_list); - free_spt(spt); } static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu) { struct hlist_node *n; - struct intel_vgpu_shadow_page *sp; + struct intel_vgpu_ppgtt_spt *spt; int i; - hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, sp, node) - ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp)); + hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, spt, node) + ppgtt_free_shadow_page(spt); } static int ppgtt_handle_guest_write_page_table_bytes( - struct intel_vgpu_guest_page *gpt, + struct intel_vgpu_ppgtt_spt *spt, u64 pa, void *p_data, int bytes); static int ppgtt_write_protection_handler(void *data, u64 pa, void *p_data, int bytes) { struct intel_vgpu_page_track *t = data; - struct intel_vgpu_guest_page *p = page_track_to_guest_page(t); + struct intel_vgpu_ppgtt_spt *spt = page_track_to_ppgtt_spt(t); int ret; if (bytes != 4 && bytes != 8) @@ -820,20 +711,47 @@ static int ppgtt_write_protection_handler(void *data, u64 pa, if (!t->tracked) return -EINVAL; - ret = ppgtt_handle_guest_write_page_table_bytes(p, + ret = ppgtt_handle_guest_write_page_table_bytes(spt, pa, p_data, bytes); if (ret) return ret; return ret; } +/* Find a spt by guest gfn. */ +static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( + struct intel_vgpu *vgpu, unsigned long gfn) +{ + struct intel_vgpu_page_track *track; + + track = intel_vgpu_find_tracked_page(vgpu, gfn); + if (track) + return page_track_to_ppgtt_spt(track); + + return NULL; +} + +/* Find the spt by shadow page mfn. */ +static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( + struct intel_vgpu *vgpu, unsigned long mfn) +{ + struct intel_vgpu_ppgtt_spt *spt; + + hash_for_each_possible(vgpu->gtt.shadow_page_hash_table, spt, node, mfn) { + if (spt->shadow_page.mfn == mfn) + return spt; + } + return NULL; +} + static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page( struct intel_vgpu *vgpu, int type, unsigned long gfn) { + struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct intel_vgpu_ppgtt_spt *spt = NULL; - int ret; + dma_addr_t daddr; retry: spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); @@ -846,44 +764,39 @@ retry: } spt->vgpu = vgpu; - spt->guest_page_type = type; atomic_set(&spt->refcount, 1); INIT_LIST_HEAD(&spt->post_shadow_list); /* - * TODO: guest page type may be different with shadow page type, - * when we support PSE page in future. + * Init shadow_page. */ - ret = init_shadow_page(vgpu, &spt->shadow_page, type, true); - if (ret) { - gvt_vgpu_err("fail to initialize shadow page for spt\n"); - goto err; - } - - ret = init_guest_page(vgpu, &spt->guest_page, - gfn, ppgtt_write_protection_handler, NULL); - if (ret) { - gvt_vgpu_err("fail to initialize guest page for spt\n"); - goto err; + spt->shadow_page.type = type; + daddr = dma_map_page(kdev, spt->shadow_page.page, + 0, 4096, PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(kdev, daddr)) { + gvt_vgpu_err("fail to map dma addr\n"); + free_spt(spt); + return ERR_PTR(-EINVAL); } + spt->shadow_page.vaddr = page_address(spt->shadow_page.page); + spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; - trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); - return spt; -err: - ppgtt_free_shadow_page(spt); - return ERR_PTR(ret); -} + /* + * Init guest_page. + */ + spt->guest_page.type = type; + spt->guest_page.gfn = gfn; -static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( - struct intel_vgpu *vgpu, unsigned long mfn) -{ - struct intel_vgpu_shadow_page *p = find_shadow_page(vgpu, mfn); + spt->guest_page.track.gfn = gfn; + spt->guest_page.track.handler = ppgtt_write_protection_handler; + hash_add(vgpu->gtt.tracked_guest_page_hash_table, + &spt->guest_page.track.node, gfn); - if (p) - return shadow_page_to_ppgtt_spt(p); + INIT_HLIST_NODE(&spt->node); + hash_add(vgpu->gtt.shadow_page_hash_table, &spt->node, spt->shadow_page.mfn); - gvt_vgpu_err("fail to find ppgtt shadow page: 0x%lx\n", mfn); - return NULL; + trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); + return spt; } #define pt_entry_size_shift(spt) \ @@ -929,7 +842,7 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) return 0; } - s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); + s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); if (!s) { gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", ops->get_pfn(e)); @@ -947,7 +860,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) int v = atomic_read(&spt->refcount); trace_spt_change(spt->vgpu->id, "die", spt, - spt->guest_page.track.gfn, spt->shadow_page.type); + spt->guest_page.gfn, spt->shadow_page.type); trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); @@ -981,7 +894,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) } release: trace_spt_change(spt->vgpu->id, "release", spt, - spt->guest_page.track.gfn, spt->shadow_page.type); + spt->guest_page.gfn, spt->shadow_page.type); ppgtt_free_shadow_page(spt); return 0; fail: @@ -996,43 +909,38 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_vgpu_ppgtt_spt *s = NULL; - struct intel_vgpu_guest_page *g; - struct intel_vgpu_page_track *t; + struct intel_vgpu_ppgtt_spt *spt = NULL; int ret; GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); - t = intel_vgpu_find_tracked_page(vgpu, ops->get_pfn(we)); - if (t) { - g = page_track_to_guest_page(t); - s = guest_page_to_ppgtt_spt(g); - ppgtt_get_shadow_page(s); - } else { + spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); + if (spt) + ppgtt_get_shadow_page(spt); + else { int type = get_next_pt_type(we->type); - s = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we)); - if (IS_ERR(s)) { - ret = PTR_ERR(s); + spt = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we)); + if (IS_ERR(spt)) { + ret = PTR_ERR(spt); goto fail; } - ret = intel_gvt_hypervisor_enable_page_track(vgpu, - &s->guest_page.track); + ret = intel_gvt_hypervisor_enable_page_track(vgpu, &spt->guest_page.track); if (ret) goto fail; - ret = ppgtt_populate_shadow_page(s); + ret = ppgtt_populate_shadow_page(spt); if (ret) goto fail; - trace_spt_change(vgpu->id, "new", s, s->guest_page.track.gfn, - s->shadow_page.type); + trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, + spt->shadow_page.type); } - return s; + return spt; fail: gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", - s, we->val64, we->type); + spt, we->val64, we->type); return ERR_PTR(ret); } @@ -1097,8 +1005,7 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) for_each_present_guest_entry(spt, &ge, i) { if (gtt_type_is_pt(get_next_pt_type(ge.type))) { - s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, - &ge); + s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); if (IS_ERR(s)) { ret = PTR_ERR(s); goto fail; @@ -1126,17 +1033,15 @@ fail: return ret; } -static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, +static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, struct intel_gvt_gtt_entry *se, unsigned long index) { - struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); - struct intel_vgpu_shadow_page *sp = &spt->shadow_page; struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; int ret; - trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, se->val64, - index); + trace_spt_guest_change(spt->vgpu->id, "remove", spt, + spt->shadow_page.type, se->val64, index); gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", se->type, index, se->val64); @@ -1144,12 +1049,13 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, if (!ops->test_present(se)) return 0; - if (ops->get_pfn(se) == vgpu->gtt.scratch_pt[sp->type].page_mfn) + if (ops->get_pfn(se) == + vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) return 0; if (gtt_type_is_pt(get_next_pt_type(se->type))) { struct intel_vgpu_ppgtt_spt *s = - ppgtt_find_shadow_page(vgpu, ops->get_pfn(se)); + intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se)); if (!s) { gvt_vgpu_err("fail to find guest page\n"); ret = -ENXIO; @@ -1166,18 +1072,16 @@ fail: return ret; } -static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt, +static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, struct intel_gvt_gtt_entry *we, unsigned long index) { - struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); - struct intel_vgpu_shadow_page *sp = &spt->shadow_page; struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_entry m; struct intel_vgpu_ppgtt_spt *s; int ret; - trace_gpt_change(spt->vgpu->id, "add", spt, sp->type, - we->val64, index); + trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type, + we->val64, index); gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", we->type, index, we->val64); @@ -1209,30 +1113,29 @@ static int sync_oos_page(struct intel_vgpu *vgpu, const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; - struct intel_vgpu_ppgtt_spt *spt = - guest_page_to_ppgtt_spt(oos_page->guest_page); + struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; struct intel_gvt_gtt_entry old, new; int index; int ret; trace_oos_change(vgpu->id, "sync", oos_page->id, - oos_page->guest_page, spt->guest_page_type); + spt, spt->guest_page.type); - old.type = new.type = get_entry_type(spt->guest_page_type); + old.type = new.type = get_entry_type(spt->guest_page.type); old.val64 = new.val64 = 0; for (index = 0; index < (I915_GTT_PAGE_SIZE >> info->gtt_entry_size_shift); index++) { ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); ops->get_entry(NULL, &new, index, true, - oos_page->guest_page->track.gfn << PAGE_SHIFT, vgpu); + spt->guest_page.gfn << PAGE_SHIFT, vgpu); if (old.val64 == new.val64 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) continue; trace_oos_sync(vgpu->id, oos_page->id, - oos_page->guest_page, spt->guest_page_type, + spt, spt->guest_page.type, new.val64, index); ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); @@ -1242,7 +1145,7 @@ static int sync_oos_page(struct intel_vgpu *vgpu, ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); } - oos_page->guest_page->write_cnt = 0; + spt->guest_page.write_cnt = 0; list_del_init(&spt->post_shadow_list); return 0; } @@ -1251,15 +1154,14 @@ static int detach_oos_page(struct intel_vgpu *vgpu, struct intel_vgpu_oos_page *oos_page) { struct intel_gvt *gvt = vgpu->gvt; - struct intel_vgpu_ppgtt_spt *spt = - guest_page_to_ppgtt_spt(oos_page->guest_page); + struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; trace_oos_change(vgpu->id, "detach", oos_page->id, - oos_page->guest_page, spt->guest_page_type); + spt, spt->guest_page.type); - oos_page->guest_page->write_cnt = 0; - oos_page->guest_page->oos_page = NULL; - oos_page->guest_page = NULL; + spt->guest_page.write_cnt = 0; + spt->guest_page.oos_page = NULL; + oos_page->spt = NULL; list_del_init(&oos_page->vm_list); list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); @@ -1267,51 +1169,49 @@ static int detach_oos_page(struct intel_vgpu *vgpu, return 0; } -static int attach_oos_page(struct intel_vgpu *vgpu, - struct intel_vgpu_oos_page *oos_page, - struct intel_vgpu_guest_page *gpt) +static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, + struct intel_vgpu_ppgtt_spt *spt) { - struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt *gvt = spt->vgpu->gvt; int ret; - ret = intel_gvt_hypervisor_read_gpa(vgpu, - gpt->track.gfn << I915_GTT_PAGE_SHIFT, + ret = intel_gvt_hypervisor_read_gpa(spt->vgpu, + spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, oos_page->mem, I915_GTT_PAGE_SIZE); if (ret) return ret; - oos_page->guest_page = gpt; - gpt->oos_page = oos_page; + oos_page->spt = spt; + spt->guest_page.oos_page = oos_page; list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); - trace_oos_change(vgpu->id, "attach", gpt->oos_page->id, - gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); + trace_oos_change(spt->vgpu->id, "attach", oos_page->id, + spt, spt->guest_page.type); return 0; } -static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *gpt) +static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) { + struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; int ret; - ret = intel_gvt_hypervisor_enable_page_track(vgpu, &gpt->track); + ret = intel_gvt_hypervisor_enable_page_track(spt->vgpu, &spt->guest_page.track); if (ret) return ret; - trace_oos_change(vgpu->id, "set page sync", gpt->oos_page->id, - gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); + trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, + spt, spt->guest_page.type); - list_del_init(&gpt->oos_page->vm_list); - return sync_oos_page(vgpu, gpt->oos_page); + list_del_init(&oos_page->vm_list); + return sync_oos_page(spt->vgpu, oos_page); } -static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *gpt) +static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) { - struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt *gvt = spt->vgpu->gvt; struct intel_gvt_gtt *gtt = &gvt->gtt; - struct intel_vgpu_oos_page *oos_page = gpt->oos_page; + struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; int ret; WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); @@ -1319,31 +1219,30 @@ static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu, if (list_empty(>t->oos_page_free_list_head)) { oos_page = container_of(gtt->oos_page_use_list_head.next, struct intel_vgpu_oos_page, list); - ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page); + ret = ppgtt_set_guest_page_sync(oos_page->spt); if (ret) return ret; - ret = detach_oos_page(vgpu, oos_page); + ret = detach_oos_page(spt->vgpu, oos_page); if (ret) return ret; } else oos_page = container_of(gtt->oos_page_free_list_head.next, struct intel_vgpu_oos_page, list); - return attach_oos_page(vgpu, oos_page, gpt); + return attach_oos_page(oos_page, spt); } -static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *gpt) +static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) { - struct intel_vgpu_oos_page *oos_page = gpt->oos_page; + struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) return -EINVAL; - trace_oos_change(vgpu->id, "set page out of sync", gpt->oos_page->id, - gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); + trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id, + spt, spt->guest_page.type); - list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head); - return intel_gvt_hypervisor_disable_page_track(vgpu, &gpt->track); + list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); + return intel_gvt_hypervisor_disable_page_track(spt->vgpu, &spt->guest_page.track); } /** @@ -1368,7 +1267,7 @@ int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { oos_page = container_of(pos, struct intel_vgpu_oos_page, vm_list); - ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page); + ret = ppgtt_set_guest_page_sync(oos_page->spt); if (ret) return ret; } @@ -1379,10 +1278,9 @@ int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) * The heart of PPGTT shadow page table. */ static int ppgtt_handle_guest_write_page_table( - struct intel_vgpu_guest_page *gpt, + struct intel_vgpu_ppgtt_spt *spt, struct intel_gvt_gtt_entry *we, unsigned long index) { - struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; int type = spt->shadow_page.type; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; @@ -1400,12 +1298,12 @@ static int ppgtt_handle_guest_write_page_table( ppgtt_get_shadow_entry(spt, &old_se, index); if (new_present) { - ret = ppgtt_handle_guest_entry_add(gpt, we, index); + ret = ppgtt_handle_guest_entry_add(spt, we, index); if (ret) goto fail; } - ret = ppgtt_handle_guest_entry_removal(gpt, &old_se, index); + ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); if (ret) goto fail; @@ -1423,12 +1321,11 @@ fail: -static inline bool can_do_out_of_sync(struct intel_vgpu_guest_page *gpt) +static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) { return enable_out_of_sync - && gtt_type_is_pte_pt( - guest_page_to_ppgtt_spt(gpt)->guest_page_type) - && gpt->write_cnt >= 2; + && gtt_type_is_pte_pt(spt->guest_page.type) + && spt->guest_page.write_cnt >= 2; } static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, @@ -1468,8 +1365,8 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) GTT_ENTRY_NUM_IN_ONE_PAGE) { ppgtt_get_guest_entry(spt, &ge, index); - ret = ppgtt_handle_guest_write_page_table( - &spt->guest_page, &ge, index); + ret = ppgtt_handle_guest_write_page_table(spt, + &ge, index); if (ret) return ret; clear_bit(index, spt->post_shadow_bitmap); @@ -1480,10 +1377,9 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) } static int ppgtt_handle_guest_write_page_table_bytes( - struct intel_vgpu_guest_page *gpt, + struct intel_vgpu_ppgtt_spt *spt, u64 pa, void *p_data, int bytes) { - struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; @@ -1498,7 +1394,7 @@ static int ppgtt_handle_guest_write_page_table_bytes( ops->test_pse(&we); if (bytes == info->gtt_entry_size) { - ret = ppgtt_handle_guest_write_page_table(gpt, &we, index); + ret = ppgtt_handle_guest_write_page_table(spt, &we, index); if (ret) return ret; } else { @@ -1506,7 +1402,7 @@ static int ppgtt_handle_guest_write_page_table_bytes( int type = spt->shadow_page.type; ppgtt_get_shadow_entry(spt, &se, index); - ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); + ret = ppgtt_handle_guest_entry_removal(spt, &se, index); if (ret) return ret; ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); @@ -1518,17 +1414,17 @@ static int ppgtt_handle_guest_write_page_table_bytes( if (!enable_out_of_sync) return 0; - gpt->write_cnt++; + spt->guest_page.write_cnt++; - if (gpt->oos_page) - ops->set_entry(gpt->oos_page->mem, &we, index, + if (spt->guest_page.oos_page) + ops->set_entry(spt->guest_page.oos_page->mem, &we, index, false, 0, vgpu); - if (can_do_out_of_sync(gpt)) { - if (!gpt->oos_page) - ppgtt_allocate_oos_page(vgpu, gpt); + if (can_do_out_of_sync(spt)) { + if (!spt->guest_page.oos_page) + ppgtt_allocate_oos_page(spt); - ret = ppgtt_set_guest_page_oos(vgpu, gpt); + ret = ppgtt_set_guest_page_oos(spt); if (ret < 0) return ret; } @@ -1557,8 +1453,8 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) se.val64 = 0; ppgtt_set_shadow_root_entry(mm, &se, index); - trace_gpt_change(vgpu->id, "destroy root pointer", - NULL, se.type, se.val64, index); + trace_spt_guest_change(vgpu->id, "destroy root pointer", + NULL, se.type, se.val64, index); } mm->ppgtt_mm.shadowed = false; @@ -1586,8 +1482,8 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) if (!ops->test_present(&ge)) continue; - trace_gpt_change(vgpu->id, __func__, NULL, - ge.type, ge.val64, index); + trace_spt_guest_change(vgpu->id, __func__, NULL, + ge.type, ge.val64, index); spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); if (IS_ERR(spt)) { @@ -1598,8 +1494,8 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) ppgtt_generate_shadow_entry(&se, spt, &ge); ppgtt_set_shadow_root_entry(mm, &se, index); - trace_gpt_change(vgpu->id, "populate root pointer", - NULL, se.type, se.val64, index); + trace_spt_guest_change(vgpu->id, "populate root pointer", + NULL, se.type, se.val64, index); } return 0; @@ -1793,7 +1689,7 @@ static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; - s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); + s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); if (!s) return -ENXIO; @@ -2030,7 +1926,7 @@ int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, if (t) { if (unlikely(vgpu->failsafe)) { /* remove write protection to prevent furture traps */ - intel_vgpu_clean_page_track(vgpu, t); + intel_gvt_hypervisor_disable_page_track(vgpu, t); } else { ret = t->handler(t, pa, p_data, bytes); if (ret) { diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 652a76ef6706..a522bfe490f9 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -205,16 +205,6 @@ extern void intel_gvt_clean_gtt(struct intel_gvt *gvt); extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, int page_table_level, void *root_entry); -struct intel_vgpu_oos_page; - -struct intel_vgpu_shadow_page { - void *vaddr; - struct page *page; - int type; - struct hlist_node node; - unsigned long mfn; -}; - struct intel_vgpu_page_track { struct hlist_node node; bool tracked; @@ -223,14 +213,8 @@ struct intel_vgpu_page_track { void *data; }; -struct intel_vgpu_guest_page { - struct intel_vgpu_page_track track; - unsigned long write_cnt; - struct intel_vgpu_oos_page *oos_page; -}; - struct intel_vgpu_oos_page { - struct intel_vgpu_guest_page *guest_page; + struct intel_vgpu_ppgtt_spt *spt; struct list_head list; struct list_head vm_list; int id; @@ -239,28 +223,31 @@ struct intel_vgpu_oos_page { #define GTT_ENTRY_NUM_IN_ONE_PAGE 512 +/* Represent a vgpu shadow page table. */ struct intel_vgpu_ppgtt_spt { - struct intel_vgpu_shadow_page shadow_page; - struct intel_vgpu_guest_page guest_page; - int guest_page_type; atomic_t refcount; struct intel_vgpu *vgpu; + struct hlist_node node; + + struct { + intel_gvt_gtt_type_t type; + void *vaddr; + struct page *page; + unsigned long mfn; + } shadow_page; + + struct { + intel_gvt_gtt_type_t type; + unsigned long gfn; + unsigned long write_cnt; + struct intel_vgpu_page_track track; + struct intel_vgpu_oos_page *oos_page; + } guest_page; + DECLARE_BITMAP(post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE); struct list_head post_shadow_list; }; -int intel_vgpu_init_page_track(struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t, - unsigned long gfn, - int (*handler)(void *gp, u64, void *, int), - void *data); - -void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t); - -struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( - struct intel_vgpu *vgpu, unsigned long gfn); - int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu); int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 5a060dacdb26..fc7831a62121 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -168,7 +168,7 @@ TRACE_EVENT(spt_change, TP_printk("%s", __entry->buf) ); -TRACE_EVENT(gpt_change, +TRACE_EVENT(spt_guest_change, TP_PROTO(int id, const char *tag, void *spt, int type, u64 v, unsigned long index), -- cgit v1.2.3 From d87f5ff35f3fc10a4abe13db6b1af9613f20519d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:50 +0800 Subject: drm/i915/gvt: Rename shadow_page to short name spt The target structure of some functions is struct intel_vgpu_ppgtt_spt and their names are xxx_shadow_page. It should be xxx_shadow_page_table. Let's use short name 'spt' instead to reduce the length. As well as the hash table name. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 56 +++++++++++++++++++++--------------------- drivers/gpu/drm/i915/gvt/gtt.h | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 2189c45d44fc..11177d71434e 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -659,7 +659,7 @@ struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( static int detach_oos_page(struct intel_vgpu *vgpu, struct intel_vgpu_oos_page *oos_page); -static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) +static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) { struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev; @@ -684,14 +684,14 @@ static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) free_spt(spt); } -static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu) +static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) { struct hlist_node *n; struct intel_vgpu_ppgtt_spt *spt; int i; - hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, spt, node) - ppgtt_free_shadow_page(spt); + hash_for_each_safe(vgpu->gtt.spt_hash_table, i, n, spt, node) + ppgtt_free_spt(spt); } static int ppgtt_handle_guest_write_page_table_bytes( @@ -737,7 +737,7 @@ static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( { struct intel_vgpu_ppgtt_spt *spt; - hash_for_each_possible(vgpu->gtt.shadow_page_hash_table, spt, node, mfn) { + hash_for_each_possible(vgpu->gtt.spt_hash_table, spt, node, mfn) { if (spt->shadow_page.mfn == mfn) return spt; } @@ -746,7 +746,7 @@ static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); -static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page( +static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( struct intel_vgpu *vgpu, int type, unsigned long gfn) { struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; @@ -793,7 +793,7 @@ retry: &spt->guest_page.track.node, gfn); INIT_HLIST_NODE(&spt->node); - hash_add(vgpu->gtt.shadow_page_hash_table, &spt->node, spt->shadow_page.mfn); + hash_add(vgpu->gtt.spt_hash_table, &spt->node, spt->shadow_page.mfn); trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); return spt; @@ -815,7 +815,7 @@ retry: if (!ppgtt_get_shadow_entry(spt, e, i) && \ spt->vgpu->gvt->gtt.pte_ops->test_present(e)) -static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) +static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) { int v = atomic_read(&spt->refcount); @@ -824,9 +824,9 @@ static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) atomic_inc(&spt->refcount); } -static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt); +static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); -static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, +static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *e) { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; @@ -848,10 +848,10 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, ops->get_pfn(e)); return -ENXIO; } - return ppgtt_invalidate_shadow_page(s); + return ppgtt_invalidate_spt(s); } -static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) +static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_entry e; @@ -883,7 +883,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) case GTT_TYPE_PPGTT_PDP_ENTRY: case GTT_TYPE_PPGTT_PDE_ENTRY: gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); - ret = ppgtt_invalidate_shadow_page_by_shadow_entry( + ret = ppgtt_invalidate_spt_by_shadow_entry( spt->vgpu, &e); if (ret) goto fail; @@ -895,7 +895,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) release: trace_spt_change(spt->vgpu->id, "release", spt, spt->guest_page.gfn, spt->shadow_page.type); - ppgtt_free_shadow_page(spt); + ppgtt_free_spt(spt); return 0; fail: gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", @@ -903,9 +903,9 @@ fail: return ret; } -static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt); +static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); -static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( +static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; @@ -916,11 +916,11 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); if (spt) - ppgtt_get_shadow_page(spt); + ppgtt_get_spt(spt); else { int type = get_next_pt_type(we->type); - spt = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we)); + spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we)); if (IS_ERR(spt)) { ret = PTR_ERR(spt); goto fail; @@ -930,7 +930,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( if (ret) goto fail; - ret = ppgtt_populate_shadow_page(spt); + ret = ppgtt_populate_spt(spt); if (ret) goto fail; @@ -990,7 +990,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, return 0; } -static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) +static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt *gvt = vgpu->gvt; @@ -1005,7 +1005,7 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) for_each_present_guest_entry(spt, &ge, i) { if (gtt_type_is_pt(get_next_pt_type(ge.type))) { - s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); + s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); if (IS_ERR(s)) { ret = PTR_ERR(s); goto fail; @@ -1061,7 +1061,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, ret = -ENXIO; goto fail; } - ret = ppgtt_invalidate_shadow_page(s); + ret = ppgtt_invalidate_spt(s); if (ret) goto fail; } @@ -1087,7 +1087,7 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, we->type, index, we->val64); if (gtt_type_is_pt(get_next_pt_type(we->type))) { - s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, we); + s = ppgtt_populate_spt_by_guest_entry(vgpu, we); if (IS_ERR(s)) { ret = PTR_ERR(s); goto fail; @@ -1449,7 +1449,7 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) if (!ops->test_present(&se)) continue; - ppgtt_invalidate_shadow_page_by_shadow_entry(vgpu, &se); + ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se); se.val64 = 0; ppgtt_set_shadow_root_entry(mm, &se, index); @@ -1485,7 +1485,7 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) trace_spt_guest_change(vgpu->id, __func__, NULL, ge.type, ge.val64, index); - spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); + spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); if (IS_ERR(spt)) { gvt_vgpu_err("fail to populate guest root pointer\n"); ret = PTR_ERR(spt); @@ -2059,7 +2059,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) struct intel_vgpu_gtt *gtt = &vgpu->gtt; hash_init(gtt->tracked_guest_page_hash_table); - hash_init(gtt->shadow_page_hash_table); + hash_init(gtt->spt_hash_table); INIT_LIST_HEAD(>t->ppgtt_mm_list_head); INIT_LIST_HEAD(>t->oos_page_list_head); @@ -2089,9 +2089,9 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) gvt_err("vgpu ppgtt mm is not fully destoried\n"); - if (GEM_WARN_ON(!hlist_empty(vgpu->gtt.shadow_page_hash_table))) { + if (GEM_WARN_ON(!hlist_empty(vgpu->gtt.spt_hash_table))) { gvt_err("Why we still has spt not freed?\n"); - ppgtt_free_all_shadow_page(vgpu); + ppgtt_free_all_spt(vgpu); } } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index a522bfe490f9..e4ff3f823c7b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -186,7 +186,7 @@ struct intel_vgpu_gtt { struct intel_vgpu_mm *ggtt_mm; unsigned long active_ppgtt_mm_bitmap; struct list_head ppgtt_mm_list_head; - DECLARE_HASHTABLE(shadow_page_hash_table, INTEL_GVT_GTT_HASH_BITS); + DECLARE_HASHTABLE(spt_hash_table, INTEL_GVT_GTT_HASH_BITS); DECLARE_HASHTABLE(tracked_guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS); atomic_t n_tracked_guest_page; struct list_head oos_page_list_head; -- cgit v1.2.3 From f66e5ff706038d03e8ef6d012e3aec7824442418 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:51 +0800 Subject: drm/i915/gvt: Rename mpt api {set, unset}_wp_page to {enable, disable}_page_track The kvmgt's implementation of mpt api {set,unset}_wp_page is not real write-protection - the data get written before invoke this two api. As discussed, change the mpt api to match the real behavior. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/hypercall.h | 4 ++-- drivers/gpu/drm/i915/gvt/kvmgt.c | 8 ++++---- drivers/gpu/drm/i915/gvt/mpt.h | 9 ++++----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index f8e77e166246..cb6303e630a4 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -44,8 +44,8 @@ struct intel_gvt_mpt { void (*detach_vgpu)(unsigned long handle); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); unsigned long (*from_virt_to_mfn)(void *p); - int (*set_wp_page)(unsigned long handle, u64 gfn); - int (*unset_wp_page)(unsigned long handle, u64 gfn); + int (*enable_page_track)(unsigned long handle, u64 gfn); + int (*disable_page_track)(unsigned long handle, u64 gfn); int (*read_gpa)(unsigned long handle, unsigned long gpa, void *buf, unsigned long len); int (*write_gpa)(unsigned long handle, unsigned long gpa, void *buf, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 909499b73d03..d86071a32b6a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1321,7 +1321,7 @@ static void kvmgt_host_exit(struct device *dev, void *gvt) mdev_unregister_device(dev); } -static int kvmgt_write_protect_add(unsigned long handle, u64 gfn) +static int kvmgt_page_track_add(unsigned long handle, u64 gfn) { struct kvmgt_guest_info *info; struct kvm *kvm; @@ -1355,7 +1355,7 @@ out: return 0; } -static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn) +static int kvmgt_page_track_remove(unsigned long handle, u64 gfn) { struct kvmgt_guest_info *info; struct kvm *kvm; @@ -1629,8 +1629,8 @@ struct intel_gvt_mpt kvmgt_mpt = { .detach_vgpu = kvmgt_detach_vgpu, .inject_msi = kvmgt_inject_msi, .from_virt_to_mfn = kvmgt_virt_to_pfn, - .set_wp_page = kvmgt_write_protect_add, - .unset_wp_page = kvmgt_write_protect_remove, + .enable_page_track = kvmgt_page_track_add, + .disable_page_track = kvmgt_page_track_remove, .read_gpa = kvmgt_read_gpa, .write_gpa = kvmgt_write_gpa, .gfn_to_mfn = kvmgt_gfn_to_pfn, diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 81aff4eacbfe..90fd83f98733 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -154,7 +154,7 @@ static inline unsigned long intel_gvt_hypervisor_virt_to_mfn(void *p) } /** - * intel_gvt_hypervisor_enable - set a guest page to write-protected + * intel_gvt_hypervisor_enable_page_track - track a guest page * @vgpu: a vGPU * @t: page track data structure * @@ -170,7 +170,7 @@ static inline int intel_gvt_hypervisor_enable_page_track( if (t->tracked) return 0; - ret = intel_gvt_host.mpt->set_wp_page(vgpu->handle, t->gfn); + ret = intel_gvt_host.mpt->enable_page_track(vgpu->handle, t->gfn); if (ret) return ret; t->tracked = true; @@ -179,8 +179,7 @@ static inline int intel_gvt_hypervisor_enable_page_track( } /** - * intel_gvt_hypervisor_disable_page_track - remove the write-protection of a - * guest page + * intel_gvt_hypervisor_disable_page_track - untrack a guest page * @vgpu: a vGPU * @t: page track data structure * @@ -196,7 +195,7 @@ static inline int intel_gvt_hypervisor_disable_page_track( if (!t->tracked) return 0; - ret = intel_gvt_host.mpt->unset_wp_page(vgpu->handle, t->gfn); + ret = intel_gvt_host.mpt->disable_page_track(vgpu->handle, t->gfn); if (ret) return ret; t->tracked = false; -- cgit v1.2.3 From 0947572849cb3ca7028d9daa3958158639ae4d69 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:52 +0800 Subject: drm/i915/gvt: Don't extend page_track to mpt layer Don't extend page_track to mpt layer. Keep MPT simple and clean. Meanwhile remove gtt.n_tracked_guest_page which doesn't make much sense. v2: clean up gtt.n_tracked_guest_page. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 54 +++++++++++++++++++++++------------------- drivers/gpu/drm/i915/gvt/mpt.h | 34 +++++--------------------- 2 files changed, 36 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 11177d71434e..b79321f5c9fb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -678,7 +678,7 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) if (spt->guest_page.track.tracked) intel_gvt_hypervisor_disable_page_track(spt->vgpu, - &spt->guest_page.track); + spt->guest_page.track.gfn); list_del_init(&spt->post_shadow_list); free_spt(spt); @@ -926,10 +926,11 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( goto fail; } - ret = intel_gvt_hypervisor_enable_page_track(vgpu, &spt->guest_page.track); + ret = intel_gvt_hypervisor_enable_page_track(vgpu, spt->guest_page.track.gfn); if (ret) goto fail; + spt->guest_page.track.tracked = true; ret = ppgtt_populate_spt(spt); if (ret) goto fail; @@ -1196,9 +1197,10 @@ static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; int ret; - ret = intel_gvt_hypervisor_enable_page_track(spt->vgpu, &spt->guest_page.track); + ret = intel_gvt_hypervisor_enable_page_track(spt->vgpu, spt->guest_page.track.gfn); if (ret) return ret; + spt->guest_page.track.tracked = true; trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, spt, spt->guest_page.type); @@ -1234,6 +1236,7 @@ static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; + int ret; if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) return -EINVAL; @@ -1242,7 +1245,11 @@ static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) spt, spt->guest_page.type); list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); - return intel_gvt_hypervisor_disable_page_track(spt->vgpu, &spt->guest_page.track); + ret = intel_gvt_hypervisor_disable_page_track(spt->vgpu, spt->guest_page.track.gfn); + if (ret) + return ret; + spt->guest_page.track.tracked = false; + return 0; } /** @@ -1917,29 +1924,28 @@ int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, struct intel_gvt *gvt = vgpu->gvt; int ret = 0; - if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { - struct intel_vgpu_page_track *t; - - mutex_lock(&gvt->lock); - - t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); - if (t) { - if (unlikely(vgpu->failsafe)) { - /* remove write protection to prevent furture traps */ - intel_gvt_hypervisor_disable_page_track(vgpu, t); - } else { - ret = t->handler(t, pa, p_data, bytes); - if (ret) { - gvt_err("guest page write error %d, " - "gfn 0x%lx, pa 0x%llx, " - "var 0x%x, len %d\n", - ret, t->gfn, pa, - *(u32 *)p_data, bytes); - } + struct intel_vgpu_page_track *t; + + mutex_lock(&gvt->lock); + + t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); + if (t) { + if (unlikely(vgpu->failsafe)) { + /* remove write protection to prevent furture traps */ + intel_gvt_hypervisor_disable_page_track(vgpu, t->gfn); + } else { + ret = t->handler(t, pa, p_data, bytes); + if (ret) { + gvt_err("guest page write error %d, " + "gfn 0x%lx, pa 0x%llx, " + "var 0x%x, len %d\n", + ret, t->gfn, pa, + *(u32 *)p_data, bytes); } } - mutex_unlock(&gvt->lock); } + mutex_unlock(&gvt->lock); + return ret; } diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 90fd83f98733..78fada9e3241 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -156,51 +156,29 @@ static inline unsigned long intel_gvt_hypervisor_virt_to_mfn(void *p) /** * intel_gvt_hypervisor_enable_page_track - track a guest page * @vgpu: a vGPU - * @t: page track data structure + * @gfn: the gfn of guest * * Returns: * Zero on success, negative error code if failed. */ static inline int intel_gvt_hypervisor_enable_page_track( - struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t) + struct intel_vgpu *vgpu, unsigned long gfn) { - int ret; - - if (t->tracked) - return 0; - - ret = intel_gvt_host.mpt->enable_page_track(vgpu->handle, t->gfn); - if (ret) - return ret; - t->tracked = true; - atomic_inc(&vgpu->gtt.n_tracked_guest_page); - return 0; + return intel_gvt_host.mpt->enable_page_track(vgpu->handle, gfn); } /** * intel_gvt_hypervisor_disable_page_track - untrack a guest page * @vgpu: a vGPU - * @t: page track data structure + * @gfn: the gfn of guest * * Returns: * Zero on success, negative error code if failed. */ static inline int intel_gvt_hypervisor_disable_page_track( - struct intel_vgpu *vgpu, - struct intel_vgpu_page_track *t) + struct intel_vgpu *vgpu, unsigned long gfn) { - int ret; - - if (!t->tracked) - return 0; - - ret = intel_gvt_host.mpt->disable_page_track(vgpu->handle, t->gfn); - if (ret) - return ret; - t->tracked = false; - atomic_dec(&vgpu->gtt.n_tracked_guest_page); - return 0; + return intel_gvt_host.mpt->disable_page_track(vgpu->handle, gfn); } /** -- cgit v1.2.3 From e502a2af4c358d14ecf8fce51bf4988ebb4d10b4 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:53 +0800 Subject: drm/i915/gvt: Provide generic page_track infrastructure for write-protected page This patch provide generic page_track infrastructure for write-protected guest page. The old page_track logic gets rewrote and now stays in a new standalone page_track.c. This page track infrastructure can be both used by vGUC and GTT shadowing. The important change is that it uses radix tree instead of hash table. We don't have a predictable number of pages that will be tracked. Here is some performance data (duration in us) of looking up a element: Before: (aka. intel_vgpu_find_tracked_page) 0.091 0.089 0.090 ... 0.093 0.091 0.087 ... 0.292 0.285 0.292 0.291 After: (aka. intel_vgpu_find_page_track) 0.104 0.105 0.100 0.102 0.102 0.100 ... 0.101 0.101 0.105 0.105 The hash table has good performance at beginning, but turns bad with more pages being tracked even no 3D applications are running. As expected, radix tree has stable duration and very quick. The overall benchmark (tested with Heaven Benchmark) marginally improved since this is not the bottleneck. What we benefit more from this change is scalability. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/Makefile | 2 +- drivers/gpu/drm/i915/gvt/gtt.c | 119 +++++----------------- drivers/gpu/drm/i915/gvt/gtt.h | 14 --- drivers/gpu/drm/i915/gvt/gvt.c | 2 +- drivers/gpu/drm/i915/gvt/gvt.h | 2 + drivers/gpu/drm/i915/gvt/page_track.c | 181 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/page_track.h | 56 +++++++++++ drivers/gpu/drm/i915/gvt/vgpu.c | 1 + 8 files changed, 266 insertions(+), 111 deletions(-) create mode 100644 drivers/gpu/drm/i915/gvt/page_track.c create mode 100644 drivers/gpu/drm/i915/gvt/page_track.h diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index 347116faa558..b016dc753db9 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -3,7 +3,7 @@ GVT_DIR := gvt GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \ - fb_decoder.o dmabuf.o + fb_decoder.o dmabuf.o page_track.o ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index b79321f5c9fb..13eb0572afdb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -559,7 +559,7 @@ static inline int ppgtt_spt_get_entry( return -EINVAL; ret = ops->get_entry(page_table, e, index, guest, - spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, + spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); if (ret) return ret; @@ -587,7 +587,7 @@ static inline int ppgtt_spt_set_entry( type, e->type, index, e->val64); return ops->set_entry(page_table, e, index, guest, - spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, + spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); } @@ -607,9 +607,6 @@ static inline int ppgtt_spt_set_entry( ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ spt->shadow_page.type, e, index, false) -#define page_track_to_ppgtt_spt(ptr) \ - container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page.track) - static void *alloc_spt(gfp_t gfp_mask) { struct intel_vgpu_ppgtt_spt *spt; @@ -632,30 +629,6 @@ static void free_spt(struct intel_vgpu_ppgtt_spt *spt) kfree(spt); } -/** - * intel_vgpu_find_tracked_page - find a tracked guest page - * @vgpu: a vGPU - * @gfn: guest memory page frame number - * - * This function is called when the emulation layer wants to figure out if a - * trapped GFN is a tracked guest page. - * - * Returns: - * Pointer to page track data structure, NULL if not found. - */ -struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( - struct intel_vgpu *vgpu, unsigned long gfn) -{ - struct intel_vgpu_page_track *t; - - hash_for_each_possible(vgpu->gtt.tracked_guest_page_hash_table, - t, node, gfn) { - if (t->gfn == gfn) - return t; - } - return NULL; -} - static int detach_oos_page(struct intel_vgpu *vgpu, struct intel_vgpu_oos_page *oos_page); @@ -673,12 +646,7 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) if (spt->guest_page.oos_page) detach_oos_page(spt->vgpu, spt->guest_page.oos_page); - if (!hlist_unhashed(&spt->guest_page.track.node)) - hash_del(&spt->guest_page.track.node); - - if (spt->guest_page.track.tracked) - intel_gvt_hypervisor_disable_page_track(spt->vgpu, - spt->guest_page.track.gfn); + intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); list_del_init(&spt->post_shadow_list); free_spt(spt); @@ -698,21 +666,18 @@ static int ppgtt_handle_guest_write_page_table_bytes( struct intel_vgpu_ppgtt_spt *spt, u64 pa, void *p_data, int bytes); -static int ppgtt_write_protection_handler(void *data, u64 pa, - void *p_data, int bytes) +static int ppgtt_write_protection_handler( + struct intel_vgpu_page_track *page_track, + u64 gpa, void *data, int bytes) { - struct intel_vgpu_page_track *t = data; - struct intel_vgpu_ppgtt_spt *spt = page_track_to_ppgtt_spt(t); + struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; + int ret; if (bytes != 4 && bytes != 8) return -EINVAL; - if (!t->tracked) - return -EINVAL; - - ret = ppgtt_handle_guest_write_page_table_bytes(spt, - pa, p_data, bytes); + ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes); if (ret) return ret; return ret; @@ -724,9 +689,9 @@ static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( { struct intel_vgpu_page_track *track; - track = intel_vgpu_find_tracked_page(vgpu, gfn); - if (track) - return page_track_to_ppgtt_spt(track); + track = intel_vgpu_find_page_track(vgpu, gfn); + if (track && track->handler == ppgtt_write_protection_handler) + return track->priv_data; return NULL; } @@ -752,6 +717,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct intel_vgpu_ppgtt_spt *spt = NULL; dma_addr_t daddr; + int ret; retry: spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); @@ -787,10 +753,13 @@ retry: spt->guest_page.type = type; spt->guest_page.gfn = gfn; - spt->guest_page.track.gfn = gfn; - spt->guest_page.track.handler = ppgtt_write_protection_handler; - hash_add(vgpu->gtt.tracked_guest_page_hash_table, - &spt->guest_page.track.node, gfn); + ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn, + ppgtt_write_protection_handler, spt); + if (ret) { + free_spt(spt); + dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + return ERR_PTR(ret); + } INIT_HLIST_NODE(&spt->node); hash_add(vgpu->gtt.spt_hash_table, &spt->node, spt->shadow_page.mfn); @@ -926,11 +895,10 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( goto fail; } - ret = intel_gvt_hypervisor_enable_page_track(vgpu, spt->guest_page.track.gfn); + ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); if (ret) goto fail; - spt->guest_page.track.tracked = true; ret = ppgtt_populate_spt(spt); if (ret) goto fail; @@ -1002,7 +970,7 @@ static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) int ret; trace_spt_change(spt->vgpu->id, "born", spt, - spt->guest_page.track.gfn, spt->shadow_page.type); + spt->guest_page.gfn, spt->shadow_page.type); for_each_present_guest_entry(spt, &ge, i) { if (gtt_type_is_pt(get_next_pt_type(ge.type))) { @@ -1197,10 +1165,9 @@ static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; int ret; - ret = intel_gvt_hypervisor_enable_page_track(spt->vgpu, spt->guest_page.track.gfn); + ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn); if (ret) return ret; - spt->guest_page.track.tracked = true; trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, spt, spt->guest_page.type); @@ -1236,7 +1203,6 @@ static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; - int ret; if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) return -EINVAL; @@ -1245,11 +1211,7 @@ static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) spt, spt->guest_page.type); list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); - ret = intel_gvt_hypervisor_disable_page_track(spt->vgpu, spt->guest_page.track.gfn); - if (ret) - return ret; - spt->guest_page.track.tracked = false; - return 0; + return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn); } /** @@ -1918,38 +1880,6 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, return ret; } -int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, - void *p_data, unsigned int bytes) -{ - struct intel_gvt *gvt = vgpu->gvt; - int ret = 0; - - struct intel_vgpu_page_track *t; - - mutex_lock(&gvt->lock); - - t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); - if (t) { - if (unlikely(vgpu->failsafe)) { - /* remove write protection to prevent furture traps */ - intel_gvt_hypervisor_disable_page_track(vgpu, t->gfn); - } else { - ret = t->handler(t, pa, p_data, bytes); - if (ret) { - gvt_err("guest page write error %d, " - "gfn 0x%lx, pa 0x%llx, " - "var 0x%x, len %d\n", - ret, t->gfn, pa, - *(u32 *)p_data, bytes); - } - } - } - mutex_unlock(&gvt->lock); - - return ret; -} - - static int alloc_scratch_pages(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) { @@ -2064,7 +1994,6 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) { struct intel_vgpu_gtt *gtt = &vgpu->gtt; - hash_init(gtt->tracked_guest_page_hash_table); hash_init(gtt->spt_hash_table); INIT_LIST_HEAD(>t->ppgtt_mm_list_head); diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index e4ff3f823c7b..695ab3bd4a69 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -187,8 +187,6 @@ struct intel_vgpu_gtt { unsigned long active_ppgtt_mm_bitmap; struct list_head ppgtt_mm_list_head; DECLARE_HASHTABLE(spt_hash_table, INTEL_GVT_GTT_HASH_BITS); - DECLARE_HASHTABLE(tracked_guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS); - atomic_t n_tracked_guest_page; struct list_head oos_page_list_head; struct list_head post_shadow_list_head; struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; @@ -205,14 +203,6 @@ extern void intel_gvt_clean_gtt(struct intel_gvt *gvt); extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, int page_table_level, void *root_entry); -struct intel_vgpu_page_track { - struct hlist_node node; - bool tracked; - unsigned long gfn; - int (*handler)(void *, u64, void *, int); - void *data; -}; - struct intel_vgpu_oos_page { struct intel_vgpu_ppgtt_spt *spt; struct list_head list; @@ -240,7 +230,6 @@ struct intel_vgpu_ppgtt_spt { intel_gvt_gtt_type_t type; unsigned long gfn; unsigned long write_cnt; - struct intel_vgpu_page_track track; struct intel_vgpu_oos_page *oos_page; } guest_page; @@ -273,7 +262,4 @@ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); -int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, - void *p_data, unsigned int bytes); - #endif /* _GVT_GTT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index fac54f32d33f..61bd14fcb649 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -183,7 +183,7 @@ static const struct intel_gvt_ops intel_gvt_ops = { .get_gvt_attrs = intel_get_gvt_attrs, .vgpu_query_plane = intel_vgpu_query_plane, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, - .write_protect_handler = intel_vgpu_write_protect_handler, + .write_protect_handler = intel_vgpu_page_track_handler, }; /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index c6197d990818..2b28b523376d 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -48,6 +48,7 @@ #include "cmd_parser.h" #include "fb_decoder.h" #include "dmabuf.h" +#include "page_track.h" #define GVT_MAX_VGPU 8 @@ -190,6 +191,7 @@ struct intel_vgpu { struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; struct intel_vgpu_submission submission; + struct radix_tree_root page_track_tree; u32 hws_pga[I915_NUM_ENGINES]; struct dentry *debugfs; diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c new file mode 100644 index 000000000000..09bd56e39ec6 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/page_track.c @@ -0,0 +1,181 @@ +/* + * Copyright(c) 2011-2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "i915_drv.h" +#include "gvt.h" + +/** + * intel_vgpu_find_page_track - find page track rcord of guest page + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + * Returns: + * A pointer to struct intel_vgpu_page_track if found, else NULL returned. + */ +struct intel_vgpu_page_track *intel_vgpu_find_page_track( + struct intel_vgpu *vgpu, unsigned long gfn) +{ + return radix_tree_lookup(&vgpu->page_track_tree, gfn); +} + +/** + * intel_vgpu_register_page_track - register a guest page to be tacked + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + * Returns: + * zero on success, negative error code if failed. + */ +int intel_vgpu_register_page_track(struct intel_vgpu *vgpu, unsigned long gfn, + gvt_page_track_handler_t handler, void *priv) +{ + struct intel_vgpu_page_track *track; + int ret; + + track = intel_vgpu_find_page_track(vgpu, gfn); + if (track) + return -EEXIST; + + track = kzalloc(sizeof(*track), GFP_KERNEL); + if (!track) + return -ENOMEM; + + track->handler = handler; + track->priv_data = priv; + + ret = radix_tree_insert(&vgpu->page_track_tree, gfn, track); + if (ret) { + kfree(track); + return ret; + } + + return 0; +} + +/** + * intel_vgpu_unregister_page_track - unregister the tracked guest page + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + */ +void intel_vgpu_unregister_page_track(struct intel_vgpu *vgpu, + unsigned long gfn) +{ + struct intel_vgpu_page_track *track; + + track = radix_tree_delete(&vgpu->page_track_tree, gfn); + if (track) { + if (track->tracked) + intel_gvt_hypervisor_disable_page_track(vgpu, gfn); + kfree(track); + } +} + +/** + * intel_vgpu_enable_page_track - set write-protection on guest page + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + * Returns: + * zero on success, negative error code if failed. + */ +int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn) +{ + struct intel_vgpu_page_track *track; + int ret; + + track = intel_vgpu_find_page_track(vgpu, gfn); + if (!track) + return -ENXIO; + + if (track->tracked) + return 0; + + ret = intel_gvt_hypervisor_enable_page_track(vgpu, gfn); + if (ret) + return ret; + track->tracked = true; + return 0; +} + +/** + * intel_vgpu_enable_page_track - cancel write-protection on guest page + * @vgpu: a vGPU + * @gfn: the gfn of guest page + * + * Returns: + * zero on success, negative error code if failed. + */ +int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn) +{ + struct intel_vgpu_page_track *track; + int ret; + + track = intel_vgpu_find_page_track(vgpu, gfn); + if (!track) + return -ENXIO; + + if (!track->tracked) + return 0; + + ret = intel_gvt_hypervisor_disable_page_track(vgpu, gfn); + if (ret) + return ret; + track->tracked = false; + return 0; +} + +/** + * intel_vgpu_page_track_handler - called when write to write-protected page + * @vgpu: a vGPU + * @gpa: the gpa of this write + * @data: the writed data + * @bytes: the length of this write + * + * Returns: + * zero on success, negative error code if failed. + */ +int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, + void *data, unsigned int bytes) +{ + struct intel_gvt *gvt = vgpu->gvt; + struct intel_vgpu_page_track *page_track; + int ret = 0; + + mutex_lock(&gvt->lock); + + page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT); + if (!page_track) + return 0; + + if (unlikely(vgpu->failsafe)) { + /* Remove write protection to prevent furture traps. */ + intel_vgpu_disable_page_track(vgpu, gpa >> PAGE_SHIFT); + } else { + ret = page_track->handler(page_track, gpa, data, bytes); + if (ret) + gvt_err("guest page write error, gpa %llx\n", gpa); + } + + mutex_unlock(&gvt->lock); + return ret; +} diff --git a/drivers/gpu/drm/i915/gvt/page_track.h b/drivers/gpu/drm/i915/gvt/page_track.h new file mode 100644 index 000000000000..fa607a71c3c0 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/page_track.h @@ -0,0 +1,56 @@ +/* + * Copyright(c) 2011-2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _GVT_PAGE_TRACK_H_ +#define _GVT_PAGE_TRACK_H_ + +struct intel_vgpu_page_track; + +typedef int (*gvt_page_track_handler_t)( + struct intel_vgpu_page_track *page_track, + u64 gpa, void *data, int bytes); + +/* Track record for a write-protected guest page. */ +struct intel_vgpu_page_track { + gvt_page_track_handler_t handler; + bool tracked; + void *priv_data; +}; + +struct intel_vgpu_page_track *intel_vgpu_find_page_track( + struct intel_vgpu *vgpu, unsigned long gfn); + +int intel_vgpu_register_page_track(struct intel_vgpu *vgpu, + unsigned long gfn, gvt_page_track_handler_t handler, + void *priv); +void intel_vgpu_unregister_page_track(struct intel_vgpu *vgpu, + unsigned long gfn); + +int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn); +int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn); + +int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, + void *data, unsigned int bytes); + +#endif diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index b87b19d8443c..41f76e86aa1f 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -354,6 +354,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->gvt = gvt; vgpu->sched_ctl.weight = param->weight; INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head); + INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL); idr_init(&vgpu->object_idr); intel_vgpu_init_cfg_space(vgpu, param->primary); -- cgit v1.2.3 From b6c126a39345f7286bb25135efd9154419127427 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:54 +0800 Subject: drm/i915/gvt: Manage shadow pages with radix tree We don't know how many page tables will be shadowed. It varies considerably corresponding to guest load. Radix tree is a better choice for us. Since Page Frame Number is used as key so most of the bits are common. Here is some performance data (duration in us) of looking up a element: Before: (aka. ppgtt_find_shadow_page) 0.308 0.292 0.246 0.432 0.143 ... 0.311 0.225 0.382 0.199 0.325 After: (aka. intel_vgpu_find_spt_by_mfn) 0.106 0.106 0.107 0.106 0.105 0.107 ... 0.107 0.109 0.105 0.108 This time I didn't get the early data of hash table. The data is measured when desktop is shown. As last change, the overall benchmark almost is not changed, but we get better scalability. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 50 ++++++++++++++++++++++-------------------- drivers/gpu/drm/i915/gvt/gtt.h | 4 +--- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 13eb0572afdb..d204532022bf 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -640,8 +640,8 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, PCI_DMA_BIDIRECTIONAL); - if (!hlist_unhashed(&spt->node)) - hash_del(&spt->node); + + radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); if (spt->guest_page.oos_page) detach_oos_page(spt->vgpu, spt->guest_page.oos_page); @@ -654,12 +654,14 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) { - struct hlist_node *n; struct intel_vgpu_ppgtt_spt *spt; - int i; + struct radix_tree_iter iter; + void **slot; - hash_for_each_safe(vgpu->gtt.spt_hash_table, i, n, spt, node) + radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { + spt = radix_tree_deref_slot(slot); ppgtt_free_spt(spt); + } } static int ppgtt_handle_guest_write_page_table_bytes( @@ -697,16 +699,10 @@ static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( } /* Find the spt by shadow page mfn. */ -static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( +static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( struct intel_vgpu *vgpu, unsigned long mfn) { - struct intel_vgpu_ppgtt_spt *spt; - - hash_for_each_possible(vgpu->gtt.spt_hash_table, spt, node, mfn) { - if (spt->shadow_page.mfn == mfn) - return spt; - } - return NULL; + return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); } static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); @@ -741,8 +737,8 @@ retry: 0, 4096, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(kdev, daddr)) { gvt_vgpu_err("fail to map dma addr\n"); - free_spt(spt); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto err_free_spt; } spt->shadow_page.vaddr = page_address(spt->shadow_page.page); spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; @@ -755,17 +751,23 @@ retry: ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn, ppgtt_write_protection_handler, spt); - if (ret) { - free_spt(spt); - dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - return ERR_PTR(ret); - } + if (ret) + goto err_unmap_dma; - INIT_HLIST_NODE(&spt->node); - hash_add(vgpu->gtt.spt_hash_table, &spt->node, spt->shadow_page.mfn); + ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); + if (ret) + goto err_unreg_page_track; trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); return spt; + +err_unreg_page_track: + intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn); +err_unmap_dma: + dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); +err_free_spt: + free_spt(spt); + return ERR_PTR(ret); } #define pt_entry_size_shift(spt) \ @@ -1994,7 +1996,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) { struct intel_vgpu_gtt *gtt = &vgpu->gtt; - hash_init(gtt->spt_hash_table); + INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); INIT_LIST_HEAD(>t->ppgtt_mm_list_head); INIT_LIST_HEAD(>t->oos_page_list_head); @@ -2024,7 +2026,7 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) gvt_err("vgpu ppgtt mm is not fully destoried\n"); - if (GEM_WARN_ON(!hlist_empty(vgpu->gtt.spt_hash_table))) { + if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { gvt_err("Why we still has spt not freed?\n"); ppgtt_free_all_spt(vgpu); } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 695ab3bd4a69..e831507e17c3 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -39,7 +39,6 @@ struct intel_vgpu_mm; -#define INTEL_GVT_GTT_HASH_BITS 8 #define INTEL_GVT_INVALID_ADDR (~0UL) struct intel_gvt_gtt_entry { @@ -186,7 +185,7 @@ struct intel_vgpu_gtt { struct intel_vgpu_mm *ggtt_mm; unsigned long active_ppgtt_mm_bitmap; struct list_head ppgtt_mm_list_head; - DECLARE_HASHTABLE(spt_hash_table, INTEL_GVT_GTT_HASH_BITS); + struct radix_tree_root spt_tree; struct list_head oos_page_list_head; struct list_head post_shadow_list_head; struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; @@ -217,7 +216,6 @@ struct intel_vgpu_oos_page { struct intel_vgpu_ppgtt_spt { atomic_t refcount; struct intel_vgpu *vgpu; - struct hlist_node node; struct { intel_gvt_gtt_type_t type; -- cgit v1.2.3 From 420fba78d9b10cf81c04bbf7a6048333cadc2658 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 30 Jan 2018 19:19:55 +0800 Subject: drm/i915/gvt: Define PTE addr mask with GENMASK_ULL Define the masks better. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index d204532022bf..8fb4f1023d06 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -337,9 +337,9 @@ static inline int gtt_set_entry64(void *pt, #define GTT_HAW 46 -#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30) -#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21) -#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12) +#define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) +#define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) +#define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { -- cgit v1.2.3 From f9a651c05d7ae492185027f6acde25e2bc54edd9 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Tue, 13 Feb 2018 13:24:31 +0800 Subject: drm/i915/gvt: add define GEN9_MOCS_SIZE No functional change. This defination will also be used in future patchesi. v4: - refine patch description (Kevin) Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio_context.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 73ad6e90e49d..ca4ba56fd60c 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -50,6 +50,8 @@ #define RING_GFX_MODE(base) _MMIO((base) + 0x29c) #define VF_GUARDBAND _MMIO(0x83a4) +#define GEN9_MOCS_SIZE 64 + /* Raw offset is appened to each line for convenience. */ static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = { {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ @@ -151,8 +153,8 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { static struct { bool initialized; - u32 control_table[I915_NUM_ENGINES][64]; - u32 l3cc_table[32]; + u32 control_table[I915_NUM_ENGINES][GEN9_MOCS_SIZE]; + u32 l3cc_table[GEN9_MOCS_SIZE / 2]; } gen9_render_mocs; static void load_render_mocs(struct drm_i915_private *dev_priv) @@ -169,7 +171,7 @@ static void load_render_mocs(struct drm_i915_private *dev_priv) for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) { offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { + for (i = 0; i < GEN9_MOCS_SIZE; i++) { gen9_render_mocs.control_table[ring_id][i] = I915_READ_FW(offset); offset.reg += 4; @@ -177,7 +179,7 @@ static void load_render_mocs(struct drm_i915_private *dev_priv) } offset.reg = 0xb020; - for (i = 0; i < 32; i++) { + for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) { gen9_render_mocs.l3cc_table[i] = I915_READ_FW(offset); offset.reg += 4; @@ -255,7 +257,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, load_render_mocs(dev_priv); offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { + for (i = 0; i < GEN9_MOCS_SIZE; i++) { if (pre) old_v = vgpu_vreg_t(pre, offset); else @@ -273,7 +275,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (ring_id == RCS) { l3_offset.reg = 0xb020; - for (i = 0; i < 32; i++) { + for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) { if (pre) old_v = vgpu_vreg_t(pre, l3_offset); else -- cgit v1.2.3 From 64f46f55bb30aebf146ae3cd2c2a4e2a06bcea04 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Tue, 13 Feb 2018 13:24:32 +0800 Subject: drm/i915/gvt: add interface to check if context is inhibit No functional change, just for easy to use. v4: - refine comment (Kevin) Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio_context.c | 24 ++++++++++++++---------- drivers/gpu/drm/i915/gvt/mmio_context.h | 2 ++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index ca4ba56fd60c..1bc1b28eb9e1 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -295,6 +295,16 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, #define CTX_CONTEXT_CONTROL_VAL 0x03 +bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) +{ + u32 *reg_state = ctx->engine[ring_id].lrc_reg_state; + u32 inhibit_mask = + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + + return inhibit_mask == + (reg_state[CTX_CONTEXT_CONTROL_VAL] & inhibit_mask); +} + /* Switch ring mmio values (context). */ static void switch_mmio(struct intel_vgpu *pre, struct intel_vgpu *next, @@ -302,9 +312,6 @@ static void switch_mmio(struct intel_vgpu *pre, { struct drm_i915_private *dev_priv; struct intel_vgpu_submission *s; - u32 *reg_state, ctx_ctrl; - u32 inhibit_mask = - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); struct engine_mmio *mmio; u32 old_v, new_v; @@ -329,16 +336,13 @@ static void switch_mmio(struct intel_vgpu *pre, // restore if (next) { s = &next->submission; - reg_state = - s->shadow_ctx->engine[ring_id].lrc_reg_state; - ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; /* - * if it is an inhibit context, load in_context mmio - * into HW by mmio write. If it is not, skip this mmio - * write. + * No need to restore the mmio which is in context state + * image if it's not inhibit context, it will restore + * itself. */ if (mmio->in_context && - (ctx_ctrl & inhibit_mask) != inhibit_mask) + !is_inhibit_context(s->shadow_ctx, ring_id)) continue; if (mmio->mask) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index ca2c6a745673..4df87c7314c9 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -49,4 +49,6 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); +bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id); + #endif -- cgit v1.2.3 From cd7e61b93d068a80bfe6cb55bf00f17332d831a1 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Fri, 23 Feb 2018 14:46:45 +0800 Subject: drm/i915/gvt: init mmio by lri command in vgpu inhibit context There is one issue relates to Coarse Power Gating(CPG) on KBL NUC in GVT-g, vgpu can't get the correct default context by updating the registers before inhibit context submission. It always get back the hardware default value unless the inhibit context submission happened before the 1st time forcewake put. With this wrong default context, vgpu will run with incorrect state and meet unknown issues. The solution is initialize these mmios by adding lri command in ring buffer of the inhibit context, then gpu hardware has no chance to go down RC6 when lri commands are right being executed, and then vgpu can get correct default context for further use. v3: - fix code fault, use 'for' to loop through mmio render list(Zhenyu) v4: - save the count of engine mmio need to be restored for inhibit context and refine some comments. (Kevin) v5: - code rebase Cc: Kevin Tian Cc: Zhenyu Wang Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 5 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 172 +++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/gvt/mmio_context.h | 3 + drivers/gpu/drm/i915/gvt/scheduler.c | 5 + 4 files changed, 181 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 2b28b523376d..9131638e3999 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -310,7 +310,10 @@ struct intel_gvt { wait_queue_head_t service_thread_wq; unsigned long service_request; - struct engine_mmio *engine_mmio_list; + struct { + struct engine_mmio *mmio; + int ctx_mmio_count[I915_NUM_ENGINES]; + } engine_mmio_list; struct dentry *debugfs_root; }; diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 1bc1b28eb9e1..74a9c7b5516e 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -187,6 +187,153 @@ static void load_render_mocs(struct drm_i915_private *dev_priv) gen9_render_mocs.initialized = true; } +static int +restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu, + struct i915_request *req) +{ + u32 *cs; + int ret; + struct engine_mmio *mmio; + struct intel_gvt *gvt = vgpu->gvt; + int ring_id = req->engine->id; + int count = gvt->engine_mmio_list.ctx_mmio_count[ring_id]; + + if (count == 0) + return 0; + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(req, count * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(count); + for (mmio = gvt->engine_mmio_list.mmio; + i915_mmio_reg_valid(mmio->reg); mmio++) { + if (mmio->ring_id != ring_id || + !mmio->in_context) + continue; + + *cs++ = i915_mmio_reg_offset(mmio->reg); + *cs++ = vgpu_vreg_t(vgpu, mmio->reg) | + (mmio->mask << 16); + gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n", + *(cs-2), *(cs-1), vgpu->id, ring_id); + } + + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + return 0; +} + +static int +restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu, + struct i915_request *req) +{ + unsigned int index; + u32 *cs; + + cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE); + + for (index = 0; index < GEN9_MOCS_SIZE; index++) { + *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index)); + *cs++ = vgpu_vreg_t(vgpu, GEN9_GFX_MOCS(index)); + gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n", + *(cs-2), *(cs-1), vgpu->id, req->engine->id); + + } + + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + return 0; +} + +static int +restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu, + struct i915_request *req) +{ + unsigned int index; + u32 *cs; + + cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE / 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2); + + for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) { + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index)); + *cs++ = vgpu_vreg_t(vgpu, GEN9_LNCFCMOCS(index)); + gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n", + *(cs-2), *(cs-1), vgpu->id, req->engine->id); + + } + + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + return 0; +} + +/* + * Use lri command to initialize the mmio which is in context state image for + * inhibit context, it contains tracked engine mmio, render_mocs and + * render_mocs_l3cc. + */ +int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, + struct i915_request *req) +{ + int ret; + u32 *cs; + + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + ret = restore_context_mmio_for_inhibit(vgpu, req); + if (ret) + goto out; + + /* no MOCS register in context except render engine */ + if (req->engine->id != RCS) + goto out; + + ret = restore_render_mocs_control_for_inhibit(vgpu, req); + if (ret) + goto out; + + ret = restore_render_mocs_l3cc_for_inhibit(vgpu, req); + if (ret) + goto out; + +out: + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + return ret; +} + static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -253,6 +400,9 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; + if (IS_KABYLAKE(dev_priv) && ring_id == RCS) + return; + if (!pre && !gen9_render_mocs.initialized) load_render_mocs(dev_priv); @@ -319,10 +469,18 @@ static void switch_mmio(struct intel_vgpu *pre, if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) switch_mocs(pre, next, ring_id); - for (mmio = dev_priv->gvt->engine_mmio_list; + for (mmio = dev_priv->gvt->engine_mmio_list.mmio; i915_mmio_reg_valid(mmio->reg); mmio++) { if (mmio->ring_id != ring_id) continue; + /* + * No need to do save or restore of the mmio which is in context + * state image on kabylake, it's initialized by lri command and + * save or restore with context together. + */ + if (IS_KABYLAKE(dev_priv) && mmio->in_context) + continue; + // save if (pre) { vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg); @@ -411,8 +569,16 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, */ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) { + struct engine_mmio *mmio; + if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) - gvt->engine_mmio_list = gen9_engine_mmio_list; + gvt->engine_mmio_list.mmio = gen9_engine_mmio_list; else - gvt->engine_mmio_list = gen8_engine_mmio_list; + gvt->engine_mmio_list.mmio = gen8_engine_mmio_list; + + for (mmio = gvt->engine_mmio_list.mmio; + i915_mmio_reg_valid(mmio->reg); mmio++) { + if (mmio->in_context) + gvt->engine_mmio_list.ctx_mmio_count[mmio->ring_id]++; + } } diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index 4df87c7314c9..0439eb8057a8 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -51,4 +51,7 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id); +int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, + struct i915_request *req); + #endif diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f4765ed4e92a..9b92b4e25a20 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -225,6 +225,11 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; void *shadow_ring_buffer_va; u32 *cs; + struct i915_request *req = workload->req; + + if (IS_KABYLAKE(req->i915) && + is_inhibit_context(req->ctx, req->engine->id)) + intel_vgpu_restore_inhibit_context(vgpu, req); /* allocate shadow ring buffer */ cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); -- cgit v1.2.3 From 7e534ac985d419c011190bc1aa14affdff4ce2e2 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 22 Feb 2018 15:16:11 +0800 Subject: drm/i915/gvt: Fix one gvt_vgpu_error() use in dmabuf.c Fix below warning with proper usage. CHECK drivers/gpu/drm/i915//gvt/dmabuf.c drivers/gpu/drm/i915//gvt/dmabuf.c:462 intel_vgpu_get_dmabuf() error: 'vgpu' dereferencing possible ERR_PTR() Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 9a471b0afb15..b555eb26f9ce 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -459,7 +459,7 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) obj = vgpu_create_gem(dev, dmabuf_obj->info); if (obj == NULL) { - gvt_vgpu_err("create gvt gem obj failed:%d\n", vgpu->id); + gvt_vgpu_err("create gvt gem obj failed\n"); ret = -ENOMEM; goto out; } -- cgit v1.2.3 From 0102d0d9227ae7412e3419973c8cefa69369a9a7 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 22 Feb 2018 15:16:12 +0800 Subject: drm/i915/gvt: remove gvt max port definition Remove GVT-g private max port definition but use i915 one. Fix error caused by: drivers/gpu/drm/i915//gvt/handlers.c:871 dp_aux_ch_ctl_mmio_write() error: buffer overflow 'display->ports' 5 <= 5 Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 9131638e3999..1df5a2a2dfdf 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -132,11 +132,9 @@ struct intel_vgpu_opregion { #define vgpu_opregion(vgpu) (&(vgpu->opregion)) -#define INTEL_GVT_MAX_PORT 5 - struct intel_vgpu_display { struct intel_vgpu_i2c_edid i2c_edid; - struct intel_vgpu_port ports[INTEL_GVT_MAX_PORT]; + struct intel_vgpu_port ports[I915_MAX_PORTS]; struct intel_vgpu_sbi sbi; }; -- cgit v1.2.3 From 9803984581de2b09290338687a3d21a7cd16685b Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 22 Feb 2018 15:16:13 +0800 Subject: drm/i915/gvt: Fix vGPU sched timeslice calculation warning Fix below warning by using proper ktime helper to calculate timeslice. CHECK drivers/gpu/drm/i915//gvt/sched_policy.c drivers/gpu/drm/i915//gvt/sched_policy.c:108 gvt_balance_timeslice() debug: sval_binop_signed: invalid divide LLONG_MIN/-1 drivers/gpu/drm/i915//gvt/sched_policy.c:108 gvt_balance_timeslice() debug: sval_binop_signed: invalid divide LLONG_MIN/-1 Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index cc1ce361cd76..75b7bc7b344c 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -103,9 +103,8 @@ static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) list_for_each(pos, &sched_data->lru_runq_head) { vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); - fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) * - vgpu_data->sched_ctl.weight / - total_weight; + fair_timeslice = ktime_divns(ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS), + total_weight) * vgpu_data->sched_ctl.weight; vgpu_data->allocated_ts = fair_timeslice; vgpu_data->left_ts = vgpu_data->allocated_ts; -- cgit v1.2.3 From 64c066a911b7ec14654d04ad1d5e1b2b8f2feef3 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 22 Feb 2018 15:16:14 +0800 Subject: drm/i915/gvt: Fix check error of vgpu create failure message Fix check error at CHECK drivers/gpu/drm/i915//gvt/kvmgt.c drivers/gpu/drm/i915//gvt/kvmgt.c:455 intel_vgpu_create() error: we previously assumed 'vgpu' could be null (see line 454) For failed vgpu create, just show error return in failure message. Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index d86071a32b6a..6fce1fae7d55 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -452,7 +452,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) vgpu = intel_gvt_ops->vgpu_create(gvt, type); if (IS_ERR_OR_NULL(vgpu)) { ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu); - gvt_vgpu_err("failed to create intel vgpu: %d\n", ret); + gvt_err("failed to create intel vgpu: %d\n", ret); goto out; } -- cgit v1.2.3 From c39bca4e0467acce30b46aae4567bf6369be4068 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 22 Feb 2018 15:16:16 +0800 Subject: drm/i915/gvt: Fix check error on fence mmio handler Fix below error with minor code refactor. CHECK drivers/gpu/drm/i915//gvt/handlers.c drivers/gpu/drm/i915//gvt/handlers.c:203 sanitize_fence_mmio_access() error: 'vgpu' dereferencing possible ERR_PTR() Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index fbb908e797c4..415ef4556e67 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -188,7 +188,9 @@ void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int fence_num, void *p_data, unsigned int bytes) { - if (fence_num >= vgpu_fence_sz(vgpu)) { + unsigned int max_fence = vgpu_fence_sz(vgpu); + + if (fence_num >= max_fence) { /* When guest access oob fence regs without access * pv_info first, we treat guest not supporting GVT, @@ -201,7 +203,7 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, if (!vgpu->mmio.disable_warn_untrack) { gvt_vgpu_err("found oob fence register access\n"); gvt_vgpu_err("total fence %d, access fence %d\n", - vgpu_fence_sz(vgpu), fence_num); + max_fence, fence_num); } memset(p_data, 0, bytes); return -EINVAL; -- cgit v1.2.3 From 253fe56ea96546bda371d2397443dfe9ee978557 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 22 Feb 2018 15:16:17 +0800 Subject: drm/i915/gvt: Fix one indent error Fix below warning: drivers/gpu/drm/i915//gvt/handlers.c:323 gdrst_mmio_write() warn: inconsistent indenting Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 415ef4556e67..7792711e01e3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -322,7 +322,7 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask); /* sw will wait for the device to ack the reset request */ - vgpu_vreg(vgpu, offset) = 0; + vgpu_vreg(vgpu, offset) = 0; return 0; } -- cgit v1.2.3 From b52646fd5bb40422be4ba8e1c3f46c23de6965a3 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 22 Feb 2018 15:16:18 +0800 Subject: drm/i915/gvt: Fix check error on hws_pga_write() fail message Fix below check error by using proper failure message output. drivers/gpu/drm/i915//gvt/handlers.c:1392 hws_pga_write() error: 'vgpu' dereferencing possible ERR_PTR() drivers/gpu/drm/i915//gvt/handlers.c:1402 hws_pga_write() error: 'vgpu' dereferencing possible ERR_PTR() Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 7792711e01e3..112f2ec7c25f 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1391,8 +1391,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) { - gvt_vgpu_err("VM(%d) write invalid HWSP address, reg:0x%x, value:0x%x\n", - vgpu->id, offset, value); + gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n", + offset, value); return -EINVAL; } /* @@ -1401,8 +1401,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, * support BDW, SKL or other platforms with same HWSP registers. */ if (unlikely(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) { - gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x%x\n", - vgpu->id, offset); + gvt_vgpu_err("access unknown hardware status page register:0x%x\n", + offset); return -EINVAL; } vgpu->hws_pga[ring_id] = value; -- cgit v1.2.3 From cf4ee73fd9b6d31fa7530f72cff5cc97b94f1272 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 1 Mar 2018 15:49:59 +0800 Subject: drm/i915/gvt: Fix guest vGPU hang caused by very high dma setup overhead The implementation of current kvmgt implicitly setup dma mapping at MPT API gfn_to_mfn. First this design against the API's original purpose. Second, there is no unmap hit in this design. The result is that the dma mapping keep growing larger and larger. For mutl-vm case, they will consume IOMMU IOVA low 4GB address space quickly and so tons of rbtree entries crated in the IOMMU IOVA allocator. Finally, single IOVA allocation can take as long as ~70ms. Such latency is intolerable. To address both above issues, this patch introduced two new MPT API: o dma_map_guest_page - setup dma map for guest page o dma_unmap_guest_page - cancel dma map for guest page The kvmgt implements these 2 API. And to reduce dma setup overhead for duplicated pages (eg. scratch pages), two caches are used: one is for mapping gfn to struct gvt_dma, another is for mapping dma addr to struct gvt_dma. With these 2 new API, the gtt now is able to cancel dma mapping when page table is invalidated. The dma mapping is not in a gradual increase now. v2: follow the old logic for VFIO_IOMMU_NOTIFY_DMA_UNMAP at this point. Cc: Hang Yuan Cc: Xiong Zhang Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 50 ++++-- drivers/gpu/drm/i915/gvt/gvt.h | 9 +- drivers/gpu/drm/i915/gvt/hypercall.h | 5 + drivers/gpu/drm/i915/gvt/kvmgt.c | 288 ++++++++++++++++++++--------------- drivers/gpu/drm/i915/gvt/mpt.h | 28 ++++ 5 files changed, 246 insertions(+), 134 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 8fb4f1023d06..0a100a288e6d 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -822,6 +822,23 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, return ppgtt_invalidate_spt(s); } +static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, + struct intel_gvt_gtt_entry *entry) +{ + struct intel_vgpu *vgpu = spt->vgpu; + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + unsigned long pfn; + int type; + + pfn = ops->get_pfn(entry); + type = spt->shadow_page.type; + + if (pfn == vgpu->gtt.scratch_pt[type].page_mfn) + return; + + intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); +} + static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; @@ -838,14 +855,12 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) if (atomic_dec_return(&spt->refcount) > 0) return 0; - if (gtt_type_is_pte_pt(spt->shadow_page.type)) - goto release; - for_each_present_shadow_entry(spt, &e, index) { switch (e.type) { case GTT_TYPE_PPGTT_PTE_4K_ENTRY: gvt_vdbg_mm("invalidate 4K entry\n"); - continue; + ppgtt_invalidate_pte(spt, &e); + break; case GTT_TYPE_PPGTT_PTE_2M_ENTRY: case GTT_TYPE_PPGTT_PTE_1G_ENTRY: WARN(1, "GVT doesn't support 2M/1GB page\n"); @@ -863,7 +878,7 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) GEM_BUG_ON(1); } } -release: + trace_spt_change(spt->vgpu->id, "release", spt, spt->guest_page.gfn, spt->shadow_page.type); ppgtt_free_spt(spt); @@ -932,7 +947,9 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, { struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry se = *ge; - unsigned long gfn, mfn; + unsigned long gfn; + dma_addr_t dma_addr; + int ret; if (!pte_ops->test_present(ge)) return 0; @@ -952,11 +969,11 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, }; /* direct shadow */ - mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); - if (mfn == INTEL_GVT_INVALID_ADDR) + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); + if (ret) return -ENXIO; - pte_ops->set_pfn(&se, mfn); + pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); ppgtt_set_shadow_entry(spt, &se, index); return 0; } @@ -1035,7 +1052,9 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, ret = ppgtt_invalidate_spt(s); if (ret) goto fail; - } + } else + ppgtt_invalidate_pte(spt, se); + return 0; fail: gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", @@ -1807,8 +1826,10 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; - unsigned long gma, gfn, mfn; + unsigned long gma, gfn; struct intel_gvt_gtt_entry e, m; + dma_addr_t dma_addr; + int ret; if (bytes != 4 && bytes != 8) return -EINVAL; @@ -1836,8 +1857,9 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, goto out; } - mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); - if (mfn == INTEL_GVT_INVALID_ADDR) { + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, + &dma_addr); + if (ret) { gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial * update the entry in this situation p2m will fail @@ -1845,7 +1867,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, */ ops->set_pfn(&m, gvt->gtt.scratch_mfn); } else - ops->set_pfn(&m, mfn); + ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); } else ops->set_pfn(&m, gvt->gtt.scratch_mfn); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 1df5a2a2dfdf..eda41448c196 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -201,8 +201,15 @@ struct intel_vgpu { int num_regions; struct eventfd_ctx *intx_trigger; struct eventfd_ctx *msi_trigger; - struct rb_root cache; + + /* + * Two caches are used to avoid mapping duplicated pages (eg. + * scratch pages). This help to reduce dma setup overhead. + */ + struct rb_root gfn_cache; + struct rb_root dma_addr_cache; struct mutex cache_lock; + struct notifier_block iommu_notifier; struct notifier_block group_notifier; struct kvm *kvm; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index cb6303e630a4..f6dd9f717888 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -51,6 +51,11 @@ struct intel_gvt_mpt { int (*write_gpa)(unsigned long handle, unsigned long gpa, void *buf, unsigned long len); unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); + + int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn, + dma_addr_t *dma_addr); + void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); + int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 6fce1fae7d55..99a8ff3fe75a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -87,9 +87,12 @@ struct kvmgt_guest_info { }; struct gvt_dma { - struct rb_node node; + struct intel_vgpu *vgpu; + struct rb_node gfn_node; + struct rb_node dma_addr_node; gfn_t gfn; - unsigned long iova; + dma_addr_t dma_addr; + struct kref ref; }; static inline bool handle_valid(unsigned long handle) @@ -101,165 +104,163 @@ static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); -static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn, - unsigned long *iova) +static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, + dma_addr_t *dma_addr) { - struct page *page; struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr; + struct page *page; + unsigned long pfn; + int ret; - if (unlikely(!pfn_valid(pfn))) - return -EFAULT; + /* Pin the page first. */ + ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); + if (ret != 1) { + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", + gfn, ret); + return -EINVAL; + } + /* Setup DMA mapping. */ page = pfn_to_page(pfn); - daddr = dma_map_page(dev, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, daddr)) + *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) { + gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn); + vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); return -ENOMEM; + } - *iova = (unsigned long)(daddr >> PAGE_SHIFT); return 0; } -static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova) +static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, + dma_addr_t dma_addr) { struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr; + int ret; - daddr = (dma_addr_t)(iova << PAGE_SHIFT); - dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); + WARN_ON(ret != 1); } -static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) +static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) { - struct rb_node *node = vgpu->vdev.cache.rb_node; - struct gvt_dma *ret = NULL; + struct rb_node *node = vgpu->vdev.dma_addr_cache.rb_node; + struct gvt_dma *itr; while (node) { - struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node); + itr = rb_entry(node, struct gvt_dma, dma_addr_node); - if (gfn < itr->gfn) + if (dma_addr < itr->dma_addr) node = node->rb_left; - else if (gfn > itr->gfn) + else if (dma_addr > itr->dma_addr) node = node->rb_right; - else { - ret = itr; - goto out; - } + else + return itr; } - -out: - return ret; + return NULL; } -static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) +static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) { - struct gvt_dma *entry; - unsigned long iova; + struct rb_node *node = vgpu->vdev.gfn_cache.rb_node; + struct gvt_dma *itr; - mutex_lock(&vgpu->vdev.cache_lock); - - entry = __gvt_cache_find(vgpu, gfn); - iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova; + while (node) { + itr = rb_entry(node, struct gvt_dma, gfn_node); - mutex_unlock(&vgpu->vdev.cache_lock); - return iova; + if (gfn < itr->gfn) + node = node->rb_left; + else if (gfn > itr->gfn) + node = node->rb_right; + else + return itr; + } + return NULL; } -static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, - unsigned long iova) +static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, + dma_addr_t dma_addr) { struct gvt_dma *new, *itr; - struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL; + struct rb_node **link, *parent = NULL; new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); if (!new) return; + new->vgpu = vgpu; new->gfn = gfn; - new->iova = iova; + new->dma_addr = dma_addr; + kref_init(&new->ref); - mutex_lock(&vgpu->vdev.cache_lock); + /* gfn_cache maps gfn to struct gvt_dma. */ + link = &vgpu->vdev.gfn_cache.rb_node; while (*link) { parent = *link; - itr = rb_entry(parent, struct gvt_dma, node); + itr = rb_entry(parent, struct gvt_dma, gfn_node); - if (gfn == itr->gfn) - goto out; - else if (gfn < itr->gfn) + if (gfn < itr->gfn) link = &parent->rb_left; else link = &parent->rb_right; } + rb_link_node(&new->gfn_node, parent, link); + rb_insert_color(&new->gfn_node, &vgpu->vdev.gfn_cache); - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &vgpu->vdev.cache); - mutex_unlock(&vgpu->vdev.cache_lock); - return; + /* dma_addr_cache maps dma addr to struct gvt_dma. */ + parent = NULL; + link = &vgpu->vdev.dma_addr_cache.rb_node; + while (*link) { + parent = *link; + itr = rb_entry(parent, struct gvt_dma, dma_addr_node); -out: - mutex_unlock(&vgpu->vdev.cache_lock); - kfree(new); + if (dma_addr < itr->dma_addr) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + rb_link_node(&new->dma_addr_node, parent, link); + rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache); } static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, struct gvt_dma *entry) { - rb_erase(&entry->node, &vgpu->vdev.cache); + rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache); + rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache); kfree(entry); } -static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) -{ - struct device *dev = mdev_dev(vgpu->vdev.mdev); - struct gvt_dma *this; - unsigned long g1; - int rc; - - mutex_lock(&vgpu->vdev.cache_lock); - this = __gvt_cache_find(vgpu, gfn); - if (!this) { - mutex_unlock(&vgpu->vdev.cache_lock); - return; - } - - g1 = gfn; - gvt_dma_unmap_iova(vgpu, this->iova); - rc = vfio_unpin_pages(dev, &g1, 1); - WARN_ON(rc != 1); - __gvt_cache_remove_entry(vgpu, this); - mutex_unlock(&vgpu->vdev.cache_lock); -} - -static void gvt_cache_init(struct intel_vgpu *vgpu) -{ - vgpu->vdev.cache = RB_ROOT; - mutex_init(&vgpu->vdev.cache_lock); -} - static void gvt_cache_destroy(struct intel_vgpu *vgpu) { struct gvt_dma *dma; struct rb_node *node = NULL; - struct device *dev = mdev_dev(vgpu->vdev.mdev); - unsigned long gfn; for (;;) { mutex_lock(&vgpu->vdev.cache_lock); - node = rb_first(&vgpu->vdev.cache); + node = rb_first(&vgpu->vdev.gfn_cache); if (!node) { mutex_unlock(&vgpu->vdev.cache_lock); break; } - dma = rb_entry(node, struct gvt_dma, node); - gvt_dma_unmap_iova(vgpu, dma->iova); - gfn = dma->gfn; + dma = rb_entry(node, struct gvt_dma, gfn_node); + gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr); __gvt_cache_remove_entry(vgpu, dma); mutex_unlock(&vgpu->vdev.cache_lock); - vfio_unpin_pages(dev, &gfn, 1); } } +static void gvt_cache_init(struct intel_vgpu *vgpu) +{ + vgpu->vdev.gfn_cache = RB_ROOT; + vgpu->vdev.dma_addr_cache = RB_ROOT; + mutex_init(&vgpu->vdev.cache_lock); +} + static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) { hash_init(info->ptable); @@ -489,13 +490,22 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { struct vfio_iommu_type1_dma_unmap *unmap = data; - unsigned long gfn, end_gfn; + struct gvt_dma *entry; + unsigned long iov_pfn, end_iov_pfn; + + iov_pfn = unmap->iova >> PAGE_SHIFT; + end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; - gfn = unmap->iova >> PAGE_SHIFT; - end_gfn = gfn + unmap->size / PAGE_SIZE; + mutex_lock(&vgpu->vdev.cache_lock); + for (; iov_pfn < end_iov_pfn; iov_pfn++) { + entry = __gvt_cache_find_gfn(vgpu, iov_pfn); + if (!entry) + continue; - while (gfn < end_gfn) - gvt_cache_remove(vgpu, gfn++); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr); + __gvt_cache_remove_entry(vgpu, entry); + } + mutex_unlock(&vgpu->vdev.cache_lock); } return NOTIFY_OK; @@ -1527,39 +1537,77 @@ static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) { - unsigned long iova, pfn; struct kvmgt_guest_info *info; - struct device *dev; - struct intel_vgpu *vgpu; - int rc; + kvm_pfn_t pfn; if (!handle_valid(handle)) return INTEL_GVT_INVALID_ADDR; info = (struct kvmgt_guest_info *)handle; - vgpu = info->vgpu; - iova = gvt_cache_find(info->vgpu, gfn); - if (iova != INTEL_GVT_INVALID_ADDR) - return iova; - - pfn = INTEL_GVT_INVALID_ADDR; - dev = mdev_dev(info->vgpu->vdev.mdev); - rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn); - if (rc != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", - gfn, rc); - return INTEL_GVT_INVALID_ADDR; - } - /* transfer to host iova for GFX to use DMA */ - rc = gvt_dma_map_iova(info->vgpu, pfn, &iova); - if (rc) { - gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn); - vfio_unpin_pages(dev, &gfn, 1); + + pfn = gfn_to_pfn(info->kvm, gfn); + if (is_error_noslot_pfn(pfn)) return INTEL_GVT_INVALID_ADDR; + + return pfn; +} + +int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, + dma_addr_t *dma_addr) +{ + struct kvmgt_guest_info *info; + struct intel_vgpu *vgpu; + struct gvt_dma *entry; + int ret; + + if (!handle_valid(handle)) + return -EINVAL; + + info = (struct kvmgt_guest_info *)handle; + vgpu = info->vgpu; + + mutex_lock(&info->vgpu->vdev.cache_lock); + + entry = __gvt_cache_find_gfn(info->vgpu, gfn); + if (!entry) { + ret = gvt_dma_map_page(vgpu, gfn, dma_addr); + if (ret) { + mutex_unlock(&info->vgpu->vdev.cache_lock); + return ret; + } + __gvt_cache_add(info->vgpu, gfn, *dma_addr); + } else { + kref_get(&entry->ref); + *dma_addr = entry->dma_addr; } - gvt_cache_add(info->vgpu, gfn, iova); - return iova; + mutex_unlock(&info->vgpu->vdev.cache_lock); + return 0; +} + +static void __gvt_dma_release(struct kref *ref) +{ + struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); + + gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr); + __gvt_cache_remove_entry(entry->vgpu, entry); +} + +void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr) +{ + struct kvmgt_guest_info *info; + struct gvt_dma *entry; + + if (!handle_valid(handle)) + return; + + info = (struct kvmgt_guest_info *)handle; + + mutex_lock(&info->vgpu->vdev.cache_lock); + entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + if (entry) + kref_put(&entry->ref, __gvt_dma_release); + mutex_unlock(&info->vgpu->vdev.cache_lock); } static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, @@ -1634,6 +1682,8 @@ struct intel_gvt_mpt kvmgt_mpt = { .read_gpa = kvmgt_read_gpa, .write_gpa = kvmgt_write_gpa, .gfn_to_mfn = kvmgt_gfn_to_pfn, + .dma_map_guest_page = kvmgt_dma_map_guest_page, + .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, .set_opregion = kvmgt_set_opregion, .get_vfio_device = kvmgt_get_vfio_device, .put_vfio_device = kvmgt_put_vfio_device, diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 78fada9e3241..32ffcd566cdd 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -227,6 +227,34 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( return intel_gvt_host.mpt->gfn_to_mfn(vgpu->handle, gfn); } +/** + * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page + * @vgpu: a vGPU + * @gpfn: guest pfn + * @dma_addr: retrieve allocated dma addr + * + * Returns: + * 0 on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_dma_map_guest_page( + struct intel_vgpu *vgpu, unsigned long gfn, + dma_addr_t *dma_addr) +{ + return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, + dma_addr); +} + +/** + * intel_gvt_hypervisor_dma_unmap_guest_page - cancel dma map for guest page + * @vgpu: a vGPU + * @dma_addr: the mapped dma addr + */ +static inline void intel_gvt_hypervisor_dma_unmap_guest_page( + struct intel_vgpu *vgpu, dma_addr_t dma_addr) +{ + intel_gvt_host.mpt->dma_unmap_guest_page(vgpu->handle, dma_addr); +} + /** * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN * @vgpu: a vGPU -- cgit v1.2.3 From 6846dfeb87a623e0bf31df4b6a7041d70277b0e5 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 5 Mar 2018 15:30:34 +0800 Subject: drm/i915/kvmgt: Add kvmgt debugfs entry nr_cache_entries under vgpu Add a new debugfs entry kvmgt_nr_cache_entries under vgpu which shows the number of entry in dma cache. $ cat /sys/kernel/debug/gvt/vgpu1/kvmgt_nr_cache_entries 10101 v3: fix compiling error for some configuration. (Xiong Zhang ) v2: keep debugfs layout flat. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/kvmgt.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index eda41448c196..efacd8abbedc 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -208,6 +208,7 @@ struct intel_vgpu { */ struct rb_root gfn_cache; struct rb_root dma_addr_cache; + unsigned long nr_cache_entries; struct mutex cache_lock; struct notifier_block iommu_notifier; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 99a8ff3fe75a..8a428678e4b5 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "i915_drv.h" #include "gvt.h" @@ -84,6 +85,7 @@ struct kvmgt_guest_info { #define NR_BKT (1 << 18) struct hlist_head ptable[NR_BKT]; #undef NR_BKT + struct dentry *debugfs_cache_entries; }; struct gvt_dma { @@ -225,6 +227,8 @@ static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, } rb_link_node(&new->dma_addr_node, parent, link); rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache); + + vgpu->vdev.nr_cache_entries++; } static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, @@ -233,6 +237,7 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache); rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache); kfree(entry); + vgpu->vdev.nr_cache_entries--; } static void gvt_cache_destroy(struct intel_vgpu *vgpu) @@ -258,6 +263,7 @@ static void gvt_cache_init(struct intel_vgpu *vgpu) { vgpu->vdev.gfn_cache = RB_ROOT; vgpu->vdev.dma_addr_cache = RB_ROOT; + vgpu->vdev.nr_cache_entries = 0; mutex_init(&vgpu->vdev.cache_lock); } @@ -1493,11 +1499,20 @@ static int kvmgt_guest_init(struct mdev_device *mdev) info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_page_track_register_notifier(kvm, &info->track_node); + info->debugfs_cache_entries = debugfs_create_ulong( + "kvmgt_nr_cache_entries", + 0444, vgpu->debugfs, + &vgpu->vdev.nr_cache_entries); + if (!info->debugfs_cache_entries) + gvt_vgpu_err("Cannot create kvmgt debugfs entry\n"); + return 0; } static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) { + debugfs_remove(info->debugfs_cache_entries); + kvm_page_track_unregister_notifier(info->kvm, &info->track_node); kvm_put_kvm(info->kvm); kvmgt_protect_table_destroy(info); -- cgit v1.2.3 From 7e60946feb4287111dc61a13ee66ea4295f4f6b4 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 6 Mar 2018 06:07:27 +0800 Subject: drm/i915/gvt: Release gvt->lock at the failure of finding page track page_track_handler take lock at the beginning, the lock should be released at the failure of finding page track. Otherwise deadlock will happen. Fixes: e502a2af4c35 ("drm/i915/gvt: Provide generic page_track infrastructure for write-protected page") Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/page_track.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c index 09bd56e39ec6..2d030545356b 100644 --- a/drivers/gpu/drm/i915/gvt/page_track.c +++ b/drivers/gpu/drm/i915/gvt/page_track.c @@ -165,7 +165,7 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT); if (!page_track) - return 0; + goto out; if (unlikely(vgpu->failsafe)) { /* Remove write protection to prevent furture traps. */ @@ -176,6 +176,7 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, gvt_err("guest page write error, gpa %llx\n", gpa); } +out: mutex_unlock(&gvt->lock); return ret; } -- cgit v1.2.3 From 991ecefbdd4b81719597d6c406df8d26ef5c1546 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 6 Mar 2018 06:07:28 +0800 Subject: drm/i915/gvt: Return error at the failure of finding page_track In XenGT, ioreq copy is used to trap mmio write and ppgtt write. Both of them are memory write, ioreq handler couldn't distinguish them. So ioreq handler probe the ppgtt write handler, if it is succuess, this ioreq is ppgtt write, otherwise it is mmio write. So ppgtt write handler should return an error at the failure of finding page track, it is fatal to implement ioreq handler in XenGT. Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/page_track.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c index 2d030545356b..53e2bd79c97d 100644 --- a/drivers/gpu/drm/i915/gvt/page_track.c +++ b/drivers/gpu/drm/i915/gvt/page_track.c @@ -164,8 +164,10 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, mutex_lock(&gvt->lock); page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT); - if (!page_track) + if (!page_track) { + ret = -ENXIO; goto out; + } if (unlikely(vgpu->failsafe)) { /* Remove write protection to prevent furture traps. */ -- cgit v1.2.3 From 618d87d783adc86db5989c25eab54780f21314d9 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 5 Mar 2018 14:21:20 -0800 Subject: drm/i915/error: remove unused gen8_engine_sync_index Leftover from Gen8 ringbuffer support removal Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180305222122.3547-1-daniele.ceraolospurio@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gpu_error.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a7933c9b5562..ef29fb48d6d9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1102,27 +1102,6 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv, error->nfence = i; } -static inline u32 -gen8_engine_sync_index(struct intel_engine_cs *engine, - struct intel_engine_cs *other) -{ - int idx; - - /* - * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2; - * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs; - * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs; - * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs; - * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs; - */ - - idx = (other - engine) - 1; - if (idx < 0) - idx += I915_NUM_ENGINES; - - return idx; -} - static void gen6_record_semaphore_state(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { -- cgit v1.2.3 From 53b725c7db127d16d1a83ed5dfd601d65fe976fb Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 5 Mar 2018 14:21:21 -0800 Subject: drm/i915/error: standardize function style in error capture some of the static functions used from capture() have the "i915_" prefix while other don't; most of them take i915 as a parameter, but one of them derives it internally from error->i915. Let's be consistent by avoiding prefix for static functions and by getting i915 from error->i915. While at it, s/dev_priv/i915 in functions that don't perform register reads. v2: take i915 from error->i915 (Michal), s/dev_priv/i915, update commit message Cc: Michal Wajdeczko Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Michel Thierry Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180305222122.3547-2-daniele.ceraolospurio@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gpu_error.c | 84 ++++++++++++++++------------------- 1 file changed, 39 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ef29fb48d6d9..9afb1b9674c0 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1084,9 +1084,9 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, return error_code; } -static void i915_gem_record_fences(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void gem_record_fences(struct i915_gpu_state *error) { + struct drm_i915_private *dev_priv = error->i915; int i; if (INTEL_GEN(dev_priv) >= 6) { @@ -1424,14 +1424,14 @@ capture_object(struct drm_i915_private *dev_priv, } } -static void i915_gem_record_rings(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void gem_record_rings(struct i915_gpu_state *error) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_private *i915 = error->i915; + struct i915_ggtt *ggtt = &i915->ggtt; int i; for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_engine_cs *engine = dev_priv->engine[i]; + struct intel_engine_cs *engine = i915->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; struct i915_request *request; @@ -1460,17 +1460,16 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, * by userspace. */ ee->batchbuffer = - i915_error_object_create(dev_priv, - request->batch); + i915_error_object_create(i915, request->batch); - if (HAS_BROKEN_CS_TLB(dev_priv)) + if (HAS_BROKEN_CS_TLB(i915)) ee->wa_batchbuffer = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, engine->scratch); request_record_user_bo(request, ee); ee->ctx = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, request->ctx->engine[i].state); error->simulated |= @@ -1484,27 +1483,24 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->cpu_ring_head = ring->head; ee->cpu_ring_tail = ring->tail; ee->ringbuffer = - i915_error_object_create(dev_priv, ring->vma); + i915_error_object_create(i915, ring->vma); engine_record_requests(engine, request, ee); } ee->hws_page = - i915_error_object_create(dev_priv, + i915_error_object_create(i915, engine->status_page.vma); - ee->wa_ctx = - i915_error_object_create(dev_priv, engine->wa_ctx.vma); + ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma); - ee->default_state = - capture_object(dev_priv, engine->default_state); + ee->default_state = capture_object(i915, engine->default_state); } } -static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - struct i915_address_space *vm, - int idx) +static void gem_capture_vm(struct i915_gpu_state *error, + struct i915_address_space *vm, + int idx) { struct drm_i915_error_buffer *active_bo; struct i915_vma *vma; @@ -1527,8 +1523,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, error->active_bo_count[idx] = count; } -static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_active_buffers(struct i915_gpu_state *error) { int cnt = 0, i, j; @@ -1548,14 +1543,13 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, for (j = 0; j < i && !found; j++) found = error->engine[j].vm == ee->vm; if (!found) - i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++); + gem_capture_vm(error, ee->vm, cnt++); } } -static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_pinned_buffers(struct i915_gpu_state *error) { - struct i915_address_space *vm = &dev_priv->ggtt.base; + struct i915_address_space *vm = &error->i915->ggtt.base; struct drm_i915_error_buffer *bo; struct i915_vma *vma; int count_inactive, count_active; @@ -1605,9 +1599,9 @@ static void capture_uc_state(struct i915_gpu_state *error) } /* Capture all registers which don't fit into another category. */ -static void i915_capture_reg_state(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_reg_state(struct i915_gpu_state *error) { + struct drm_i915_private *dev_priv = error->i915; int i; /* General organization @@ -1704,24 +1698,25 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, engine_mask ? "reset" : "continue"); } -static void i915_capture_gen_state(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_gen_state(struct i915_gpu_state *error) { - error->awake = dev_priv->gt.awake; - error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count); - error->suspended = dev_priv->runtime_pm.suspended; + struct drm_i915_private *i915 = error->i915; + + error->awake = i915->gt.awake; + error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); + error->suspended = i915->runtime_pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU error->iommu = intel_iommu_gfx_mapped; #endif - error->reset_count = i915_reset_count(&dev_priv->gpu_error); - error->suspend_count = dev_priv->suspend_count; + error->reset_count = i915_reset_count(&i915->gpu_error); + error->suspend_count = i915->suspend_count; memcpy(&error->device_info, - INTEL_INFO(dev_priv), + INTEL_INFO(i915), sizeof(error->device_info)); - error->driver_caps = dev_priv->caps; + error->driver_caps = i915->caps; } static __always_inline void dup_param(const char *type, void *x) @@ -1749,13 +1744,12 @@ static int capture(void *data) capture_params(error); capture_uc_state(error); - - i915_capture_gen_state(error->i915, error); - i915_capture_reg_state(error->i915, error); - i915_gem_record_fences(error->i915, error); - i915_gem_record_rings(error->i915, error); - i915_capture_active_buffers(error->i915, error); - i915_capture_pinned_buffers(error->i915, error); + capture_gen_state(error); + capture_reg_state(error); + gem_record_fences(error); + gem_record_rings(error); + capture_active_buffers(error); + capture_pinned_buffers(error); error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); -- cgit v1.2.3 From 7cc62d0b8e257fbac8e2972074351bc766b96853 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 5 Mar 2018 14:21:22 -0800 Subject: drm/i915/error: capture uc_state after gen_state error->device_info.has_guc, which we check in capture_uc_state, is set in capture_gen_state, so the latter needs to be performed first. v2: rebased Reported-by: Vinay Belgaumkar Fixes: 7d41ef3479a6 (drm/i915: Add Guc/HuC firmware details to error state) Cc: Vinay Belgaumkar Cc: Michal Wajdeczko Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180305222122.3547-3-daniele.ceraolospurio@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9afb1b9674c0..9e5e9547adb2 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1743,8 +1743,8 @@ static int capture(void *data) error->i915->gt.last_init_time); capture_params(error); - capture_uc_state(error); capture_gen_state(error); + capture_uc_state(error); capture_reg_state(error); gem_record_fences(error); gem_record_rings(error); -- cgit v1.2.3 From cd46c545b7db2a9ac14f6db66944b017cbf21faf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Feb 2018 09:25:44 +0000 Subject: drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list The goal here is to try and reduce the latency of signaling additional requests following the wakeup from interrupt by reducing the list of to-be-signaled requests from an rbtree to a sorted linked list. The original choice of using an rbtree was to facilitate random insertions of request into the signaler while maintaining a sorted list. However, if we assume that most new requests are added when they are submitted, we see those new requests in execution order making a insertion sort fast, and the reduction in overhead of each signaler iteration significant. Since commit 56299fb7d904 ("drm/i915: Signal first fence from irq handler if complete"), we signal most fences directly from notify_ring() in the interrupt handler greatly reducing the amount of work that actually needs to be done by the signaler kthread. All the thread is then required to do is operate as the bottom-half, cleaning up after the interrupt handler and preparing the next waiter. This includes signaling all later completed fences in a saturated system, but on a mostly idle system we only have to rebuild the wait rbtree in time for the next interrupt. With this de-emphasis of the signaler's role, we want to rejig it's datastructures to reduce the amount of work we require to both setup the signal tree and maintain it on every interrupt. References: 56299fb7d904 ("drm/i915: Signal first fence from irq handler if complete") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180222092545.17216-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 261 +++++++++++++------------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +- 3 files changed, 116 insertions(+), 151 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 74311fc53e2f..7d6eb82eeb91 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -44,8 +44,8 @@ struct intel_wait { }; struct intel_signal_node { - struct rb_node node; struct intel_wait wait; + struct list_head link; }; struct i915_dependency { diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 094f010908b8..03bbc1dfbc51 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -340,7 +340,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, lockdep_assert_held(&b->rb_lock); GEM_BUG_ON(b->irq_wait == wait); - /* This request is completed, so remove it from the tree, mark it as + /* + * This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. N.B. when the * task wakes up, it will find the empty rb_node, discern that it * has already been removed from the tree and skip the serialisation @@ -351,7 +352,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, rb_erase(&wait->node, &b->waiters); RB_CLEAR_NODE(&wait->node); - wake_up_process(wait->tsk); /* implicit smp_wmb() */ + if (wait->tsk->state != TASK_RUNNING) + wake_up_process(wait->tsk); /* implicit smp_wmb() */ } static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, @@ -592,23 +594,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->rb_lock); } -static bool signal_complete(const struct i915_request *request) -{ - if (!request) - return false; - - /* - * Carefully check if the request is complete, giving time for the - * seqno to be visible or if the GPU hung. - */ - return __i915_request_irq_complete(request); -} - -static struct i915_request *to_signaler(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_request, signaling.node); -} - static void signaler_set_rtpriority(void) { struct sched_param param = { .sched_priority = 1 }; @@ -616,78 +601,26 @@ static void signaler_set_rtpriority(void) sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); } -static void __intel_engine_remove_signal(struct intel_engine_cs *engine, - struct i915_request *request) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - lockdep_assert_held(&b->rb_lock); - - /* - * Wake up all other completed waiters and select the - * next bottom-half for the next user interrupt. - */ - __intel_engine_remove_wait(engine, &request->signaling.wait); - - /* - * Find the next oldest signal. Note that as we have - * not been holding the lock, another client may - * have installed an even older signal than the one - * we just completed - so double check we are still - * the oldest before picking the next one. - */ - if (request->signaling.wait.seqno) { - if (request == rcu_access_pointer(b->first_signal)) { - struct rb_node *rb = rb_next(&request->signaling.node); - rcu_assign_pointer(b->first_signal, - rb ? to_signaler(rb) : NULL); - } - - rb_erase(&request->signaling.node, &b->signals); - request->signaling.wait.seqno = 0; - } -} - -static struct i915_request * -get_first_signal_rcu(struct intel_breadcrumbs *b) -{ - /* - * See the big warnings for i915_gem_active_get_rcu() and similarly - * for dma_fence_get_rcu_safe() that explain the intricacies involved - * here with defeating CPU/compiler speculation and enforcing - * the required memory barriers. - */ - do { - struct i915_request *request; - - request = rcu_dereference(b->first_signal); - if (request) - request = i915_request_get_rcu(request); - - barrier(); - - if (!request || request == rcu_access_pointer(b->first_signal)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - static int intel_breadcrumbs_signaler(void *arg) { struct intel_engine_cs *engine = arg; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct i915_request *request; + struct i915_request *rq, *n; /* Install ourselves with high priority to reduce signalling latency */ signaler_set_rtpriority(); do { bool do_schedule = true; + LIST_HEAD(list); + u32 seqno; set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&b->signals)) + goto sleep; - /* We are either woken up by the interrupt bottom-half, + /* + * We are either woken up by the interrupt bottom-half, * or by a client adding a new signaller. In both cases, * the GPU seqno may have advanced beyond our oldest signal. * If it has, propagate the signal, remove the waiter and @@ -695,25 +628,45 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - rcu_read_lock(); - request = get_first_signal_rcu(b); - rcu_read_unlock(); - if (signal_complete(request)) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &request->fence.flags)) { - local_bh_disable(); - dma_fence_signal(&request->fence); - GEM_BUG_ON(!i915_request_completed(request)); - local_bh_enable(); /* kick start the tasklets */ - } + seqno = intel_engine_get_seqno(engine); + + spin_lock_irq(&b->rb_lock); + list_for_each_entry_safe(rq, n, &b->signals, signaling.link) { + u32 this = rq->signaling.wait.seqno; + + GEM_BUG_ON(!rq->signaling.wait.seqno); - if (READ_ONCE(request->signaling.wait.seqno)) { - spin_lock_irq(&b->rb_lock); - __intel_engine_remove_signal(engine, request); - spin_unlock_irq(&b->rb_lock); + if (!i915_seqno_passed(seqno, this)) + break; + + if (likely(this == i915_request_global_seqno(rq))) { + __intel_engine_remove_wait(engine, + &rq->signaling.wait); + + rq->signaling.wait.seqno = 0; + __list_del_entry(&rq->signaling.link); + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) { + list_add_tail(&rq->signaling.link, + &list); + i915_request_get(rq); + } + } + } + spin_unlock_irq(&b->rb_lock); + + if (!list_empty(&list)) { + local_bh_disable(); + list_for_each_entry_safe(rq, n, &list, signaling.link) { + dma_fence_signal(&rq->fence); + GEM_BUG_ON(!i915_request_completed(rq)); + i915_request_put(rq); } + local_bh_enable(); /* kick start the tasklets */ - /* If the engine is saturated we may be continually + /* + * If the engine is saturated we may be continually * processing completed requests. This angers the * NMI watchdog if we never let anything else * have access to the CPU. Let's pretend to be nice @@ -722,9 +675,19 @@ static int intel_breadcrumbs_signaler(void *arg) */ do_schedule = need_resched(); } - i915_request_put(request); if (unlikely(do_schedule)) { + /* Before we sleep, check for a missed seqno */ + if (current->state & TASK_NORMAL && + !list_empty(&b->signals) && + engine->irq_seqno_barrier && + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)) { + engine->irq_seqno_barrier(engine); + intel_engine_wakeup(engine); + } + +sleep: if (kthread_should_park()) kthread_parkme(); @@ -739,13 +702,40 @@ static int intel_breadcrumbs_signaler(void *arg) return 0; } +static void insert_signal(struct intel_breadcrumbs *b, + struct i915_request *request, + const u32 seqno) +{ + struct i915_request *iter; + + lockdep_assert_held(&b->rb_lock); + + /* + * A reasonable assumption is that we are called to add signals + * in sequence, as the requests are submitted for execution and + * assigned a global_seqno. This will be the case for the majority + * of internally generated signals (inter-engine signaling). + * + * Out of order waiters triggering random signaling enabling will + * be more problematic, but hopefully rare enough and the list + * small enough that the O(N) insertion sort is not an issue. + */ + + list_for_each_entry_reverse(iter, &b->signals, signaling.link) + if (i915_seqno_passed(seqno, iter->signaling.wait.seqno)) + break; + + list_add(&request->signaling.link, &iter->signaling.link); +} + void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; u32 seqno; - /* Note that we may be called from an interrupt handler on another + /* + * Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us * to submit a request, and so enable signaling). As such, * we need to make sure that all other users of b->rb_lock protect @@ -757,17 +747,16 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) lockdep_assert_held(&request->lock); seqno = i915_request_global_seqno(request); - if (!seqno) + if (!seqno) /* will be enabled later upon execution */ return; - spin_lock(&b->rb_lock); - GEM_BUG_ON(request->signaling.wait.seqno); request->signaling.wait.tsk = b->signaler; request->signaling.wait.request = request; request->signaling.wait.seqno = seqno; - /* First add ourselves into the list of waiters, but register our + /* + * Add ourselves into the list of waiters, but registering our * bottom-half as the signaller thread. As per usual, only the oldest * waiter (not just signaller) is tasked as the bottom-half waking * up all completed waiters after the user interrupt. @@ -775,39 +764,9 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) * If we are the oldest waiter, enable the irq (after which we * must double check that the seqno did not complete). */ + spin_lock(&b->rb_lock); + insert_signal(b, request, seqno); wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); - - if (!__i915_request_completed(request, seqno)) { - struct rb_node *parent, **p; - bool first; - - /* Now insert ourselves into the retirement ordered list of - * signals on this engine. We track the oldest seqno as that - * will be the first signal to complete. - */ - parent = NULL; - first = true; - p = &b->signals.rb_node; - while (*p) { - parent = *p; - if (i915_seqno_passed(seqno, - to_signaler(parent)->signaling.wait.seqno)) { - p = &parent->rb_right; - first = false; - } else { - p = &parent->rb_left; - } - } - rb_link_node(&request->signaling.node, parent, p); - rb_insert_color(&request->signaling.node, &b->signals); - if (first) - rcu_assign_pointer(b->first_signal, request); - } else { - __intel_engine_remove_wait(engine, &request->signaling.wait); - request->signaling.wait.seqno = 0; - wakeup = false; - } - spin_unlock(&b->rb_lock); if (wakeup) @@ -816,17 +775,20 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) void intel_engine_cancel_signaling(struct i915_request *request) { + struct intel_engine_cs *engine = request->engine; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); - if (READ_ONCE(request->signaling.wait.seqno)) { - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; + if (!READ_ONCE(request->signaling.wait.seqno)) + return; - spin_lock(&b->rb_lock); - __intel_engine_remove_signal(engine, request); - spin_unlock(&b->rb_lock); - } + spin_lock(&b->rb_lock); + __intel_engine_remove_wait(engine, &request->signaling.wait); + if (fetch_and_zero(&request->signaling.wait.seqno)) + __list_del_entry(&request->signaling.link); + spin_unlock(&b->rb_lock); } int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) @@ -840,6 +802,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); + INIT_LIST_HEAD(&b->signals); + /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current * task for handling the bottom-half to the user interrupt, therefore @@ -899,8 +863,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) /* The engines should be idle and all requests accounted for! */ WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(rcu_access_pointer(b->first_signal)); - WARN_ON(!RB_EMPTY_ROOT(&b->signals)); + WARN_ON(!list_empty(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) kthread_stop(b->signaler); @@ -913,20 +876,22 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; bool busy = false; - spin_lock_irq(&b->rb_lock); - if (b->irq_wait) { - wake_up_process(b->irq_wait->tsk); - busy = true; + spin_lock_irq(&b->irq_lock); + + if (b->irq_wait) { + wake_up_process(b->irq_wait->tsk); + busy = true; + } + + spin_unlock_irq(&b->irq_lock); } - if (rcu_access_pointer(b->first_signal)) { + if (!busy && !list_empty(&b->signals)) { wake_up_process(b->signaler); busy = true; } - spin_unlock_irq(&b->rb_lock); - return busy; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 90e4380cbdd5..e7526a4f05e5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -356,9 +356,9 @@ struct intel_engine_cs { spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct rb_root waiters; /* sorted by retirement, priority */ - struct rb_root signals; /* sorted by retirement */ + struct list_head signals; /* sorted by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct i915_request __rcu *first_signal; + struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ -- cgit v1.2.3 From 9792e213a4c9b16b814c0865ea0cc5d5125e052b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Feb 2018 09:25:45 +0000 Subject: drm/i915/breadcrumbs: Assert all missed breadcrumbs were signaled When parking the engines and their breadcrumbs, if we have waiters left then they missed their wakeup. Verify that each waiter's seqno did complete. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180222092545.17216-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 03bbc1dfbc51..6a740618863c 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -247,6 +247,8 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) spin_unlock(&b->irq_lock); rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { + GEM_BUG_ON(!i915_seqno_passed(intel_engine_get_seqno(engine), + wait->seqno)); RB_CLEAR_NODE(&wait->node); wake_up_process(wait->tsk); } -- cgit v1.2.3 From 93eef7d65329b62cf8a6db918fe5ca5d84eedf50 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 6 Mar 2018 13:01:42 +0000 Subject: drm/i915: Stop kicking the signaling thread on seqno wraparound Since commit fd10e2ce9905 ("drm/i915/breadcrumbs: Ignore unsubmitted signalers"), we cancel the signaler when retiring the request and so upon wraparound, where we wait for all requests to be retired, we no longer need to spin waiting for the signaling thread to release its references to the in-flight requests, and so we can assert that the signaler is idle. References: fd10e2ce9905 ("drm/i915/breadcrumbs: Ignore unsubmitted signalers") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180306130143.13312-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 ++ drivers/gpu/drm/i915/intel_breadcrumbs.c | 5 ----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2265bb8ff4fa..d34a3e5800b7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -220,6 +220,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* spin until threads are complete */ while (intel_breadcrumbs_busy(engine)) cond_resched(); + + GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } /* Check we are idle before we fiddle with hw state! */ diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 6a740618863c..bab74c3ee81a 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -889,11 +889,6 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) spin_unlock_irq(&b->irq_lock); } - if (!busy && !list_empty(&b->signals)) { - wake_up_process(b->signaler); - busy = true; - } - return busy; } -- cgit v1.2.3 From f41d19beccfebb84abc729e2d8ece0c368b2152f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 6 Mar 2018 13:01:43 +0000 Subject: drm/i915: Flush waiters on seqno wraparound Previously, we would spin waiting for all waiters to wake up and notice their request had completed before we would reset the seqno upon wraparound. However, we can mark their waits as complete and wake them up directly using the existing machinery for handling the flushing of missed wakeups when idling. Suggested-by: Joonas Lahtinen Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180306130143.13312-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 6 ++---- drivers/gpu/drm/i915/intel_breadcrumbs.c | 19 ------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d34a3e5800b7..d437beac3969 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -217,10 +217,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) struct intel_timeline *tl = engine->timeline; if (!i915_seqno_passed(seqno, tl->seqno)) { - /* spin until threads are complete */ - while (intel_breadcrumbs_busy(engine)) - cond_resched(); - + /* Flush any waiters before we reuse the seqno */ + intel_engine_disarm_breadcrumbs(engine); GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index bab74c3ee81a..1f79e7a47433 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -873,25 +873,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - bool busy = false; - - if (b->irq_wait) { - spin_lock_irq(&b->irq_lock); - - if (b->irq_wait) { - wake_up_process(b->irq_wait->tsk); - busy = true; - } - - spin_unlock_irq(&b->irq_lock); - } - - return busy; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_breadcrumbs.c" #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index e7526a4f05e5..26605f39bbfd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -951,7 +951,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { -- cgit v1.2.3 From c90c275c6ff4b7eca4221a302b399499239ae89e Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 6 Mar 2018 12:33:55 -0800 Subject: drm/i915/psr: Update PSR2 resolution check for Cannonlake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In fact, apply the Cannonlake resolution check for all >= Gen-10 platforms to be safe. v3: Update GLK too. (Ville) Longer variable names. if-else in place of ternary operator. v2: Use local variables for resolution limits and print them (Ville) Cc: Ville Syrjälä Cc: Rodrigo Vivi Cc: Elio Martinez Monroy Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Ville Syrjälä Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180306203355.29292-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 05770790a4e9..23175c5c4a50 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -451,8 +451,9 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - const struct drm_display_mode *adjusted_mode = - &crtc_state->base.adjusted_mode; + int crtc_hdisplay = crtc_state->base.adjusted_mode.crtc_hdisplay; + int crtc_vdisplay = crtc_state->base.adjusted_mode.crtc_vdisplay; + int psr_max_h = 0, psr_max_v = 0; /* * FIXME psr2_support is messed up. It's both computed @@ -462,10 +463,18 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, if (!dev_priv->psr.psr2_support) return false; - /* PSR2 is restricted to work with panel resolutions up to 3640x2304 */ - if (adjusted_mode->crtc_hdisplay > 3640 || - adjusted_mode->crtc_vdisplay > 2304) { - DRM_DEBUG_KMS("PSR2 not enabled, panel resolution too big\n"); + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + psr_max_h = 4096; + psr_max_v = 2304; + } else if (IS_GEN9(dev_priv)) { + psr_max_h = 3640; + psr_max_v = 2304; + } + + if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) { + DRM_DEBUG_KMS("PSR2 not enabled, resolution %dx%d > max supported %dx%d\n", + crtc_hdisplay, crtc_vdisplay, + psr_max_h, psr_max_v); return false; } -- cgit v1.2.3 From 14941b6e86873680714aae2fee0b529024c63380 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 5 Mar 2018 17:20:00 -0800 Subject: drm/i915/cnl: document WaVFUnitClockGatingDisable No functional change. WA is already properly applied. but in different databases it has different names. Let's document all of them to avoid future confusion. Cc: Rafael Antognolli Signed-off-by: Rodrigo Vivi Reviewed-by: Rafael Antognolli Link: https://patchwork.freedesktop.org/patch/msgid/20180306012000.18928-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3e60279f18b1..1ed1abb5b6a0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8523,6 +8523,7 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); /* WaDisableVFclkgate:cnl */ + /* WaVFUnitClockGatingDisable:cnl */ val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE); val |= VFUNIT_CLKGATE_DIS; I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val); -- cgit v1.2.3 From 470e7c6189dbce4c0d1beb8cce7e38a9bd5f5144 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 5 Mar 2018 17:28:12 -0800 Subject: drm/i915/cnp: Document WaSouthDisplayDisablePWMCGEGating No functional change since WA is already applied. But since it has different names on different databases, let's document it here to avoid future confusion. Cc: Radhakrishna Sripada Signed-off-by: Rodrigo Vivi Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20180306012812.19779-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1ed1abb5b6a0..6cab20ce167a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8492,7 +8492,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) if (!HAS_PCH_CNP(dev_priv)) return; - /* Display WA #1181: cnp */ + /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */ I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) | CNP_PWM_CGE_GATING_DISABLE); } -- cgit v1.2.3 From 1b2cb026dc8b6f5cc4043031896a27745ad6f898 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Jan 2018 21:21:45 +0200 Subject: drm/i915: Convert intel_hpd_irq_event() into an encoder hotplug hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow encoders to customize their hotplug processing by moving the intel_hpd_irq_event() code into an encoder hotplug vfunc. Currently only SDVO needs this to re-enable hotplug signalling in the SDVO chip. We'll use this same hook for DP/HDMI link management later. Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180117192149.17760-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_sdvo.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 0c14d1c04cbd..96e213ec202d 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1705,7 +1705,15 @@ static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder) struct intel_sdvo *intel_sdvo = to_sdvo(encoder); intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, - &intel_sdvo->hotplug_active, 2); + &intel_sdvo->hotplug_active, 2); +} + +static bool intel_sdvo_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) +{ + intel_sdvo_enable_hotplug(encoder); + + return intel_encoder_hotplug(encoder, connector); } static bool @@ -2516,7 +2524,7 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) * Some SDVO devices have one-shot hotplug interrupts. * Ensure that they get re-enabled when an interrupt happens. */ - intel_encoder->hot_plug = intel_sdvo_enable_hotplug; + intel_encoder->hotplug = intel_sdvo_hotplug; intel_sdvo_enable_hotplug(intel_encoder); } else { intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; -- cgit v1.2.3 From dba14b27dd3ca5ce5b3a1d538862e7dce556dba7 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Jan 2018 21:21:46 +0200 Subject: drm/i915: Reinitialize sink scrambling/TMDS clock ratio on HPD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LG 4k TV I have doesn't deassert HPD when I turn the TV off, but when I turn it back on it will pulse the HPD line. By that time it has forgotten everything we told it about scrambling and the clock ratio. Hence if we want to get a picture out if it again we have to tell it whether we're currently sending scrambled data or not. Implement that via the encoder->hotplug() hook. v2: Force a full modeset to not follow the HDMI 2.0 spec more closely (Shashank) [pushed with whitespace fixes to make sparse happy] Cc: Shashank Sharma Cc: Maarten Lankhorst Signed-off-by: Ville Syrjälä Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180117192149.17760-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_crt.c | 4 +- drivers/gpu/drm/i915/intel_ddi.c | 146 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_dp.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 6 +- drivers/gpu/drm/i915/intel_hdmi.c | 1 + drivers/gpu/drm/i915/intel_hotplug.c | 25 +++--- 6 files changed, 168 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 391dd69ae0a4..c0a8805b277f 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -956,8 +956,10 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.power_domain = POWER_DOMAIN_PORT_CRT; if (I915_HAS_HOTPLUG(dev_priv) && - !dmi_check_system(intel_spurious_crt_detect)) + !dmi_check_system(intel_spurious_crt_detect)) { crt->base.hpd_pin = HPD_CRT; + crt->base.hotplug = intel_encoder_hotplug; + } if (HAS_DDI(dev_priv)) { crt->base.port = PORT_E; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index bfdaa5d86861..e5b5d21c3c09 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -25,6 +25,7 @@ * */ +#include #include "i915_drv.h" #include "intel_drv.h" @@ -2798,6 +2799,147 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port) return connector; } +static int modeset_pipe(struct drm_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_atomic_state *state; + struct drm_crtc_state *crtc_state; + int ret; + + state = drm_atomic_state_alloc(crtc->dev); + if (!state) + return -ENOMEM; + + state->acquire_ctx = ctx; + + crtc_state = drm_atomic_get_crtc_state(state, crtc); + if (IS_ERR(crtc_state)) { + ret = PTR_ERR(crtc_state); + goto out; + } + + crtc_state->mode_changed = true; + + ret = drm_atomic_add_affected_connectors(state, crtc); + if (ret) + goto out; + + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + goto out; + + ret = drm_atomic_commit(state); + if (ret) + goto out; + + return 0; + + out: + drm_atomic_state_put(state); + + return ret; +} + +static int intel_hdmi_reset_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_connector *connector = hdmi->attached_connector; + struct i2c_adapter *adapter = + intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); + struct drm_connector_state *conn_state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + u8 config; + int ret; + + if (!connector || connector->base.status != connector_status_connected) + return 0; + + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + ctx); + if (ret) + return ret; + + conn_state = connector->base.state; + + crtc = to_intel_crtc(conn_state->crtc); + if (!crtc) + return 0; + + ret = drm_modeset_lock(&crtc->base.mutex, ctx); + if (ret) + return ret; + + crtc_state = to_intel_crtc_state(crtc->base.state); + + WARN_ON(!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)); + + if (!crtc_state->base.active) + return 0; + + if (!crtc_state->hdmi_high_tmds_clock_ratio && + !crtc_state->hdmi_scrambling) + return 0; + + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) + return 0; + + ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); + if (ret < 0) { + DRM_ERROR("Failed to read TMDS config: %d\n", ret); + return 0; + } + + if (!!(config & SCDC_TMDS_BIT_CLOCK_RATIO_BY_40) == + crtc_state->hdmi_high_tmds_clock_ratio && + !!(config & SCDC_SCRAMBLING_ENABLE) == + crtc_state->hdmi_scrambling) + return 0; + + /* + * HDMI 2.0 says that one should not send scrambled data + * prior to configuring the sink scrambling, and that + * TMDS clock/data transmission should be suspended when + * changing the TMDS clock rate in the sink. So let's + * just do a full modeset here, even though some sinks + * would be perfectly happy if were to just reconfigure + * the SCDC settings on the fly. + */ + return modeset_pipe(&crtc->base, ctx); +} + +static bool intel_ddi_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) +{ + struct drm_modeset_acquire_ctx ctx; + bool changed; + int ret; + + changed = intel_encoder_hotplug(encoder, connector); + + drm_modeset_acquire_init(&ctx, 0); + + for (;;) { + ret = intel_hdmi_reset_link(encoder, &ctx); + + if (ret == -EDEADLK) { + drm_modeset_backoff(&ctx); + continue; + } + + break; + } + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + WARN(ret, "Acquiring modeset locks failed with %i\n", ret); + + return changed; +} + static struct intel_connector * intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) { @@ -2914,6 +3056,10 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); + if (init_hdmi) + intel_encoder->hotplug = intel_ddi_hotplug; + else + intel_encoder->hotplug = intel_encoder_hotplug; intel_encoder->compute_output_type = intel_ddi_compute_output_type; intel_encoder->compute_config = intel_ddi_compute_config; intel_encoder->enable = intel_enable_ddi; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index c722a6750e90..7cc1720a437d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -6393,6 +6393,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, "DP %c", port_name(port))) goto err_encoder_init; + intel_encoder->hotplug = intel_encoder_hotplug; intel_encoder->compute_config = intel_dp_compute_config; intel_encoder->get_hw_state = intel_dp_get_hw_state; intel_encoder->get_config = intel_dp_get_config; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 652b11e788cc..2ae6d5548171 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -215,7 +215,8 @@ struct intel_encoder { enum intel_output_type type; enum port port; unsigned int cloneable; - void (*hot_plug)(struct intel_encoder *); + bool (*hotplug)(struct intel_encoder *encoder, + struct intel_connector *connector); enum intel_output_type (*compute_output_type)(struct intel_encoder *, struct intel_crtc_state *, struct drm_connector_state *); @@ -1704,7 +1705,8 @@ int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector); void intel_dvo_init(struct drm_i915_private *dev_priv); /* intel_hotplug.c */ void intel_hpd_poll_init(struct drm_i915_private *dev_priv); - +bool intel_encoder_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector); /* legacy fbdev emulation in intel_fbdev.c */ #ifdef CONFIG_DRM_FBDEV_EMULATION diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index f5d7bfb43006..1baef4ac7ecb 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -2383,6 +2383,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, "HDMI %c", port_name(port)); + intel_encoder->hotplug = intel_encoder_hotplug; intel_encoder->compute_config = intel_hdmi_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { intel_encoder->disable = pch_disable_hdmi; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index fe28c1ea84a5..0e3d3e89d66a 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -274,24 +274,26 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) intel_runtime_pm_put(dev_priv); } -static bool intel_hpd_irq_event(struct drm_device *dev, - struct drm_connector *connector) +bool intel_encoder_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) { + struct drm_device *dev = connector->base.dev; enum drm_connector_status old_status; WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); - old_status = connector->status; + old_status = connector->base.status; - connector->status = drm_helper_probe_detect(connector, NULL, false); + connector->base.status = + drm_helper_probe_detect(&connector->base, NULL, false); - if (old_status == connector->status) + if (old_status == connector->base.status) return false; DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n", - connector->base.id, - connector->name, + connector->base.base.id, + connector->base.name, drm_get_connector_status_name(old_status), - drm_get_connector_status_name(connector->status)); + drm_get_connector_status_name(connector->base.status)); return true; } @@ -381,10 +383,9 @@ static void i915_hotplug_work_func(struct work_struct *work) if (hpd_event_bits & (1 << intel_encoder->hpd_pin)) { DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n", connector->name, intel_encoder->hpd_pin); - if (intel_encoder->hot_plug) - intel_encoder->hot_plug(intel_encoder); - if (intel_hpd_irq_event(dev, connector)) - changed = true; + + changed |= intel_encoder->hotplug(intel_encoder, + intel_connector); } } drm_connector_list_iter_end(&conn_iter); -- cgit v1.2.3 From c85d200e832197e23ceeadfda9745646a242fd46 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Jan 2018 21:21:47 +0200 Subject: drm/i915: Move SST DP link retraining into the ->post_hotplug() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing link retraining from the short pulse handler is problematic since that might introduce deadlocks with MST sideband processing. Currently we don't retrain MST links from this code, but we want to change that. So better to move the entire thing to the hotplug work. We can utilize the new encoder->hotplug() hook for this. The only thing we leave in the short pulse handler is the link status check. That one still depends on the link parameters stored under intel_dp, so no locking around that but races should be mostly harmless as the actual retraining code will recheck the link state if we end up there by mistake. v2: Rebase due to ->post_hotplug() now being just ->hotplug() Check the connector type to figure out if we should do the HDMI thing or the DP think for DDI [pushed with whitespace changes for sparse] Cc: Manasi Navare Cc: Maarten Lankhorst Signed-off-by: Ville Syrjälä Acked-by: Manasi Navare Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180117192149.17760-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 10 +- drivers/gpu/drm/i915/intel_dp.c | 197 ++++++++++++++++++++++----------------- drivers/gpu/drm/i915/intel_drv.h | 2 + 3 files changed, 121 insertions(+), 88 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e5b5d21c3c09..d3cbea2c136c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2923,7 +2923,10 @@ static bool intel_ddi_hotplug(struct intel_encoder *encoder, drm_modeset_acquire_init(&ctx, 0); for (;;) { - ret = intel_hdmi_reset_link(encoder, &ctx); + if (connector->base.connector_type == DRM_MODE_CONNECTOR_HDMIA) + ret = intel_hdmi_reset_link(encoder, &ctx); + else + ret = intel_dp_retrain_link(encoder, &ctx); if (ret == -EDEADLK) { drm_modeset_backoff(&ctx); @@ -3056,10 +3059,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); - if (init_hdmi) - intel_encoder->hotplug = intel_ddi_hotplug; - else - intel_encoder->hotplug = intel_encoder_hotplug; + intel_encoder->hotplug = intel_ddi_hotplug; intel_encoder->compute_output_type = intel_ddi_compute_output_type; intel_encoder->compute_config = intel_ddi_compute_config; intel_encoder->enable = intel_enable_ddi; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 7cc1720a437d..153342cf5898 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4272,12 +4272,84 @@ go_again: return -EINVAL; } -static void -intel_dp_retrain_link(struct intel_dp *intel_dp) +static bool +intel_dp_needs_link_retrain(struct intel_dp *intel_dp) +{ + u8 link_status[DP_LINK_STATUS_SIZE]; + + if (!intel_dp_get_link_status(intel_dp, link_status)) { + DRM_ERROR("Failed to get link status\n"); + return false; + } + + /* + * Validate the cached values of intel_dp->link_rate and + * intel_dp->lane_count before attempting to retrain. + */ + if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate, + intel_dp->lane_count)) + return false; + + /* Retrain if Channel EQ or CR not ok */ + return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count); +} + +/* + * If display is now connected check links status, + * there has been known issues of link loss triggering + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. + */ +int intel_dp_retrain_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx) { - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_connector *connector = intel_dp->attached_connector; + struct drm_connector_state *conn_state; + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int ret; + + /* FIXME handle the MST connectors as well */ + + if (!connector || connector->base.status != connector_status_connected) + return 0; + + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + ctx); + if (ret) + return ret; + + conn_state = connector->base.state; + + crtc = to_intel_crtc(conn_state->crtc); + if (!crtc) + return 0; + + ret = drm_modeset_lock(&crtc->base.mutex, ctx); + if (ret) + return ret; + + crtc_state = to_intel_crtc_state(crtc->base.state); + + WARN_ON(!intel_crtc_has_dp_encoder(crtc_state)); + + if (!crtc_state->base.active) + return 0; + + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) + return 0; + + if (!intel_dp_needs_link_retrain(intel_dp)) + return 0; /* Suppress underruns caused by re-training */ intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); @@ -4295,51 +4367,49 @@ intel_dp_retrain_link(struct intel_dp *intel_dp) if (crtc->config->has_pch_encoder) intel_set_pch_fifo_underrun_reporting(dev_priv, intel_crtc_pch_transcoder(crtc), true); + + return 0; } -static void -intel_dp_check_link_status(struct intel_dp *intel_dp) +/* + * If display is now connected check links status, + * there has been known issues of link loss triggering + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. + */ +static bool intel_dp_hotplug(struct intel_encoder *encoder, + struct intel_connector *connector) { - struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_connector_state *conn_state = - intel_dp->attached_connector->base.state; - u8 link_status[DP_LINK_STATUS_SIZE]; - - WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); - - if (!intel_dp_get_link_status(intel_dp, link_status)) { - DRM_ERROR("Failed to get link status\n"); - return; - } + struct drm_modeset_acquire_ctx ctx; + bool changed; + int ret; - if (!conn_state->crtc) - return; + changed = intel_encoder_hotplug(encoder, connector); - WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex)); + drm_modeset_acquire_init(&ctx, 0); - if (!conn_state->crtc->state->active) - return; + for (;;) { + ret = intel_dp_retrain_link(encoder, &ctx); - if (conn_state->commit && - !try_wait_for_completion(&conn_state->commit->hw_done)) - return; + if (ret == -EDEADLK) { + drm_modeset_backoff(&ctx); + continue; + } - /* - * Validate the cached values of intel_dp->link_rate and - * intel_dp->lane_count before attempting to retrain. - */ - if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate, - intel_dp->lane_count)) - return; + break; + } - /* Retrain if Channel EQ or CR not ok */ - if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { - DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", - intel_encoder->base.name); + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + WARN(ret, "Acquiring modeset locks failed with %i\n", ret); - intel_dp_retrain_link(intel_dp); - } + return changed; } /* @@ -4397,7 +4467,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } - intel_dp_check_link_status(intel_dp); + /* defer to the hotplug work for link retraining if needed */ + if (intel_dp_needs_link_retrain(intel_dp)) + return false; if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); @@ -4782,20 +4854,6 @@ intel_dp_long_pulse(struct intel_connector *connector) */ status = connector_status_disconnected; goto out; - } else { - /* - * If display is now connected check links status, - * there has been known issues of link loss triggerring - * long pulse. - * - * Some sinks (eg. ASUS PB287Q) seem to perform some - * weird HPD ping pong during modesets. So we can apparently - * end up with HPD going low during a modeset, and then - * going back up soon after. And once that happens we must - * retrain the link to get a picture. That's in case no - * userspace component reacted to intermittent HPD dip. - */ - intel_dp_check_link_status(intel_dp); } /* @@ -5372,37 +5430,10 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (!intel_dp->is_mst) { - struct drm_modeset_acquire_ctx ctx; - struct drm_connector *connector = &intel_dp->attached_connector->base; - struct drm_crtc *crtc; - int iret; - bool handled = false; - - drm_modeset_acquire_init(&ctx, 0); -retry: - iret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, &ctx); - if (iret) - goto err; - - crtc = connector->state->crtc; - if (crtc) { - iret = drm_modeset_lock(&crtc->mutex, &ctx); - if (iret) - goto err; - } + bool handled; handled = intel_dp_short_pulse(intel_dp); -err: - if (iret == -EDEADLK) { - drm_modeset_backoff(&ctx); - goto retry; - } - - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); - WARN(iret, "Acquiring modeset locks failed with %i\n", iret); - /* Short pulse can signify loss of hdcp authentication */ intel_hdcp_check_link(intel_dp->attached_connector); @@ -6393,7 +6424,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, "DP %c", port_name(port))) goto err_encoder_init; - intel_encoder->hotplug = intel_encoder_hotplug; + intel_encoder->hotplug = intel_dp_hotplug; intel_encoder->compute_config = intel_dp_compute_config; intel_encoder->get_hw_state = intel_dp_get_hw_state; intel_encoder->get_config = intel_dp_get_config; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2ae6d5548171..5bc69ce1c1a5 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1626,6 +1626,8 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count); void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); +int intel_dp_retrain_link(struct intel_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); void intel_dp_encoder_reset(struct drm_encoder *encoder); void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); -- cgit v1.2.3 From 2fed7955bf4c2e87e8b3759939fd0ad961da776e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Jan 2018 21:21:48 +0200 Subject: drm/i915: Nuke intel_dp->channel_eq_status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_dp->channel_eq_status is used in exactly one function, and we don't need it to persist between calls. So just go back to using a local variable instead. Signed-off-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Reviewed-by: Lyude Paul Reviewed-by: Manasi Navare Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180117192149.17760-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_dp_link_training.c | 6 +++--- drivers/gpu/drm/i915/intel_drv.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index cf8fef8b6f58..ae849952d4b9 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -248,6 +248,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) int tries; u32 training_pattern; uint8_t link_status[DP_LINK_STATUS_SIZE]; + bool channel_eq = false; training_pattern = intel_dp_training_pattern(intel_dp); @@ -259,7 +260,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) return false; } - intel_dp->channel_eq_status = false; for (tries = 0; tries < 5; tries++) { drm_dp_link_train_channel_eq_delay(intel_dp->dpcd); @@ -279,7 +279,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) if (drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { - intel_dp->channel_eq_status = true; + channel_eq = true; DRM_DEBUG_KMS("Channel EQ done. DP Training " "successful\n"); break; @@ -301,7 +301,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) intel_dp_set_idle_link_train(intel_dp); - return intel_dp->channel_eq_status; + return channel_eq; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 5bc69ce1c1a5..dc693b40a884 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1050,7 +1050,6 @@ struct intel_dp { bool link_mst; bool has_audio; bool detect_done; - bool channel_eq_status; bool reset_link_params; enum aux_ch aux_ch; uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; -- cgit v1.2.3 From edb2e5301c4489d8c99b0f3d86a074df27f6f8ff Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Jan 2018 21:21:49 +0200 Subject: drm/i915: Track whether the DP link is trained or not MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LSPCON likes to throw short HPDs during the enable seqeunce prior to the link being trained. These obviously result in the channel CR/EQ check failing and thus we schedule a pointless hotplug work to retrain the link. Avoid that by ignoring the bad CR/EQ status until we've actually initially trained the link. I've not actually investigated to see what LSPCON is trying to signal with the short pulse. But as long as it signals anything I think we're supposed to check the link status anyway, so I don't really see other good ways to solve this. I've not seen these short pulses being generated by normal DP sinks. Signed-off-by: Ville Syrjälä Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180117192149.17760-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 2 ++ drivers/gpu/drm/i915/intel_dp.c | 10 +++++++--- drivers/gpu/drm/i915/intel_dp_link_training.c | 2 ++ drivers/gpu/drm/i915/intel_drv.h | 1 + 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index d3cbea2c136c..ac8fc2a44ac6 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2508,6 +2508,8 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp->link_trained = false; + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 153342cf5898..9a4a51e79fa1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1913,6 +1913,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count, bool link_mst) { + intel_dp->link_trained = false; intel_dp->link_rate = link_rate; intel_dp->lane_count = lane_count; intel_dp->link_mst = link_mst; @@ -2761,6 +2762,8 @@ static void intel_disable_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + intel_dp->link_trained = false; + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); @@ -4277,10 +4280,11 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp) { u8 link_status[DP_LINK_STATUS_SIZE]; - if (!intel_dp_get_link_status(intel_dp, link_status)) { - DRM_ERROR("Failed to get link status\n"); + if (!intel_dp->link_trained) + return false; + + if (!intel_dp_get_link_status(intel_dp, link_status)) return false; - } /* * Validate the cached values of intel_dp->link_rate and diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index ae849952d4b9..f59b59bb0a21 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -307,6 +307,8 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) void intel_dp_stop_link_train(struct intel_dp *intel_dp) { + intel_dp->link_trained = true; + intel_dp_set_link_train(intel_dp, DP_TRAINING_PATTERN_DISABLE); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index dc693b40a884..37d5412af8f5 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1048,6 +1048,7 @@ struct intel_dp { uint8_t lane_count; uint8_t sink_count; bool link_mst; + bool link_trained; bool has_audio; bool detect_done; bool reset_link_params; -- cgit v1.2.3 From 949f7c7d5fc4277e1ca91dbee32ceb469bbb1349 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 5 Mar 2018 13:36:08 +0100 Subject: drm/i915: Handle changing enable_fbc parameter at runtime better. If i915.enable_fbc is cleared at runtime, but FBC was previously enabled then we don't disable FBC until the next time the crtc is disabled. Make sure that if the module param is changed, we disable FBC in intel_fbc_post_update so we never have to worry about disabling. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20180305123608.20665-1-maarten.lankhorst@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_fbc.c | 62 +++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 38a5535a5c63..707d49c12638 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -960,6 +960,30 @@ unlock: mutex_unlock(&fbc->lock); } +/** + * __intel_fbc_disable - disable FBC + * @dev_priv: i915 device instance + * + * This is the low level function that actually disables FBC. Callers should + * grab the FBC lock. + */ +static void __intel_fbc_disable(struct drm_i915_private *dev_priv) +{ + struct intel_fbc *fbc = &dev_priv->fbc; + struct intel_crtc *crtc = fbc->crtc; + + WARN_ON(!mutex_is_locked(&fbc->lock)); + WARN_ON(!fbc->enabled); + WARN_ON(fbc->active); + + DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe)); + + __intel_fbc_cleanup_cfb(dev_priv); + + fbc->enabled = false; + fbc->crtc = NULL; +} + static void __intel_fbc_post_update(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -971,6 +995,13 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc) if (!fbc->enabled || fbc->crtc != crtc) return; + if (!i915_modparams.enable_fbc) { + intel_fbc_deactivate(dev_priv, "disabled at runtime per module param"); + __intel_fbc_disable(dev_priv); + + return; + } + if (!intel_fbc_can_activate(crtc)) { WARN_ON(fbc->active); return; @@ -1174,31 +1205,6 @@ out: mutex_unlock(&fbc->lock); } -/** - * __intel_fbc_disable - disable FBC - * @dev_priv: i915 device instance - * - * This is the low level function that actually disables FBC. Callers should - * grab the FBC lock. - */ -static void __intel_fbc_disable(struct drm_i915_private *dev_priv) -{ - struct intel_fbc *fbc = &dev_priv->fbc; - struct intel_crtc *crtc = fbc->crtc; - - WARN_ON(!mutex_is_locked(&fbc->lock)); - WARN_ON(!fbc->enabled); - WARN_ON(fbc->active); - WARN_ON(crtc->active); - - DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe)); - - __intel_fbc_cleanup_cfb(dev_priv); - - fbc->enabled = false; - fbc->crtc = NULL; -} - /** * intel_fbc_disable - disable FBC if it's associated with crtc * @crtc: the CRTC @@ -1213,6 +1219,8 @@ void intel_fbc_disable(struct intel_crtc *crtc) if (!fbc_supported(dev_priv)) return; + WARN_ON(crtc->active); + mutex_lock(&fbc->lock); if (fbc->crtc == crtc) __intel_fbc_disable(dev_priv); @@ -1235,8 +1243,10 @@ void intel_fbc_global_disable(struct drm_i915_private *dev_priv) return; mutex_lock(&fbc->lock); - if (fbc->enabled) + if (fbc->enabled) { + WARN_ON(fbc->crtc->active); __intel_fbc_disable(dev_priv); + } mutex_unlock(&fbc->lock); cancel_work_sync(&fbc->work.work); -- cgit v1.2.3 From 4e9a8bef20c973f62358fbd888a38f1bd1669200 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 5 Mar 2018 10:41:05 +0000 Subject: drm/i915: Assert that the request is indeed complete when signaled from irq After we call dma_fence_signal(), confirm that the request was indeed complete. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180305104105.8296-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ce16003ef048..633c18785c1e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1123,6 +1123,7 @@ static void notify_ring(struct intel_engine_cs *engine) if (rq) { dma_fence_signal(&rq->fence); + GEM_BUG_ON(!i915_request_completed(rq)); i915_request_put(rq); } -- cgit v1.2.3 From 5f79e7c6754249dd71f3124c9c7604aab2880c20 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Fri, 2 Mar 2018 18:14:57 +0200 Subject: drm/i915/icl: Correctly initialize the Gen11 engines Gen11 has up to 4 VCS and up to 2 VECS engines, this patch adds mmio base definitions for all of them. Bspec: 20944 Bspec: 7021 v2: Set the correct mmio_base in intel_engines_init_mmio; updating the base mmio values any later would cause incorrect reads in i915_gem_sanitize (Michel). Cc: Tvrtko Ursulin Cc: Ceraolo Spurio, Daniele Signed-off-by: Oscar Mateo Signed-off-by: Michel Thierry Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20180302161501.28594-2-mika.kuoppala@linux.intel.com Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_reg.h | 6 +++++ drivers/gpu/drm/i915/intel_engine_cs.c | 44 +++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 95a2e51ecbb0..d7023f15f0ed 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2345,7 +2345,13 @@ enum i915_power_well_id { #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 #define GEN8_BSD2_RING_BASE 0x1c000 +#define GEN11_BSD_RING_BASE 0x1c0000 +#define GEN11_BSD2_RING_BASE 0x1c4000 +#define GEN11_BSD3_RING_BASE 0x1d0000 +#define GEN11_BSD4_RING_BASE 0x1d4000 #define VEBOX_RING_BASE 0x1a000 +#define GEN11_VEBOX_RING_BASE 0x1c8000 +#define GEN11_VEBOX2_RING_BASE 0x1d8000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base) _MMIO((base)+0x30) #define RING_HEAD(base) _MMIO((base)+0x34) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 3e1107ecb6ee..911fc08658c5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -123,6 +123,22 @@ static const struct engine_info intel_engines[] = { .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, }, + [VCS3] = { + .hw_id = VCS3_HW, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 2, + .mmio_base = GEN11_BSD3_RING_BASE, + .irq_shift = 0, /* not used */ + }, + [VCS4] = { + .hw_id = VCS4_HW, + .uabi_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 3, + .mmio_base = GEN11_BSD4_RING_BASE, + .irq_shift = 0, /* not used */ + }, [VECS] = { .hw_id = VECS_HW, .uabi_id = I915_EXEC_VEBOX, @@ -131,6 +147,14 @@ static const struct engine_info intel_engines[] = { .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, }, + [VECS2] = { + .hw_id = VECS2_HW, + .uabi_id = I915_EXEC_VEBOX, + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 1, + .mmio_base = GEN11_VEBOX2_RING_BASE, + .irq_shift = 0, /* not used */ + }, }; /** @@ -230,7 +254,25 @@ intel_engine_setup(struct drm_i915_private *dev_priv, class_info->name, info->instance) >= sizeof(engine->name)); engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = info->mmio_base; + if (INTEL_GEN(dev_priv) >= 11) { + switch (engine->id) { + case VCS: + engine->mmio_base = GEN11_BSD_RING_BASE; + break; + case VCS2: + engine->mmio_base = GEN11_BSD2_RING_BASE; + break; + case VECS: + engine->mmio_base = GEN11_VEBOX_RING_BASE; + break; + default: + /* take the original value for all other engines */ + engine->mmio_base = info->mmio_base; + break; + } + } else { + engine->mmio_base = info->mmio_base; + } engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; -- cgit v1.2.3 From ac52da6af826d05f02c03fcde4a0651d070783b2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 2 Mar 2018 18:14:58 +0200 Subject: drm/i915/icl: new context descriptor support Starting from Gen11 the context descriptor format has been updated in the HW. The hw_id field has been considerably reduced in size and engine class and instance fields have been added. There is a slight name clashing issue because the field that we call hw_id is actually called SW Context ID in the specs for Gen11+. With the current size of the hw_id field we can have a maximum of 2k contexts at any time, but we could use the sw_counter field (which is sw defined) to increase that because the HW requirement is that engine_id + sw id + sw_counter is a unique number. GuC uses a similar method to support more contexts but does its tracking at lrc level. To avoid doing an implementation that will need to be reworked once GuC support lands, defer it for now and mark it as TODO. v2: rebased, add documentation, fix GEN11_ENGINE_INSTANCE_SHIFT v3: rebased, bring back lost code from i915_gem_context.c v4: make TODO comment more generic v5: be consistent with bit ordering, add extra checks (Chris) Cc: Oscar Mateo Cc: Chris Wilson Cc: Mika Kuoppala Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Oscar Mateo Link: https://patchwork.freedesktop.org/patch/msgid/20180302161501.28594-3-mika.kuoppala@linux.intel.com Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 11 ++++++++-- drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ drivers/gpu/drm/i915/intel_engine_cs.c | 3 +++ drivers/gpu/drm/i915/intel_lrc.c | 36 +++++++++++++++++++++++++++++++-- 5 files changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7eec99d7fad4..78dd318df18e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2103,6 +2103,7 @@ struct drm_i915_private { */ struct ida hw_ida; #define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ } contexts; u32 fdi_rx_config; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a73340ae9419..f2cbea7cf940 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -211,9 +211,15 @@ static void context_close(struct i915_gem_context *ctx) static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) { int ret; + unsigned int max; + + if (INTEL_GEN(dev_priv) >= 11) + max = GEN11_MAX_CONTEXT_HW_ID; + else + max = MAX_CONTEXT_HW_ID; ret = ida_simple_get(&dev_priv->contexts.hw_ida, - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + 0, max, GFP_KERNEL); if (ret < 0) { /* Contexts are only released when no longer active. * Flush any pending retires to hopefully release some @@ -221,7 +227,7 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) */ i915_retire_requests(dev_priv); ret = ida_simple_get(&dev_priv->contexts.hw_ida, - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + 0, max, GFP_KERNEL); if (ret < 0) return ret; } @@ -463,6 +469,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) /* Using the simple ida interface, the max is limited by sizeof(int) */ BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); /* lowest priority; idle task */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d7023f15f0ed..a778b93f60d2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3912,6 +3912,12 @@ enum { #define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_WIDTH 21 +#define GEN11_SW_CTX_ID_SHIFT 37 +#define GEN11_SW_CTX_ID_WIDTH 11 +#define GEN11_ENGINE_CLASS_SHIFT 61 +#define GEN11_ENGINE_CLASS_WIDTH 3 +#define GEN11_ENGINE_INSTANCE_SHIFT 48 +#define GEN11_ENGINE_INSTANCE_WIDTH 6 #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 911fc08658c5..4ba139c27fba 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -234,6 +234,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv, GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); class_info = &intel_engine_classes[info->class]; + BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); + BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 75d2daa4f6c1..69838f668862 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -204,6 +204,18 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware * bits 55-63: group ID, currently unused and set to 0 + * + * Starting from Gen11, the upper dword of the descriptor has a new format: + * + * bits 32-36: reserved + * bits 37-47: SW context ID + * bits 48:53: engine instance + * bit 54: mbz, reserved for use by hardware + * bits 55-60: SW counter + * bits 61-63: engine class + * + * engine info, SW context ID and SW counter need to form a unique number + * (Context ID) per lrc. */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, @@ -212,12 +224,32 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_context *ce = &ctx->engine[engine->id]; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1< (BIT(GEN8_CTX_ID_WIDTH))); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); desc = ctx->desc_template; /* bits 0-11 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 12)); + desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; /* bits 12-31 */ - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + GEM_BUG_ON(desc & GENMASK_ULL(63, 32)); + + if (INTEL_GEN(ctx->i915) >= 11) { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; + /* bits 37-47 */ + + desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; + /* bits 48-53 */ + + /* TODO: decide what to do with SW counter (bits 55-60) */ + + desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; + /* bits 61-63 */ + } else { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); + desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ + } ce->lrc_desc = desc; } -- cgit v1.2.3 From 05f0addd9b108d4dfa6853122f453a445f241c2b Mon Sep 17 00:00:00 2001 From: Thomas Daniel Date: Fri, 2 Mar 2018 18:14:59 +0200 Subject: drm/i915/icl: Enhanced execution list support Enhanced Execlists is an upgraded version of execlists which supports up to 8 ports. The lrcs to be submitted are written to a submit queue (the ExecLists Submission Queue - ELSQ), which is then loaded on the HW. When writing to the ELSP register, the lrcs are written cyclically in the queue from position 0 to position 7. Alternatively, it is possible to write directly in the individual positions of the queue using the ELSQC registers. To be able to re-use all the existing code we're using the latter method and we're currently limiting ourself to only using 2 elements. v2: Rebase. v3: Switch from !IS_GEN11 to GEN < 11 (Daniele Ceraolo Spurio). v4: Use the elsq registers instead of elsp. (Daniele Ceraolo Spurio) v5: Reword commit, rename regs to be closer to specs, turn off preemption (Daniele), reuse engine->execlists.elsp (Chris) v6: use has_logical_ring_elsq to differentiate the new paths v7: add preemption support, rename els to submit_reg (Chris) v8: save the ctrl register inside the execlists struct, drop CSB handling updates (superseded by preempt_complete_status) (Chris) v9: s/drm_i915_gem_request/i915_request (Mika) v10: resolved conflict in inject_preempt_context (Mika) Cc: Chris Wilson Cc: Mika Kuoppala Signed-off-by: Thomas Daniel Signed-off-by: Rodrigo Vivi Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180302161501.28594-4-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 3 +- drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 58 ++++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_lrc.h | 3 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++++-- 6 files changed, 62 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 78dd318df18e..604389d0b6a3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2772,6 +2772,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ ((dev_priv)->info.has_logical_ring_contexts) +#define HAS_LOGICAL_RING_ELSQ(dev_priv) \ + ((dev_priv)->info.has_logical_ring_elsq) #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \ ((dev_priv)->info.has_logical_ring_preemption) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 26e8f5c13231..062e91b39085 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -594,7 +594,8 @@ static const struct intel_device_info intel_cannonlake_info = { GEN10_FEATURES, \ GEN(11), \ .ddb_size = 2048, \ - .has_csr = 0 + .has_csr = 0, \ + .has_logical_ring_elsq = 1 static const struct intel_device_info intel_icelake_11_info = { GEN11_FEATURES, diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index ab5bfd305477..7cc5a8e649b5 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -96,6 +96,7 @@ enum intel_platform { func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ + func(has_logical_ring_elsq); \ func(has_logical_ring_preemption); \ func(has_overlay); \ func(has_pooled_eu); \ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 69838f668862..835cc6149bfe 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -417,18 +417,30 @@ static u64 execlists_update_context(struct i915_request *rq) return ce->lrc_desc; } -static inline void elsp_write(u64 desc, u32 __iomem *elsp) +static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) { - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + if (execlists->ctrl_reg) { + writel(lower_32_bits(desc), execlists->submit_reg + port * 2); + writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); + } else { + writel(upper_32_bits(desc), execlists->submit_reg); + writel(lower_32_bits(desc), execlists->submit_reg); + } } static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct execlist_port *port = engine->execlists.port; + struct intel_engine_execlists *execlists = &engine->execlists; + struct execlist_port *port = execlists->port; unsigned int n; - for (n = execlists_num_ports(&engine->execlists); n--; ) { + /* + * ELSQ note: the submit queue is not cleared after being submitted + * to the HW so we need to make sure we always clean it up. This is + * currently ensured by the fact that we always write the same number + * of elsq entries, keep this in mind before changing the loop below. + */ + for (n = execlists_num_ports(execlists); n--; ) { struct i915_request *rq; unsigned int count; u64 desc; @@ -452,9 +464,14 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = 0; } - elsp_write(desc, engine->execlists.elsp); + write_desc(execlists, desc, n); } - execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct i915_gem_context *ctx) @@ -487,11 +504,12 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) static void inject_preempt_context(struct intel_engine_cs *engine) { + struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = &engine->i915->preempt_context->engine[engine->id]; unsigned int n; - GEM_BUG_ON(engine->execlists.preempt_complete_status != + GEM_BUG_ON(execlists->preempt_complete_status != upper_32_bits(ce->lrc_desc)); GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] & _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | @@ -504,10 +522,15 @@ static void inject_preempt_context(struct intel_engine_cs *engine) * the state of the GPU is known (idle). */ GEM_TRACE("%s\n", engine->name); - for (n = execlists_num_ports(&engine->execlists); --n; ) - elsp_write(0, engine->execlists.elsp); + for (n = execlists_num_ports(execlists); --n; ) + write_desc(execlists, 0, n); + + write_desc(execlists, ce->lrc_desc, n); + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); - elsp_write(ce->lrc_desc, engine->execlists.elsp); execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } @@ -2131,8 +2154,15 @@ static int logical_ring_init(struct intel_engine_cs *engine) if (ret) goto error; - engine->execlists.elsp = - engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + if (HAS_LOGICAL_RING_ELSQ(engine->i915)) { + engine->execlists.submit_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine)); + engine->execlists.ctrl_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine)); + } else { + engine->execlists.submit_reg = engine->i915->regs + + i915_mmio_reg_offset(RING_ELSP(engine)); + } engine->execlists.preempt_complete_status = ~0u; if (engine->i915->preempt_context) @@ -2401,7 +2431,7 @@ populate_lr_context(struct i915_gem_context *ctx, if (!engine->default_state) regs[CTX_CONTEXT_CONTROL + 1] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (ctx == ctx->i915->preempt_context) + if (ctx == ctx->i915->preempt_context && INTEL_GEN(engine->i915) < 11) regs[CTX_CONTEXT_CONTROL + 1] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 636ced41225d..59d7b86012e9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -42,6 +42,9 @@ #define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8) #define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4) #define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0) +#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510) +#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550) +#define EL_CTRL_LOAD (1 << 0) /* The docs specify that the write pointer wraps around after 5h, "After status * is written out to the last available status QW at offset 5h, this pointer diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 26605f39bbfd..d8ddea0174ca 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -209,9 +209,17 @@ struct intel_engine_execlists { bool no_priolist; /** - * @elsp: the ExecList Submission Port register + * @submit_reg: gen-specific execlist submission register + * set to the ExecList Submission Port (elsp) register pre-Gen11 and to + * the ExecList Submission Queue Contents register array for Gen11+ */ - u32 __iomem *elsp; + u32 __iomem *submit_reg; + + /** + * @ctrl_reg: the enhanced execlists control register, used to load the + * submit queue on the HW and to request preemptions to idle + */ + u32 __iomem *ctrl_reg; /** * @port: execlist port states -- cgit v1.2.3 From fd034c77b52703c19b7b60b1bf4eb129ebfffb31 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 2 Mar 2018 18:15:00 +0200 Subject: drm/i915/icl: Add Indirect Context Offset for Gen11 v2: rebased to intel_lr_indirect_ctx_offset v3: rebase, move define to intel_lrc_reg.h BSpec: 11740 Signed-off-by: Michel Thierry Signed-off-by: Rodrigo Vivi Signed-off-by: Michal Wajdeczko Reviewed-by: Oscar Mateo Link: https://patchwork.freedesktop.org/patch/msgid/20180302161501.28594-5-mika.kuoppala@linux.intel.com Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 4 ++++ drivers/gpu/drm/i915/intel_lrc_reg.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 835cc6149bfe..999d5f2539d4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2272,6 +2272,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) default: MISSING_CASE(INTEL_GEN(engine->i915)); /* fall through */ + case 11: + indirect_ctx_offset = + GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + break; case 10: indirect_ctx_offset = GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h index a53336e2fc97..169a2239d6c7 100644 --- a/drivers/gpu/drm/i915/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/intel_lrc_reg.h @@ -63,5 +63,6 @@ #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 #define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 +#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A #endif /* _INTEL_LRC_REG_H_ */ -- cgit v1.2.3 From a89a70a8b50d67263b83274a50d1c77deded03ee Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 2 Mar 2018 18:15:01 +0200 Subject: drm/i915/icl: Gen11 forcewake support The main difference with previous GENs is that starting from Gen11 each VCS and VECS engine has its own power well, which only exist if the related engine exists in the HW. The fallback forcewake request workaround is only needed on gen9 according to the HSDES WA entry (1604254524), so we can go back to using the simpler fw_domains_get/put functions. BSpec: 18331 v2: fix fwtable, use array to test shadow tables, create new accessors to avoid check on every access (Tvrtko) v3 (from Paulo): Rebase. v4: - Range 09400-097FF should be FORCEWAKE_ALL (Daniele) - Use the BIT macro for forcewake domains (Daniele) - Add a comment about the range ordering (Oscar) - Updated commit message (Oscar) v5: Rebased v6: Use I915_MAX_VCS/VECS (Michal) v7: translate FORCEWAKE_ALL to available domains v8: rebase, add clarification on fallback ack in commit message. v9: fix rebase issue, change check in fw_domains_init from IS_GEN11 to GEN >= 11 v10: Generate is_genX_shadowed with a macro (Daniele) Include gen11_fw_ranges in the selftest (Michel) v11: Simplify FORCEWAKE_ALL, new line between NEEDS_FORCEWAKEs (Tvrtko) Cc: Michal Wajdeczko Cc: Tvrtko Ursulin Cc: Paulo Zanoni Acked-by: Michel Thierry Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Oscar Mateo Signed-off-by: Michel Thierry Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180302161501.28594-6-mika.kuoppala@linux.intel.com Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_reg.h | 4 + drivers/gpu/drm/i915/intel_uncore.c | 157 ++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_uncore.h | 23 +++- drivers/gpu/drm/i915/selftests/intel_uncore.c | 31 +++-- 4 files changed, 189 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a778b93f60d2..10580826319e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7998,9 +7998,13 @@ enum { #define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7) #define FORCEWAKE_MT _MMIO(0xa188) /* multi-threaded */ #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) +#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4) +#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) #define FORCEWAKE_BLITTER_GEN9 _MMIO(0xa188) #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) +#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n) _MMIO(0x0D50 + (n) * 4) +#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n) _MMIO(0x0D70 + (n) * 4) #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) #define FORCEWAKE_KERNEL BIT(0) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 5ae9a62712ca..4df7c2ef8576 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -37,6 +37,12 @@ static const char * const forcewake_domain_names[] = { "render", "blitter", "media", + "vdbox0", + "vdbox1", + "vdbox2", + "vdbox3", + "vebox0", + "vebox1", }; const char * @@ -774,6 +780,9 @@ void assert_forcewakes_active(struct drm_i915_private *dev_priv, /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) +#define GEN11_NEEDS_FORCE_WAKE(reg) \ + ((reg) < 0x40000 || ((reg) >= 0x1c0000 && (reg) < 0x1dc000)) + #define __gen6_reg_read_fw_domains(offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -826,6 +835,14 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset) if (!entry) return 0; + /* + * The list of FW domains depends on the SKU in gen11+ so we + * can't determine it statically. We use FORCEWAKE_ALL and + * translate it here to the list of available domains. + */ + if (entry->domains == FORCEWAKE_ALL) + return dev_priv->uncore.fw_domains; + WARN(entry->domains & ~dev_priv->uncore.fw_domains, "Uninitialized forcewake domain(s) 0x%x accessed at 0x%x\n", entry->domains & ~dev_priv->uncore.fw_domains, offset); @@ -860,6 +877,14 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { __fwd; \ }) +#define __gen11_fwtable_reg_read_fw_domains(offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + if (GEN11_NEEDS_FORCE_WAKE((offset))) \ + __fwd = find_fw_domain(dev_priv, offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ @@ -871,6 +896,20 @@ static const i915_reg_t gen8_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; +static const i915_reg_t gen11_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + /* TODO: Other registers are not yet used */ +}; + static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -883,14 +922,17 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) return 0; } -static bool is_gen8_shadowed(u32 offset) -{ - const i915_reg_t *regs = gen8_shadowed_regs; - - return BSEARCH(offset, regs, ARRAY_SIZE(gen8_shadowed_regs), - mmio_reg_cmp); +#define __is_genX_shadowed(x) \ +static bool is_gen##x##_shadowed(u32 offset) \ +{ \ + const i915_reg_t *regs = gen##x##_shadowed_regs; \ + return BSEARCH(offset, regs, ARRAY_SIZE(gen##x##_shadowed_regs), \ + mmio_reg_cmp); \ } +__is_genX_shadowed(8) +__is_genX_shadowed(11) + #define __gen8_reg_write_fw_domains(offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -929,6 +971,14 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) +#define __gen11_fwtable_reg_write_fw_domains(offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + if (GEN11_NEEDS_FORCE_WAKE((offset)) && !is_gen11_shadowed(offset)) \ + __fwd = find_fw_domain(dev_priv, offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), @@ -965,6 +1015,40 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA), }; +/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ +static const struct intel_forcewake_range __gen11_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), + GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xe900, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x40000, 0x1bffff, 0), + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), + GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), + GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1) +}; + static void ilk_dummy_write(struct drm_i915_private *dev_priv) { @@ -1095,7 +1179,12 @@ func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { } #define __gen6_read(x) __gen_read(gen6, x) #define __fwtable_read(x) __gen_read(fwtable, x) +#define __gen11_fwtable_read(x) __gen_read(gen11_fwtable, x) +__gen11_fwtable_read(8) +__gen11_fwtable_read(16) +__gen11_fwtable_read(32) +__gen11_fwtable_read(64) __fwtable_read(8) __fwtable_read(16) __fwtable_read(32) @@ -1105,6 +1194,7 @@ __gen6_read(16) __gen6_read(32) __gen6_read(64) +#undef __gen11_fwtable_read #undef __fwtable_read #undef __gen6_read #undef GEN6_READ_FOOTER @@ -1181,7 +1271,11 @@ func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, boo } #define __gen8_write(x) __gen_write(gen8, x) #define __fwtable_write(x) __gen_write(fwtable, x) +#define __gen11_fwtable_write(x) __gen_write(gen11_fwtable, x) +__gen11_fwtable_write(8) +__gen11_fwtable_write(16) +__gen11_fwtable_write(32) __fwtable_write(8) __fwtable_write(16) __fwtable_write(32) @@ -1192,6 +1286,7 @@ __gen6_write(8) __gen6_write(16) __gen6_write(32) +#undef __gen11_fwtable_write #undef __fwtable_write #undef __gen8_write #undef __gen6_write @@ -1240,6 +1335,13 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER)); BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX0)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX1)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX2)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX3)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1)); + d->mask = BIT(domain_id); @@ -1267,7 +1369,34 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); } - if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 11) { + int i; + + dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, + FORCEWAKE_RENDER_GEN9, + FORCEWAKE_ACK_RENDER_GEN9); + fw_domain_init(dev_priv, FW_DOMAIN_ID_BLITTER, + FORCEWAKE_BLITTER_GEN9, + FORCEWAKE_ACK_BLITTER_GEN9); + for (i = 0; i < I915_MAX_VCS; i++) { + if (!HAS_ENGINE(dev_priv, _VCS(i))) + continue; + + fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA_VDBOX0 + i, + FORCEWAKE_MEDIA_VDBOX_GEN11(i), + FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i)); + } + for (i = 0; i < I915_MAX_VECS; i++) { + if (!HAS_ENGINE(dev_priv, _VECS(i))) + continue; + + fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA_VEBOX0 + i, + FORCEWAKE_MEDIA_VEBOX_GEN11(i), + FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i)); + } + } else if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; @@ -1422,10 +1551,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8); ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6); } - } else { + } else if (IS_GEN(dev_priv, 9, 10)) { ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); + } else { + ASSIGN_FW_DOMAINS_TABLE(__gen11_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen11_fwtable); } iosf_mbi_register_pmic_bus_access_notifier( @@ -1994,7 +2127,9 @@ intel_uncore_forcewake_for_read(struct drm_i915_private *dev_priv, u32 offset = i915_mmio_reg_offset(reg); enum forcewake_domains fw_domains; - if (HAS_FWTABLE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 11) { + fw_domains = __gen11_fwtable_reg_read_fw_domains(offset); + } else if (HAS_FWTABLE(dev_priv)) { fw_domains = __fwtable_reg_read_fw_domains(offset); } else if (INTEL_GEN(dev_priv) >= 6) { fw_domains = __gen6_reg_read_fw_domains(offset); @@ -2015,7 +2150,9 @@ intel_uncore_forcewake_for_write(struct drm_i915_private *dev_priv, u32 offset = i915_mmio_reg_offset(reg); enum forcewake_domains fw_domains; - if (HAS_FWTABLE(dev_priv) && !IS_VALLEYVIEW(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 11) { + fw_domains = __gen11_fwtable_reg_write_fw_domains(offset); + } else if (HAS_FWTABLE(dev_priv) && !IS_VALLEYVIEW(dev_priv)) { fw_domains = __fwtable_reg_write_fw_domains(offset); } else if (IS_GEN8(dev_priv)) { fw_domains = __gen8_reg_write_fw_domains(offset); diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 53ef77d0c97c..dfdf444e4bcc 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -37,17 +37,28 @@ enum forcewake_domain_id { FW_DOMAIN_ID_RENDER = 0, FW_DOMAIN_ID_BLITTER, FW_DOMAIN_ID_MEDIA, + FW_DOMAIN_ID_MEDIA_VDBOX0, + FW_DOMAIN_ID_MEDIA_VDBOX1, + FW_DOMAIN_ID_MEDIA_VDBOX2, + FW_DOMAIN_ID_MEDIA_VDBOX3, + FW_DOMAIN_ID_MEDIA_VEBOX0, + FW_DOMAIN_ID_MEDIA_VEBOX1, FW_DOMAIN_ID_COUNT }; enum forcewake_domains { - FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), - FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), - FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), - FORCEWAKE_ALL = (FORCEWAKE_RENDER | - FORCEWAKE_BLITTER | - FORCEWAKE_MEDIA) + FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), + FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), + FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), + FORCEWAKE_MEDIA_VDBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX0), + FORCEWAKE_MEDIA_VDBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX1), + FORCEWAKE_MEDIA_VDBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX2), + FORCEWAKE_MEDIA_VDBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX3), + FORCEWAKE_MEDIA_VEBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX0), + FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1), + + FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1 }; struct intel_uncore_funcs { diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 2f6367643171..f76f2597df5c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -61,20 +61,30 @@ static int intel_fw_table_check(const struct intel_forcewake_range *ranges, static int intel_shadow_table_check(void) { - const i915_reg_t *reg = gen8_shadowed_regs; - unsigned int i; + struct { + const i915_reg_t *regs; + unsigned int size; + } reg_lists[] = { + { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, + { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, + }; + const i915_reg_t *reg; + unsigned int i, j; s32 prev; - for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { - u32 offset = i915_mmio_reg_offset(*reg); + for (j = 0; j < ARRAY_SIZE(reg_lists); ++j) { + reg = reg_lists[j].regs; + for (i = 0, prev = -1; i < reg_lists[j].size; i++, reg++) { + u32 offset = i915_mmio_reg_offset(*reg); - if (prev >= (s32)offset) { - pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", - __func__, i, offset, prev); - return -EINVAL; - } + if (prev >= (s32)offset) { + pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", + __func__, i, offset, prev); + return -EINVAL; + } - prev = offset; + prev = offset; + } } return 0; @@ -90,6 +100,7 @@ int intel_uncore_mock_selftests(void) { __vlv_fw_ranges, ARRAY_SIZE(__vlv_fw_ranges), false }, { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, + { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, }; int err, i; -- cgit v1.2.3 From a4713c5a8d612b4a3445874a9eb8af5a92c08bd0 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 7 Mar 2018 14:09:12 -0800 Subject: drm/i915/cnl: Add Wa_2201832410 "Clock gating bug in GWL may not clear barrier state when an EOT is received, causing a hang the next time that barrier is used." HSDES: 2201832410 Cc: Rafael Antognolli Signed-off-by: Rodrigo Vivi Reviewed-by: Rafael Antognolli Link: https://patchwork.freedesktop.org/patch/msgid/20180307220912.3681-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_pm.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 10580826319e..9e765462ca44 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3965,6 +3965,9 @@ enum { #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) +#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) +#define GWUNIT_CLKGATE_DIS (1 << 16) + #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6cab20ce167a..b8da4dcdd584 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8522,6 +8522,11 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) val |= SARBUNIT_CLKGATE_DIS; I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); + /* Wa_2201832410:cnl */ + val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE); + val |= GWUNIT_CLKGATE_DIS; + I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val); + /* WaDisableVFclkgate:cnl */ /* WaVFUnitClockGatingDisable:cnl */ val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE); -- cgit v1.2.3 From 401d0ae326c92185f1727b0f12834197536265ae Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 7 Mar 2018 16:13:33 -0800 Subject: drm/i915/guc: work around gcc-4.4.4 union initializer issue gcc-4.4.4 has problems with initalizers of anon unions. drivers/gpu/drm/i915/intel_guc_log.c: In function 'guc_log_control': drivers/gpu/drm/i915/intel_guc_log.c:64: error: unknown field 'logging_enabled' specified in initializer Work around this. Fixes: 35fe703c3161 ("drm/i915/guc: Change values for i915_guc_log_control") Cc: Michal Wajdeczko Cc: Sagar Arun Kamble Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Chris Wilson Signed-off-by: Andrew Morton Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180308001333.rI2vrNRTY%akpm@linux-foundation.org --- drivers/gpu/drm/i915/intel_guc_log.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 7b5074e2120c..c0c2e7d1c7d7 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -61,8 +61,10 @@ static int guc_log_flush(struct intel_guc *guc) static int guc_log_control(struct intel_guc *guc, bool enable, u32 verbosity) { union guc_log_control control_val = { - .logging_enabled = enable, - .verbosity = verbosity, + { + .logging_enabled = enable, + .verbosity = verbosity, + }, }; u32 action[] = { INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, -- cgit v1.2.3 From 8cc7669355136f8952779e6f60053c1284d59c4d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 6 Mar 2018 12:28:52 +0000 Subject: drm/i915: store all subslice masks Up to now, subslice mask was assumed to be uniform across slices. But starting with Cannonlake, slices can be asymmetric (for example slice0 has different number of subslices as slice1+). This change stores all subslices masks for all slices rather than having a single mask that applies to all slices. v2: Rework how we store total numbers in sseu_dev_info (Tvrtko) Fix CHV eu masks, was reading disabled as enabled (Tvrtko) Readability changes (Tvrtko) Add EU index helper (Tvrtko) v3: Turn ALIGN(v, 8) / 8 into DIV_ROUND_UP(v, BITS_PER_BYTE) (Tvrtko) Reuse sseu_eu_idx() for setting eu_mask on CHV (Tvrtko) Reformat debug prints for subslices (Tvrtko) v4: Change eu_mask helper into sseu_set_eus() (Tvrtko) v5: With Haswell reporting masks & counts, bump sseu_*_eus() functions to use u16 (Lionel) v6: Fix sseu_get_eus() for > 8 EUs per subslice (Lionel) v7: Change debugfs enabels for number of subslices per slice, will need a small igt/pm_sseu change (Lionel) Drop subslice_total field from sseu_dev_info, rely on sseu_subslice_total() to recompute the value instead (Lionel) v8: Remove unused function compute_subslice_total() (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Tvrtko Ursulin Acked-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-2-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 28 +++-- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 208 +++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_device_info.h | 62 ++++++++- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 6 files changed, 237 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e838c765b251..229d4d605cd9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4323,7 +4323,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask = BIT(0); - sseu->subslice_mask |= BIT(ss); + sseu->subslice_mask[0] |= BIT(ss); eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + @@ -4370,7 +4370,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask |= BIT(s); - sseu->subslice_mask = info->sseu.subslice_mask; + sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; for (ss = 0; ss < ss_max; ss++) { unsigned int eu_cnt; @@ -4425,8 +4425,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask = - INTEL_INFO(dev_priv)->sseu.subslice_mask; + sseu->subslice_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; for (ss = 0; ss < ss_max; ss++) { unsigned int eu_cnt; @@ -4436,7 +4436,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, /* skip disabled subslice */ continue; - sseu->subslice_mask |= BIT(ss); + sseu->subslice_mask[s] |= BIT(ss); } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & @@ -4458,9 +4458,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; if (sseu->slice_mask) { - sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; sseu->eu_per_subslice = INTEL_INFO(dev_priv)->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) { + sseu->subslice_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; + } sseu->eu_total = sseu->eu_per_subslice * sseu_subslice_total(sseu); @@ -4479,6 +4482,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, { struct drm_i915_private *dev_priv = node_to_i915(m->private); const char *type = is_available_info ? "Available" : "Enabled"; + int s; seq_printf(m, " %s Slice Mask: %04x\n", type, sseu->slice_mask); @@ -4486,10 +4490,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, hweight8(sseu->slice_mask)); seq_printf(m, " %s Subslice Total: %u\n", type, sseu_subslice_total(sseu)); - seq_printf(m, " %s Subslice Mask: %04x\n", type, - sseu->subslice_mask); - seq_printf(m, " %s Subslice Per Slice: %u\n", type, - hweight8(sseu->subslice_mask)); + for (s = 0; s < fls(sseu->slice_mask); s++) { + seq_printf(m, " %s Slice%i subslices: %u\n", type, + s, hweight8(sseu->subslice_mask[s])); + } seq_printf(m, " %s EU Total: %u\n", type, sseu->eu_total); seq_printf(m, " %s EU Per Subslice: %u\n", type, @@ -4523,6 +4527,10 @@ static int i915_sseu_status(struct seq_file *m, void *unused) seq_puts(m, "SSEU Device Status\n"); memset(&sseu, 0, sizeof(sseu)); + sseu.max_slices = INTEL_INFO(dev_priv)->sseu.max_slices; + sseu.max_subslices = INTEL_INFO(dev_priv)->sseu.max_subslices; + sseu.max_eus_per_subslice = + INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice; intel_runtime_pm_get(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d61b51c0bf0b..c594ff5e57d0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -428,7 +428,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: - value = INTEL_INFO(dev_priv)->sseu.subslice_mask; + value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0]; if (!value) return -ENODEV; break; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index cadc5f81ed72..b29a35d441b4 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -81,12 +81,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info, static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) { + int s; + drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); - drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); - drm_printf(p, "subslice per slice: %u\n", - hweight8(sseu->subslice_mask)); + for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) { + drm_printf(p, "slice%d %u subslices mask=%04x\n", + s, hweight8(sseu->subslice_mask[s]), + sseu->subslice_mask[s]); + } drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); drm_printf(p, "has slice power gating: %s\n", @@ -120,22 +124,76 @@ void intel_device_info_dump(const struct intel_device_info *info, intel_device_info_dump_flags(info, p); } +static u16 compute_eu_total(const struct sseu_dev_info *sseu) +{ + u16 i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) + total += hweight8(sseu->eu_mask[i]); + + return total; +} + static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; const u32 fuse2 = I915_READ(GEN8_FUSE2); + int s, ss; + const int eu_mask = 0xff; + u32 subslice_mask, eu_en; sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - sseu->subslice_mask = (1 << 4) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); + sseu->max_slices = 6; + sseu->max_subslices = 4; + sseu->max_eus_per_subslice = 8; - sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); - sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & - GEN10_EU_DIS_SS_MASK)); + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + + /* + * Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + sseu->subslice_mask[0] = subslice_mask; + for (s = 1; s < sseu->max_slices; s++) + sseu->subslice_mask[s] = subslice_mask & 0x3; + + /* Slice0 */ + eu_en = ~I915_READ(GEN8_EU_DISABLE0); + for (ss = 0; ss < sseu->max_subslices; ss++) + sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); + /* Slice1 */ + sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN8_EU_DISABLE1); + sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); + /* Slice2 */ + sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); + /* Slice3 */ + sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN8_EU_DISABLE2); + sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); + /* Slice4 */ + sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); + /* Slice5 */ + sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN10_EU_DISABLE3); + sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); + + /* Do a second pass where we mark the subslices disabled if all their + * eus are off. + */ + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + if (sseu_get_eus(sseu, s, ss) == 0) + sseu->subslice_mask[s] &= ~BIT(ss); + } + } + + sseu->eu_total = compute_eu_total(sseu); /* * CNL is expected to always have a uniform distribution @@ -156,26 +214,39 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - u32 fuse, eu_dis; + u32 fuse; fuse = I915_READ(CHV_FUSE_GT); sseu->slice_mask = BIT(0); + sseu->max_slices = 1; + sseu->max_subslices = 2; + sseu->max_eus_per_subslice = 8; if (!(fuse & CHV_FGT_DISABLE_SS0)) { - sseu->subslice_mask |= BIT(0); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | - CHV_FGT_EU_DIS_SS0_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> + CHV_FGT_EU_DIS_SS0_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> + CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); + + sseu->subslice_mask[0] |= BIT(0); + sseu_set_eus(sseu, 0, 0, ~disabled_mask); } if (!(fuse & CHV_FGT_DISABLE_SS1)) { - sseu->subslice_mask |= BIT(1); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | - CHV_FGT_EU_DIS_SS1_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> + CHV_FGT_EU_DIS_SS1_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> + CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); + + sseu->subslice_mask[0] |= BIT(1); + sseu_set_eus(sseu, 0, 1, ~disabled_mask); } + sseu->eu_total = compute_eu_total(sseu); + /* * CHV expected to always have a uniform distribution of EU * across subslices. @@ -197,41 +268,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) { struct intel_device_info *info = mkwrite_device_info(dev_priv); struct sseu_dev_info *sseu = &info->sseu; - int s_max = 3, ss_max = 4, eu_max = 8; int s, ss; - u32 fuse2, eu_disable; - u8 eu_mask = 0xff; + u32 fuse2, eu_disable, subslice_mask; + const u8 eu_mask = 0xff; fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + /* BXT has a single slice and at most 3 subslices. */ + sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; + sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = (1 << ss_max) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> - GEN9_F2_SS_DIS_SHIFT); + subslice_mask = (1 << sseu->max_subslices) - 1; + subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> + GEN9_F2_SS_DIS_SHIFT); /* * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; + sseu->subslice_mask[s] = subslice_mask; + eu_disable = I915_READ(GEN9_EU_DISABLE(s)); - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { int eu_per_ss; + u8 eu_disabled_mask; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslice_mask[s] & BIT(ss))) /* skip disabled subslice */ continue; - eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & - eu_mask); + eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask; + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + eu_per_ss = sseu->max_eus_per_subslice - + hweight8(eu_disabled_mask); /* * Record which subslice(s) has(have) 7 EUs. we @@ -240,11 +322,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) */ if (eu_per_ss == 7) sseu->subslice_7eu[s] |= BIT(ss); - - sseu->eu_total += eu_per_ss; } } + sseu->eu_total = compute_eu_total(sseu); + /* * SKL is expected to always have a uniform distribution * of EU across subslices with the exception that any one @@ -270,8 +352,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_GEN9_LP(dev_priv)) { -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) - info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) + info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { @@ -289,19 +371,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - const int s_max = 3, ss_max = 3, eu_max = 8; int s, ss; - u32 fuse2, eu_disable[3]; /* s_max */ + u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->max_slices = 3; + sseu->max_subslices = 3; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = GENMASK(ss_max - 1, 0); - sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> - GEN8_F2_SS_DIS_SHIFT); + subslice_mask = GENMASK(sseu->max_subslices - 1, 0); + subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> + GEN8_F2_SS_DIS_SHIFT); eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | @@ -315,30 +400,38 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; - for (ss = 0; ss < ss_max; ss++) { + sseu->subslice_mask[s] = subslice_mask; + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslice_mask[ss] & BIT(ss))) /* skip disabled subslice */ continue; - n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); + eu_disabled_mask = + eu_disable[s] >> (ss * sseu->max_eus_per_subslice); + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + n_disabled = hweight8(eu_disabled_mask); /* * Record which subslices have 7 EUs. */ - if (eu_max - n_disabled == 7) + if (sseu->max_eus_per_subslice - n_disabled == 7) sseu->subslice_7eu[s] |= 1 << ss; - - sseu->eu_total += eu_max - n_disabled; } } + sseu->eu_total = compute_eu_total(sseu); + /* * BDW is expected to always have a uniform distribution of EU across * subslices with the exception that any one EU in any one subslice may @@ -362,6 +455,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) struct intel_device_info *info = mkwrite_device_info(dev_priv); struct sseu_dev_info *sseu = &info->sseu; u32 fuse1; + int s, ss; /* * There isn't a register to tell us how many slices/subslices. We @@ -373,18 +467,22 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) /* fall through */ case 1: sseu->slice_mask = BIT(0); - sseu->subslice_mask = BIT(0); + sseu->subslice_mask[0] = BIT(0); break; case 2: sseu->slice_mask = BIT(0); - sseu->subslice_mask = BIT(0) | BIT(1); + sseu->subslice_mask[0] = BIT(0) | BIT(1); break; case 3: sseu->slice_mask = BIT(0) | BIT(1); - sseu->subslice_mask = BIT(0) | BIT(1); + sseu->subslice_mask[0] = BIT(0) | BIT(1); + sseu->subslice_mask[1] = BIT(0) | BIT(1); break; } + sseu->max_slices = hweight8(sseu->slice_mask); + sseu->max_subslices = hweight8(sseu->subslice_mask[0]); + fuse1 = I915_READ(HSW_PAVP_FUSE1); switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { default: @@ -401,8 +499,16 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->eu_per_subslice = 6; break; } + sseu->max_eus_per_subslice = sseu->eu_per_subslice; + + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + sseu_set_eus(sseu, s, ss, + (1UL << sseu->eu_per_subslice) - 1); + } + } - sseu->eu_total = sseu_subslice_total(sseu) * sseu->eu_per_subslice; + sseu->eu_total = compute_eu_total(sseu); /* No powergating for you. */ sseu->has_slice_pg = 0; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 7cc5a8e649b5..4bc7b06a789e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -113,10 +113,13 @@ enum intel_platform { func(supports_tv); \ func(has_ipc); +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (7) + struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask; - u8 eu_total; + u8 subslice_mask[GEN_MAX_SUBSLICES]; + u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ @@ -124,6 +127,17 @@ struct sseu_dev_info { u8 has_slice_pg:1; u8 has_subslice_pg:1; u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; typedef u8 intel_ring_mask_t; @@ -176,7 +190,49 @@ struct intel_driver_caps { static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) { - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); + unsigned int i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) + total += hweight8(sseu->subslice_mask[i]); + + return total; +} + +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, + int slice, int subslice) +{ + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, + BITS_PER_BYTE); + int slice_stride = sseu->max_subslices * subslice_stride; + + return slice * slice_stride + subslice * subslice_stride; +} + +static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu, + int slice, int subslice) +{ + int i, offset = sseu_eu_idx(sseu, slice, subslice); + u16 eu_mask = 0; + + for (i = 0; + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { + eu_mask |= ((u16) sseu->eu_mask[offset + i]) << + (i * BITS_PER_BYTE); + } + + return eu_mask; +} + +static inline void sseu_set_eus(struct sseu_dev_info *sseu, + int slice, int subslice, u16 eu_mask) +{ + int i, offset = sseu_eu_idx(sseu, slice, subslice); + + for (i = 0; + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { + sseu->eu_mask[offset + i] = + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; + } } const char *intel_platform_name(enum intel_platform platform); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 999d5f2539d4..d763dfb51190 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2248,7 +2248,7 @@ make_rpcs(struct drm_i915_private *dev_priv) if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << GEN8_RPCS_SS_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index d8ddea0174ca..0320c2c4cfba 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -92,7 +92,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) #define instdone_subslice_mask(dev_priv__) \ (INTEL_GEN(dev_priv__) == 7 ? \ - 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) + 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ for ((slice__) = 0, (subslice__) = 0; \ -- cgit v1.2.3 From b3e7f866927985b11f2e483ba02404d9a3a8685c Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 6 Mar 2018 12:28:53 +0000 Subject: drm/i915/debugfs: reuse max slice/subslices already stored in sseu Now that we have that information in topology fields, let's just reuse it. v2: Style tweaks (Tvrtko) Signed-off-by: Lionel Landwerlin Reviewed-by: Tvrtko Ursulin Acked-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-3-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 27 +++++++++++---------------- drivers/gpu/drm/i915/intel_device_info.c | 2 +- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 229d4d605cd9..bd5f98c60b5b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4338,11 +4338,11 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { const struct intel_device_info *info = INTEL_INFO(dev_priv); - int s_max = 6, ss_max = 4; int s, ss; - u32 s_reg[s_max], eu_reg[2 * s_max], eu_mask[2]; + u32 s_reg[info->sseu.max_slices]; + u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { /* * FIXME: Valid SS Mask respects the spec and read * only valid bits for those registers, excluding reserverd @@ -4364,7 +4364,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, GEN9_PGCTL_SSB_EU210_ACK | GEN9_PGCTL_SSB_EU311_ACK; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) /* skip disabled slice */ continue; @@ -4372,7 +4372,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) @@ -4392,17 +4392,12 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { - int s_max = 3, ss_max = 4; + const struct intel_device_info *info = INTEL_INFO(dev_priv); int s, ss; - u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2]; - - /* BXT has a single slice and at most 3 subslices. */ - if (IS_GEN9_LP(dev_priv)) { - s_max = 1; - ss_max = 3; - } + u32 s_reg[info->sseu.max_slices]; + u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s)); eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s)); eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s)); @@ -4417,7 +4412,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, GEN9_PGCTL_SSB_EU210_ACK | GEN9_PGCTL_SSB_EU311_ACK; - for (s = 0; s < s_max; s++) { + for (s = 0; s < info->sseu.max_slices; s++) { if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) /* skip disabled slice */ continue; @@ -4428,7 +4423,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->subslice_mask[s] = INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; if (IS_GEN9_LP(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index b29a35d441b4..14f2c72fdcff 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -308,7 +308,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) /* skip disabled subslice */ continue; - eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask; + eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); -- cgit v1.2.3 From 79e9cd5f14dda3a6a47b2f0ac288e9c52d6435a9 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 6 Mar 2018 12:28:54 +0000 Subject: drm/i915/debugfs: add rcs topology entry While the end goal is to make this information available to userspace through a new ioctl, there is no reason we can't display it in a human readable fashion through debugfs. slice0: 3 subslice(s) (0x7): subslice0: 8 EUs (0xff) subslice1: 8 EUs (0xff) subslice2: 8 EUs (0xff) subslice3: 0 EUs (0x0) slice1: 3 subslice(s) (0x7): subslice0: 8 EUs (0xff) subslice1: 8 EUs (0xff) subslice2: 8 EUs (0xff) subslice3: 0 EUs (0x0) slice2: 3 subslice(s) (0x7): subslice0: 8 EUs (0xff) subslice1: 8 EUs (0xff) subslice2: 8 EUs (0xff) subslice3: 0 EUs (0x0) v2: Reformat debugfs printing (Tvrtko) Use the new EU mask helper (Tvrtko) v3: Move printing code to intel_device_info.c to be shared with error state (Michal) v4: Bump u8 to u16 when using sseu_get_eus() (Lionel) Suggested-by: Chris Wilson Signed-off-by: Lionel Landwerlin Reviewed-by: Tvrtko Ursulin Acked-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-4-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 11 +++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 24 ++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_device_info.h | 2 ++ 3 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bd5f98c60b5b..89f7ff2c652e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3201,6 +3201,16 @@ static int i915_engine_info(struct seq_file *m, void *unused) return 0; } +static int i915_rcs_topology(struct seq_file *m, void *unused) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + intel_device_info_dump_topology(&INTEL_INFO(dev_priv)->sseu, &p); + + return 0; +} + static int i915_shrinker_info(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -4733,6 +4743,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_dmc_info", i915_dmc_info, 0}, {"i915_display_info", i915_display_info, 0}, {"i915_engine_info", i915_engine_info, 0}, + {"i915_rcs_topology", i915_rcs_topology, 0}, {"i915_shrinker_info", i915_shrinker_info, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 14f2c72fdcff..3dd350f7b8e6 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -124,6 +124,30 @@ void intel_device_info_dump(const struct intel_device_info *info, intel_device_info_dump_flags(info, p); } +void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) +{ + int s, ss; + + if (sseu->max_slices == 0) { + drm_printf(p, "Unavailable\n"); + return; + } + + for (s = 0; s < sseu->max_slices; s++) { + drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", + s, hweight8(sseu->subslice_mask[s]), + sseu->subslice_mask[s]); + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u16 enabled_eus = sseu_get_eus(sseu, s, ss); + + drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", + ss, hweight16(enabled_eus), enabled_eus); + } + } +} + static u16 compute_eu_total(const struct sseu_dev_info *sseu) { u16 i, total = 0; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 4bc7b06a789e..0835752c8b22 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -244,6 +244,8 @@ void intel_device_info_dump_flags(const struct intel_device_info *info, struct drm_printer *p); void intel_device_info_dump_runtime(const struct intel_device_info *info, struct drm_printer *p); +void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p); void intel_driver_caps_print(const struct intel_driver_caps *caps, struct drm_printer *p); -- cgit v1.2.3 From cac6cfaa2fe4f9be753daeefd523ebcc0f46e331 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 6 Mar 2018 12:28:55 +0000 Subject: drm/i915: add rcs topology to error state This might be useful information for developers looking at an error state. v2: Place topology towards the end of the error state (Chris) v3: Reuse common printing code (Michal) v4: Make this a one-liner (Chris) Signed-off-by: Lionel Landwerlin Reviewed-by: Tvrtko Ursulin Acked-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-5-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9e5e9547adb2..f89ac7a8f95f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -586,6 +586,7 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m, intel_device_info_dump_flags(info, &p); intel_driver_caps_print(caps, &p); + intel_device_info_dump_topology(&info->sseu, &p); } static void err_print_params(struct drm_i915_error_state_buf *m, -- cgit v1.2.3 From a446ae2c6e6555048301f2339cfd97b8eed6d0b7 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 6 Mar 2018 12:28:56 +0000 Subject: drm/i915: add query uAPI There are a number of information that are readable from hardware registers and that we would like to make accessible to userspace. One particular example is the topology of the execution units (how are execution units grouped in subslices and slices and also which ones have been fused off for die recovery). At the moment the GET_PARAM ioctl covers some basic needs, but generally is only able to return a single value for each defined parameter. This is a bit problematic with topology descriptions which are array/maps of available units. This change introduces a new ioctl that can deal with requests to fill structures of potentially variable lengths. The user is expected fill a query with length fields set at 0 on the first call, the kernel then sets the length fields to the their expected values. A second call to the kernel with length fields at their expected values will trigger a copy of the data to the pointed memory locations. The scope of this uAPI is only to provide information to userspace, not to allow configuration of the device. v2: Simplify dispatcher code iteration (Tvrtko) Tweak uapi drm_i915_query_item structure (Tvrtko) v3: Rename pad fields into flags (Chris) Return error on flags field != 0 (Chris) Only copy length back to userspace in drm_i915_query_item (Chris) v4: Use array of functions instead of switch (Chris) v5: More comments in uapi (Tvrtko) Return query item errors in length field (All) v6: Tweak uapi comments style to match the coding style (Lionel) v7: Add i915_query.h (Joonas) v8: (Lionel) Change the behavior of the item iterator to report invalid queries into the query item rather than stopping the iteration. This enables userspace applications to query newer items on older kernels and only have failure on the items that are not supported. v9: Edit copyright headers (Joonas) v10: Typos & comments in uapi (Joonas) Signed-off-by: Lionel Landwerlin Reviewed-by: Tvrtko Ursulin Acked-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/i915_query.c | 50 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_query.h | 15 ++++++++++++ include/uapi/drm/i915_drm.h | 46 ++++++++++++++++++++++++++++++++--- 5 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_query.c create mode 100644 drivers/gpu/drm/i915/i915_query.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 1bd9bc5b8c5c..4eee91a3a236 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -69,6 +69,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ + i915_query.o \ i915_request.o \ i915_trace_points.o \ i915_vma.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c594ff5e57d0..d7c4de45644d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -49,6 +49,7 @@ #include "i915_drv.h" #include "i915_trace.h" #include "i915_pmu.h" +#include "i915_query.h" #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_uc.h" @@ -2832,6 +2833,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c new file mode 100644 index 000000000000..5582e6c3234a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_query.c @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_query.h" +#include + +static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, + struct drm_i915_query_item *query_item) = { +}; + +int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_query *args = data; + struct drm_i915_query_item __user *user_item_ptr = + u64_to_user_ptr(args->items_ptr); + u32 i; + + if (args->flags != 0) + return -EINVAL; + + for (i = 0; i < args->num_items; i++, user_item_ptr++) { + struct drm_i915_query_item item; + u64 func_idx; + int ret; + + if (copy_from_user(&item, user_item_ptr, sizeof(item))) + return -EFAULT; + + if (item.query_id == 0) + return -EINVAL; + + func_idx = item.query_id - 1; + + if (func_idx < ARRAY_SIZE(i915_query_funcs)) + ret = i915_query_funcs[func_idx](dev_priv, &item); + else + ret = -EINVAL; + + /* Only write the length back to userspace if they differ. */ + if (ret != item.length && put_user(ret, &user_item_ptr->length)) + return -EFAULT; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_query.h b/drivers/gpu/drm/i915/i915_query.h new file mode 100644 index 000000000000..31dcef181f63 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_query.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef _I915_QUERY_H_ +#define _I915_QUERY_H_ + +struct drm_device; +struct drm_file; + +int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 29fa48e4755d..eedd5a23a944 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_OPEN 0x36 #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG 0x38 +#define DRM_I915_QUERY 0x39 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) +#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1606,15 +1608,53 @@ struct drm_i915_perf_oa_config { __u32 n_flex_regs; /* - * These fields are pointers to tuples of u32 values (register - * address, value). For example the expected length of the buffer - * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + * These fields are pointers to tuples of u32 values (register address, + * value). For example the expected length of the buffer pointed by + * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). */ __u64 mux_regs_ptr; __u64 boolean_regs_ptr; __u64 flex_regs_ptr; }; +struct drm_i915_query_item { + __u64 query_id; + + /* + * When set to zero by userspace, this is filled with the size of the + * data to be written at the data_ptr pointer. The kernel sets this + * value to a negative value to signal an error on a particular query + * item. + */ + __s32 length; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Data will be written at the location pointed by data_ptr when the + * value of length matches the length of the data to be written by the + * kernel. + */ + __u64 data_ptr; +}; + +struct drm_i915_query { + __u32 num_items; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * This points to an array of num_items drm_i915_query_item structures. + */ + __u64 items_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From c822e059185585f79b2007b1d2cafacf4264e610 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 6 Mar 2018 12:28:57 +0000 Subject: drm/i915: expose rcs topology through query uAPI With the introduction of asymmetric slices in CNL, we cannot rely on the previous SUBSLICE_MASK getparam to tell userspace what subslices are available. Here we introduce a more detailed way of querying the Gen's GPU topology that doesn't aggregate numbers. This is essential for monitoring parts of the GPU with the OA unit, because counters need to be normalized to the number of EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do not gives us sufficient information. The Mesa series making use of this API is : https://patchwork.freedesktop.org/series/38795/ As a bonus we can draw representations of the GPU : https://imgur.com/a/vuqpa v2: Rename uapi struct s/_mask/_info/ (Tvrtko) Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko) Add uapi macros to read data from *_info structs (Tvrtko) v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko) v4: factorize query item writting (Tvrtko) tweak uapi struct/define names (Tvrtko) v5: Replace ALIGN() macro (Chris) v6: Updated uapi comments (Tvrtko) Moved flags != 0 checks into vfuncs (Tvrtko) v7: Use access_ok() before copying anything, to avoid overflows (Chris) Switch BUG_ON() to GEM_WARN_ON() (Tvrtko) v8: Tweak uapi comments style to match the coding style (Lionel) v9: Fix error in comment about computation of enabled subslice (Tvrtko) v10: Fix/update comments in uAPI (Sagar) v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single drm_i915_query_topology_info (Joonas) v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas) v13: Fix comment in uAPI (Joonas) Signed-off-by: Lionel Landwerlin Acked-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_query.c | 75 +++++++++++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 5582e6c3234a..3ace929dd90f 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -8,8 +8,83 @@ #include "i915_query.h" #include +static int query_topology_info(struct drm_i915_private *dev_priv, + struct drm_i915_query_item *query_item) +{ + const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu; + struct drm_i915_query_topology_info topo; + u32 slice_length, subslice_length, eu_length, total_length; + + if (query_item->flags != 0) + return -EINVAL; + + if (sseu->max_slices == 0) + return -ENODEV; + + BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); + + slice_length = sizeof(sseu->slice_mask); + subslice_length = sseu->max_slices * + DIV_ROUND_UP(sseu->max_subslices, + sizeof(sseu->subslice_mask[0]) * BITS_PER_BYTE); + eu_length = sseu->max_slices * sseu->max_subslices * + DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); + + total_length = sizeof(topo) + slice_length + subslice_length + eu_length; + + if (query_item->length == 0) + return total_length; + + if (query_item->length < total_length) + return -EINVAL; + + if (copy_from_user(&topo, u64_to_user_ptr(query_item->data_ptr), + sizeof(topo))) + return -EFAULT; + + if (topo.flags != 0) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(query_item->data_ptr), + total_length)) + return -EFAULT; + + memset(&topo, 0, sizeof(topo)); + topo.max_slices = sseu->max_slices; + topo.max_subslices = sseu->max_subslices; + topo.max_eus_per_subslice = sseu->max_eus_per_subslice; + + topo.subslice_offset = slice_length; + topo.subslice_stride = DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE); + topo.eu_offset = slice_length + subslice_length; + topo.eu_stride = + DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr), + &topo, sizeof(topo))) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)), + &sseu->slice_mask, slice_length)) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + + sizeof(topo) + slice_length), + sseu->subslice_mask, subslice_length)) + return -EFAULT; + + if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + + sizeof(topo) + + slice_length + subslice_length), + sseu->eu_mask, eu_length)) + return -EFAULT; + + return total_length; +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { + query_topology_info, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index eedd5a23a944..7f5634ce8e88 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1619,6 +1619,7 @@ struct drm_i915_perf_oa_config { struct drm_i915_query_item { __u64 query_id; +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 /* * When set to zero by userspace, this is filled with the size of the @@ -1655,6 +1656,67 @@ struct drm_i915_query { __u64 items_ptr; }; +/* + * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO : + * + * data: contains the 3 pieces of information : + * + * - the slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the following + * formula : + * + * (data[X / 8] >> (X % 8)) & 1 + * + * - the subslice mask for each slice with one bit per subslice telling + * whether a subslice is available. The availability of subslice Y in slice + * X can be queried with the following formula : + * + * (data[subslice_offset + + * X * subslice_stride + + * Y / 8] >> (Y % 8)) & 1 + * + * - the EU mask for each subslice in each slice with one bit per EU telling + * whether an EU is available. The availability of EU Z in subslice Y in + * slice X can be queried with the following formula : + * + * (data[eu_offset + + * (X * max_subslices + Y) * eu_stride + + * Z / 8] >> (Z % 8)) & 1 + */ +struct drm_i915_query_topology_info { + /* + * Unused for now. Must be cleared to zero. + */ + __u16 flags; + + __u16 max_slices; + __u16 max_subslices; + __u16 max_eus_per_subslice; + + /* + * Offset in data[] at which the subslice masks are stored. + */ + __u16 subslice_offset; + + /* + * Stride at which each of the subslice masks for each slice are + * stored. + */ + __u16 subslice_stride; + + /* + * Offset in data[] at which the EU masks are stored. + */ + __u16 eu_offset; + + /* + * Stride at which each of the EU masks for each subslice are stored. + */ + __u16 eu_stride; + + __u8 data[]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 702791f7f20473813e613b7899d2c5ba5dc86b04 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Tue, 6 Mar 2018 10:15:57 +0800 Subject: drm/i915: add schedule out notification of preempted but completed request There is one corner case missing schedule out notification of the preempted request. The preempted request is just completed when preemption happen, then it will be canceled and won't be resubmitted later, GVT-g will lost the schedule out notification. Here add schedule out notification if found the preempted request has been completed. v2: - refine description, add completed check and notification in execlists_cancel_port_requests. (Chris) v3: - use ternary confitional, remove local variable. (Tvrtko) Cc: Chris Wilson Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang Reviewed-by: Tvrtko Ursulin Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/1520302557-25079-1-git-send-email-weinan.z.li@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d763dfb51190..3a69b367e565 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -727,7 +727,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); + + execlists_context_status_change(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); + i915_request_put(rq); memset(port, 0, sizeof(*port)); -- cgit v1.2.3 From cf07a60f03f06d6298f4e70b3865bd5faac21c3b Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 8 Mar 2018 14:49:39 +0200 Subject: drm/i915: Update DRIVER_DATE to 20180308 Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 604389d0b6a3..6e740f6fe33f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -83,8 +83,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20180305" -#define DRIVER_TIMESTAMP 1520243775 +#define DRIVER_DATE "20180308" +#define DRIVER_TIMESTAMP 1520513379 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions -- cgit v1.2.3