diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2016-10-28 13:58:44 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2016-10-28 20:53:50 +0100 |
commit | d07f0e59b2c762584478920cd2d11fba2980a94a (patch) | |
tree | a9fc0cade87570bdec356493e6744ee5223bdaaf /drivers/gpu/drm/i915/i915_drv.h | |
parent | f0cd518206e1a47e57bc251e1faba9d38eadcc59 (diff) | |
download | lwn-d07f0e59b2c762584478920cd2d11fba2980a94a.tar.gz lwn-d07f0e59b2c762584478920cd2d11fba2980a94a.zip |
drm/i915: Move GEM activity tracking into a common struct reservation_object
In preparation to support many distinct timelines, we need to expand the
activity tracking on the GEM object to handle more than just a request
per engine. We already use the struct reservation_object on the dma-buf
to handle many fence contexts, so integrating that into the GEM object
itself is the preferred solution. (For example, we can now share the same
reservation_object between every consumer/producer using this buffer and
skip the manual import/export via dma-buf.)
v2: Reimplement busy-ioctl (by walking the reservation object), postpone
the ABI change for another day. Similarly use the reservation object to
find the last_write request (if active and from i915) for choosing
display CS flips.
Caveats:
* busy-ioctl: busy-ioctl only reports on the native fences, it will not
warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is
being rendered to by external fences. It also will not report the same
busy state as wait-ioctl (or polling on the dma-buf) in the same
circumstances. On the plus side, it does retain reporting of which
*i915* engines are engaged with this object.
* non-blocking atomic modesets take a step backwards as the wait for
render completion blocks the ioctl. This is fixed in a subsequent
patch to use a fence instead for awaiting on the rendering, see
"drm/i915: Restore nonblocking awaits for modesetting"
* dynamic array manipulation for shared-fences in reservation is slower
than the previous lockless static assignment (e.g. gem_exec_lut_handle
runtime on ivb goes from 42s to 66s), mainly due to atomic operations
(maintaining the fence refcounts).
* loss of object-level retirement callbacks, emulated by VMA retirement
tracking.
* minor loss of object-level last activity information from debugfs,
could be replaced with per-vma information if desired
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_drv.h')
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 62 |
1 files changed, 24 insertions, 38 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c3c49bc4d2ac..693ee69a4715 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -41,6 +41,7 @@ #include <linux/intel-iommu.h> #include <linux/kref.h> #include <linux/pm_qos.h> +#include <linux/reservation.h> #include <linux/shmem_fs.h> #include <drm/drmP.h> @@ -2246,21 +2247,12 @@ struct drm_i915_gem_object { struct list_head batch_pool_link; unsigned long flags; - /** - * This is set if the object is on the active lists (has pending - * rendering and so a non-zero seqno), and is not set if it i s on - * inactive (ready to be unbound) list. - */ -#define I915_BO_ACTIVE_SHIFT 0 -#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1) -#define __I915_BO_ACTIVE(bo) \ - ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) /** * Have we taken a reference for the object for incomplete GPU * activity? */ -#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES) +#define I915_BO_ACTIVE_REF 0 /* * Is the object to be mapped as read-only to the GPU @@ -2281,6 +2273,7 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ unsigned int bind_count; + unsigned int active_count; unsigned int pin_display; struct { @@ -2320,8 +2313,7 @@ struct drm_i915_gem_object { * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. */ - struct i915_gem_active last_read[I915_NUM_ENGINES]; - struct i915_gem_active last_write; + struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ unsigned long framebuffer_references; @@ -2340,6 +2332,8 @@ struct drm_i915_gem_object { /** for phys allocated objects */ struct drm_dma_handle *phys_handle; + + struct reservation_object __builtin_resv; }; static inline struct drm_i915_gem_object * @@ -2425,35 +2419,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; } -static inline unsigned long -i915_gem_object_get_active(const struct drm_i915_gem_object *obj) -{ - return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK; -} - static inline bool i915_gem_object_is_active(const struct drm_i915_gem_object *obj) { - return i915_gem_object_get_active(obj); -} - -static inline void -i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine) -{ - obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT); -} - -static inline void -i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine) -{ - obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT); -} - -static inline bool -i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, - int engine) -{ - return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT); + return obj->active_count; } static inline bool @@ -2496,6 +2465,23 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj) return obj->tiling_and_stride & STRIDE_MASK; } +static inline struct intel_engine_cs * +i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) +{ + struct intel_engine_cs *engine = NULL; + struct dma_fence *fence; + + rcu_read_lock(); + fence = reservation_object_get_excl_rcu(obj->resv); + rcu_read_unlock(); + + if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + dma_fence_put(fence); + + return engine; +} + static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) { i915_gem_object_get(vma->obj); |