diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec_queue.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.c | 1056 |
1 files changed, 966 insertions, 90 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7e1abbbfba12..071b8c41df43 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -10,21 +10,95 @@ #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> +#include "xe_bo.h" +#include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_vf.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" -#include "xe_hw_fence.h" #include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_migrate.h" #include "xe_pm.h" -#include "xe_ring_ops_types.h" #include "xe_trace.h" #include "xe_vm.h" +#include "xe_pxp.h" + +/** + * DOC: Execution Queue + * + * An Execution queue is an interface for the HW context of execution. + * The user creates an execution queue, submits the GPU jobs through those + * queues and in the end destroys them. + * + * Execution queues can also be created by XeKMD itself for driver internal + * operations like object migration etc. + * + * An execution queue is associated with a specified HW engine or a group of + * engines (belonging to the same tile and engine class) and any GPU job + * submitted on the queue will be run on one of these engines. + * + * An execution queue is tied to an address space (VM). It holds a reference + * of the associated VM and the underlying Logical Ring Context/s (LRC/s) + * until the queue is destroyed. + * + * The execution queue sits on top of the submission backend. It opaquely + * handles the GuC and Execlist backends whichever the platform uses, and + * the ring operations the different engine classes support. + */ + +/** + * DOC: Multi Queue Group + * + * Multi Queue Group is another mode of execution supported by the compute + * and blitter copy command streamers (CCS and BCS, respectively). It is + * an enhancement of the existing hardware architecture and leverages the + * same submission model. It enables support for efficient, parallel + * execution of multiple queues within a single shared context. The multi + * queue group functionality is only supported with GuC submission backend. + * All the queues of a group must use the same address space (VM). + * + * The DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property + * supports creating a multi queue group and adding queues to a queue group. + * + * The XE_EXEC_QUEUE_CREATE ioctl call with above property with value field + * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with + * the queue being created as the primary queue (aka q0) of the group. To add + * secondary queues to the group, they need to be created with the above + * property with id of the primary queue as the value. The properties of + * the primary queue (like priority, time slice) applies to the whole group. + * So, these properties can't be set for secondary queues of a group. + * + * The hardware does not support removing a queue from a multi-queue group. + * However, queues can be dynamically added to the group. A group can have + * up to 64 queues. To support this, XeKMD holds references to LRCs of the + * queues even after the queues are destroyed by the user until the whole + * group is destroyed. The secondary queues hold a reference to the primary + * queue thus preventing the group from being destroyed when user destroys + * the primary queue. Once the primary queue is destroyed, secondary queues + * can't be added to the queue group and new job submissions on existing + * secondary queues are not allowed. + * + * The queues of a multi queue group can set their priority within the group + * through the DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property. + * This multi queue priority can also be set dynamically through the + * XE_EXEC_QUEUE_SET_PROPERTY ioctl. This is the only other property + * supported by the secondary queues of a multi queue group, other than + * DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. + * + * When GuC reports an error on any of the queues of a multi queue group, + * the queue cleanup mechanism is invoked for all the queues of the group + * as hardware cannot make progress on the multi queue context. + * + * Refer :ref:`multi-queue-group-guc-interface` for multi queue group GuC + * interface. + */ enum xe_exec_queue_sched_prop { XE_EXEC_QUEUE_JOB_TIMEOUT = 0, @@ -34,19 +108,95 @@ enum xe_exec_queue_sched_prop { }; static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number); + u64 extensions); + +static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_lrc *lrc; + unsigned long idx; + + if (xe_exec_queue_is_multi_queue_secondary(q)) { + /* + * Put pairs with get from xe_exec_queue_lookup() call + * in xe_exec_queue_group_validate(). + */ + xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); + return; + } + + if (!group) + return; + + /* Primary queue cleanup */ + xa_for_each(&group->xa, idx, lrc) + xe_lrc_put(lrc); + + xa_destroy(&group->xa); + mutex_destroy(&group->list_lock); + xe_bo_unpin_map_no_vm(group->cgp_bo); + kfree(group); +} static void __xe_exec_queue_free(struct xe_exec_queue *q) { - if (q->vm) + int i; + + for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) + if (q->tlb_inval[i].dep_scheduler) + xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); + + if (xe_exec_queue_uses_pxp(q)) + xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); + + if (xe_exec_queue_is_multi_queue(q)) + xe_exec_queue_group_cleanup(q); + + if (q->vm) { + xe_vm_remove_exec_queue(q->vm, q); xe_vm_put(q->vm); + } if (q->xef) xe_file_put(q->xef); + kvfree(q->replay_state); kfree(q); } +static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_tile *tile = gt_to_tile(q->gt); + int i; + + for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { + struct xe_dep_scheduler *dep_scheduler; + struct xe_gt *gt; + struct workqueue_struct *wq; + + if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) + gt = tile->primary_gt; + else + gt = tile->media_gt; + + if (!gt) + continue; + + wq = gt->tlb_inval.job_wq; + +#define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ + dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, + MAX_TLB_INVAL_JOBS); + if (IS_ERR(dep_scheduler)) + return PTR_ERR(dep_scheduler); + + q->tlb_inval[i].dep_scheduler = dep_scheduler; + } +#undef MAX_TLB_INVAL_JOBS + + return 0; +} + static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, @@ -60,7 +210,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, /* only kernel queues can be permanent */ XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); - q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); + q = kzalloc_flex(*q, lrc, width); if (!q) return ERR_PTR(-ENOMEM); @@ -76,8 +226,13 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); + INIT_LIST_HEAD(&q->vm_exec_queue_link); INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); + INIT_LIST_HEAD(&q->pxp.link); + spin_lock_init(&q->multi_queue.lock); + spin_lock_init(&q->lrc_lookup_lock); + q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -90,6 +245,14 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, else q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; + if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { + err = alloc_dep_schedulers(xe, q); + if (err) { + __xe_exec_queue_free(q); + return ERR_PTR(err); + } + } + if (vm) q->vm = xe_vm_get(vm); @@ -98,7 +261,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, * may set q->usm, must come before xe_lrc_create(), * may overwrite q->sched_props, must come before q->ops->init() */ - err = exec_queue_user_extensions(xe, q, extensions, 0); + err = exec_queue_user_extensions(xe, q, extensions); if (err) { __xe_exec_queue_free(q); return ERR_PTR(err); @@ -108,43 +271,151 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } -static int __xe_exec_queue_init(struct xe_exec_queue *q) +static void xe_exec_queue_set_lrc(struct xe_exec_queue *q, struct xe_lrc *lrc, u16 idx) { - struct xe_vm *vm = q->vm; - int i, err; + xe_assert(gt_to_xe(q->gt), idx < q->width); - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - return err; + scoped_guard(spinlock, &q->lrc_lookup_lock) + q->lrc[idx] = lrc; +} + +/** + * xe_exec_queue_get_lrc() - Get the LRC from exec queue. + * @q: The exec queue instance. + * @idx: Index within multi-LRC array. + * + * Retrieves LRC of given index for the exec queue under lock + * and takes reference. + * + * Return: Pointer to LRC on success, error on failure, NULL on + * lookup failure. + */ +struct xe_lrc *xe_exec_queue_get_lrc(struct xe_exec_queue *q, u16 idx) +{ + struct xe_lrc *lrc; + + xe_assert(gt_to_xe(q->gt), idx < q->width); + + scoped_guard(spinlock, &q->lrc_lookup_lock) { + lrc = q->lrc[idx]; + if (lrc) + xe_lrc_get(lrc); } - for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); - if (IS_ERR(q->lrc[i])) { - err = PTR_ERR(q->lrc[i]); - goto err_unlock; - } + return lrc; +} + +/** + * xe_exec_queue_lrc() - Get the LRC from exec queue. + * @q: The exec queue instance. + * + * Retrieves the primary LRC for the exec queue. Note that this function + * returns only the first LRC instance, even when multiple parallel LRCs + * are configured. This function does not increment reference count, + * so the reference can be just forgotten after use. + * + * Return: Pointer to LRC on success, error on failure + */ +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) +{ + return q->lrc[0]; +} + +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + +static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) +{ + int i, err; + u32 flags = 0; + + /* + * PXP workloads executing on RCS or CCS must run in isolation (i.e. no + * other workload can use the EUs at the same time). On MTL this is done + * by setting the RUNALONE bit in the LRC, while starting on Xe2 there + * is a dedicated bit for it. + */ + if (xe_exec_queue_uses_pxp(q) && + (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { + if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) + flags |= XE_LRC_CREATE_PXP; + else + flags |= XE_LRC_CREATE_RUNALONE; } - if (vm) - xe_vm_unlock(vm); + if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) + flags |= XE_LRC_CREATE_USER_CTX; + + if (q->flags & EXEC_QUEUE_FLAG_DISABLE_STATE_CACHE_PERF_FIX) + flags |= XE_LRC_DISABLE_STATE_CACHE_PERF_FIX; err = q->ops->init(q); if (err) - goto err_lrc; + return err; + + /* + * This must occur after q->ops->init to avoid race conditions during VF + * post-migration recovery, as the fixups for the LRC GGTT addresses + * depend on the queue being present in the backend tracking structure. + * + * In addition to above, we must wait on inflight GGTT changes to avoid + * writing out stale values here. Such wait provides a solid solution + * (without a race) only if the function can detect migration instantly + * from the moment vCPU resumes execution. + */ + for (i = 0; i < q->width; ++i) { + struct xe_lrc *__lrc = NULL; + int marker; + + do { + struct xe_lrc *lrc; + + marker = xe_gt_sriov_vf_wait_valid_ggtt(q->gt); + + lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, + xe_lrc_ring_size(), q->msix_vec, flags); + if (IS_ERR(lrc)) { + err = PTR_ERR(lrc); + goto err_lrc; + } + + xe_exec_queue_set_lrc(q, lrc, i); + + if (__lrc) + xe_lrc_put(__lrc); + __lrc = lrc; + + } while (marker != xe_vf_migration_fixups_complete_count(q->gt)); + } return 0; -err_unlock: - if (vm) - xe_vm_unlock(vm); err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_put(q->lrc[i]); + __xe_exec_queue_fini(q); return err; } +/** + * xe_exec_queue_create() - Create an exec queue + * @xe: Xe device + * @vm: VM for the exec queue + * @logical_mask: Logical mask of HW engines + * @width: Width of the exec queue (number of LRCs) + * @hwe: Hardware engine + * @flags: Exec queue creation flags + * @extensions: Extensions for exec queue creation + * + * Create an exec queue (allocate and initialize) with the specified parameters + * + * Return: Pointer to the created exec queue on success, ERR_PTR on failure + */ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, @@ -153,22 +424,54 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v struct xe_exec_queue *q; int err; + /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ + xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); + q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, extensions); if (IS_ERR(q)) return q; - err = __xe_exec_queue_init(q); + err = __xe_exec_queue_init(q, flags); if (err) goto err_post_alloc; + /* + * We can only add the queue to the PXP list after the init is complete, + * because the PXP termination can call exec_queue_kill and that will + * go bad if the queue is only half-initialized. This means that we + * can't do it when we handle the PXP extension in __xe_exec_queue_alloc + * and we need to do it here instead. + */ + if (xe_exec_queue_uses_pxp(q)) { + err = xe_pxp_exec_queue_add(xe->pxp, q); + if (err) + goto err_post_init; + } + return q; +err_post_init: + __xe_exec_queue_fini(q); err_post_alloc: __xe_exec_queue_free(q); return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO); +/** + * xe_exec_queue_create_class() - Create an exec queue for a specific engine class + * @xe: Xe device + * @gt: GT for the exec queue + * @vm: VM for the exec queue + * @class: Engine class + * @flags: Exec queue creation flags + * @extensions: Extensions for exec queue creation + * + * Create an exec queue for the specified engine class. + * + * Return: Pointer to the created exec queue on success, ERR_PTR on failure + */ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, enum xe_engine_class class, @@ -200,6 +503,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe * @xe: Xe device. * @tile: tile which bind exec queue belongs to. * @flags: exec queue creation flags + * @user_vm: The user VM which this exec queue belongs to * @extensions: exec queue creation extensions * * Normalize bind exec queue creation. Bind exec queue is tied to migration VM @@ -213,6 +517,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe */ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *user_vm, u32 flags, u64 extensions) { struct xe_gt *gt = tile->primary_gt; @@ -241,29 +546,73 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } xe_vm_put(migrate_vm); + if (!IS_ERR(q)) { + int err = drm_syncobj_create(&q->ufence_syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) { + xe_exec_queue_put(q); + return ERR_PTR(err); + } + + if (user_vm) + q->user_vm = xe_vm_get(user_vm); + } + return q; } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); +/** + * xe_exec_queue_destroy() - Destroy an exec queue + * @ref: Reference count of the exec queue + * + * Called when the last reference to the exec queue is dropped. + * Cleans up all resources associated with the exec queue. + * This function should not be called directly; use xe_exec_queue_put() instead. + */ void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + int i; + + xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); + + if (q->ufence_syncobj) + drm_syncobj_put(q->ufence_syncobj); + + if (xe_exec_queue_uses_pxp(q)) + xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); xe_exec_queue_last_fence_put_unlocked(q); + for_each_tlb_inval(i) + xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); + if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { list_for_each_entry_safe(eq, next, &q->multi_gt_list, multi_gt_link) xe_exec_queue_put(eq); } - q->ops->fini(q); + if (q->user_vm) { + xe_vm_put(q->user_vm); + q->user_vm = NULL; + } + + q->ops->destroy(q); } +/** + * xe_exec_queue_fini() - Finalize an exec queue + * @q: The exec queue + * + * Finalizes the exec queue by updating run ticks, releasing LRC references, + * and freeing the queue structure. This is called after the queue has been + * destroyed and all references have been dropped. + */ void xe_exec_queue_fini(struct xe_exec_queue *q) { - int i; - /* * Before releasing our ref to lrc and xef, accumulate our run ticks * and wakeup any waiters. @@ -272,12 +621,18 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) wake_up_var(&q->xef->exec_queue.pending_removal); - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - + __xe_exec_queue_fini(q); __xe_exec_queue_free(q); } +/** + * xe_exec_queue_assign_name() - Assign a name to an exec queue + * @q: The exec queue + * @instance: Instance number for the engine + * + * Assigns a human-readable name to the exec queue based on its engine class + * and instance number (e.g., "rcs0", "vcs1", "bcs2"). + */ void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) { switch (q->class) { @@ -304,6 +659,15 @@ void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) } } +/** + * xe_exec_queue_lookup() - Look up an exec queue by ID + * @xef: Xe file private data + * @id: Exec queue ID + * + * Looks up an exec queue by its ID and increments its reference count. + * + * Return: Pointer to the exec queue if found, NULL otherwise + */ struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) { struct xe_exec_queue *q; @@ -317,6 +681,14 @@ struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) return q; } +/** + * xe_exec_queue_device_get_max_priority() - Get maximum priority for an exec queues + * @xe: Xe device + * + * Returns the maximum priority level that can be assigned to an exec queues. + * + * Return: Maximum priority level (HIGH if CAP_SYS_NICE, NORMAL otherwise) + */ enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe) { @@ -405,6 +777,221 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return 0; } +static int +exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) +{ + if (value == DRM_XE_PXP_TYPE_NONE) + return 0; + + /* we only support HWDRM sessions right now */ + if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) + return -EINVAL; + + if (!xe_pxp_is_enabled(xe->pxp)) + return -ENODEV; + + return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); +} + +static int exec_queue_set_hang_replay_state(struct xe_device *xe, + struct xe_exec_queue *q, + u64 value) +{ + size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class); + u64 __user *address = u64_to_user_ptr(value); + void *ptr; + + ptr = vmemdup_user(address, size); + if (XE_IOCTL_DBG(xe, IS_ERR(ptr))) + return PTR_ERR(ptr); + + q->replay_state = ptr; + + return 0; +} + +static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_tile *tile = gt_to_tile(q->gt); + struct xe_exec_queue_group *group; + struct xe_bo *bo; + + group = kzalloc_obj(*group); + if (!group) + return -ENOMEM; + + bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_FORCE_USER_VRAM | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_GGTT, false); + if (IS_ERR(bo)) { + drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", + PTR_ERR(bo)); + kfree(group); + return PTR_ERR(bo); + } + + xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); + + group->primary = q; + group->cgp_bo = bo; + INIT_LIST_HEAD(&group->list); + xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); + mutex_init(&group->list_lock); + q->multi_queue.group = group; + + /* group->list_lock is used in submission backend */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&group->list_lock); + fs_reclaim_release(GFP_KERNEL); + } + + return 0; +} + +static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) +{ + return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); +} + +static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, + u32 primary_id) +{ + struct xe_exec_queue_group *group; + struct xe_exec_queue *primary; + int ret; + + /* + * Get from below xe_exec_queue_lookup() pairs with put + * in xe_exec_queue_group_cleanup(). + */ + primary = xe_exec_queue_lookup(q->vm->xef, primary_id); + if (XE_IOCTL_DBG(xe, !primary)) + return -ENOENT; + + if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || + XE_IOCTL_DBG(xe, q->vm != primary->vm) || + XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { + ret = -EINVAL; + goto put_primary; + } + + group = primary->multi_queue.group; + q->multi_queue.valid = true; + q->multi_queue.group = group; + + return 0; +put_primary: + xe_exec_queue_put(primary); + return ret; +} + +#define XE_MAX_GROUP_SIZE 64 +static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + u32 pos; + int err; + + xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); + + /* Primary queue holds a reference to LRCs of all secondary queues */ + err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), + XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); + if (XE_IOCTL_DBG(xe, err)) { + xe_lrc_put(q->lrc[0]); + + /* It is invalid if queue group limit is exceeded */ + if (err == -EBUSY) + err = -EINVAL; + + return err; + } + + q->multi_queue.pos = pos; + + return 0; +} + +static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_lrc *lrc; + + xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); + + lrc = xa_erase(&group->xa, q->multi_queue.pos); + xe_assert(xe, lrc); + xe_lrc_put(lrc); +} + +static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) + return -ENODEV; + + if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, !q->vm->xef)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) + return -EINVAL; + + if (value & DRM_XE_MULTI_GROUP_CREATE) { + if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) + return -EINVAL; + + q->multi_queue.valid = true; + q->multi_queue.is_primary = true; + q->multi_queue.pos = 0; + return 0; + } + + /* While adding secondary queues, the upper 32 bits must be 0 */ + if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) + return -EINVAL; + + return xe_exec_queue_group_validate(xe, q, value); +} + +static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) + return -EINVAL; + + /* For queue creation time (!q->xef) setting, just store the priority value */ + if (!q->xef) { + q->multi_queue.priority = value; + return 0; + } + + if (!xe_exec_queue_is_multi_queue(q)) + return -EINVAL; + + return q->ops->set_multi_queue_priority(q, value); +} + +static int exec_queue_set_state_cache_perf_fix(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, q->class != XE_ENGINE_CLASS_RENDER)) + return -EOPNOTSUPP; + + q->flags |= value != 0 ? EXEC_QUEUE_FLAG_DISABLE_STATE_CACHE_PERF_FIX : 0; + + return 0; +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -412,18 +999,97 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, + [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = + exec_queue_set_multi_queue_priority, + [DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX] = + exec_queue_set_state_cache_perf_fix, }; +/** + * xe_exec_queue_set_property_ioctl() - Set a property on an exec queue + * @dev: DRM device + * @data: IOCTL data + * @file: DRM file + * + * Allows setting properties on an existing exec queue. Currently only + * supports setting multi-queue priority. + * + * Return: 0 on success, negative error code on failure + */ +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec_queue_set_property *args = data; + struct xe_exec_queue *q; + int ret; + u32 idx; + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->property != + DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) + return -EINVAL; + + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + + idx = array_index_nospec(args->property, + ARRAY_SIZE(exec_queue_set_property_funcs)); + ret = exec_queue_set_property_funcs[idx](xe, q, args->value); + if (XE_IOCTL_DBG(xe, ret)) + goto err_post_lookup; + + xe_exec_queue_put(q); + return 0; + + err_post_lookup: + xe_exec_queue_put(q); + return ret; +} + +static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) +{ + u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | + BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY); + + /* + * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a + * multi-queue group. + */ + if (xe_exec_queue_is_multi_queue_secondary(q) && + properties & ~secondary_queue_valid_props) + return -EINVAL; + + return 0; +} + +static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties) +{ + /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */ + if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) && + !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP))) + return -EINVAL; + + return 0; +} + static int exec_queue_user_ext_set_property(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension) + u64 extension, u64 *properties) { u64 __user *address = u64_to_user_ptr(extension); struct drm_xe_ext_set_property ext; int err; u32 idx; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -431,27 +1097,37 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, ARRAY_SIZE(exec_queue_set_property_funcs)) || XE_IOCTL_DBG(xe, ext.pad) || XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && + ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY && + ext.property != DRM_XE_EXEC_QUEUE_SET_DISABLE_STATE_CACHE_PERF_FIX)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); if (!exec_queue_set_property_funcs[idx]) return -EINVAL; + *properties |= BIT_ULL(idx); + err = exec_queue_user_ext_check(q, *properties); + if (XE_IOCTL_DBG(xe, err)) + return err; + return exec_queue_set_property_funcs[idx](xe, q, ext.value); } typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension); + u64 extension, u64 *properties); static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, }; #define MAX_USER_EXTENSIONS 16 -static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number) +static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, + u64 extensions, int ext_number, u64 *properties) { u64 __user *address = u64_to_user_ptr(extensions); struct drm_xe_user_extension ext; @@ -461,7 +1137,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; - err = __copy_from_user(&ext, address, sizeof(ext)); + err = copy_from_user(&ext, address, sizeof(ext)); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; @@ -472,18 +1148,41 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue idx = array_index_nospec(ext.name, ARRAY_SIZE(exec_queue_user_extension_funcs)); - err = exec_queue_user_extension_funcs[idx](xe, q, extensions); + err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties); if (XE_IOCTL_DBG(xe, err)) return err; if (ext.next_extension) - return exec_queue_user_extensions(xe, q, ext.next_extension, - ++ext_number); + return __exec_queue_user_extensions(xe, q, ext.next_extension, + ++ext_number, properties); return 0; } -static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, +static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, + u64 extensions) +{ + u64 properties = 0; + int err; + + err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties); + if (XE_IOCTL_DBG(xe, err)) + return err; + + err = exec_queue_user_ext_check_final(q, properties); + if (XE_IOCTL_DBG(xe, err)) + return err; + + if (xe_exec_queue_is_multi_queue_primary(q)) { + err = xe_exec_queue_group_init(xe, q); + if (XE_IOCTL_DBG(xe, err)) + return err; + } + + return 0; +} + +static u32 calc_validate_logical_mask(struct xe_device *xe, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) { @@ -534,6 +1233,29 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, return return_mask; } +static bool has_sched_groups(struct xe_gt *gt) +{ + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_sriov_pf_sched_groups_enabled(gt)) + return true; + + if (IS_SRIOV_VF(gt_to_xe(gt)) && xe_gt_sriov_vf_sched_groups_enabled(gt)) + return true; + + return false; +} + +/** + * xe_exec_queue_create_ioctl() - Create an exec queue via IOCTL + * @dev: DRM device + * @data: IOCTL data + * @file: DRM file + * + * Creates a new exec queue based on user-provided parameters. Supports + * creating VM bind queues, regular exec queues, multi-lrc exec queues + * and multi-queue groups. + * + * Return: 0 on success with exec_queue_id filled in, negative error code on failure + */ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -545,15 +1267,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; struct xe_vm *vm; - struct xe_gt *gt; struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; + u32 flags = 0; u32 id; u32 len; int err; - if (XE_IOCTL_DBG(xe, args->flags) || + if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; @@ -561,31 +1283,51 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) return -EINVAL; - err = __copy_from_user(eci, user_eci, - sizeof(struct drm_xe_engine_class_instance) * - len); + err = copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * len); if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) return -EINVAL; + if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) + flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { if (XE_IOCTL_DBG(xe, args->width != 1) || XE_IOCTL_DBG(xe, args->num_placements != 1) || XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) return -EINVAL; + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -ENOENT; + + err = down_read_interruptible(&vm->lock); + if (err) { + xe_vm_put(vm); + return err; + } + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -ENOENT; + } + for_each_tile(tile, xe, id) { struct xe_exec_queue *new; - u32 flags = EXEC_QUEUE_FLAG_VM; + flags |= EXEC_QUEUE_FLAG_VM; if (id) flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - new = xe_exec_queue_create_bind(xe, tile, flags, + new = xe_exec_queue_create_bind(xe, tile, vm, flags, args->extensions); if (IS_ERR(new)) { + up_read(&vm->lock); + xe_vm_put(vm); err = PTR_ERR(new); if (q) goto put_exec_queue; @@ -597,9 +1339,10 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, list_add_tail(&new->multi_gt_list, &q->multi_gt_link); } + up_read(&vm->lock); + xe_vm_put(vm); } else { - gt = xe_device_get_gt(xe, eci[0].gt_id); - logical_mask = calc_validate_logical_mask(xe, gt, eci, + logical_mask = calc_validate_logical_mask(xe, eci, args->width, args->num_placements); if (XE_IOCTL_DBG(xe, !logical_mask)) @@ -609,6 +1352,11 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; + /* multi-lrc is only supported on select engine classes */ + if (XE_IOCTL_DBG(xe, args->width > 1 && + !(xe->info.multi_lrc_mask & BIT(hwe->class)))) + return -EOPNOTSUPP; + vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; @@ -625,47 +1373,79 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -ENOENT; } + /* SRIOV sched groups are not compatible with multi-lrc */ + if (XE_IOCTL_DBG(xe, args->width > 1 && has_sched_groups(hwe->gt))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -EINVAL; + } + q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, 0, + args->width, hwe, flags, args->extensions); up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(q)) return PTR_ERR(q); + if (xe_exec_queue_is_multi_queue_secondary(q)) { + err = xe_exec_queue_group_add(xe, q); + if (XE_IOCTL_DBG(xe, err)) + goto put_exec_queue; + } + if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) - goto put_exec_queue; + goto delete_queue_group; } if (q->vm && q->hwe->hw_engine_group) { err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); if (err) - goto put_exec_queue; + goto kill_exec_queue; } } q->xef = xe_file_get(xef); + if (eci[0].engine_class != DRM_XE_ENGINE_CLASS_VM_BIND) + xe_vm_add_exec_queue(vm, q); /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); if (err) - goto kill_exec_queue; + goto del_hw_engine_group; args->exec_queue_id = id; return 0; +del_hw_engine_group: + if (q->vm && q->hwe && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); kill_exec_queue: xe_exec_queue_kill(q); +delete_queue_group: + if (xe_exec_queue_is_multi_queue_secondary(q)) + xe_exec_queue_group_delete(xe, q); put_exec_queue: xe_exec_queue_put(q); return err; } +/** + * xe_exec_queue_get_property_ioctl() - Get a property from an exec queue + * @dev: DRM device + * @data: IOCTL data + * @file: DRM file + * + * Retrieves property values from an existing exec queue. Currently supports + * getting the ban/reset status. + * + * Return: 0 on success with value filled in, negative error code on failure + */ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -708,25 +1488,6 @@ bool xe_exec_queue_is_lr(struct xe_exec_queue *q) !(q->flags & EXEC_QUEUE_FLAG_VM); } -static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) -{ - return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; -} - -/** - * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full - * @q: The exec_queue - * - * Return: True if the exec_queue's ring is full, false otherwise. - */ -bool xe_exec_queue_ring_full(struct xe_exec_queue *q) -{ - struct xe_lrc *lrc = q->lrc[0]; - s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; - - return xe_exec_queue_num_job_inflight(q) >= max_job; -} - /** * xe_exec_queue_is_idle() - Whether an exec_queue is idle. * @q: The exec_queue @@ -771,7 +1532,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; - u32 old_ts, new_ts; + u64 old_ts, new_ts; int idx; /* @@ -822,6 +1583,16 @@ void xe_exec_queue_kill(struct xe_exec_queue *q) xe_vm_remove_compute_exec_queue(q->vm, q); } +/** + * xe_exec_queue_destroy_ioctl() - Destroy an exec queue via IOCTL + * @dev: DRM device + * @data: IOCTL data + * @file: DRM file + * + * Destroys an existing exec queue and releases its reference. + * + * Return: 0 on success, negative error code on failure + */ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -857,7 +1628,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) { + if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { + xe_migrate_job_lock_assert(q); + } else if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); } else { xe_vm_assert_held(vm); @@ -956,29 +1729,132 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, struct dma_fence *fence) { xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, !dma_fence_is_container(fence)); xe_exec_queue_last_fence_put(q, vm); q->last_fence = dma_fence_get(fence); } /** - * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue + * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence * @q: The exec queue - * @vm: The VM the engine does a bind or exec for + * @vm: The VM the engine does a bind for + * @type: Either primary or media GT + */ +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); +} + +/** + * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB + * invalidation fence unlocked + * @q: The exec queue + * @type: Either primary or media GT * - * Returns: - * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. + * Only safe to be called from xe_exec_queue_destroy(). */ -int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, + unsigned int type) +{ + xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + dma_fence_put(q->tlb_inval[type].last_fence); + q->tlb_inval[type].last_fence = NULL; +} + +/** + * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation + * @q: The exec queue + * @vm: The VM the engine does a bind for + * @type: Either primary or media GT + * + * Get last fence, takes a ref + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type) { struct dma_fence *fence; + + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_MIGRATE)); + + if (q->tlb_inval[type].last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &q->tlb_inval[type].last_fence->flags)) + xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); + + fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** + * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation + * @q: The exec queue + * @vm: The VM the engine does a bind for + * @fence: The fence + * @type: Either primary or media GT + * + * Set the last fence for the tlb invalidation type on the queue. Increases + * reference count for fence, when closing queue + * xe_exec_queue_tlb_inval_last_fence_put should be called. + */ +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, + struct xe_vm *vm, + struct dma_fence *fence, + unsigned int type) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_MIGRATE)); + xe_assert(vm->xe, !dma_fence_is_container(fence)); + + xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); + q->tlb_inval[type].last_fence = dma_fence_get(fence); +} + +/** + * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references + * within all LRCs of a queue. + * @q: the &xe_exec_queue struct instance containing target LRCs + * @scratch: scratch buffer to be used as temporary storage + * + * Returns: zero on success, negative error code on failure + */ +int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) +{ + int i; int err = 0; - fence = xe_exec_queue_last_fence_get(q, vm); - if (fence) { - err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? - 0 : -ETIME; - dma_fence_put(fence); + for (i = 0; i < q->width; ++i) { + struct xe_lrc *lrc; + + lrc = xe_exec_queue_get_lrc(q, i); + if (!lrc) + continue; + + xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); + xe_lrc_update_hwctx_regs_with_address(lrc); + err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); + xe_lrc_put(lrc); + if (err) + break; } return err; |
