From 3fe630c7715aaa1cbb6da8b8b5562882971241e9 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Fri, 28 Apr 2023 10:01:24 +0000 Subject: drm: fix code style for embedded structs in hdr_metadata_infoframe Only the stuff inside the brackets should be indented. Signed-off-by: Simon Ser Cc: Harry Wentland Cc: Daniel Vetter Cc: Sebastian Wick Cc: Joshua Ashton Cc: Pekka Paalanen Reviewed-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/msgid/20230428100115.9802-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 43691058d28f..92d96a2b6763 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -883,7 +883,7 @@ struct hdr_metadata_infoframe { */ struct { __u16 x, y; - } display_primaries[3]; + } display_primaries[3]; /** * @white_point: White Point of Colorspace Data. * These are coded as unsigned 16-bit values in units of @@ -894,7 +894,7 @@ struct hdr_metadata_infoframe { */ struct { __u16 x, y; - } white_point; + } white_point; /** * @max_display_mastering_luminance: Max Mastering Display Luminance. * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, -- cgit v1.2.3 From c7a472297169156252a50d76965eb36b081186e2 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Fri, 14 Jul 2023 11:13:03 +0000 Subject: drm/syncobj: add IOCTL to register an eventfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new DRM_IOCTL_SYNCOBJ_EVENTFD IOCTL which signals an eventfd from a syncobj. This is useful for Wayland compositors to handle wait-before-submit. Wayland clients can send a timeline point to the compositor before the point has materialized yet, then compositors can wait for the point to materialize via this new IOCTL. The existing DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT IOCTL is not suitable because it blocks. Compositors want to integrate the wait with their poll(2)-based event loop. Requirements for new uAPI: - User-space patch: https://gitlab.freedesktop.org/wlroots/wlroots/-/merge_requests/4262 - IGT: https://lists.freedesktop.org/archives/igt-dev/2023-July/057893.html v2: - Wait for fence when flags is zero - Improve documentation (Pekka) - Rename IOCTL (Christian) - Fix typo in drm_syncobj_add_eventfd() (Christian) v3: - Link user-space + IGT patches - Add reference from overview docs v4: fix IOCTL number conflict with GETFB2 (Nicholas Choi, Vitaly Prosyak) Signed-off-by: Simon Ser Reviewed-by: Christian König Acked-by: Pekka Paalanen Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Bas Nieuwenhuizen Cc: Daniel Stone Cc: James Jones Cc: Austin Shafer Cc: Vitaly Prosyak Link: https://patchwork.freedesktop.org/patch/msgid/20230714111257.11940-1-contact@emersion.fr --- drivers/gpu/drm/drm_internal.h | 2 + drivers/gpu/drm/drm_ioctl.c | 2 + drivers/gpu/drm/drm_syncobj.c | 148 +++++++++++++++++++++++++++++++++++++++-- include/drm/drm_syncobj.h | 6 +- include/uapi/drm/drm.h | 23 +++++++ 5 files changed, 174 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index d7e023bbb0d5..ba12acd55139 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -245,6 +245,8 @@ int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 8e9afe7af19c..f03ffbacfe9b 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -701,6 +701,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_EVENTFD, drm_syncobj_eventfd_ioctl, + DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 0c2be8360525..be3e8787d207 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -136,6 +136,10 @@ * requirement is inherited from the wait-before-signal behavior required by * the Vulkan timeline semaphore API. * + * Alternatively, &DRM_IOCTL_SYNCOBJ_EVENTFD can be used to wait without + * blocking: an eventfd will be signaled when the syncobj is. This is useful to + * integrate the wait in an event loop. + * * * Import/export of syncobjs * ------------------------- @@ -185,6 +189,7 @@ #include #include +#include #include #include #include @@ -212,6 +217,20 @@ struct syncobj_wait_entry { static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, struct syncobj_wait_entry *wait); +struct syncobj_eventfd_entry { + struct list_head node; + struct dma_fence *fence; + struct dma_fence_cb fence_cb; + struct drm_syncobj *syncobj; + struct eventfd_ctx *ev_fd_ctx; + u64 point; + u32 flags; +}; + +static void +syncobj_eventfd_entry_func(struct drm_syncobj *syncobj, + struct syncobj_eventfd_entry *entry); + /** * drm_syncobj_find - lookup and reference a sync object. * @file_private: drm file private pointer @@ -274,6 +293,28 @@ static void drm_syncobj_remove_wait(struct drm_syncobj *syncobj, spin_unlock(&syncobj->lock); } +static void +syncobj_eventfd_entry_free(struct syncobj_eventfd_entry *entry) +{ + eventfd_ctx_put(entry->ev_fd_ctx); + dma_fence_put(entry->fence); + /* This happens either inside the syncobj lock, or after the node has + * already been removed from the list. + */ + list_del(&entry->node); + kfree(entry); +} + +static void +drm_syncobj_add_eventfd(struct drm_syncobj *syncobj, + struct syncobj_eventfd_entry *entry) +{ + spin_lock(&syncobj->lock); + list_add_tail(&entry->node, &syncobj->ev_fd_list); + syncobj_eventfd_entry_func(syncobj, entry); + spin_unlock(&syncobj->lock); +} + /** * drm_syncobj_add_point - add new timeline point to the syncobj * @syncobj: sync object to add timeline point do @@ -288,7 +329,8 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj, struct dma_fence *fence, uint64_t point) { - struct syncobj_wait_entry *cur, *tmp; + struct syncobj_wait_entry *wait_cur, *wait_tmp; + struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp; struct dma_fence *prev; dma_fence_get(fence); @@ -302,8 +344,10 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj, dma_fence_chain_init(chain, prev, fence, point); rcu_assign_pointer(syncobj->fence, &chain->base); - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) - syncobj_wait_syncobj_func(syncobj, cur); + list_for_each_entry_safe(wait_cur, wait_tmp, &syncobj->cb_list, node) + syncobj_wait_syncobj_func(syncobj, wait_cur); + list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node) + syncobj_eventfd_entry_func(syncobj, ev_fd_cur); spin_unlock(&syncobj->lock); /* Walk the chain once to trigger garbage collection */ @@ -323,7 +367,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence) { struct dma_fence *old_fence; - struct syncobj_wait_entry *cur, *tmp; + struct syncobj_wait_entry *wait_cur, *wait_tmp; + struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp; if (fence) dma_fence_get(fence); @@ -335,8 +380,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, rcu_assign_pointer(syncobj->fence, fence); if (fence != old_fence) { - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) - syncobj_wait_syncobj_func(syncobj, cur); + list_for_each_entry_safe(wait_cur, wait_tmp, &syncobj->cb_list, node) + syncobj_wait_syncobj_func(syncobj, wait_cur); + list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node) + syncobj_eventfd_entry_func(syncobj, ev_fd_cur); } spin_unlock(&syncobj->lock); @@ -472,7 +519,13 @@ void drm_syncobj_free(struct kref *kref) struct drm_syncobj *syncobj = container_of(kref, struct drm_syncobj, refcount); + struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp; + drm_syncobj_replace_fence(syncobj, NULL); + + list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node) + syncobj_eventfd_entry_free(ev_fd_cur); + kfree(syncobj); } EXPORT_SYMBOL(drm_syncobj_free); @@ -501,6 +554,7 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, kref_init(&syncobj->refcount); INIT_LIST_HEAD(&syncobj->cb_list); + INIT_LIST_HEAD(&syncobj->ev_fd_list); spin_lock_init(&syncobj->lock); if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { @@ -1304,6 +1358,88 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, return ret; } +static void syncobj_eventfd_entry_fence_func(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct syncobj_eventfd_entry *entry = + container_of(cb, struct syncobj_eventfd_entry, fence_cb); + + eventfd_signal(entry->ev_fd_ctx, 1); + syncobj_eventfd_entry_free(entry); +} + +static void +syncobj_eventfd_entry_func(struct drm_syncobj *syncobj, + struct syncobj_eventfd_entry *entry) +{ + int ret; + struct dma_fence *fence; + + /* This happens inside the syncobj lock */ + fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1)); + ret = dma_fence_chain_find_seqno(&fence, entry->point); + if (ret != 0 || !fence) { + dma_fence_put(fence); + return; + } + + list_del_init(&entry->node); + entry->fence = fence; + + if (entry->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) { + eventfd_signal(entry->ev_fd_ctx, 1); + syncobj_eventfd_entry_free(entry); + } else { + ret = dma_fence_add_callback(fence, &entry->fence_cb, + syncobj_eventfd_entry_fence_func); + if (ret == -ENOENT) { + eventfd_signal(entry->ev_fd_ctx, 1); + syncobj_eventfd_entry_free(entry); + } + } +} + +int +drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_eventfd *args = data; + struct drm_syncobj *syncobj; + struct eventfd_ctx *ev_fd_ctx; + struct syncobj_eventfd_entry *entry; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) + return -EOPNOTSUPP; + + if (args->flags & ~DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) + return -EINVAL; + + if (args->pad) + return -EINVAL; + + syncobj = drm_syncobj_find(file_private, args->handle); + if (!syncobj) + return -ENOENT; + + ev_fd_ctx = eventfd_ctx_fdget(args->fd); + if (IS_ERR(ev_fd_ctx)) + return PTR_ERR(ev_fd_ctx); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + eventfd_ctx_put(ev_fd_ctx); + return -ENOMEM; + } + entry->syncobj = syncobj; + entry->ev_fd_ctx = ev_fd_ctx; + entry->point = args->point; + entry->flags = args->flags; + + drm_syncobj_add_eventfd(syncobj, entry); + drm_syncobj_put(syncobj); + + return 0; +} int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 6cf7243a1dc5..b40052132e52 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -54,7 +54,11 @@ struct drm_syncobj { */ struct list_head cb_list; /** - * @lock: Protects &cb_list and write-locks &fence. + * @ev_fd_list: List of registered eventfd. + */ + struct list_head ev_fd_list; + /** + * @lock: Protects &cb_list and &ev_fd_list, and write-locks &fence. */ spinlock_t lock; /** diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index a87bbbbca2d4..863e47200911 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -909,6 +909,27 @@ struct drm_syncobj_timeline_wait { __u32 pad; }; +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; +}; + struct drm_syncobj_array { __u64 handles; @@ -1169,6 +1190,8 @@ extern "C" { */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. -- cgit v1.2.3 From 7cb8d1ab8cbda554341dac8b54fd135dedff4245 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 24 Mar 2023 02:07:55 +0300 Subject: drm/virtio: Support sync objects Add sync object DRM UAPI support to VirtIO-GPU driver. Sync objects support is needed by native context VirtIO-GPU Mesa drivers, it also will be used by Venus and Virgl contexts. Reviewed-by; Emil Velikov Signed-off-by: Dmitry Osipenko Tested-by: Pierre-Eric Pelloux-Prayer # amdgpu nctx Tested-by: Rob Clark # freedreno nctx Reviewed-by: Rob Clark Acked-by: Gurchetan Singh Acked-by: Gerd Hoffmann Link: https://patchwork.freedesktop.org/patch/msgid/20230416115237.798604-4-dmitry.osipenko@collabora.com --- drivers/gpu/drm/virtio/virtgpu_drv.c | 3 +- drivers/gpu/drm/virtio/virtgpu_submit.c | 224 ++++++++++++++++++++++++++++++++ include/uapi/drm/virtgpu_drm.h | 16 ++- 3 files changed, 241 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index a7ec5a3770da..644b8ee51009 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -176,7 +176,8 @@ static const struct drm_driver driver = { * If KMS is disabled DRIVER_MODESET and DRIVER_ATOMIC are masked * out via drm_device::driver_features: */ - .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC, + .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC | + DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, .open = virtio_gpu_driver_open, .postclose = virtio_gpu_driver_postclose, diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c index 1d010c66910d..3c00135ead45 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -14,11 +14,24 @@ #include #include +#include #include #include "virtgpu_drv.h" +struct virtio_gpu_submit_post_dep { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + u64 point; +}; + struct virtio_gpu_submit { + struct virtio_gpu_submit_post_dep *post_deps; + unsigned int num_out_syncobjs; + + struct drm_syncobj **in_syncobjs; + unsigned int num_in_syncobjs; + struct virtio_gpu_object_array *buflist; struct drm_virtgpu_execbuffer *exbuf; struct virtio_gpu_fence *out_fence; @@ -59,6 +72,203 @@ static int virtio_gpu_dma_fence_wait(struct virtio_gpu_submit *submit, return 0; } +static void virtio_gpu_free_syncobjs(struct drm_syncobj **syncobjs, + u32 nr_syncobjs) +{ + u32 i = nr_syncobjs; + + while (i--) { + if (syncobjs[i]) + drm_syncobj_put(syncobjs[i]); + } + + kvfree(syncobjs); +} + +static int +virtio_gpu_parse_deps(struct virtio_gpu_submit *submit) +{ + struct drm_virtgpu_execbuffer *exbuf = submit->exbuf; + struct drm_virtgpu_execbuffer_syncobj syncobj_desc; + size_t syncobj_stride = exbuf->syncobj_stride; + u32 num_in_syncobjs = exbuf->num_in_syncobjs; + struct drm_syncobj **syncobjs; + int ret = 0, i; + + if (!num_in_syncobjs) + return 0; + + /* + * kvalloc at first tries to allocate memory using kmalloc and + * falls back to vmalloc only on failure. It also uses __GFP_NOWARN + * internally for allocations larger than a page size, preventing + * storm of KMSG warnings. + */ + syncobjs = kvcalloc(num_in_syncobjs, sizeof(*syncobjs), GFP_KERNEL); + if (!syncobjs) + return -ENOMEM; + + for (i = 0; i < num_in_syncobjs; i++) { + u64 address = exbuf->in_syncobjs + i * syncobj_stride; + struct dma_fence *fence; + + memset(&syncobj_desc, 0, sizeof(syncobj_desc)); + + if (copy_from_user(&syncobj_desc, + u64_to_user_ptr(address), + min(syncobj_stride, sizeof(syncobj_desc)))) { + ret = -EFAULT; + break; + } + + if (syncobj_desc.flags & ~VIRTGPU_EXECBUF_SYNCOBJ_FLAGS) { + ret = -EINVAL; + break; + } + + ret = drm_syncobj_find_fence(submit->file, syncobj_desc.handle, + syncobj_desc.point, 0, &fence); + if (ret) + break; + + ret = virtio_gpu_dma_fence_wait(submit, fence); + + dma_fence_put(fence); + if (ret) + break; + + if (syncobj_desc.flags & VIRTGPU_EXECBUF_SYNCOBJ_RESET) { + syncobjs[i] = drm_syncobj_find(submit->file, + syncobj_desc.handle); + if (!syncobjs[i]) { + ret = -EINVAL; + break; + } + } + } + + if (ret) { + virtio_gpu_free_syncobjs(syncobjs, i); + return ret; + } + + submit->num_in_syncobjs = num_in_syncobjs; + submit->in_syncobjs = syncobjs; + + return ret; +} + +static void virtio_gpu_reset_syncobjs(struct drm_syncobj **syncobjs, + u32 nr_syncobjs) +{ + u32 i; + + for (i = 0; i < nr_syncobjs; i++) { + if (syncobjs[i]) + drm_syncobj_replace_fence(syncobjs[i], NULL); + } +} + +static void +virtio_gpu_free_post_deps(struct virtio_gpu_submit_post_dep *post_deps, + u32 nr_syncobjs) +{ + u32 i = nr_syncobjs; + + while (i--) { + kfree(post_deps[i].chain); + drm_syncobj_put(post_deps[i].syncobj); + } + + kvfree(post_deps); +} + +static int virtio_gpu_parse_post_deps(struct virtio_gpu_submit *submit) +{ + struct drm_virtgpu_execbuffer *exbuf = submit->exbuf; + struct drm_virtgpu_execbuffer_syncobj syncobj_desc; + struct virtio_gpu_submit_post_dep *post_deps; + u32 num_out_syncobjs = exbuf->num_out_syncobjs; + size_t syncobj_stride = exbuf->syncobj_stride; + int ret = 0, i; + + if (!num_out_syncobjs) + return 0; + + post_deps = kvcalloc(num_out_syncobjs, sizeof(*post_deps), GFP_KERNEL); + if (!post_deps) + return -ENOMEM; + + for (i = 0; i < num_out_syncobjs; i++) { + u64 address = exbuf->out_syncobjs + i * syncobj_stride; + + memset(&syncobj_desc, 0, sizeof(syncobj_desc)); + + if (copy_from_user(&syncobj_desc, + u64_to_user_ptr(address), + min(syncobj_stride, sizeof(syncobj_desc)))) { + ret = -EFAULT; + break; + } + + post_deps[i].point = syncobj_desc.point; + + if (syncobj_desc.flags) { + ret = -EINVAL; + break; + } + + if (syncobj_desc.point) { + post_deps[i].chain = dma_fence_chain_alloc(); + if (!post_deps[i].chain) { + ret = -ENOMEM; + break; + } + } + + post_deps[i].syncobj = drm_syncobj_find(submit->file, + syncobj_desc.handle); + if (!post_deps[i].syncobj) { + kfree(post_deps[i].chain); + ret = -EINVAL; + break; + } + } + + if (ret) { + virtio_gpu_free_post_deps(post_deps, i); + return ret; + } + + submit->num_out_syncobjs = num_out_syncobjs; + submit->post_deps = post_deps; + + return 0; +} + +static void +virtio_gpu_process_post_deps(struct virtio_gpu_submit *submit) +{ + struct virtio_gpu_submit_post_dep *post_deps = submit->post_deps; + + if (post_deps) { + struct dma_fence *fence = &submit->out_fence->f; + u32 i; + + for (i = 0; i < submit->num_out_syncobjs; i++) { + if (post_deps[i].chain) { + drm_syncobj_add_point(post_deps[i].syncobj, + post_deps[i].chain, + fence, post_deps[i].point); + post_deps[i].chain = NULL; + } else { + drm_syncobj_replace_fence(post_deps[i].syncobj, + fence); + } + } + } +} + static int virtio_gpu_fence_event_create(struct drm_device *dev, struct drm_file *file, struct virtio_gpu_fence *fence, @@ -118,6 +328,10 @@ static int virtio_gpu_init_submit_buflist(struct virtio_gpu_submit *submit) static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit) { + virtio_gpu_reset_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs); + virtio_gpu_free_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs); + virtio_gpu_free_post_deps(submit->post_deps, submit->num_out_syncobjs); + if (!IS_ERR(submit->buf)) kvfree(submit->buf); @@ -172,6 +386,7 @@ static int virtio_gpu_init_submit(struct virtio_gpu_submit *submit, drm_fence_event = false; if ((exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_OUT) || + exbuf->num_out_syncobjs || exbuf->num_bo_handles || drm_fence_event) out_fence = virtio_gpu_fence_alloc(vgdev, fence_ctx, ring_idx); @@ -291,6 +506,14 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, if (ret) goto cleanup; + ret = virtio_gpu_parse_post_deps(&submit); + if (ret) + goto cleanup; + + ret = virtio_gpu_parse_deps(&submit); + if (ret) + goto cleanup; + /* * Await in-fences in the end of the job submission path to * optimize the path by proceeding directly to the submission @@ -311,6 +534,7 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, * the job submission path. */ virtio_gpu_install_out_fence_fd(&submit); + virtio_gpu_process_post_deps(&submit); virtio_gpu_complete_submit(&submit); cleanup: virtio_gpu_cleanup_submit(&submit); diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 7b158fcb02b4..b1d0e56565bc 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -64,6 +64,16 @@ struct drm_virtgpu_map { __u32 pad; }; +#define VIRTGPU_EXECBUF_SYNCOBJ_RESET 0x01 +#define VIRTGPU_EXECBUF_SYNCOBJ_FLAGS ( \ + VIRTGPU_EXECBUF_SYNCOBJ_RESET | \ + 0) +struct drm_virtgpu_execbuffer_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + /* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */ struct drm_virtgpu_execbuffer { __u32 flags; @@ -73,7 +83,11 @@ struct drm_virtgpu_execbuffer { __u32 num_bo_handles; __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */ __u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */ - __u32 pad; + __u32 syncobj_stride; /* size of @drm_virtgpu_execbuffer_syncobj */ + __u32 num_in_syncobjs; + __u32 num_out_syncobjs; + __u64 in_syncobjs; + __u64 out_syncobjs; }; #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ -- cgit v1.2.3 From 9a2eabf48ade4fbadb54b95dc1ece429b1cce400 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 3 Aug 2023 09:57:39 +0000 Subject: drm/doc: use proper cross-references for sections When I originally wrote these docs, I couldn't manage to insert a cross-reference to a section. Here's how it can be done. Signed-off-by: Simon Ser Reviewed-by: Jani Nikula Acked-by: Daniel Vetter Cc: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/msgid/20230803095734.386761-1-contact@emersion.fr --- Documentation/gpu/drm-mm.rst | 2 ++ include/uapi/drm/drm.h | 9 ++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index 3d5dc9dc1bfe..513197359aba 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -517,6 +517,8 @@ DRM Cache Handling and Fast WC memcpy() .. kernel-doc:: drivers/gpu/drm/drm_cache.c :export: +.. _drm_sync_objects: + DRM Sync Objects =========================== diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 863e47200911..75ec985d95e5 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -673,8 +673,8 @@ struct drm_gem_open { * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT * and &DRM_PRIME_CAP_EXPORT. * - * PRIME buffers are exposed as dma-buf file descriptors. See - * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. */ #define DRM_CAP_PRIME 0x5 /** @@ -756,15 +756,14 @@ struct drm_gem_open { /** * DRM_CAP_SYNCOBJ * - * If set to 1, the driver supports sync objects. See - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ 0x13 /** * DRM_CAP_SYNCOBJ_TIMELINE * * If set to 1, the driver supports timeline operations on sync objects. See - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + * :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 -- cgit v1.2.3 From f1bfcad68170497bc2132b52322d6314fe6b2120 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 17 Jul 2023 13:13:09 +0000 Subject: drm/doc: add warning about connector_type_id stability Mention that the connector_type_id is not stable: it depends on driver and device probe order. Signed-off-by: Simon Ser Acked-by: Pekka Paalanen Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230717131305.616855-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 92d96a2b6763..ea1b639bcb28 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -488,6 +488,9 @@ struct drm_mode_get_connector { * This is not an object ID. This is a per-type connector number. Each * (type, type_id) combination is unique across all connectors of a DRM * device. + * + * The (type, type_id) combination is not a stable identifier: the + * type_id can change depending on the driver probe order. */ __u32 connector_type_id; -- cgit v1.2.3 From 2ff4f6d410afa7625bf784dd54c4511c5b3b7a13 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 17 Jul 2023 09:30:39 +0000 Subject: drm/doc: document drm_event and its types Convert struct drm_event to a kernel doc comment. Link to the generic DRM event types. Add a basic description of each event type. Signed-off-by: Simon Ser Acked-by: Pekka Paalanen Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230717093032.600773-1-contact@emersion.fr --- include/uapi/drm/drm.h | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 75ec985d95e5..ccf6ddb9560e 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1202,25 +1202,50 @@ extern "C" { #define DRM_COMMAND_BASE 0x40 #define DRM_COMMAND_END 0xA0 -/* - * Header for events written back to userspace on the drm fd. The - * type defines the type of event, the length specifies the total - * length of the event (including the header), and user_data is - * typically a 64 bit value passed with the ioctl that triggered the - * event. A read on the drm fd will always only return complete - * events, that is, if for example the read buffer is 100 bytes, and - * there are two 64 byte events pending, only one will be returned. +/** + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). * - * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and - * up are chipset specific. + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. + * + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. */ struct drm_event { __u32 type; __u32 length; }; +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ #define DRM_EVENT_CRTC_SEQUENCE 0x03 struct drm_event_vblank { -- cgit v1.2.3 From ad9ee11fdf113f966e338a3f796efd326fc6f502 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Wed, 12 Jul 2023 18:32:00 +0000 Subject: drm/doc: document that PRIME import/export is always supported Since commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for all drivers"), import/export is always supported. Document this so that user-space knows what to expect. Signed-off-by: Simon Ser Reviewed-by: Jeffrey Hugo Cc: Thomas Zimmermann Cc: Alex Deucher Cc: Jeffrey Hugo Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230712183156.191445-1-contact@emersion.fr --- include/uapi/drm/drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index ccf6ddb9560e..794c1d857677 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -673,6 +673,9 @@ struct drm_gem_open { * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT * and &DRM_PRIME_CAP_EXPORT. * + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * * PRIME buffers are exposed as dma-buf file descriptors. * See :ref:`prime_buffer_sharing`. */ @@ -682,6 +685,8 @@ struct drm_gem_open { * * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. */ #define DRM_PRIME_CAP_IMPORT 0x1 /** @@ -689,6 +694,8 @@ struct drm_gem_open { * * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. */ #define DRM_PRIME_CAP_EXPORT 0x2 /** -- cgit v1.2.3 From 7a5d5f9c05879e98c8009bea6d0740182725de06 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Aug 2023 20:23:42 +0200 Subject: drm/nouveau: fixup the uapi header file. nouveau > 10 years ago had a plan for new multiplexer inside a multiplexer API using nvif. It never fully reached fruition, fast forward 10 years, and the new vulkan driver is avoiding libdrm and calling ioctls, and these 3 ioctls, getparam, channel alloc + free don't seem to be things we'd want to use nvif for. Undeprecate and put them into the uapi header so we can just copy it into mesa later. v2: use uapi types. Reviewed-by: Faith Ekstrand Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20230804182406.5222-3-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_abi16.h | 41 ---------------------------- include/uapi/drm/nouveau_drm.h | 48 ++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 44 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 27eae85f33e6..d5d80d0d9011 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -43,28 +43,6 @@ int nouveau_abi16_usif(struct drm_file *, void *data, u32 size); #define NOUVEAU_GEM_DOMAIN_VRAM (1 << 1) #define NOUVEAU_GEM_DOMAIN_GART (1 << 2) -struct drm_nouveau_channel_alloc { - uint32_t fb_ctxdma_handle; - uint32_t tt_ctxdma_handle; - - int channel; - uint32_t pushbuf_domains; - - /* Notifier memory */ - uint32_t notifier_handle; - - /* DRM-enforced subchannel assignments */ - struct { - uint32_t handle; - uint32_t grclass; - } subchan[8]; - uint32_t nr_subchan; -}; - -struct drm_nouveau_channel_free { - int channel; -}; - struct drm_nouveau_grobj_alloc { int channel; uint32_t handle; @@ -83,31 +61,12 @@ struct drm_nouveau_gpuobj_free { uint32_t handle; }; -#define NOUVEAU_GETPARAM_PCI_VENDOR 3 -#define NOUVEAU_GETPARAM_PCI_DEVICE 4 -#define NOUVEAU_GETPARAM_BUS_TYPE 5 -#define NOUVEAU_GETPARAM_FB_SIZE 8 -#define NOUVEAU_GETPARAM_AGP_SIZE 9 -#define NOUVEAU_GETPARAM_CHIPSET_ID 11 -#define NOUVEAU_GETPARAM_VM_VRAM_BASE 12 -#define NOUVEAU_GETPARAM_GRAPH_UNITS 13 -#define NOUVEAU_GETPARAM_PTIMER_TIME 14 -#define NOUVEAU_GETPARAM_HAS_BO_USAGE 15 -#define NOUVEAU_GETPARAM_HAS_PAGEFLIP 16 -struct drm_nouveau_getparam { - uint64_t param; - uint64_t value; -}; - struct drm_nouveau_setparam { uint64_t param; uint64_t value; }; -#define DRM_IOCTL_NOUVEAU_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam) #define DRM_IOCTL_NOUVEAU_SETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SETPARAM, struct drm_nouveau_setparam) -#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc) -#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free) #define DRM_IOCTL_NOUVEAU_GROBJ_ALLOC DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GROBJ_ALLOC, struct drm_nouveau_grobj_alloc) #define DRM_IOCTL_NOUVEAU_NOTIFIEROBJ_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, struct drm_nouveau_notifierobj_alloc) #define DRM_IOCTL_NOUVEAU_GPUOBJ_FREE DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GPUOBJ_FREE, struct drm_nouveau_gpuobj_free) diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 853a327433d3..ca917e55b38f 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -33,6 +33,44 @@ extern "C" { #endif +#define NOUVEAU_GETPARAM_PCI_VENDOR 3 +#define NOUVEAU_GETPARAM_PCI_DEVICE 4 +#define NOUVEAU_GETPARAM_BUS_TYPE 5 +#define NOUVEAU_GETPARAM_FB_SIZE 8 +#define NOUVEAU_GETPARAM_AGP_SIZE 9 +#define NOUVEAU_GETPARAM_CHIPSET_ID 11 +#define NOUVEAU_GETPARAM_VM_VRAM_BASE 12 +#define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#define NOUVEAU_GETPARAM_PTIMER_TIME 14 +#define NOUVEAU_GETPARAM_HAS_BO_USAGE 15 +#define NOUVEAU_GETPARAM_HAS_PAGEFLIP 16 +struct drm_nouveau_getparam { + __u64 param; + __u64 value; +}; + +struct drm_nouveau_channel_alloc { + __u32 fb_ctxdma_handle; + __u32 tt_ctxdma_handle; + + __s32 channel; + __u32 pushbuf_domains; + + /* Notifier memory */ + __u32 notifier_handle; + + /* DRM-enforced subchannel assignments */ + struct { + __u32 handle; + __u32 grclass; + } subchan[8]; + __u32 nr_subchan; +}; + +struct drm_nouveau_channel_free { + __s32 channel; +}; + #define NOUVEAU_GEM_DOMAIN_CPU (1 << 0) #define NOUVEAU_GEM_DOMAIN_VRAM (1 << 1) #define NOUVEAU_GEM_DOMAIN_GART (1 << 2) @@ -126,10 +164,10 @@ struct drm_nouveau_gem_cpu_fini { __u32 handle; }; -#define DRM_NOUVEAU_GETPARAM 0x00 /* deprecated */ +#define DRM_NOUVEAU_GETPARAM 0x00 #define DRM_NOUVEAU_SETPARAM 0x01 /* deprecated */ -#define DRM_NOUVEAU_CHANNEL_ALLOC 0x02 /* deprecated */ -#define DRM_NOUVEAU_CHANNEL_FREE 0x03 /* deprecated */ +#define DRM_NOUVEAU_CHANNEL_ALLOC 0x02 +#define DRM_NOUVEAU_CHANNEL_FREE 0x03 #define DRM_NOUVEAU_GROBJ_ALLOC 0x04 /* deprecated */ #define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC 0x05 /* deprecated */ #define DRM_NOUVEAU_GPUOBJ_FREE 0x06 /* deprecated */ @@ -188,6 +226,10 @@ struct drm_nouveau_svm_bind { #define NOUVEAU_SVM_BIND_TARGET__GPU_VRAM (1UL << 31) +#define DRM_IOCTL_NOUVEAU_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam) +#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc) +#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free) + #define DRM_IOCTL_NOUVEAU_SVM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init) #define DRM_IOCTL_NOUVEAU_SVM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_BIND, struct drm_nouveau_svm_bind) -- cgit v1.2.3 From e02238990b1ab2dfc8abb4c28369f1da6e863f81 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 4 Aug 2023 20:23:43 +0200 Subject: drm/nouveau: new VM_BIND uAPI interfaces This commit provides the interfaces for the new UAPI motivated by the Vulkan API. It allows user mode drivers (UMDs) to: 1) Initialize a GPU virtual address (VA) space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl. UMDs can provide a kernel reserved VA area. 2) Bind and unbind GPU VA space mappings via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl. 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC support asynchronous processing with DRM syncobjs as synchronization mechanism. The default DRM_IOCTL_NOUVEAU_VM_BIND is synchronous processing, DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only. Reviewed-by: Faith Ekstrand Reviewed-by: Dave Airlie Co-developed-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20230804182406.5222-4-dakr@redhat.com --- Documentation/gpu/driver-uapi.rst | 8 ++ include/uapi/drm/nouveau_drm.h | 217 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index 4411e6919a3d..9c7ca6e33a68 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -6,3 +6,11 @@ drm/i915 uAPI ============= .. kernel-doc:: include/uapi/drm/i915_drm.h + +drm/nouveau uAPI +================ + +VM_BIND / EXEC uAPI +------------------- + +.. kernel-doc:: include/uapi/drm/nouveau_drm.h diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index ca917e55b38f..b1ad9d5ffce8 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -76,6 +76,8 @@ struct drm_nouveau_channel_free { #define NOUVEAU_GEM_DOMAIN_GART (1 << 2) #define NOUVEAU_GEM_DOMAIN_MAPPABLE (1 << 3) #define NOUVEAU_GEM_DOMAIN_COHERENT (1 << 4) +/* The BO will never be shared via import or export. */ +#define NOUVEAU_GEM_DOMAIN_NO_SHARE (1 << 5) #define NOUVEAU_GEM_TILE_COMP 0x00030000 /* nv50-only */ #define NOUVEAU_GEM_TILE_LAYOUT_MASK 0x0000ff00 @@ -164,6 +166,215 @@ struct drm_nouveau_gem_cpu_fini { __u32 handle; }; +/** + * struct drm_nouveau_sync - sync object + * + * This structure serves as synchronization mechanism for (potentially) + * asynchronous operations such as EXEC or VM_BIND. + */ +struct drm_nouveau_sync { + /** + * @flags: the flags for a sync object + * + * The first 8 bits are used to determine the type of the sync object. + */ + __u32 flags; +#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0 +#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1 +#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf + /** + * @handle: the handle of the sync object + */ + __u32 handle; + /** + * @timeline_value: + * + * The timeline point of the sync object in case the syncobj is of + * type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ. + */ + __u64 timeline_value; +}; + +/** + * struct drm_nouveau_vm_init - GPU VA space init structure + * + * Used to initialize the GPU's VA space for a user client, telling the kernel + * which portion of the VA space is managed by the UMD and kernel respectively. + * + * For the UMD to use the VM_BIND uAPI, this must be called before any BOs or + * channels are created; if called afterwards DRM_IOCTL_NOUVEAU_VM_INIT fails + * with -ENOSYS. + */ +struct drm_nouveau_vm_init { + /** + * @kernel_managed_addr: start address of the kernel managed VA space + * region + */ + __u64 kernel_managed_addr; + /** + * @kernel_managed_size: size of the kernel managed VA space region in + * bytes + */ + __u64 kernel_managed_size; +}; + +/** + * struct drm_nouveau_vm_bind_op - VM_BIND operation + * + * This structure represents a single VM_BIND operation. UMDs should pass + * an array of this structure via struct drm_nouveau_vm_bind's &op_ptr field. + */ +struct drm_nouveau_vm_bind_op { + /** + * @op: the operation type + */ + __u32 op; +/** + * @DRM_NOUVEAU_VM_BIND_OP_MAP: + * + * Map a GEM object to the GPU's VA space. Optionally, the + * &DRM_NOUVEAU_VM_BIND_SPARSE flag can be passed to instruct the kernel to + * create sparse mappings for the given range. + */ +#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x0 +/** + * @DRM_NOUVEAU_VM_BIND_OP_UNMAP: + * + * Unmap an existing mapping in the GPU's VA space. If the region the mapping + * is located in is a sparse region, new sparse mappings are created where the + * unmapped (memory backed) mapping was mapped previously. To remove a sparse + * region the &DRM_NOUVEAU_VM_BIND_SPARSE must be set. + */ +#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x1 + /** + * @flags: the flags for a &drm_nouveau_vm_bind_op + */ + __u32 flags; +/** + * @DRM_NOUVEAU_VM_BIND_SPARSE: + * + * Indicates that an allocated VA space region should be sparse. + */ +#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8) + /** + * @handle: the handle of the DRM GEM object to map + */ + __u32 handle; + /** + * @pad: 32 bit padding, should be 0 + */ + __u32 pad; + /** + * @addr: + * + * the address the VA space region or (memory backed) mapping should be mapped to + */ + __u64 addr; + /** + * @bo_offset: the offset within the BO backing the mapping + */ + __u64 bo_offset; + /** + * @range: the size of the requested mapping in bytes + */ + __u64 range; +}; + +/** + * struct drm_nouveau_vm_bind - structure for DRM_IOCTL_NOUVEAU_VM_BIND + */ +struct drm_nouveau_vm_bind { + /** + * @op_count: the number of &drm_nouveau_vm_bind_op + */ + __u32 op_count; + /** + * @flags: the flags for a &drm_nouveau_vm_bind ioctl + */ + __u32 flags; +/** + * @DRM_NOUVEAU_VM_BIND_RUN_ASYNC: + * + * Indicates that the given VM_BIND operation should be executed asynchronously + * by the kernel. + * + * If this flag is not supplied the kernel executes the associated operations + * synchronously and doesn't accept any &drm_nouveau_sync objects. + */ +#define DRM_NOUVEAU_VM_BIND_RUN_ASYNC 0x1 + /** + * @wait_count: the number of wait &drm_nouveau_syncs + */ + __u32 wait_count; + /** + * @sig_count: the number of &drm_nouveau_syncs to signal when finished + */ + __u32 sig_count; + /** + * @wait_ptr: pointer to &drm_nouveau_syncs to wait for + */ + __u64 wait_ptr; + /** + * @sig_ptr: pointer to &drm_nouveau_syncs to signal when finished + */ + __u64 sig_ptr; + /** + * @op_ptr: pointer to the &drm_nouveau_vm_bind_ops to execute + */ + __u64 op_ptr; +}; + +/** + * struct drm_nouveau_exec_push - EXEC push operation + * + * This structure represents a single EXEC push operation. UMDs should pass an + * array of this structure via struct drm_nouveau_exec's &push_ptr field. + */ +struct drm_nouveau_exec_push { + /** + * @va: the virtual address of the push buffer mapping + */ + __u64 va; + /** + * @va_len: the length of the push buffer mapping + */ + __u64 va_len; +}; + +/** + * struct drm_nouveau_exec - structure for DRM_IOCTL_NOUVEAU_EXEC + */ +struct drm_nouveau_exec { + /** + * @channel: the channel to execute the push buffer in + */ + __u32 channel; + /** + * @push_count: the number of &drm_nouveau_exec_push ops + */ + __u32 push_count; + /** + * @wait_count: the number of wait &drm_nouveau_syncs + */ + __u32 wait_count; + /** + * @sig_count: the number of &drm_nouveau_syncs to signal when finished + */ + __u32 sig_count; + /** + * @wait_ptr: pointer to &drm_nouveau_syncs to wait for + */ + __u64 wait_ptr; + /** + * @sig_ptr: pointer to &drm_nouveau_syncs to signal when finished + */ + __u64 sig_ptr; + /** + * @push_ptr: pointer to &drm_nouveau_exec_push ops + */ + __u64 push_ptr; +}; + #define DRM_NOUVEAU_GETPARAM 0x00 #define DRM_NOUVEAU_SETPARAM 0x01 /* deprecated */ #define DRM_NOUVEAU_CHANNEL_ALLOC 0x02 @@ -174,6 +385,9 @@ struct drm_nouveau_gem_cpu_fini { #define DRM_NOUVEAU_NVIF 0x07 #define DRM_NOUVEAU_SVM_INIT 0x08 #define DRM_NOUVEAU_SVM_BIND 0x09 +#define DRM_NOUVEAU_VM_INIT 0x10 +#define DRM_NOUVEAU_VM_BIND 0x11 +#define DRM_NOUVEAU_EXEC 0x12 #define DRM_NOUVEAU_GEM_NEW 0x40 #define DRM_NOUVEAU_GEM_PUSHBUF 0x41 #define DRM_NOUVEAU_GEM_CPU_PREP 0x42 @@ -239,6 +453,9 @@ struct drm_nouveau_svm_bind { #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini) #define DRM_IOCTL_NOUVEAU_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info) +#define DRM_IOCTL_NOUVEAU_VM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_INIT, struct drm_nouveau_vm_init) +#define DRM_IOCTL_NOUVEAU_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_BIND, struct drm_nouveau_vm_bind) +#define DRM_IOCTL_NOUVEAU_EXEC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_EXEC, struct drm_nouveau_exec) #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 899272354dbc1f8ca2d94ea0ddc0174a14fc8324 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Sep 2022 13:03:36 -0400 Subject: drm/amdgpu: add UAPI for allocating doorbell memory This patch adds flags for a new gem domain AMDGPU_GEM_DOMAIN_DOORBELL in the UAPI layer. V2: Drop 'memory' from description (Christian) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- include/uapi/drm/amdgpu_drm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 79b14828d542..f477eda6a2b8 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -94,6 +94,9 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -101,12 +104,14 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ - AMDGPU_GEM_DOMAIN_OA) + AMDGPU_GEM_DOMAIN_OA | \ + AMDGPU_GEM_DOMAIN_DOORBELL) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) -- cgit v1.2.3 From aa5f04d2e5a842351449034e619913e1c721a37c Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 31 Jul 2023 18:12:56 +0200 Subject: accel/ivpu: Extend get_param ioctl to identify capabilities Add DRM_IVPU_PARAM_CAPABILITIES parameters to get_param ioctl to query driver capabilities. For now use it for identify metric streamer and new dma memory range features. Currently upstream version of intel_vpu does not have those, they will be added it the future. Reviewed-by: Jacek Lawrynowicz Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230731161258.2987564-5-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 19 +++++++++++++++++++ include/uapi/drm/ivpu_accel.h | 4 ++++ 2 files changed, 23 insertions(+) (limited to 'include/uapi') diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index fad607dbb2c6..d33eb17007bf 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -115,6 +115,22 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) kref_put(&file_priv->ref, file_priv_release); } +static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param *args) +{ + switch (args->index) { + case DRM_IVPU_CAP_METRIC_STREAMER: + args->value = 0; + break; + case DRM_IVPU_CAP_DMA_MEMORY_RANGE: + args->value = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; @@ -174,6 +190,9 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f case DRM_IVPU_PARAM_SKU: args->value = vdev->hw->sku; break; + case DRM_IVPU_PARAM_CAPABILITIES: + ret = ivpu_get_capabilities(vdev, args); + break; default: ret = -EINVAL; break; diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 839820aed87e..3e99b74eef04 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -60,6 +60,7 @@ extern "C" { #define DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID 10 #define DRM_IVPU_PARAM_TILE_CONFIG 11 #define DRM_IVPU_PARAM_SKU 12 +#define DRM_IVPU_PARAM_CAPABILITIES 13 #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 @@ -68,6 +69,9 @@ extern "C" { #define DRM_IVPU_CONTEXT_PRIORITY_FOCUS 2 #define DRM_IVPU_CONTEXT_PRIORITY_REALTIME 3 +#define DRM_IVPU_CAP_METRIC_STREAMER 1 +#define DRM_IVPU_CAP_DMA_MEMORY_RANGE 2 + /** * struct drm_ivpu_param - Get/Set VPU parameters */ -- cgit v1.2.3 From 162f17b2d97a848586340694e64eff14e60a85a8 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 31 Jul 2023 18:12:57 +0200 Subject: accel/ivpu: Refactor memory ranges logic Add new dma range and change naming convention for virtual address memory ranges managed by KMD. New available ranges are named as follows: * global range - global context accessible by FW * aliased range - user context accessible by FW * dma range - user context accessible by DMA * shave range - user context accessible by shaves * global shave range - global context accessible by shave nn Signed-off-by: Karol Wachowski Reviewed-by: Stanislaw Gruszka Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230731161258.2987564-6-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 4 ++-- drivers/accel/ivpu/ivpu_fw.c | 18 ++++++++---------- drivers/accel/ivpu/ivpu_gem.c | 10 ++++++---- drivers/accel/ivpu/ivpu_hw.h | 9 ++++----- drivers/accel/ivpu/ivpu_hw_37xx.c | 9 ++++----- drivers/accel/ivpu/ivpu_mmu_context.c | 8 ++++---- include/uapi/drm/ivpu_accel.h | 5 ++++- 7 files changed, 32 insertions(+), 31 deletions(-) (limited to 'include/uapi') diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index d33eb17007bf..3da15890bc59 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -122,7 +122,7 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param args->value = 0; break; case DRM_IVPU_CAP_DMA_MEMORY_RANGE: - args->value = 0; + args->value = 1; break; default: return -EINVAL; @@ -160,7 +160,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = ivpu_get_context_count(vdev); break; case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: - args->value = vdev->hw->ranges.user_low.start; + args->value = vdev->hw->ranges.user.start; break; case DRM_IVPU_PARAM_CONTEXT_PRIORITY: args->value = file_priv->priority; diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 7caf90a169a3..7e75439dec60 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -204,7 +204,7 @@ static int ivpu_fw_update_global_range(struct ivpu_device *vdev) return -EINVAL; } - ivpu_hw_init_range(&vdev->hw->ranges.global_low, start, size); + ivpu_hw_init_range(&vdev->hw->ranges.global, start, size); return 0; } @@ -245,7 +245,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) } if (fw->shave_nn_size) { - fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.global_high.start, + fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start, fw->shave_nn_size, DRM_IVPU_BO_UNCACHED); if (!fw->mem_shave_nn) { ivpu_err(vdev, "Failed to allocate shavenn buffer\n"); @@ -443,9 +443,9 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params * Uncached region of VPU address space, covers IPC buffers, job queues * and log buffers, programmable to L2$ Uncached by VPU MTRR */ - boot_params->shared_region_base = vdev->hw->ranges.global_low.start; - boot_params->shared_region_size = vdev->hw->ranges.global_low.end - - vdev->hw->ranges.global_low.start; + boot_params->shared_region_base = vdev->hw->ranges.global.start; + boot_params->shared_region_size = vdev->hw->ranges.global.end - + vdev->hw->ranges.global.start; boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr; boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2; @@ -453,10 +453,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2; boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2; - boot_params->global_aliased_pio_base = - vdev->hw->ranges.global_aliased_pio.start; - boot_params->global_aliased_pio_size = - ivpu_hw_range_size(&vdev->hw->ranges.global_aliased_pio); + boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; + boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); /* Allow configuration for L2C_PAGE_TABLE with boot param value */ boot_params->autoconfig = 1; @@ -464,7 +462,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params /* Enable L2 cache for first 2GB of high memory */ boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 1; boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = - ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.global_high.start); + ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.shave.start); if (vdev->fw->mem_shave_nn) boot_params->shave_nn_fw_base = vdev->fw->mem_shave_nn->vpu_addr; diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 52b339aefadc..2981bb32c755 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -279,10 +279,12 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, int ret; if (!range) { - if (bo->flags & DRM_IVPU_BO_HIGH_MEM) - range = &vdev->hw->ranges.user_high; + if (bo->flags & DRM_IVPU_BO_SHAVE_MEM) + range = &vdev->hw->ranges.shave; + else if (bo->flags & DRM_IVPU_BO_DMA_MEM) + range = &vdev->hw->ranges.dma; else - range = &vdev->hw->ranges.user_low; + range = &vdev->hw->ranges.user; } mutex_lock(&ctx->lock); @@ -570,7 +572,7 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla fixed_range.end = vpu_addr + size; range = &fixed_range; } else { - range = &vdev->hw->ranges.global_low; + range = &vdev->hw->ranges.global; } bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0); diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index 335b7f707752..69e52d21cd10 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -38,11 +38,10 @@ struct ivpu_addr_range { struct ivpu_hw_info { const struct ivpu_hw_ops *ops; struct { - struct ivpu_addr_range global_low; - struct ivpu_addr_range global_high; - struct ivpu_addr_range user_low; - struct ivpu_addr_range user_high; - struct ivpu_addr_range global_aliased_pio; + struct ivpu_addr_range global; + struct ivpu_addr_range user; + struct ivpu_addr_range shave; + struct ivpu_addr_range dma; } ranges; struct { u8 min_ratio; diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 2cda8e4c4a3d..9eae1c241bc0 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -620,11 +620,10 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) ivpu_pll_init_frequency_ratios(vdev); - ivpu_hw_init_range(&hw->ranges.global_low, 0x80000000, SZ_512M); - ivpu_hw_init_range(&hw->ranges.global_high, 0x180000000, SZ_2M); - ivpu_hw_init_range(&hw->ranges.user_low, 0xc0000000, 255 * SZ_1M); - ivpu_hw_init_range(&hw->ranges.user_high, 0x180000000, SZ_2G); - hw->ranges.global_aliased_pio = hw->ranges.user_low; + ivpu_hw_init_range(&hw->ranges.global, 0x80000000, SZ_512M); + ivpu_hw_init_range(&hw->ranges.user, 0xc0000000, 255 * SZ_1M); + ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G); + ivpu_hw_init_range(&hw->ranges.dma, 0x200000000, SZ_8G); return 0; } diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 465a82298476..1d2e554e2c4a 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -431,11 +431,11 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3 return ret; if (!context_id) { - start = vdev->hw->ranges.global_low.start; - end = vdev->hw->ranges.global_high.end; + start = vdev->hw->ranges.global.start; + end = vdev->hw->ranges.shave.end; } else { - start = vdev->hw->ranges.user_low.start; - end = vdev->hw->ranges.user_high.end; + start = vdev->hw->ranges.user.start; + end = vdev->hw->ranges.dma.end; } drm_mm_init(&ctx->mm, start, end - start); diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 3e99b74eef04..a58a14c9f222 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -133,8 +133,10 @@ struct drm_ivpu_param { __u64 value; }; -#define DRM_IVPU_BO_HIGH_MEM 0x00000001 +#define DRM_IVPU_BO_SHAVE_MEM 0x00000001 +#define DRM_IVPU_BO_HIGH_MEM DRM_IVPU_BO_SHAVE_MEM #define DRM_IVPU_BO_MAPPABLE 0x00000002 +#define DRM_IVPU_BO_DMA_MEM 0x00000004 #define DRM_IVPU_BO_CACHED 0x00000000 #define DRM_IVPU_BO_UNCACHED 0x00010000 @@ -144,6 +146,7 @@ struct drm_ivpu_param { #define DRM_IVPU_BO_FLAGS \ (DRM_IVPU_BO_HIGH_MEM | \ DRM_IVPU_BO_MAPPABLE | \ + DRM_IVPU_BO_DMA_MEM | \ DRM_IVPU_BO_CACHE_MASK) /** -- cgit v1.2.3 From 443f9e0b1ab5e3b95abf8606097d13e30e2f2413 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 23 Aug 2023 20:15:34 +0200 Subject: drm/nouveau: uapi: don't pass NO_PREFETCH flag implicitly Currently, NO_PREFETCH is passed implicitly through drm_nouveau_gem_pushbuf_push::length and drm_nouveau_exec_push::va_len. Since this is a direct representation of how the HW is programmed it isn't really future proof for a uAPI. Hence, fix this up for the new uAPI and split up the va_len field of struct drm_nouveau_exec_push, such that we keep 32bit for va_len and 32bit for flags. For drm_nouveau_gem_pushbuf_push::length at least provide NOUVEAU_GEM_PUSHBUF_NO_PREFETCH to indicate the bit shift. While at it, fix up nv50_dma_push() as well, such that the caller doesn't need to encode the NO_PREFETCH flag into the length parameter. Signed-off-by: Danilo Krummrich Reviewed-by: Faith Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20230823181746.3446-1-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_dma.c | 7 +++++-- drivers/gpu/drm/nouveau/nouveau_dma.h | 8 ++++++-- drivers/gpu/drm/nouveau/nouveau_exec.c | 19 ++++++++++++++++--- drivers/gpu/drm/nouveau/nouveau_gem.c | 6 ++++-- include/uapi/drm/nouveau_drm.h | 8 +++++++- 5 files changed, 38 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c index b90cac6d5772..b01c029f3a90 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c @@ -69,16 +69,19 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout) } void -nv50_dma_push(struct nouveau_channel *chan, u64 offset, int length) +nv50_dma_push(struct nouveau_channel *chan, u64 offset, u32 length, + bool no_prefetch) { struct nvif_user *user = &chan->drm->client.device.user; struct nouveau_bo *pb = chan->push.buffer; int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; BUG_ON(chan->dma.ib_free < 1); + WARN_ON(length > NV50_DMA_PUSH_MAX_LENGTH); nouveau_bo_wr32(pb, ip++, lower_32_bits(offset)); - nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8); + nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8 | + (no_prefetch ? (1 << 31) : 0)); chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max; diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index 035a709c7be1..1744d95b233e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -31,7 +31,8 @@ #include "nouveau_chan.h" int nouveau_dma_wait(struct nouveau_channel *, int slots, int size); -void nv50_dma_push(struct nouveau_channel *, u64 addr, int length); +void nv50_dma_push(struct nouveau_channel *, u64 addr, u32 length, + bool no_prefetch); /* * There's a hw race condition where you can't jump to your PUT offset, @@ -45,6 +46,9 @@ void nv50_dma_push(struct nouveau_channel *, u64 addr, int length); */ #define NOUVEAU_DMA_SKIPS (128 / 4) +/* Maximum push buffer size. */ +#define NV50_DMA_PUSH_MAX_LENGTH 0x7fffff + /* Object handles - for stuff that's doesn't use handle == oclass. */ enum { NvDmaFB = 0x80000002, @@ -89,7 +93,7 @@ FIRE_RING(struct nouveau_channel *chan) if (chan->dma.ib_max) { nv50_dma_push(chan, chan->push.addr + (chan->dma.put << 2), - (chan->dma.cur - chan->dma.put) << 2); + (chan->dma.cur - chan->dma.put) << 2, false); } else { WRITE_PUT(chan->dma.cur); } diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 0f927adda4ed..a90c4cd8cbb2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -164,8 +164,10 @@ nouveau_exec_job_run(struct nouveau_job *job) } for (i = 0; i < exec_job->push.count; i++) { - nv50_dma_push(chan, exec_job->push.s[i].va, - exec_job->push.s[i].va_len); + struct drm_nouveau_exec_push *p = &exec_job->push.s[i]; + bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH; + + nv50_dma_push(chan, p->va, p->va_len, no_prefetch); } ret = nouveau_fence_emit(fence, chan); @@ -223,7 +225,18 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob, { struct nouveau_exec_job *job; struct nouveau_job_args args = {}; - int ret; + int i, ret; + + for (i = 0; i < __args->push.count; i++) { + struct drm_nouveau_exec_push *p = &__args->push.s[i]; + + if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) { + NV_PRINTK(err, nouveau_cli(__args->file_priv), + "pushbuf size exceeds limit: 0x%x max 0x%x\n", + p->va_len, NV50_DMA_PUSH_MAX_LENGTH); + return -EINVAL; + } + } job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL); if (!job) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index f39360870c70..c0b10d8d3d03 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -856,9 +856,11 @@ revalidate: for (i = 0; i < req->nr_push; i++) { struct nouveau_vma *vma = (void *)(unsigned long) bo[push[i].bo_index].user_priv; + u64 addr = vma->addr + push[i].offset; + u32 length = push[i].length & ~NOUVEAU_GEM_PUSHBUF_NO_PREFETCH; + bool no_prefetch = push[i].length & NOUVEAU_GEM_PUSHBUF_NO_PREFETCH; - nv50_dma_push(chan, vma->addr + push[i].offset, - push[i].length); + nv50_dma_push(chan, addr, length, no_prefetch); } } else if (drm->client.device.info.chipset >= 0x25) { diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index b1ad9d5ffce8..8d7402c13e56 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -138,6 +138,7 @@ struct drm_nouveau_gem_pushbuf_push { __u32 pad; __u64 offset; __u64 length; +#define NOUVEAU_GEM_PUSHBUF_NO_PREFETCH (1 << 23) }; struct drm_nouveau_gem_pushbuf { @@ -338,7 +339,12 @@ struct drm_nouveau_exec_push { /** * @va_len: the length of the push buffer mapping */ - __u64 va_len; + __u32 va_len; + /** + * @flags: the flags for this push buffer mapping + */ + __u32 flags; +#define DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH 0x1 }; /** -- cgit v1.2.3