From 3fe630c7715aaa1cbb6da8b8b5562882971241e9 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Fri, 28 Apr 2023 10:01:24 +0000
Subject: drm: fix code style for embedded structs in hdr_metadata_infoframe

Only the stuff inside the brackets should be indented.

Signed-off-by: Simon Ser <contact@emersion.fr>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Sebastian Wick <sebastian.wick@redhat.com>
Cc: Joshua Ashton <joshua@froggi.es>
Cc: Pekka Paalanen <pekka.paalanen@collabora.com>
Reviewed-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230428100115.9802-1-contact@emersion.fr
---
 include/uapi/drm/drm_mode.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 43691058d28f..92d96a2b6763 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -883,7 +883,7 @@ struct hdr_metadata_infoframe {
 	 */
 	struct {
 		__u16 x, y;
-		} display_primaries[3];
+	} display_primaries[3];
 	/**
 	 * @white_point: White Point of Colorspace Data.
 	 * These are coded as unsigned 16-bit values in units of
@@ -894,7 +894,7 @@ struct hdr_metadata_infoframe {
 	 */
 	struct {
 		__u16 x, y;
-		} white_point;
+	} white_point;
 	/**
 	 * @max_display_mastering_luminance: Max Mastering Display Luminance.
 	 * This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
-- 
cgit v1.2.3


From c7a472297169156252a50d76965eb36b081186e2 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Fri, 14 Jul 2023 11:13:03 +0000
Subject: drm/syncobj: add IOCTL to register an eventfd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a new DRM_IOCTL_SYNCOBJ_EVENTFD IOCTL which signals an
eventfd from a syncobj.

This is useful for Wayland compositors to handle wait-before-submit.
Wayland clients can send a timeline point to the compositor
before the point has materialized yet, then compositors can wait
for the point to materialize via this new IOCTL.

The existing DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT IOCTL is not suitable
because it blocks. Compositors want to integrate the wait with
their poll(2)-based event loop.

Requirements for new uAPI:

- User-space patch: https://gitlab.freedesktop.org/wlroots/wlroots/-/merge_requests/4262
- IGT: https://lists.freedesktop.org/archives/igt-dev/2023-July/057893.html

v2:
- Wait for fence when flags is zero
- Improve documentation (Pekka)
- Rename IOCTL (Christian)
- Fix typo in drm_syncobj_add_eventfd() (Christian)

v3:
- Link user-space + IGT patches
- Add reference from overview docs

v4: fix IOCTL number conflict with GETFB2 (Nicholas Choi, Vitaly Prosyak)

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: Daniel Stone <daniel@fooishbar.org>
Cc: James Jones <jajones@nvidia.com>
Cc: Austin Shafer <ashafer@nvidia.com>
Cc: Vitaly Prosyak <vprosyak@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230714111257.11940-1-contact@emersion.fr
---
 drivers/gpu/drm/drm_internal.h |   2 +
 drivers/gpu/drm/drm_ioctl.c    |   2 +
 drivers/gpu/drm/drm_syncobj.c  | 148 +++++++++++++++++++++++++++++++++++++++--
 include/drm/drm_syncobj.h      |   6 +-
 include/uapi/drm/drm.h         |  23 +++++++
 5 files changed, 174 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index d7e023bbb0d5..ba12acd55139 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -245,6 +245,8 @@ int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_private);
 int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_private);
+int drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_private);
 int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 8e9afe7af19c..f03ffbacfe9b 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -701,6 +701,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
 		      DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_EVENTFD, drm_syncobj_eventfd_ioctl,
+		      DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
 		      DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0c2be8360525..be3e8787d207 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -136,6 +136,10 @@
  * requirement is inherited from the wait-before-signal behavior required by
  * the Vulkan timeline semaphore API.
  *
+ * Alternatively, &DRM_IOCTL_SYNCOBJ_EVENTFD can be used to wait without
+ * blocking: an eventfd will be signaled when the syncobj is. This is useful to
+ * integrate the wait in an event loop.
+ *
  *
  * Import/export of syncobjs
  * -------------------------
@@ -185,6 +189,7 @@
 
 #include <linux/anon_inodes.h>
 #include <linux/dma-fence-unwrap.h>
+#include <linux/eventfd.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/sched/signal.h>
@@ -212,6 +217,20 @@ struct syncobj_wait_entry {
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
 				      struct syncobj_wait_entry *wait);
 
+struct syncobj_eventfd_entry {
+	struct list_head node;
+	struct dma_fence *fence;
+	struct dma_fence_cb fence_cb;
+	struct drm_syncobj *syncobj;
+	struct eventfd_ctx *ev_fd_ctx;
+	u64 point;
+	u32 flags;
+};
+
+static void
+syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
+			   struct syncobj_eventfd_entry *entry);
+
 /**
  * drm_syncobj_find - lookup and reference a sync object.
  * @file_private: drm file private pointer
@@ -274,6 +293,28 @@ static void drm_syncobj_remove_wait(struct drm_syncobj *syncobj,
 	spin_unlock(&syncobj->lock);
 }
 
+static void
+syncobj_eventfd_entry_free(struct syncobj_eventfd_entry *entry)
+{
+	eventfd_ctx_put(entry->ev_fd_ctx);
+	dma_fence_put(entry->fence);
+	/* This happens either inside the syncobj lock, or after the node has
+	 * already been removed from the list.
+	 */
+	list_del(&entry->node);
+	kfree(entry);
+}
+
+static void
+drm_syncobj_add_eventfd(struct drm_syncobj *syncobj,
+			struct syncobj_eventfd_entry *entry)
+{
+	spin_lock(&syncobj->lock);
+	list_add_tail(&entry->node, &syncobj->ev_fd_list);
+	syncobj_eventfd_entry_func(syncobj, entry);
+	spin_unlock(&syncobj->lock);
+}
+
 /**
  * drm_syncobj_add_point - add new timeline point to the syncobj
  * @syncobj: sync object to add timeline point do
@@ -288,7 +329,8 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
 			   struct dma_fence *fence,
 			   uint64_t point)
 {
-	struct syncobj_wait_entry *cur, *tmp;
+	struct syncobj_wait_entry *wait_cur, *wait_tmp;
+	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
 	struct dma_fence *prev;
 
 	dma_fence_get(fence);
@@ -302,8 +344,10 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
 	dma_fence_chain_init(chain, prev, fence, point);
 	rcu_assign_pointer(syncobj->fence, &chain->base);
 
-	list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
-		syncobj_wait_syncobj_func(syncobj, cur);
+	list_for_each_entry_safe(wait_cur, wait_tmp, &syncobj->cb_list, node)
+		syncobj_wait_syncobj_func(syncobj, wait_cur);
+	list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
+		syncobj_eventfd_entry_func(syncobj, ev_fd_cur);
 	spin_unlock(&syncobj->lock);
 
 	/* Walk the chain once to trigger garbage collection */
@@ -323,7 +367,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence)
 {
 	struct dma_fence *old_fence;
-	struct syncobj_wait_entry *cur, *tmp;
+	struct syncobj_wait_entry *wait_cur, *wait_tmp;
+	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
 
 	if (fence)
 		dma_fence_get(fence);
@@ -335,8 +380,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 	rcu_assign_pointer(syncobj->fence, fence);
 
 	if (fence != old_fence) {
-		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
-			syncobj_wait_syncobj_func(syncobj, cur);
+		list_for_each_entry_safe(wait_cur, wait_tmp, &syncobj->cb_list, node)
+			syncobj_wait_syncobj_func(syncobj, wait_cur);
+		list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
+			syncobj_eventfd_entry_func(syncobj, ev_fd_cur);
 	}
 
 	spin_unlock(&syncobj->lock);
@@ -472,7 +519,13 @@ void drm_syncobj_free(struct kref *kref)
 	struct drm_syncobj *syncobj = container_of(kref,
 						   struct drm_syncobj,
 						   refcount);
+	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
+
 	drm_syncobj_replace_fence(syncobj, NULL);
+
+	list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
+		syncobj_eventfd_entry_free(ev_fd_cur);
+
 	kfree(syncobj);
 }
 EXPORT_SYMBOL(drm_syncobj_free);
@@ -501,6 +554,7 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
 
 	kref_init(&syncobj->refcount);
 	INIT_LIST_HEAD(&syncobj->cb_list);
+	INIT_LIST_HEAD(&syncobj->ev_fd_list);
 	spin_lock_init(&syncobj->lock);
 
 	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
@@ -1304,6 +1358,88 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static void syncobj_eventfd_entry_fence_func(struct dma_fence *fence,
+					     struct dma_fence_cb *cb)
+{
+	struct syncobj_eventfd_entry *entry =
+		container_of(cb, struct syncobj_eventfd_entry, fence_cb);
+
+	eventfd_signal(entry->ev_fd_ctx, 1);
+	syncobj_eventfd_entry_free(entry);
+}
+
+static void
+syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
+			   struct syncobj_eventfd_entry *entry)
+{
+	int ret;
+	struct dma_fence *fence;
+
+	/* This happens inside the syncobj lock */
+	fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+	ret = dma_fence_chain_find_seqno(&fence, entry->point);
+	if (ret != 0 || !fence) {
+		dma_fence_put(fence);
+		return;
+	}
+
+	list_del_init(&entry->node);
+	entry->fence = fence;
+
+	if (entry->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) {
+		eventfd_signal(entry->ev_fd_ctx, 1);
+		syncobj_eventfd_entry_free(entry);
+	} else {
+		ret = dma_fence_add_callback(fence, &entry->fence_cb,
+					     syncobj_eventfd_entry_fence_func);
+		if (ret == -ENOENT) {
+			eventfd_signal(entry->ev_fd_ctx, 1);
+			syncobj_eventfd_entry_free(entry);
+		}
+	}
+}
+
+int
+drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_private)
+{
+	struct drm_syncobj_eventfd *args = data;
+	struct drm_syncobj *syncobj;
+	struct eventfd_ctx *ev_fd_ctx;
+	struct syncobj_eventfd_entry *entry;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
+		return -EOPNOTSUPP;
+
+	if (args->flags & ~DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)
+		return -EINVAL;
+
+	if (args->pad)
+		return -EINVAL;
+
+	syncobj = drm_syncobj_find(file_private, args->handle);
+	if (!syncobj)
+		return -ENOENT;
+
+	ev_fd_ctx = eventfd_ctx_fdget(args->fd);
+	if (IS_ERR(ev_fd_ctx))
+		return PTR_ERR(ev_fd_ctx);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		eventfd_ctx_put(ev_fd_ctx);
+		return -ENOMEM;
+	}
+	entry->syncobj = syncobj;
+	entry->ev_fd_ctx = ev_fd_ctx;
+	entry->point = args->point;
+	entry->flags = args->flags;
+
+	drm_syncobj_add_eventfd(syncobj, entry);
+	drm_syncobj_put(syncobj);
+
+	return 0;
+}
 
 int
 drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 6cf7243a1dc5..b40052132e52 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -54,7 +54,11 @@ struct drm_syncobj {
 	 */
 	struct list_head cb_list;
 	/**
-	 * @lock: Protects &cb_list and write-locks &fence.
+	 * @ev_fd_list: List of registered eventfd.
+	 */
+	struct list_head ev_fd_list;
+	/**
+	 * @lock: Protects &cb_list and &ev_fd_list, and write-locks &fence.
 	 */
 	spinlock_t lock;
 	/**
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index a87bbbbca2d4..863e47200911 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -909,6 +909,27 @@ struct drm_syncobj_timeline_wait {
 	__u32 pad;
 };
 
+/**
+ * struct drm_syncobj_eventfd
+ * @handle: syncobj handle.
+ * @flags: Zero to wait for the point to be signalled, or
+ *         &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be
+ *         available for the point.
+ * @point: syncobj timeline point (set to zero for binary syncobjs).
+ * @fd: Existing eventfd to sent events to.
+ * @pad: Must be zero.
+ *
+ * Register an eventfd to be signalled by a syncobj. The eventfd counter will
+ * be incremented by one.
+ */
+struct drm_syncobj_eventfd {
+	__u32 handle;
+	__u32 flags;
+	__u64 point;
+	__s32 fd;
+	__u32 pad;
+};
+
 
 struct drm_syncobj_array {
 	__u64 handles;
@@ -1169,6 +1190,8 @@ extern "C" {
  */
 #define DRM_IOCTL_MODE_GETFB2		DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
 
+#define DRM_IOCTL_SYNCOBJ_EVENTFD	DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
+
 /*
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
-- 
cgit v1.2.3


From 7cb8d1ab8cbda554341dac8b54fd135dedff4245 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Date: Fri, 24 Mar 2023 02:07:55 +0300
Subject: drm/virtio: Support sync objects

Add sync object DRM UAPI support to VirtIO-GPU driver. Sync objects
support is needed by native context VirtIO-GPU Mesa drivers, it also will
be used by Venus and Virgl contexts.

Reviewed-by; Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> # amdgpu nctx
Tested-by: Rob Clark <robdclark@gmail.com> # freedreno nctx
Reviewed-by: Rob Clark <robdclark@gmail.com>
Acked-by: Gurchetan Singh <gurchetansingh@chromium.org>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230416115237.798604-4-dmitry.osipenko@collabora.com
---
 drivers/gpu/drm/virtio/virtgpu_drv.c    |   3 +-
 drivers/gpu/drm/virtio/virtgpu_submit.c | 224 ++++++++++++++++++++++++++++++++
 include/uapi/drm/virtgpu_drm.h          |  16 ++-
 3 files changed, 241 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index a7ec5a3770da..644b8ee51009 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -176,7 +176,8 @@ static const struct drm_driver driver = {
 	 * If KMS is disabled DRIVER_MODESET and DRIVER_ATOMIC are masked
 	 * out via drm_device::driver_features:
 	 */
-	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC,
+	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC |
+			   DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
 	.open = virtio_gpu_driver_open,
 	.postclose = virtio_gpu_driver_postclose,
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c
index 1d010c66910d..3c00135ead45 100644
--- a/drivers/gpu/drm/virtio/virtgpu_submit.c
+++ b/drivers/gpu/drm/virtio/virtgpu_submit.c
@@ -14,11 +14,24 @@
 #include <linux/uaccess.h>
 
 #include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
 #include <drm/virtgpu_drm.h>
 
 #include "virtgpu_drv.h"
 
+struct virtio_gpu_submit_post_dep {
+	struct drm_syncobj *syncobj;
+	struct dma_fence_chain *chain;
+	u64 point;
+};
+
 struct virtio_gpu_submit {
+	struct virtio_gpu_submit_post_dep *post_deps;
+	unsigned int num_out_syncobjs;
+
+	struct drm_syncobj **in_syncobjs;
+	unsigned int num_in_syncobjs;
+
 	struct virtio_gpu_object_array *buflist;
 	struct drm_virtgpu_execbuffer *exbuf;
 	struct virtio_gpu_fence *out_fence;
@@ -59,6 +72,203 @@ static int virtio_gpu_dma_fence_wait(struct virtio_gpu_submit *submit,
 	return 0;
 }
 
+static void virtio_gpu_free_syncobjs(struct drm_syncobj **syncobjs,
+				     u32 nr_syncobjs)
+{
+	u32 i = nr_syncobjs;
+
+	while (i--) {
+		if (syncobjs[i])
+			drm_syncobj_put(syncobjs[i]);
+	}
+
+	kvfree(syncobjs);
+}
+
+static int
+virtio_gpu_parse_deps(struct virtio_gpu_submit *submit)
+{
+	struct drm_virtgpu_execbuffer *exbuf = submit->exbuf;
+	struct drm_virtgpu_execbuffer_syncobj syncobj_desc;
+	size_t syncobj_stride = exbuf->syncobj_stride;
+	u32 num_in_syncobjs = exbuf->num_in_syncobjs;
+	struct drm_syncobj **syncobjs;
+	int ret = 0, i;
+
+	if (!num_in_syncobjs)
+		return 0;
+
+	/*
+	 * kvalloc at first tries to allocate memory using kmalloc and
+	 * falls back to vmalloc only on failure. It also uses __GFP_NOWARN
+	 * internally for allocations larger than a page size, preventing
+	 * storm of KMSG warnings.
+	 */
+	syncobjs = kvcalloc(num_in_syncobjs, sizeof(*syncobjs), GFP_KERNEL);
+	if (!syncobjs)
+		return -ENOMEM;
+
+	for (i = 0; i < num_in_syncobjs; i++) {
+		u64 address = exbuf->in_syncobjs + i * syncobj_stride;
+		struct dma_fence *fence;
+
+		memset(&syncobj_desc, 0, sizeof(syncobj_desc));
+
+		if (copy_from_user(&syncobj_desc,
+				   u64_to_user_ptr(address),
+				   min(syncobj_stride, sizeof(syncobj_desc)))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		if (syncobj_desc.flags & ~VIRTGPU_EXECBUF_SYNCOBJ_FLAGS) {
+			ret = -EINVAL;
+			break;
+		}
+
+		ret = drm_syncobj_find_fence(submit->file, syncobj_desc.handle,
+					     syncobj_desc.point, 0, &fence);
+		if (ret)
+			break;
+
+		ret = virtio_gpu_dma_fence_wait(submit, fence);
+
+		dma_fence_put(fence);
+		if (ret)
+			break;
+
+		if (syncobj_desc.flags & VIRTGPU_EXECBUF_SYNCOBJ_RESET) {
+			syncobjs[i] = drm_syncobj_find(submit->file,
+						       syncobj_desc.handle);
+			if (!syncobjs[i]) {
+				ret = -EINVAL;
+				break;
+			}
+		}
+	}
+
+	if (ret) {
+		virtio_gpu_free_syncobjs(syncobjs, i);
+		return ret;
+	}
+
+	submit->num_in_syncobjs = num_in_syncobjs;
+	submit->in_syncobjs = syncobjs;
+
+	return ret;
+}
+
+static void virtio_gpu_reset_syncobjs(struct drm_syncobj **syncobjs,
+				      u32 nr_syncobjs)
+{
+	u32 i;
+
+	for (i = 0; i < nr_syncobjs; i++) {
+		if (syncobjs[i])
+			drm_syncobj_replace_fence(syncobjs[i], NULL);
+	}
+}
+
+static void
+virtio_gpu_free_post_deps(struct virtio_gpu_submit_post_dep *post_deps,
+			  u32 nr_syncobjs)
+{
+	u32 i = nr_syncobjs;
+
+	while (i--) {
+		kfree(post_deps[i].chain);
+		drm_syncobj_put(post_deps[i].syncobj);
+	}
+
+	kvfree(post_deps);
+}
+
+static int virtio_gpu_parse_post_deps(struct virtio_gpu_submit *submit)
+{
+	struct drm_virtgpu_execbuffer *exbuf = submit->exbuf;
+	struct drm_virtgpu_execbuffer_syncobj syncobj_desc;
+	struct virtio_gpu_submit_post_dep *post_deps;
+	u32 num_out_syncobjs = exbuf->num_out_syncobjs;
+	size_t syncobj_stride = exbuf->syncobj_stride;
+	int ret = 0, i;
+
+	if (!num_out_syncobjs)
+		return 0;
+
+	post_deps = kvcalloc(num_out_syncobjs, sizeof(*post_deps), GFP_KERNEL);
+	if (!post_deps)
+		return -ENOMEM;
+
+	for (i = 0; i < num_out_syncobjs; i++) {
+		u64 address = exbuf->out_syncobjs + i * syncobj_stride;
+
+		memset(&syncobj_desc, 0, sizeof(syncobj_desc));
+
+		if (copy_from_user(&syncobj_desc,
+				   u64_to_user_ptr(address),
+				   min(syncobj_stride, sizeof(syncobj_desc)))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		post_deps[i].point = syncobj_desc.point;
+
+		if (syncobj_desc.flags) {
+			ret = -EINVAL;
+			break;
+		}
+
+		if (syncobj_desc.point) {
+			post_deps[i].chain = dma_fence_chain_alloc();
+			if (!post_deps[i].chain) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+
+		post_deps[i].syncobj = drm_syncobj_find(submit->file,
+							syncobj_desc.handle);
+		if (!post_deps[i].syncobj) {
+			kfree(post_deps[i].chain);
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	if (ret) {
+		virtio_gpu_free_post_deps(post_deps, i);
+		return ret;
+	}
+
+	submit->num_out_syncobjs = num_out_syncobjs;
+	submit->post_deps = post_deps;
+
+	return 0;
+}
+
+static void
+virtio_gpu_process_post_deps(struct virtio_gpu_submit *submit)
+{
+	struct virtio_gpu_submit_post_dep *post_deps = submit->post_deps;
+
+	if (post_deps) {
+		struct dma_fence *fence = &submit->out_fence->f;
+		u32 i;
+
+		for (i = 0; i < submit->num_out_syncobjs; i++) {
+			if (post_deps[i].chain) {
+				drm_syncobj_add_point(post_deps[i].syncobj,
+						      post_deps[i].chain,
+						      fence, post_deps[i].point);
+				post_deps[i].chain = NULL;
+			} else {
+				drm_syncobj_replace_fence(post_deps[i].syncobj,
+							  fence);
+			}
+		}
+	}
+}
+
 static int virtio_gpu_fence_event_create(struct drm_device *dev,
 					 struct drm_file *file,
 					 struct virtio_gpu_fence *fence,
@@ -118,6 +328,10 @@ static int virtio_gpu_init_submit_buflist(struct virtio_gpu_submit *submit)
 
 static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit)
 {
+	virtio_gpu_reset_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs);
+	virtio_gpu_free_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs);
+	virtio_gpu_free_post_deps(submit->post_deps, submit->num_out_syncobjs);
+
 	if (!IS_ERR(submit->buf))
 		kvfree(submit->buf);
 
@@ -172,6 +386,7 @@ static int virtio_gpu_init_submit(struct virtio_gpu_submit *submit,
 		drm_fence_event = false;
 
 	if ((exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_OUT) ||
+	    exbuf->num_out_syncobjs ||
 	    exbuf->num_bo_handles ||
 	    drm_fence_event)
 		out_fence = virtio_gpu_fence_alloc(vgdev, fence_ctx, ring_idx);
@@ -291,6 +506,14 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto cleanup;
 
+	ret = virtio_gpu_parse_post_deps(&submit);
+	if (ret)
+		goto cleanup;
+
+	ret = virtio_gpu_parse_deps(&submit);
+	if (ret)
+		goto cleanup;
+
 	/*
 	 * Await in-fences in the end of the job submission path to
 	 * optimize the path by proceeding directly to the submission
@@ -311,6 +534,7 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	 * the job submission path.
 	 */
 	virtio_gpu_install_out_fence_fd(&submit);
+	virtio_gpu_process_post_deps(&submit);
 	virtio_gpu_complete_submit(&submit);
 cleanup:
 	virtio_gpu_cleanup_submit(&submit);
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index 7b158fcb02b4..b1d0e56565bc 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -64,6 +64,16 @@ struct drm_virtgpu_map {
 	__u32 pad;
 };
 
+#define VIRTGPU_EXECBUF_SYNCOBJ_RESET		0x01
+#define VIRTGPU_EXECBUF_SYNCOBJ_FLAGS ( \
+		VIRTGPU_EXECBUF_SYNCOBJ_RESET | \
+		0)
+struct drm_virtgpu_execbuffer_syncobj {
+	__u32 handle;
+	__u32 flags;
+	__u64 point;
+};
+
 /* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */
 struct drm_virtgpu_execbuffer {
 	__u32 flags;
@@ -73,7 +83,11 @@ struct drm_virtgpu_execbuffer {
 	__u32 num_bo_handles;
 	__s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */
 	__u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */
-	__u32 pad;
+	__u32 syncobj_stride; /* size of @drm_virtgpu_execbuffer_syncobj */
+	__u32 num_in_syncobjs;
+	__u32 num_out_syncobjs;
+	__u64 in_syncobjs;
+	__u64 out_syncobjs;
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
-- 
cgit v1.2.3


From 9a2eabf48ade4fbadb54b95dc1ece429b1cce400 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Thu, 3 Aug 2023 09:57:39 +0000
Subject: drm/doc: use proper cross-references for sections

When I originally wrote these docs, I couldn't manage to insert a
cross-reference to a section. Here's how it can be done.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Pekka Paalanen <pekka.paalanen@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230803095734.386761-1-contact@emersion.fr
---
 Documentation/gpu/drm-mm.rst | 2 ++
 include/uapi/drm/drm.h       | 9 ++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index 3d5dc9dc1bfe..513197359aba 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -517,6 +517,8 @@ DRM Cache Handling and Fast WC memcpy()
 .. kernel-doc:: drivers/gpu/drm/drm_cache.c
    :export:
 
+.. _drm_sync_objects:
+
 DRM Sync Objects
 ===========================
 
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 863e47200911..75ec985d95e5 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -673,8 +673,8 @@ struct drm_gem_open {
  * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
  * and &DRM_PRIME_CAP_EXPORT.
  *
- * PRIME buffers are exposed as dma-buf file descriptors. See
- * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".
+ * PRIME buffers are exposed as dma-buf file descriptors.
+ * See :ref:`prime_buffer_sharing`.
  */
 #define DRM_CAP_PRIME			0x5
 /**
@@ -756,15 +756,14 @@ struct drm_gem_open {
 /**
  * DRM_CAP_SYNCOBJ
  *
- * If set to 1, the driver supports sync objects. See
- * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`.
  */
 #define DRM_CAP_SYNCOBJ		0x13
 /**
  * DRM_CAP_SYNCOBJ_TIMELINE
  *
  * If set to 1, the driver supports timeline operations on sync objects. See
- * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ * :ref:`drm_sync_objects`.
  */
 #define DRM_CAP_SYNCOBJ_TIMELINE	0x14
 
-- 
cgit v1.2.3


From f1bfcad68170497bc2132b52322d6314fe6b2120 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Mon, 17 Jul 2023 13:13:09 +0000
Subject: drm/doc: add warning about connector_type_id stability

Mention that the connector_type_id is not stable: it depends on
driver and device probe order.

Signed-off-by: Simon Ser <contact@emersion.fr>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20230717131305.616855-1-contact@emersion.fr
---
 include/uapi/drm/drm_mode.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 92d96a2b6763..ea1b639bcb28 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -488,6 +488,9 @@ struct drm_mode_get_connector {
 	 * This is not an object ID. This is a per-type connector number. Each
 	 * (type, type_id) combination is unique across all connectors of a DRM
 	 * device.
+	 *
+	 * The (type, type_id) combination is not a stable identifier: the
+	 * type_id can change depending on the driver probe order.
 	 */
 	__u32 connector_type_id;
 
-- 
cgit v1.2.3


From 2ff4f6d410afa7625bf784dd54c4511c5b3b7a13 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Mon, 17 Jul 2023 09:30:39 +0000
Subject: drm/doc: document drm_event and its types

Convert struct drm_event to a kernel doc comment. Link to the
generic DRM event types. Add a basic description of each event
type.

Signed-off-by: Simon Ser <contact@emersion.fr>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20230717093032.600773-1-contact@emersion.fr
---
 include/uapi/drm/drm.h | 45 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 75ec985d95e5..ccf6ddb9560e 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -1202,25 +1202,50 @@ extern "C" {
 #define DRM_COMMAND_BASE                0x40
 #define DRM_COMMAND_END			0xA0
 
-/*
- * Header for events written back to userspace on the drm fd.  The
- * type defines the type of event, the length specifies the total
- * length of the event (including the header), and user_data is
- * typically a 64 bit value passed with the ioctl that triggered the
- * event.  A read on the drm fd will always only return complete
- * events, that is, if for example the read buffer is 100 bytes, and
- * there are two 64 byte events pending, only one will be returned.
+/**
+ * struct drm_event - Header for DRM events
+ * @type: event type.
+ * @length: total number of payload bytes (including header).
  *
- * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and
- * up are chipset specific.
+ * This struct is a header for events written back to user-space on the DRM FD.
+ * A read on the DRM FD will always only return complete events: e.g. if the
+ * read buffer is 100 bytes large and there are two 64 byte events pending,
+ * only one will be returned.
+ *
+ * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and
+ * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK,
+ * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE.
  */
 struct drm_event {
 	__u32 type;
 	__u32 length;
 };
 
+/**
+ * DRM_EVENT_VBLANK - vertical blanking event
+ *
+ * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the
+ * &_DRM_VBLANK_EVENT flag set.
+ *
+ * The event payload is a struct drm_event_vblank.
+ */
 #define DRM_EVENT_VBLANK 0x01
+/**
+ * DRM_EVENT_FLIP_COMPLETE - page-flip completion event
+ *
+ * This event is sent in response to an atomic commit or legacy page-flip with
+ * the &DRM_MODE_PAGE_FLIP_EVENT flag set.
+ *
+ * The event payload is a struct drm_event_vblank.
+ */
 #define DRM_EVENT_FLIP_COMPLETE 0x02
+/**
+ * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event
+ *
+ * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE.
+ *
+ * The event payload is a struct drm_event_crtc_sequence.
+ */
 #define DRM_EVENT_CRTC_SEQUENCE	0x03
 
 struct drm_event_vblank {
-- 
cgit v1.2.3


From ad9ee11fdf113f966e338a3f796efd326fc6f502 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Wed, 12 Jul 2023 18:32:00 +0000
Subject: drm/doc: document that PRIME import/export is always supported

Since commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for all
drivers"), import/export is always supported. Document this so that
user-space knows what to expect.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Jeffrey Hugo <quic_jhugo@quicinc.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20230712183156.191445-1-contact@emersion.fr
---
 include/uapi/drm/drm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index ccf6ddb9560e..794c1d857677 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -673,6 +673,9 @@ struct drm_gem_open {
  * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
  * and &DRM_PRIME_CAP_EXPORT.
  *
+ * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and
+ * &DRM_PRIME_CAP_EXPORT are always advertised.
+ *
  * PRIME buffers are exposed as dma-buf file descriptors.
  * See :ref:`prime_buffer_sharing`.
  */
@@ -682,6 +685,8 @@ struct drm_gem_open {
  *
  * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
  * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
+ *
+ * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
  */
 #define  DRM_PRIME_CAP_IMPORT		0x1
 /**
@@ -689,6 +694,8 @@ struct drm_gem_open {
  *
  * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
  * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
+ *
+ * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
  */
 #define  DRM_PRIME_CAP_EXPORT		0x2
 /**
-- 
cgit v1.2.3


From 7a5d5f9c05879e98c8009bea6d0740182725de06 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 4 Aug 2023 20:23:42 +0200
Subject: drm/nouveau: fixup the uapi header file.

nouveau > 10 years ago had a plan for new multiplexer inside a multiplexer
API using nvif. It never fully reached fruition, fast forward 10 years,
and the new vulkan driver is avoiding libdrm and calling ioctls, and
these 3 ioctls, getparam, channel alloc + free don't seem to be things
we'd want to use nvif for.

Undeprecate and put them into the uapi header so we can just copy it
into mesa later.

v2: use uapi types.

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230804182406.5222-3-dakr@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_abi16.h | 41 ----------------------------
 include/uapi/drm/nouveau_drm.h          | 48 ++++++++++++++++++++++++++++++---
 2 files changed, 45 insertions(+), 44 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 27eae85f33e6..d5d80d0d9011 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -43,28 +43,6 @@ int  nouveau_abi16_usif(struct drm_file *, void *data, u32 size);
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
 
-struct drm_nouveau_channel_alloc {
-	uint32_t     fb_ctxdma_handle;
-	uint32_t     tt_ctxdma_handle;
-
-	int          channel;
-	uint32_t     pushbuf_domains;
-
-	/* Notifier memory */
-	uint32_t     notifier_handle;
-
-	/* DRM-enforced subchannel assignments */
-	struct {
-		uint32_t handle;
-		uint32_t grclass;
-	} subchan[8];
-	uint32_t nr_subchan;
-};
-
-struct drm_nouveau_channel_free {
-	int channel;
-};
-
 struct drm_nouveau_grobj_alloc {
 	int      channel;
 	uint32_t handle;
@@ -83,31 +61,12 @@ struct drm_nouveau_gpuobj_free {
 	uint32_t handle;
 };
 
-#define NOUVEAU_GETPARAM_PCI_VENDOR      3
-#define NOUVEAU_GETPARAM_PCI_DEVICE      4
-#define NOUVEAU_GETPARAM_BUS_TYPE        5
-#define NOUVEAU_GETPARAM_FB_SIZE         8
-#define NOUVEAU_GETPARAM_AGP_SIZE        9
-#define NOUVEAU_GETPARAM_CHIPSET_ID      11
-#define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
-#define NOUVEAU_GETPARAM_GRAPH_UNITS     13
-#define NOUVEAU_GETPARAM_PTIMER_TIME     14
-#define NOUVEAU_GETPARAM_HAS_BO_USAGE    15
-#define NOUVEAU_GETPARAM_HAS_PAGEFLIP    16
-struct drm_nouveau_getparam {
-	uint64_t param;
-	uint64_t value;
-};
-
 struct drm_nouveau_setparam {
 	uint64_t param;
 	uint64_t value;
 };
 
-#define DRM_IOCTL_NOUVEAU_GETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam)
 #define DRM_IOCTL_NOUVEAU_SETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SETPARAM, struct drm_nouveau_setparam)
-#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC      DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc)
-#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free)
 #define DRM_IOCTL_NOUVEAU_GROBJ_ALLOC        DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GROBJ_ALLOC, struct drm_nouveau_grobj_alloc)
 #define DRM_IOCTL_NOUVEAU_NOTIFIEROBJ_ALLOC  DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, struct drm_nouveau_notifierobj_alloc)
 #define DRM_IOCTL_NOUVEAU_GPUOBJ_FREE        DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GPUOBJ_FREE, struct drm_nouveau_gpuobj_free)
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 853a327433d3..ca917e55b38f 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -33,6 +33,44 @@
 extern "C" {
 #endif
 
+#define NOUVEAU_GETPARAM_PCI_VENDOR      3
+#define NOUVEAU_GETPARAM_PCI_DEVICE      4
+#define NOUVEAU_GETPARAM_BUS_TYPE        5
+#define NOUVEAU_GETPARAM_FB_SIZE         8
+#define NOUVEAU_GETPARAM_AGP_SIZE        9
+#define NOUVEAU_GETPARAM_CHIPSET_ID      11
+#define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
+#define NOUVEAU_GETPARAM_GRAPH_UNITS     13
+#define NOUVEAU_GETPARAM_PTIMER_TIME     14
+#define NOUVEAU_GETPARAM_HAS_BO_USAGE    15
+#define NOUVEAU_GETPARAM_HAS_PAGEFLIP    16
+struct drm_nouveau_getparam {
+	__u64 param;
+	__u64 value;
+};
+
+struct drm_nouveau_channel_alloc {
+	__u32     fb_ctxdma_handle;
+	__u32     tt_ctxdma_handle;
+
+	__s32     channel;
+	__u32     pushbuf_domains;
+
+	/* Notifier memory */
+	__u32     notifier_handle;
+
+	/* DRM-enforced subchannel assignments */
+	struct {
+		__u32 handle;
+		__u32 grclass;
+	} subchan[8];
+	__u32 nr_subchan;
+};
+
+struct drm_nouveau_channel_free {
+	__s32 channel;
+};
+
 #define NOUVEAU_GEM_DOMAIN_CPU       (1 << 0)
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
@@ -126,10 +164,10 @@ struct drm_nouveau_gem_cpu_fini {
 	__u32 handle;
 };
 
-#define DRM_NOUVEAU_GETPARAM           0x00 /* deprecated */
+#define DRM_NOUVEAU_GETPARAM           0x00
 #define DRM_NOUVEAU_SETPARAM           0x01 /* deprecated */
-#define DRM_NOUVEAU_CHANNEL_ALLOC      0x02 /* deprecated */
-#define DRM_NOUVEAU_CHANNEL_FREE       0x03 /* deprecated */
+#define DRM_NOUVEAU_CHANNEL_ALLOC      0x02
+#define DRM_NOUVEAU_CHANNEL_FREE       0x03
 #define DRM_NOUVEAU_GROBJ_ALLOC        0x04 /* deprecated */
 #define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x05 /* deprecated */
 #define DRM_NOUVEAU_GPUOBJ_FREE        0x06 /* deprecated */
@@ -188,6 +226,10 @@ struct drm_nouveau_svm_bind {
 #define NOUVEAU_SVM_BIND_TARGET__GPU_VRAM               (1UL << 31)
 
 
+#define DRM_IOCTL_NOUVEAU_GETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam)
+#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC      DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc)
+#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free)
+
 #define DRM_IOCTL_NOUVEAU_SVM_INIT           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init)
 #define DRM_IOCTL_NOUVEAU_SVM_BIND           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_BIND, struct drm_nouveau_svm_bind)
 
-- 
cgit v1.2.3


From e02238990b1ab2dfc8abb4c28369f1da6e863f81 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Fri, 4 Aug 2023 20:23:43 +0200
Subject: drm/nouveau: new VM_BIND uAPI interfaces

This commit provides the interfaces for the new UAPI motivated by the
Vulkan API. It allows user mode drivers (UMDs) to:

1) Initialize a GPU virtual address (VA) space via the new
   DRM_IOCTL_NOUVEAU_VM_INIT ioctl. UMDs can provide a kernel reserved
   VA area.

2) Bind and unbind GPU VA space mappings via the new
   DRM_IOCTL_NOUVEAU_VM_BIND ioctl.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC support
asynchronous processing with DRM syncobjs as synchronization mechanism.

The default DRM_IOCTL_NOUVEAU_VM_BIND is synchronous processing,
DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Co-developed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230804182406.5222-4-dakr@redhat.com
---
 Documentation/gpu/driver-uapi.rst |   8 ++
 include/uapi/drm/nouveau_drm.h    | 217 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 225 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst
index 4411e6919a3d..9c7ca6e33a68 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -6,3 +6,11 @@ drm/i915 uAPI
 =============
 
 .. kernel-doc:: include/uapi/drm/i915_drm.h
+
+drm/nouveau uAPI
+================
+
+VM_BIND / EXEC uAPI
+-------------------
+
+.. kernel-doc:: include/uapi/drm/nouveau_drm.h
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index ca917e55b38f..b1ad9d5ffce8 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -76,6 +76,8 @@ struct drm_nouveau_channel_free {
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
 #define NOUVEAU_GEM_DOMAIN_MAPPABLE  (1 << 3)
 #define NOUVEAU_GEM_DOMAIN_COHERENT  (1 << 4)
+/* The BO will never be shared via import or export. */
+#define NOUVEAU_GEM_DOMAIN_NO_SHARE  (1 << 5)
 
 #define NOUVEAU_GEM_TILE_COMP        0x00030000 /* nv50-only */
 #define NOUVEAU_GEM_TILE_LAYOUT_MASK 0x0000ff00
@@ -164,6 +166,215 @@ struct drm_nouveau_gem_cpu_fini {
 	__u32 handle;
 };
 
+/**
+ * struct drm_nouveau_sync - sync object
+ *
+ * This structure serves as synchronization mechanism for (potentially)
+ * asynchronous operations such as EXEC or VM_BIND.
+ */
+struct drm_nouveau_sync {
+	/**
+	 * @flags: the flags for a sync object
+	 *
+	 * The first 8 bits are used to determine the type of the sync object.
+	 */
+	__u32 flags;
+#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0
+#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1
+#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf
+	/**
+	 * @handle: the handle of the sync object
+	 */
+	__u32 handle;
+	/**
+	 * @timeline_value:
+	 *
+	 * The timeline point of the sync object in case the syncobj is of
+	 * type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ.
+	 */
+	__u64 timeline_value;
+};
+
+/**
+ * struct drm_nouveau_vm_init - GPU VA space init structure
+ *
+ * Used to initialize the GPU's VA space for a user client, telling the kernel
+ * which portion of the VA space is managed by the UMD and kernel respectively.
+ *
+ * For the UMD to use the VM_BIND uAPI, this must be called before any BOs or
+ * channels are created; if called afterwards DRM_IOCTL_NOUVEAU_VM_INIT fails
+ * with -ENOSYS.
+ */
+struct drm_nouveau_vm_init {
+	/**
+	 * @kernel_managed_addr: start address of the kernel managed VA space
+	 * region
+	 */
+	__u64 kernel_managed_addr;
+	/**
+	 * @kernel_managed_size: size of the kernel managed VA space region in
+	 * bytes
+	 */
+	__u64 kernel_managed_size;
+};
+
+/**
+ * struct drm_nouveau_vm_bind_op - VM_BIND operation
+ *
+ * This structure represents a single VM_BIND operation. UMDs should pass
+ * an array of this structure via struct drm_nouveau_vm_bind's &op_ptr field.
+ */
+struct drm_nouveau_vm_bind_op {
+	/**
+	 * @op: the operation type
+	 */
+	__u32 op;
+/**
+ * @DRM_NOUVEAU_VM_BIND_OP_MAP:
+ *
+ * Map a GEM object to the GPU's VA space. Optionally, the
+ * &DRM_NOUVEAU_VM_BIND_SPARSE flag can be passed to instruct the kernel to
+ * create sparse mappings for the given range.
+ */
+#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x0
+/**
+ * @DRM_NOUVEAU_VM_BIND_OP_UNMAP:
+ *
+ * Unmap an existing mapping in the GPU's VA space. If the region the mapping
+ * is located in is a sparse region, new sparse mappings are created where the
+ * unmapped (memory backed) mapping was mapped previously. To remove a sparse
+ * region the &DRM_NOUVEAU_VM_BIND_SPARSE must be set.
+ */
+#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x1
+	/**
+	 * @flags: the flags for a &drm_nouveau_vm_bind_op
+	 */
+	__u32 flags;
+/**
+ * @DRM_NOUVEAU_VM_BIND_SPARSE:
+ *
+ * Indicates that an allocated VA space region should be sparse.
+ */
+#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8)
+	/**
+	 * @handle: the handle of the DRM GEM object to map
+	 */
+	__u32 handle;
+	/**
+	 * @pad: 32 bit padding, should be 0
+	 */
+	__u32 pad;
+	/**
+	 * @addr:
+	 *
+	 * the address the VA space region or (memory backed) mapping should be mapped to
+	 */
+	__u64 addr;
+	/**
+	 * @bo_offset: the offset within the BO backing the mapping
+	 */
+	__u64 bo_offset;
+	/**
+	 * @range: the size of the requested mapping in bytes
+	 */
+	__u64 range;
+};
+
+/**
+ * struct drm_nouveau_vm_bind - structure for DRM_IOCTL_NOUVEAU_VM_BIND
+ */
+struct drm_nouveau_vm_bind {
+	/**
+	 * @op_count: the number of &drm_nouveau_vm_bind_op
+	 */
+	__u32 op_count;
+	/**
+	 * @flags: the flags for a &drm_nouveau_vm_bind ioctl
+	 */
+	__u32 flags;
+/**
+ * @DRM_NOUVEAU_VM_BIND_RUN_ASYNC:
+ *
+ * Indicates that the given VM_BIND operation should be executed asynchronously
+ * by the kernel.
+ *
+ * If this flag is not supplied the kernel executes the associated operations
+ * synchronously and doesn't accept any &drm_nouveau_sync objects.
+ */
+#define DRM_NOUVEAU_VM_BIND_RUN_ASYNC 0x1
+	/**
+	 * @wait_count: the number of wait &drm_nouveau_syncs
+	 */
+	__u32 wait_count;
+	/**
+	 * @sig_count: the number of &drm_nouveau_syncs to signal when finished
+	 */
+	__u32 sig_count;
+	/**
+	 * @wait_ptr: pointer to &drm_nouveau_syncs to wait for
+	 */
+	__u64 wait_ptr;
+	/**
+	 * @sig_ptr: pointer to &drm_nouveau_syncs to signal when finished
+	 */
+	__u64 sig_ptr;
+	/**
+	 * @op_ptr: pointer to the &drm_nouveau_vm_bind_ops to execute
+	 */
+	__u64 op_ptr;
+};
+
+/**
+ * struct drm_nouveau_exec_push - EXEC push operation
+ *
+ * This structure represents a single EXEC push operation. UMDs should pass an
+ * array of this structure via struct drm_nouveau_exec's &push_ptr field.
+ */
+struct drm_nouveau_exec_push {
+	/**
+	 * @va: the virtual address of the push buffer mapping
+	 */
+	__u64 va;
+	/**
+	 * @va_len: the length of the push buffer mapping
+	 */
+	__u64 va_len;
+};
+
+/**
+ * struct drm_nouveau_exec - structure for DRM_IOCTL_NOUVEAU_EXEC
+ */
+struct drm_nouveau_exec {
+	/**
+	 * @channel: the channel to execute the push buffer in
+	 */
+	__u32 channel;
+	/**
+	 * @push_count: the number of &drm_nouveau_exec_push ops
+	 */
+	__u32 push_count;
+	/**
+	 * @wait_count: the number of wait &drm_nouveau_syncs
+	 */
+	__u32 wait_count;
+	/**
+	 * @sig_count: the number of &drm_nouveau_syncs to signal when finished
+	 */
+	__u32 sig_count;
+	/**
+	 * @wait_ptr: pointer to &drm_nouveau_syncs to wait for
+	 */
+	__u64 wait_ptr;
+	/**
+	 * @sig_ptr: pointer to &drm_nouveau_syncs to signal when finished
+	 */
+	__u64 sig_ptr;
+	/**
+	 * @push_ptr: pointer to &drm_nouveau_exec_push ops
+	 */
+	__u64 push_ptr;
+};
+
 #define DRM_NOUVEAU_GETPARAM           0x00
 #define DRM_NOUVEAU_SETPARAM           0x01 /* deprecated */
 #define DRM_NOUVEAU_CHANNEL_ALLOC      0x02
@@ -174,6 +385,9 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_NOUVEAU_NVIF               0x07
 #define DRM_NOUVEAU_SVM_INIT           0x08
 #define DRM_NOUVEAU_SVM_BIND           0x09
+#define DRM_NOUVEAU_VM_INIT            0x10
+#define DRM_NOUVEAU_VM_BIND            0x11
+#define DRM_NOUVEAU_EXEC               0x12
 #define DRM_NOUVEAU_GEM_NEW            0x40
 #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
@@ -239,6 +453,9 @@ struct drm_nouveau_svm_bind {
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini)
 #define DRM_IOCTL_NOUVEAU_GEM_INFO           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
 
+#define DRM_IOCTL_NOUVEAU_VM_INIT            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_INIT, struct drm_nouveau_vm_init)
+#define DRM_IOCTL_NOUVEAU_VM_BIND            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_BIND, struct drm_nouveau_vm_bind)
+#define DRM_IOCTL_NOUVEAU_EXEC               DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_EXEC, struct drm_nouveau_exec)
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 899272354dbc1f8ca2d94ea0ddc0174a14fc8324 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 20 Sep 2022 13:03:36 -0400
Subject: drm/amdgpu: add UAPI for allocating doorbell memory

This patch adds flags for a new gem domain AMDGPU_GEM_DOMAIN_DOORBELL
in the UAPI layer.

V2: Drop 'memory' from description (Christian)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Reviewed-by: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 79b14828d542..f477eda6a2b8 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -94,6 +94,9 @@ extern "C" {
  *
  * %AMDGPU_GEM_DOMAIN_OA	Ordered append, used by 3D or Compute engines
  * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL	Doorbell. It is an MMIO region for
+ * signalling user mode queues.
  */
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -101,12 +104,14 @@ extern "C" {
 #define AMDGPU_GEM_DOMAIN_GDS		0x8
 #define AMDGPU_GEM_DOMAIN_GWS		0x10
 #define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL	0x40
 #define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
 					 AMDGPU_GEM_DOMAIN_GTT | \
 					 AMDGPU_GEM_DOMAIN_VRAM | \
 					 AMDGPU_GEM_DOMAIN_GDS | \
 					 AMDGPU_GEM_DOMAIN_GWS | \
-					 AMDGPU_GEM_DOMAIN_OA)
+					 AMDGPU_GEM_DOMAIN_OA | \
+					 AMDGPU_GEM_DOMAIN_DOORBELL)
 
 /* Flag that CPU access will be required for the case of VRAM domain */
 #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
-- 
cgit v1.2.3


From aa5f04d2e5a842351449034e619913e1c721a37c Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Mon, 31 Jul 2023 18:12:56 +0200
Subject: accel/ivpu: Extend get_param ioctl to identify capabilities

Add DRM_IVPU_PARAM_CAPABILITIES parameters to get_param ioctl to query
driver capabilities. For now use it for identify metric streamer and
new dma memory range features. Currently upstream version of intel_vpu
does not have those, they will be added it the future.

Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230731161258.2987564-5-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c | 19 +++++++++++++++++++
 include/uapi/drm/ivpu_accel.h |  4 ++++
 2 files changed, 23 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index fad607dbb2c6..d33eb17007bf 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -115,6 +115,22 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link)
 	kref_put(&file_priv->ref, file_priv_release);
 }
 
+static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param *args)
+{
+	switch (args->index) {
+	case DRM_IVPU_CAP_METRIC_STREAMER:
+		args->value = 0;
+		break;
+	case DRM_IVPU_CAP_DMA_MEMORY_RANGE:
+		args->value = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
@@ -174,6 +190,9 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
 	case DRM_IVPU_PARAM_SKU:
 		args->value = vdev->hw->sku;
 		break;
+	case DRM_IVPU_PARAM_CAPABILITIES:
+		ret = ivpu_get_capabilities(vdev, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 839820aed87e..3e99b74eef04 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -60,6 +60,7 @@ extern "C" {
 #define DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID  10
 #define DRM_IVPU_PARAM_TILE_CONFIG	    11
 #define DRM_IVPU_PARAM_SKU		    12
+#define DRM_IVPU_PARAM_CAPABILITIES	    13
 
 #define DRM_IVPU_PLATFORM_TYPE_SILICON	    0
 
@@ -68,6 +69,9 @@ extern "C" {
 #define DRM_IVPU_CONTEXT_PRIORITY_FOCUS	    2
 #define DRM_IVPU_CONTEXT_PRIORITY_REALTIME  3
 
+#define DRM_IVPU_CAP_METRIC_STREAMER	    1
+#define DRM_IVPU_CAP_DMA_MEMORY_RANGE       2
+
 /**
  * struct drm_ivpu_param - Get/Set VPU parameters
  */
-- 
cgit v1.2.3


From 162f17b2d97a848586340694e64eff14e60a85a8 Mon Sep 17 00:00:00 2001
From: Karol Wachowski <karol.wachowski@linux.intel.com>
Date: Mon, 31 Jul 2023 18:12:57 +0200
Subject: accel/ivpu: Refactor memory ranges logic

Add new dma range and change naming convention for virtual address
memory ranges managed by KMD.

New available ranges are named as follows:
 * global range - global context accessible by FW
 * aliased range - user context accessible by FW
 * dma range - user context accessible by DMA
 * shave range - user context accessible by shaves
 * global shave range - global context accessible by shave nn

Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230731161258.2987564-6-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_drv.c         |  4 ++--
 drivers/accel/ivpu/ivpu_fw.c          | 18 ++++++++----------
 drivers/accel/ivpu/ivpu_gem.c         | 10 ++++++----
 drivers/accel/ivpu/ivpu_hw.h          |  9 ++++-----
 drivers/accel/ivpu/ivpu_hw_37xx.c     |  9 ++++-----
 drivers/accel/ivpu/ivpu_mmu_context.c |  8 ++++----
 include/uapi/drm/ivpu_accel.h         |  5 ++++-
 7 files changed, 32 insertions(+), 31 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index d33eb17007bf..3da15890bc59 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -122,7 +122,7 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
 		args->value = 0;
 		break;
 	case DRM_IVPU_CAP_DMA_MEMORY_RANGE:
-		args->value = 0;
+		args->value = 1;
 		break;
 	default:
 		return -EINVAL;
@@ -160,7 +160,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
 		args->value = ivpu_get_context_count(vdev);
 		break;
 	case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS:
-		args->value = vdev->hw->ranges.user_low.start;
+		args->value = vdev->hw->ranges.user.start;
 		break;
 	case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
 		args->value = file_priv->priority;
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 7caf90a169a3..7e75439dec60 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -204,7 +204,7 @@ static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
 		return -EINVAL;
 	}
 
-	ivpu_hw_init_range(&vdev->hw->ranges.global_low, start, size);
+	ivpu_hw_init_range(&vdev->hw->ranges.global, start, size);
 	return 0;
 }
 
@@ -245,7 +245,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
 	}
 
 	if (fw->shave_nn_size) {
-		fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.global_high.start,
+		fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start,
 							  fw->shave_nn_size, DRM_IVPU_BO_UNCACHED);
 		if (!fw->mem_shave_nn) {
 			ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
@@ -443,9 +443,9 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	 * Uncached region of VPU address space, covers IPC buffers, job queues
 	 * and log buffers, programmable to L2$ Uncached by VPU MTRR
 	 */
-	boot_params->shared_region_base = vdev->hw->ranges.global_low.start;
-	boot_params->shared_region_size = vdev->hw->ranges.global_low.end -
-					  vdev->hw->ranges.global_low.start;
+	boot_params->shared_region_base = vdev->hw->ranges.global.start;
+	boot_params->shared_region_size = vdev->hw->ranges.global.end -
+					  vdev->hw->ranges.global.start;
 
 	boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr;
 	boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2;
@@ -453,10 +453,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2;
 	boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2;
 
-	boot_params->global_aliased_pio_base =
-		vdev->hw->ranges.global_aliased_pio.start;
-	boot_params->global_aliased_pio_size =
-		ivpu_hw_range_size(&vdev->hw->ranges.global_aliased_pio);
+	boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
+	boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
 
 	/* Allow configuration for L2C_PAGE_TABLE with boot param value */
 	boot_params->autoconfig = 1;
@@ -464,7 +462,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	/* Enable L2 cache for first 2GB of high memory */
 	boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 1;
 	boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg =
-		ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.global_high.start);
+		ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.shave.start);
 
 	if (vdev->fw->mem_shave_nn)
 		boot_params->shave_nn_fw_base = vdev->fw->mem_shave_nn->vpu_addr;
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 52b339aefadc..2981bb32c755 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -279,10 +279,12 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 	int ret;
 
 	if (!range) {
-		if (bo->flags & DRM_IVPU_BO_HIGH_MEM)
-			range = &vdev->hw->ranges.user_high;
+		if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
+			range = &vdev->hw->ranges.shave;
+		else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
+			range = &vdev->hw->ranges.dma;
 		else
-			range = &vdev->hw->ranges.user_low;
+			range = &vdev->hw->ranges.user;
 	}
 
 	mutex_lock(&ctx->lock);
@@ -570,7 +572,7 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 		fixed_range.end = vpu_addr + size;
 		range = &fixed_range;
 	} else {
-		range = &vdev->hw->ranges.global_low;
+		range = &vdev->hw->ranges.global;
 	}
 
 	bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index 335b7f707752..69e52d21cd10 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -38,11 +38,10 @@ struct ivpu_addr_range {
 struct ivpu_hw_info {
 	const struct ivpu_hw_ops *ops;
 	struct {
-		struct ivpu_addr_range global_low;
-		struct ivpu_addr_range global_high;
-		struct ivpu_addr_range user_low;
-		struct ivpu_addr_range user_high;
-		struct ivpu_addr_range global_aliased_pio;
+		struct ivpu_addr_range global;
+		struct ivpu_addr_range user;
+		struct ivpu_addr_range shave;
+		struct ivpu_addr_range dma;
 	} ranges;
 	struct {
 		u8 min_ratio;
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 2cda8e4c4a3d..9eae1c241bc0 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -620,11 +620,10 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
 
 	ivpu_pll_init_frequency_ratios(vdev);
 
-	ivpu_hw_init_range(&hw->ranges.global_low, 0x80000000, SZ_512M);
-	ivpu_hw_init_range(&hw->ranges.global_high, 0x180000000, SZ_2M);
-	ivpu_hw_init_range(&hw->ranges.user_low, 0xc0000000, 255 * SZ_1M);
-	ivpu_hw_init_range(&hw->ranges.user_high, 0x180000000, SZ_2G);
-	hw->ranges.global_aliased_pio = hw->ranges.user_low;
+	ivpu_hw_init_range(&hw->ranges.global, 0x80000000, SZ_512M);
+	ivpu_hw_init_range(&hw->ranges.user,   0xc0000000, 255 * SZ_1M);
+	ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G);
+	ivpu_hw_init_range(&hw->ranges.dma,   0x200000000, SZ_8G);
 
 	return 0;
 }
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 465a82298476..1d2e554e2c4a 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -431,11 +431,11 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
 		return ret;
 
 	if (!context_id) {
-		start = vdev->hw->ranges.global_low.start;
-		end = vdev->hw->ranges.global_high.end;
+		start = vdev->hw->ranges.global.start;
+		end = vdev->hw->ranges.shave.end;
 	} else {
-		start = vdev->hw->ranges.user_low.start;
-		end = vdev->hw->ranges.user_high.end;
+		start = vdev->hw->ranges.user.start;
+		end = vdev->hw->ranges.dma.end;
 	}
 
 	drm_mm_init(&ctx->mm, start, end - start);
diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 3e99b74eef04..a58a14c9f222 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -133,8 +133,10 @@ struct drm_ivpu_param {
 	__u64 value;
 };
 
-#define DRM_IVPU_BO_HIGH_MEM   0x00000001
+#define DRM_IVPU_BO_SHAVE_MEM  0x00000001
+#define DRM_IVPU_BO_HIGH_MEM   DRM_IVPU_BO_SHAVE_MEM
 #define DRM_IVPU_BO_MAPPABLE   0x00000002
+#define DRM_IVPU_BO_DMA_MEM    0x00000004
 
 #define DRM_IVPU_BO_CACHED     0x00000000
 #define DRM_IVPU_BO_UNCACHED   0x00010000
@@ -144,6 +146,7 @@ struct drm_ivpu_param {
 #define DRM_IVPU_BO_FLAGS \
 	(DRM_IVPU_BO_HIGH_MEM | \
 	 DRM_IVPU_BO_MAPPABLE | \
+	 DRM_IVPU_BO_DMA_MEM | \
 	 DRM_IVPU_BO_CACHE_MASK)
 
 /**
-- 
cgit v1.2.3


From 443f9e0b1ab5e3b95abf8606097d13e30e2f2413 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@redhat.com>
Date: Wed, 23 Aug 2023 20:15:34 +0200
Subject: drm/nouveau: uapi: don't pass NO_PREFETCH flag implicitly

Currently, NO_PREFETCH is passed implicitly through
drm_nouveau_gem_pushbuf_push::length and drm_nouveau_exec_push::va_len.

Since this is a direct representation of how the HW is programmed it
isn't really future proof for a uAPI. Hence, fix this up for the new
uAPI and split up the va_len field of struct drm_nouveau_exec_push,
such that we keep 32bit for va_len and 32bit for flags.

For drm_nouveau_gem_pushbuf_push::length at least provide
NOUVEAU_GEM_PUSHBUF_NO_PREFETCH to indicate the bit shift.

While at it, fix up nv50_dma_push() as well, such that the caller
doesn't need to encode the NO_PREFETCH flag into the length parameter.

Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230823181746.3446-1-dakr@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_dma.c  |  7 +++++--
 drivers/gpu/drm/nouveau/nouveau_dma.h  |  8 ++++++--
 drivers/gpu/drm/nouveau/nouveau_exec.c | 19 ++++++++++++++++---
 drivers/gpu/drm/nouveau/nouveau_gem.c  |  6 ++++--
 include/uapi/drm/nouveau_drm.h         |  8 +++++++-
 5 files changed, 38 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index b90cac6d5772..b01c029f3a90 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -69,16 +69,19 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout)
 }
 
 void
-nv50_dma_push(struct nouveau_channel *chan, u64 offset, int length)
+nv50_dma_push(struct nouveau_channel *chan, u64 offset, u32 length,
+	      bool no_prefetch)
 {
 	struct nvif_user *user = &chan->drm->client.device.user;
 	struct nouveau_bo *pb = chan->push.buffer;
 	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
 
 	BUG_ON(chan->dma.ib_free < 1);
+	WARN_ON(length > NV50_DMA_PUSH_MAX_LENGTH);
 
 	nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
-	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
+	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8 |
+			(no_prefetch ? (1 << 31) : 0));
 
 	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index 035a709c7be1..1744d95b233e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -31,7 +31,8 @@
 #include "nouveau_chan.h"
 
 int nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
-void nv50_dma_push(struct nouveau_channel *, u64 addr, int length);
+void nv50_dma_push(struct nouveau_channel *, u64 addr, u32 length,
+		   bool no_prefetch);
 
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
@@ -45,6 +46,9 @@ void nv50_dma_push(struct nouveau_channel *, u64 addr, int length);
  */
 #define NOUVEAU_DMA_SKIPS (128 / 4)
 
+/* Maximum push buffer size. */
+#define NV50_DMA_PUSH_MAX_LENGTH 0x7fffff
+
 /* Object handles - for stuff that's doesn't use handle == oclass. */
 enum {
 	NvDmaFB		= 0x80000002,
@@ -89,7 +93,7 @@ FIRE_RING(struct nouveau_channel *chan)
 
 	if (chan->dma.ib_max) {
 		nv50_dma_push(chan, chan->push.addr + (chan->dma.put << 2),
-			      (chan->dma.cur - chan->dma.put) << 2);
+			      (chan->dma.cur - chan->dma.put) << 2, false);
 	} else {
 		WRITE_PUT(chan->dma.cur);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index 0f927adda4ed..a90c4cd8cbb2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -164,8 +164,10 @@ nouveau_exec_job_run(struct nouveau_job *job)
 	}
 
 	for (i = 0; i < exec_job->push.count; i++) {
-		nv50_dma_push(chan, exec_job->push.s[i].va,
-			      exec_job->push.s[i].va_len);
+		struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
+		bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
+
+		nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
 	}
 
 	ret = nouveau_fence_emit(fence, chan);
@@ -223,7 +225,18 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob,
 {
 	struct nouveau_exec_job *job;
 	struct nouveau_job_args args = {};
-	int ret;
+	int i, ret;
+
+	for (i = 0; i < __args->push.count; i++) {
+		struct drm_nouveau_exec_push *p = &__args->push.s[i];
+
+		if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
+			NV_PRINTK(err, nouveau_cli(__args->file_priv),
+				  "pushbuf size exceeds limit: 0x%x max 0x%x\n",
+				  p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
+			return -EINVAL;
+		}
+	}
 
 	job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL);
 	if (!job)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index f39360870c70..c0b10d8d3d03 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -856,9 +856,11 @@ revalidate:
 		for (i = 0; i < req->nr_push; i++) {
 			struct nouveau_vma *vma = (void *)(unsigned long)
 				bo[push[i].bo_index].user_priv;
+			u64 addr = vma->addr + push[i].offset;
+			u32 length = push[i].length & ~NOUVEAU_GEM_PUSHBUF_NO_PREFETCH;
+			bool no_prefetch = push[i].length & NOUVEAU_GEM_PUSHBUF_NO_PREFETCH;
 
-			nv50_dma_push(chan, vma->addr + push[i].offset,
-				      push[i].length);
+			nv50_dma_push(chan, addr, length, no_prefetch);
 		}
 	} else
 	if (drm->client.device.info.chipset >= 0x25) {
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index b1ad9d5ffce8..8d7402c13e56 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -138,6 +138,7 @@ struct drm_nouveau_gem_pushbuf_push {
 	__u32 pad;
 	__u64 offset;
 	__u64 length;
+#define NOUVEAU_GEM_PUSHBUF_NO_PREFETCH (1 << 23)
 };
 
 struct drm_nouveau_gem_pushbuf {
@@ -338,7 +339,12 @@ struct drm_nouveau_exec_push {
 	/**
 	 * @va_len: the length of the push buffer mapping
 	 */
-	__u64 va_len;
+	__u32 va_len;
+	/**
+	 * @flags: the flags for this push buffer mapping
+	 */
+	__u32 flags;
+#define DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH 0x1
 };
 
 /**
-- 
cgit v1.2.3