1 files changed, 241 insertions, 0 deletions
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
new file mode 100644
index 000000000000..5fa917ba5450
--- /dev/null
+++ b/include/linux/gpu_buddy.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __GPU_BUDDY_H__
+#define __GPU_BUDDY_H__
+
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
+
+/**
+ * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range
+ *
+ * When set, allocation is restricted to the range [start, end) specified
+ * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored
+ * and allocation can use any free space.
+ */
+#define GPU_BUDDY_RANGE_ALLOCATION		BIT(0)
+
+/**
+ * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space
+ *
+ * Allocate starting from high addresses and working down. Useful for
+ * separating different allocation types (e.g., kernel vs userspace)
+ * to reduce fragmentation.
+ */
+#define GPU_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
+
+/**
+ * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks
+ *
+ * The allocation must be satisfied with a single contiguous block.
+ * If the requested size cannot be allocated contiguously, the
+ * allocation fails with -ENOSPC.
+ */
+#define GPU_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+
+/**
+ * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory
+ *
+ * Attempt to allocate from the clear tree first. If insufficient clear
+ * memory is available, falls back to dirty memory. Useful when the
+ * caller needs zeroed memory and wants to avoid GPU clear operations.
+ */
+#define GPU_BUDDY_CLEAR_ALLOCATION		BIT(3)
+
+/**
+ * GPU_BUDDY_CLEARED - Mark returned blocks as cleared
+ *
+ * Used with gpu_buddy_free_list() to indicate that the memory being
+ * freed has been cleared (zeroed). The blocks will be placed in the
+ * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests.
+ */
+#define GPU_BUDDY_CLEARED			BIT(4)
+
+/**
+ * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming
+ *
+ * By default, if an allocation is smaller than the allocated block,
+ * excess memory is trimmed and returned to the free pool. This flag
+ * disables trimming, keeping the full power-of-two block size.
+ */
+#define GPU_BUDDY_TRIM_DISABLE			BIT(5)
+
+enum gpu_buddy_free_tree {
+	GPU_BUDDY_CLEAR_TREE = 0,
+	GPU_BUDDY_DIRTY_TREE,
+	GPU_BUDDY_MAX_FREE_TREES,
+};
+
+#define for_each_free_tree(tree) \
+	for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
+
+/**
+ * struct gpu_buddy_block - Block within a buddy allocator
+ *
+ * Each block in the buddy allocator is represented by this structure.
+ * Blocks are organized in a binary tree where each parent block can be
+ * split into two children (left and right buddies). The allocator manages
+ * blocks at various orders (power-of-2 sizes) from chunk_size up to the
+ * largest contiguous region.
+ *
+ * @private: Private data owned by the allocator user (e.g., driver-specific data)
+ * @link: List node for user ownership while block is allocated
+ */
+struct gpu_buddy_block {
+/* private: */
+	/*
+	 * Header bit layout:
+	 * - Bits 63:12: block offset within the address space
+	 * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT)
+	 * - Bit 9: clear bit (1 if memory is zeroed)
+	 * - Bits 8:6: reserved
+	 * - Bits 5:0: order (log2 of size relative to chunk_size)
+	 */
+#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
+#define GPU_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
+#define   GPU_BUDDY_ALLOCATED	   (1 << 10)
+#define   GPU_BUDDY_FREE	   (2 << 10)
+#define   GPU_BUDDY_SPLIT	   (3 << 10)
+#define GPU_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
+/* Free to be used, if needed in the future */
+#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
+#define GPU_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
+	u64 header;
+
+	struct gpu_buddy_block *left;
+	struct gpu_buddy_block *right;
+	struct gpu_buddy_block *parent;
+/* public: */
+	void *private; /* owned by creator */
+
+	/*
+	 * While the block is allocated by the user through gpu_buddy_alloc*,
+	 * the user has ownership of the link, for example to maintain within
+	 * a list, if so desired. As soon as the block is freed with
+	 * gpu_buddy_free* ownership is given back to the mm.
+	 */
+	union {
+/* private: */
+		struct rb_node rb;
+/* public: */
+		struct list_head link;
+	};
+/* private: */
+	struct list_head tmp_link;
+	unsigned int subtree_max_alignment;
+};
+
+/* Order-zero must be at least SZ_4K */
+#define GPU_BUDDY_MAX_ORDER (63 - 12)
+
+/**
+ * struct gpu_buddy - GPU binary buddy allocator
+ *
+ * The buddy allocator provides efficient power-of-two memory allocation
+ * with fast allocation and free operations. It is commonly used for GPU
+ * memory management where allocations can be split into power-of-two
+ * block sizes.
+ *
+ * Locking should be handled by the user; a simple mutex around
+ * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list()
+ * should suffice.
+ *
+ * @n_roots: Number of root blocks in the roots array.
+ * @max_order: Maximum block order (log2 of largest block size / chunk_size).
+ * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K.
+ * @size: Total size of the address space managed by this allocator in bytes.
+ * @avail: Total free space currently available for allocation in bytes.
+ * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes.
+ *               This is a subset of @avail.
+ */
+struct gpu_buddy {
+/* private: */
+	/*
+	 * Array of red-black trees for free block management.
+	 * Indexed as free_trees[clear/dirty][order] where:
+	 * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content
+	 * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content
+	 * Each tree holds free blocks of the corresponding order.
+	 */
+	struct rb_root **free_trees;
+	/*
+	 * Array of root blocks representing the top-level blocks of the
+	 * binary tree(s). Multiple roots exist when the total size is not
+	 * a power of two, with each root being the largest power-of-two
+	 * that fits in the remaining space.
+	 */
+	struct gpu_buddy_block **roots;
+/* public: */
+	unsigned int n_roots;
+	unsigned int max_order;
+	u64 chunk_size;
+	u64 size;
+	u64 avail;
+	u64 clear_avail;
+};
+
+static inline u64
+gpu_buddy_block_offset(const struct gpu_buddy_block *block)
+{
+	return block->header & GPU_BUDDY_HEADER_OFFSET;
+}
+
+static inline unsigned int
+gpu_buddy_block_order(struct gpu_buddy_block *block)
+{
+	return block->header & GPU_BUDDY_HEADER_ORDER;
+}
+
+static inline bool
+gpu_buddy_block_is_free(struct gpu_buddy_block *block)
+{
+	return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE;
+}
+
+static inline bool
+gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
+{
+	return block->header & GPU_BUDDY_HEADER_CLEAR;
+}
+
+static inline u64
+gpu_buddy_block_size(struct gpu_buddy *mm,
+		     struct gpu_buddy_block *block)
+{
+	return mm->chunk_size << gpu_buddy_block_order(block);
+}
+
+int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
+
+void gpu_buddy_fini(struct gpu_buddy *mm);
+
+int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags);
+
+int gpu_buddy_block_trim(struct gpu_buddy *mm,
+			 u64 *start,
+			 u64 new_size,
+			 struct list_head *blocks);
+
+void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear);
+
+void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block);
+
+void gpu_buddy_free_list(struct gpu_buddy *mm,
+			 struct list_head *objects,
+			 unsigned int flags);
+
+void gpu_buddy_print(struct gpu_buddy *mm);
+void gpu_buddy_block_print(struct gpu_buddy *mm,
+			   struct gpu_buddy_block *block);
+#endif