diff options
Diffstat (limited to 'include/linux/gpu_buddy.h')
| -rw-r--r-- | include/linux/gpu_buddy.h | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h new file mode 100644 index 000000000000..5fa917ba5450 --- /dev/null +++ b/include/linux/gpu_buddy.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __GPU_BUDDY_H__ +#define __GPU_BUDDY_H__ + +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/rbtree.h> +#include <linux/rbtree_augmented.h> + +/** + * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range + * + * When set, allocation is restricted to the range [start, end) specified + * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored + * and allocation can use any free space. + */ +#define GPU_BUDDY_RANGE_ALLOCATION BIT(0) + +/** + * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space + * + * Allocate starting from high addresses and working down. Useful for + * separating different allocation types (e.g., kernel vs userspace) + * to reduce fragmentation. + */ +#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1) + +/** + * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks + * + * The allocation must be satisfied with a single contiguous block. + * If the requested size cannot be allocated contiguously, the + * allocation fails with -ENOSPC. + */ +#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) + +/** + * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory + * + * Attempt to allocate from the clear tree first. If insufficient clear + * memory is available, falls back to dirty memory. Useful when the + * caller needs zeroed memory and wants to avoid GPU clear operations. + */ +#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3) + +/** + * GPU_BUDDY_CLEARED - Mark returned blocks as cleared + * + * Used with gpu_buddy_free_list() to indicate that the memory being + * freed has been cleared (zeroed). The blocks will be placed in the + * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests. + */ +#define GPU_BUDDY_CLEARED BIT(4) + +/** + * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming + * + * By default, if an allocation is smaller than the allocated block, + * excess memory is trimmed and returned to the free pool. This flag + * disables trimming, keeping the full power-of-two block size. + */ +#define GPU_BUDDY_TRIM_DISABLE BIT(5) + +enum gpu_buddy_free_tree { + GPU_BUDDY_CLEAR_TREE = 0, + GPU_BUDDY_DIRTY_TREE, + GPU_BUDDY_MAX_FREE_TREES, +}; + +#define for_each_free_tree(tree) \ + for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++) + +/** + * struct gpu_buddy_block - Block within a buddy allocator + * + * Each block in the buddy allocator is represented by this structure. + * Blocks are organized in a binary tree where each parent block can be + * split into two children (left and right buddies). The allocator manages + * blocks at various orders (power-of-2 sizes) from chunk_size up to the + * largest contiguous region. + * + * @private: Private data owned by the allocator user (e.g., driver-specific data) + * @link: List node for user ownership while block is allocated + */ +struct gpu_buddy_block { +/* private: */ + /* + * Header bit layout: + * - Bits 63:12: block offset within the address space + * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT) + * - Bit 9: clear bit (1 if memory is zeroed) + * - Bits 8:6: reserved + * - Bits 5:0: order (log2 of size relative to chunk_size) + */ +#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) +#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) +#define GPU_BUDDY_ALLOCATED (1 << 10) +#define GPU_BUDDY_FREE (2 << 10) +#define GPU_BUDDY_SPLIT (3 << 10) +#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) +/* Free to be used, if needed in the future */ +#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) +#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) + u64 header; + + struct gpu_buddy_block *left; + struct gpu_buddy_block *right; + struct gpu_buddy_block *parent; +/* public: */ + void *private; /* owned by creator */ + + /* + * While the block is allocated by the user through gpu_buddy_alloc*, + * the user has ownership of the link, for example to maintain within + * a list, if so desired. As soon as the block is freed with + * gpu_buddy_free* ownership is given back to the mm. + */ + union { +/* private: */ + struct rb_node rb; +/* public: */ + struct list_head link; + }; +/* private: */ + struct list_head tmp_link; + unsigned int subtree_max_alignment; +}; + +/* Order-zero must be at least SZ_4K */ +#define GPU_BUDDY_MAX_ORDER (63 - 12) + +/** + * struct gpu_buddy - GPU binary buddy allocator + * + * The buddy allocator provides efficient power-of-two memory allocation + * with fast allocation and free operations. It is commonly used for GPU + * memory management where allocations can be split into power-of-two + * block sizes. + * + * Locking should be handled by the user; a simple mutex around + * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list() + * should suffice. + * + * @n_roots: Number of root blocks in the roots array. + * @max_order: Maximum block order (log2 of largest block size / chunk_size). + * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K. + * @size: Total size of the address space managed by this allocator in bytes. + * @avail: Total free space currently available for allocation in bytes. + * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes. + * This is a subset of @avail. + */ +struct gpu_buddy { +/* private: */ + /* + * Array of red-black trees for free block management. + * Indexed as free_trees[clear/dirty][order] where: + * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content + * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content + * Each tree holds free blocks of the corresponding order. + */ + struct rb_root **free_trees; + /* + * Array of root blocks representing the top-level blocks of the + * binary tree(s). Multiple roots exist when the total size is not + * a power of two, with each root being the largest power-of-two + * that fits in the remaining space. + */ + struct gpu_buddy_block **roots; +/* public: */ + unsigned int n_roots; + unsigned int max_order; + u64 chunk_size; + u64 size; + u64 avail; + u64 clear_avail; +}; + +static inline u64 +gpu_buddy_block_offset(const struct gpu_buddy_block *block) +{ + return block->header & GPU_BUDDY_HEADER_OFFSET; +} + +static inline unsigned int +gpu_buddy_block_order(struct gpu_buddy_block *block) +{ + return block->header & GPU_BUDDY_HEADER_ORDER; +} + +static inline bool +gpu_buddy_block_is_free(struct gpu_buddy_block *block) +{ + return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE; +} + +static inline bool +gpu_buddy_block_is_clear(struct gpu_buddy_block *block) +{ + return block->header & GPU_BUDDY_HEADER_CLEAR; +} + +static inline u64 +gpu_buddy_block_size(struct gpu_buddy *mm, + struct gpu_buddy_block *block) +{ + return mm->chunk_size << gpu_buddy_block_order(block); +} + +int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size); + +void gpu_buddy_fini(struct gpu_buddy *mm); + +int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, + u64 start, u64 end, u64 size, + u64 min_page_size, + struct list_head *blocks, + unsigned long flags); + +int gpu_buddy_block_trim(struct gpu_buddy *mm, + u64 *start, + u64 new_size, + struct list_head *blocks); + +void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear); + +void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block); + +void gpu_buddy_free_list(struct gpu_buddy *mm, + struct list_head *objects, + unsigned int flags); + +void gpu_buddy_print(struct gpu_buddy *mm); +void gpu_buddy_block_print(struct gpu_buddy *mm, + struct gpu_buddy_block *block); +#endif |
