summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/memory.c')
-rw-r--r--drivers/misc/habanalabs/common/memory.c289
1 files changed, 74 insertions, 215 deletions
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index a13506dd8119..663dd7e589d4 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -41,7 +41,7 @@ static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u
return -EINVAL;
}
} else {
- psize = hdev->asic_prop.dram_page_size;
+ psize = prop->device_mem_alloc_default_page_size;
}
*page_size = psize;
@@ -117,7 +117,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
paddr = gen_pool_alloc(vm->dram_pg_pool, total_size);
if (!paddr) {
dev_err(hdev->dev,
- "failed to allocate %llu contiguous pages with total size of %llu\n",
+ "Cannot allocate %llu contiguous pages with total size of %llu\n",
num_pgs, total_size);
return -ENOMEM;
}
@@ -156,9 +156,10 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
else
phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool,
page_size);
+
if (!phys_pg_pack->pages[i]) {
dev_err(hdev->dev,
- "Failed to allocate device memory (out of memory)\n");
+ "Cannot allocate device memory (out of memory)\n");
rc = -ENOMEM;
goto page_err;
}
@@ -237,19 +238,18 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
goto pin_err;
}
- rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents, DMA_BIDIRECTIONAL);
- if (rc) {
- dev_err(hdev->dev, "failed to map sgt with DMA region\n");
- goto dma_map_err;
- }
-
userptr->dma_mapped = true;
userptr->dir = DMA_BIDIRECTIONAL;
userptr->vm_type = VM_TYPE_USERPTR;
*p_userptr = userptr;
+ rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
+ if (rc) {
+ dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+ goto dma_map_err;
+ }
+
return 0;
dma_map_err:
@@ -900,7 +900,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
* consecutive block.
*/
total_npages = 0;
- for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+ for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr);
total_npages += npages;
@@ -929,7 +929,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
phys_pg_pack->total_size = total_npages * page_size;
j = 0;
- for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+ for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr);
/* align down to physical page size and save the offset */
@@ -1102,21 +1102,24 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
* map a device virtual block to this pages and return the start address of
* this block.
*/
-static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
- u64 *device_addr)
+static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr)
{
- struct hl_device *hdev = ctx->hdev;
- struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
+ enum hl_va_range_type va_range_type = 0;
+ struct hl_device *hdev = ctx->hdev;
struct hl_userptr *userptr = NULL;
+ u32 handle = 0, va_block_align;
struct hl_vm_hash_node *hnode;
+ struct hl_vm *vm = &hdev->vm;
struct hl_va_range *va_range;
- enum vm_type *vm_type;
+ bool is_userptr, do_prefetch;
u64 ret_vaddr, hint_addr;
- u32 handle = 0, va_block_align;
+ enum vm_type *vm_type;
int rc;
- bool is_userptr = args->flags & HL_MEM_USERPTR;
- enum hl_va_range_type va_range_type = 0;
+
+ /* set map flags */
+ is_userptr = args->flags & HL_MEM_USERPTR;
+ do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH);
/* Assume failure */
*device_addr = 0;
@@ -1241,19 +1244,27 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
if (rc) {
- mutex_unlock(&ctx->mmu_lock);
- dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
- handle);
+ dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
goto map_err;
}
rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, ret_vaddr, phys_pg_pack->total_size);
+ if (rc)
+ goto map_err;
mutex_unlock(&ctx->mmu_lock);
- if (rc)
- goto map_err;
+ /*
+ * prefetch is done upon user's request. it is performed in WQ as and so can
+ * be outside the MMU lock. the operation itself is already protected by the mmu lock
+ */
+ if (do_prefetch) {
+ rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr,
+ phys_pg_pack->total_size);
+ if (rc)
+ goto map_err;
+ }
ret_vaddr += phys_pg_pack->offset;
@@ -1272,6 +1283,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc;
map_err:
+ mutex_unlock(&ctx->mmu_lock);
+
if (add_va_block(hdev, va_range, ret_vaddr,
ret_vaddr + phys_pg_pack->total_size - 1))
dev_warn(hdev->dev,
@@ -1509,7 +1522,7 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
vma->vm_ops = &hw_block_vm_ops;
vma->vm_private_data = lnode;
- hl_ctx_get(hdev, ctx);
+ hl_ctx_get(ctx);
rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
if (rc) {
@@ -1819,7 +1832,7 @@ static int export_dmabuf_common(struct hl_ctx *ctx,
}
hl_dmabuf->ctx = ctx;
- hl_ctx_get(hdev, hl_dmabuf->ctx);
+ hl_ctx_get(hl_dmabuf->ctx);
*dmabuf_fd = fd;
@@ -2076,164 +2089,34 @@ out:
return rc;
}
-static void ts_buff_release(struct kref *ref)
-{
- struct hl_ts_buff *buff;
-
- buff = container_of(ref, struct hl_ts_buff, refcount);
-
- vfree(buff->kernel_buff_address);
- vfree(buff->user_buff_address);
- kfree(buff);
-}
-
-struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
- u32 handle)
-{
- struct hl_ts_buff *buff;
-
- spin_lock(&mgr->ts_lock);
- buff = idr_find(&mgr->ts_handles, handle);
- if (!buff) {
- spin_unlock(&mgr->ts_lock);
- dev_warn(hdev->dev,
- "TS buff get failed, no match to handle 0x%x\n", handle);
- return NULL;
- }
- kref_get(&buff->refcount);
- spin_unlock(&mgr->ts_lock);
-
- return buff;
-}
-
-void hl_ts_put(struct hl_ts_buff *buff)
+static void ts_buff_release(struct hl_mmap_mem_buf *buf)
{
- kref_put(&buff->refcount, ts_buff_release);
-}
-
-static void buff_vm_close(struct vm_area_struct *vma)
-{
- struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
- long new_mmap_size;
-
- new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
+ struct hl_ts_buff *ts_buff = buf->private;
- if (new_mmap_size > 0) {
- buff->mmap_size = new_mmap_size;
- return;
- }
-
- atomic_set(&buff->mmap, 0);
- hl_ts_put(buff);
- vma->vm_private_data = NULL;
+ vfree(ts_buff->kernel_buff_address);
+ vfree(ts_buff->user_buff_address);
+ kfree(ts_buff);
}
-static const struct vm_operations_struct ts_buff_vm_ops = {
- .close = buff_vm_close
-};
-
-int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args)
{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_ts_buff *buff;
- u32 handle, user_buff_size;
- int rc;
-
- /* We use the page offset to hold the idr and thus we need to clear
- * it before doing the mmap itself
- */
- handle = vma->vm_pgoff;
- vma->vm_pgoff = 0;
-
- buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
- if (!buff) {
- dev_err(hdev->dev,
- "TS buff mmap failed, no match to handle 0x%x\n", handle);
- return -EINVAL;
- }
-
- /* Validation check */
- user_buff_size = vma->vm_end - vma->vm_start;
- if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
- dev_err(hdev->dev,
- "TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
- user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
- rc = -EINVAL;
- goto put_buff;
- }
-
-#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
- if (!access_ok(VERIFY_WRITE,
- (void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
-#else
- if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
- user_buff_size)) {
-#endif
- dev_err(hdev->dev,
- "user pointer is invalid - 0x%lx\n",
- vma->vm_start);
-
- rc = -EINVAL;
- goto put_buff;
- }
+ struct hl_ts_buff *ts_buff = buf->private;
- if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
- dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
- rc = -EINVAL;
- goto put_buff;
- }
-
- vma->vm_ops = &ts_buff_vm_ops;
- vma->vm_private_data = buff;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
- rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
- if (rc) {
- atomic_set(&buff->mmap, 0);
- goto put_buff;
- }
-
- buff->mmap_size = buff->user_buff_size;
- vma->vm_pgoff = handle;
-
- return 0;
-
-put_buff:
- hl_ts_put(buff);
- return rc;
-}
-
-void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
-{
- spin_lock_init(&mgr->ts_lock);
- idr_init(&mgr->ts_handles);
+ return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0);
}
-void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
-{
- struct hl_ts_buff *buff;
- struct idr *idp;
- u32 id;
-
- idp = &mgr->ts_handles;
-
- idr_for_each_entry(idp, buff, id) {
- if (kref_put(&buff->refcount, ts_buff_release) != 1)
- dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
- id);
- }
-
- idr_destroy(&mgr->ts_handles);
-}
-
-static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
+static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
{
struct hl_ts_buff *ts_buff = NULL;
- u32 size;
+ u32 size, num_elements;
void *p;
+ num_elements = *(u32 *)args;
+
ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
if (!ts_buff)
- return NULL;
+ return -ENOMEM;
/* Allocate the user buffer */
size = num_elements * sizeof(u64);
@@ -2242,7 +2125,7 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme
goto free_mem;
ts_buff->user_buff_address = p;
- ts_buff->user_buff_size = size;
+ buf->mappable_size = size;
/* Allocate the internal kernel buffer */
size = num_elements * sizeof(struct hl_user_pending_interrupt);
@@ -2253,15 +2136,25 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme
ts_buff->kernel_buff_address = p;
ts_buff->kernel_buff_size = size;
- return ts_buff;
+ buf->private = ts_buff;
+
+ return 0;
free_user_buff:
vfree(ts_buff->user_buff_address);
free_mem:
kfree(ts_buff);
- return NULL;
+ return -ENOMEM;
}
+static struct hl_mmap_mem_buf_behavior hl_ts_behavior = {
+ .topic = "TS",
+ .mem_id = HL_MMAP_TYPE_TS_BUFF,
+ .mmap = hl_ts_mmap,
+ .alloc = hl_ts_alloc_buf,
+ .release = ts_buff_release,
+};
+
/**
* allocate_timestamps_buffers() - allocate timestamps buffers
* This function will allocate ts buffer that will later on be mapped to the user
@@ -2278,54 +2171,22 @@ free_mem:
*/
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
{
- struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
- struct hl_device *hdev = hpriv->hdev;
- struct hl_ts_buff *ts_buff;
- int rc = 0;
+ struct hl_mem_mgr *mmg = &hpriv->mem_mgr;
+ struct hl_mmap_mem_buf *buf;
if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
- dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
+ dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
args->num_of_elements, TS_MAX_ELEMENTS_NUM);
return -EINVAL;
}
- /* Allocate ts buffer object
- * This object will contain two buffers one that will be mapped to the user
- * and another internal buffer for the driver use only, which won't be mapped
- * to the user.
- */
- ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
- if (!ts_buff) {
- rc = -ENOMEM;
- goto out_err;
- }
-
- spin_lock(&ts_mgr->ts_lock);
- rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
- spin_unlock(&ts_mgr->ts_lock);
- if (rc < 0) {
- dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
- goto release_ts_buff;
- }
-
- ts_buff->id = rc;
- ts_buff->hdev = hdev;
-
- kref_init(&ts_buff->refcount);
-
- /* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
- *handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
- *handle <<= PAGE_SHIFT;
+ buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements);
+ if (!buf)
+ return -ENOMEM;
- dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
+ *handle = buf->handle;
return 0;
-
-release_ts_buff:
- kref_put(&ts_buff->refcount, ts_buff_release);
-out_err:
- *handle = 0;
- return rc;
}
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
@@ -2587,9 +2448,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
hl_debugfs_remove_userptr(hdev, userptr);
if (userptr->dma_mapped)
- hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents,
- userptr->dir);
+ hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
kvfree(userptr->pages);