summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core/umem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core/umem.c')
-rw-r--r--drivers/infiniband/core/umem.c179
1 files changed, 138 insertions, 41 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index fe5551562dbc..0a23048db523 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -37,27 +37,23 @@
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/export.h>
-#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/pagemap.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
-
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
- struct scatterlist *sg;
+ struct sg_page_iter sg_iter;
struct page *page;
- int i;
if (umem->nmap > 0)
- ib_dma_unmap_sg(dev, umem->sg_head.sgl,
- umem->npages,
+ ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
DMA_BIDIRECTIONAL);
- for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
-
- page = sg_page(sg);
+ for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
+ page = sg_page_iter_page(&sg_iter);
if (!PageDirty(page) && umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
@@ -66,6 +62,124 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
sg_free_table(&umem->sg_head);
}
+/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
+ *
+ * sg: current scatterlist entry
+ * page_list: array of npage struct page pointers
+ * npages: number of pages in page_list
+ * max_seg_sz: maximum segment size in bytes
+ * nents: [out] number of entries in the scatterlist
+ *
+ * Return new end of scatterlist
+ */
+static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
+ struct page **page_list,
+ unsigned long npages,
+ unsigned int max_seg_sz,
+ int *nents)
+{
+ unsigned long first_pfn;
+ unsigned long i = 0;
+ bool update_cur_sg = false;
+ bool first = !sg_page(sg);
+
+ /* Check if new page_list is contiguous with end of previous page_list.
+ * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
+ */
+ if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
+ page_to_pfn(page_list[0])))
+ update_cur_sg = true;
+
+ while (i != npages) {
+ unsigned long len;
+ struct page *first_page = page_list[i];
+
+ first_pfn = page_to_pfn(first_page);
+
+ /* Compute the number of contiguous pages we have starting
+ * at i
+ */
+ for (len = 0; i != npages &&
+ first_pfn + len == page_to_pfn(page_list[i]) &&
+ len < (max_seg_sz >> PAGE_SHIFT);
+ len++)
+ i++;
+
+ /* Squash N contiguous pages from page_list into current sge */
+ if (update_cur_sg) {
+ if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
+ sg_set_page(sg, sg_page(sg),
+ sg->length + (len << PAGE_SHIFT),
+ 0);
+ update_cur_sg = false;
+ continue;
+ }
+ update_cur_sg = false;
+ }
+
+ /* Squash N contiguous pages into next sge or first sge */
+ if (!first)
+ sg = sg_next(sg);
+
+ (*nents)++;
+ sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
+ first = false;
+ }
+
+ return sg;
+}
+
+/**
+ * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
+ *
+ * @umem: umem struct
+ * @pgsz_bitmap: bitmap of HW supported page sizes
+ * @virt: IOVA
+ *
+ * This helper is intended for HW that support multiple page
+ * sizes but can do only a single page size in an MR.
+ *
+ * Returns 0 if the umem requires page sizes not supported by
+ * the driver to be mapped. Drivers always supporting PAGE_SIZE
+ * or smaller will never see a 0 result.
+ */
+unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt)
+{
+ struct scatterlist *sg;
+ unsigned int best_pg_bit;
+ unsigned long va, pgoff;
+ dma_addr_t mask;
+ int i;
+
+ /* At minimum, drivers must support PAGE_SIZE or smaller */
+ if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
+ return 0;
+
+ va = virt;
+ /* max page size not to exceed MR length */
+ mask = roundup_pow_of_two(umem->length);
+ /* offset into first SGL */
+ pgoff = umem->address & ~PAGE_MASK;
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
+ /* Walk SGL and reduce max page size if VA/PA bits differ
+ * for any address.
+ */
+ mask |= (sg_dma_address(sg) + pgoff) ^ va;
+ if (i && i != (umem->nmap - 1))
+ /* restrict by length as well for interior SGEs */
+ mask |= sg_dma_len(sg);
+ va += sg_dma_len(sg) - pgoff;
+ pgoff = 0;
+ }
+ best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
+
+ return BIT_ULL(best_pg_bit);
+}
+EXPORT_SYMBOL(ib_umem_find_best_pgsz);
+
/**
* ib_umem_get - Pin and DMA map userspace memory.
*
@@ -84,16 +198,14 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
struct ib_ucontext *context;
struct ib_umem *umem;
struct page **page_list;
- struct vm_area_struct **vma_list;
unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base;
struct mm_struct *mm;
unsigned long npages;
int ret;
- int i;
unsigned long dma_attrs = 0;
- struct scatterlist *sg, *sg_list_start;
+ struct scatterlist *sg;
unsigned int gup_flags = FOLL_WRITE;
if (!udata)
@@ -138,29 +250,23 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
mmgrab(mm);
if (access & IB_ACCESS_ON_DEMAND) {
+ if (WARN_ON_ONCE(!context->invalidate_range)) {
+ ret = -EINVAL;
+ goto umem_kfree;
+ }
+
ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
if (ret)
goto umem_kfree;
return umem;
}
- /* We assume the memory is from hugetlb until proved otherwise */
- umem->hugetlb = 1;
-
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
ret = -ENOMEM;
goto umem_kfree;
}
- /*
- * if we can't alloc the vma_list, it's not so bad;
- * just assume the memory is not hugetlb memory
- */
- vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
- if (!vma_list)
- umem->hugetlb = 0;
-
npages = ib_umem_num_pages(umem);
if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
@@ -185,41 +291,34 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
if (!umem->writable)
gup_flags |= FOLL_FORCE;
- sg_list_start = umem->sg_head.sgl;
+ sg = umem->sg_head.sgl;
while (npages) {
down_read(&mm->mmap_sem);
ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
- gup_flags, page_list, vma_list);
+ gup_flags, page_list, NULL);
if (ret < 0) {
up_read(&mm->mmap_sem);
goto umem_release;
}
- umem->npages += ret;
cur_base += ret * PAGE_SIZE;
npages -= ret;
- /* Continue to hold the mmap_sem as vma_list access
- * needs to be protected.
- */
- for_each_sg(sg_list_start, sg, ret, i) {
- if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
- umem->hugetlb = 0;
+ sg = ib_umem_add_sg_table(sg, page_list, ret,
+ dma_get_max_seg_size(context->device->dma_device),
+ &umem->sg_nents);
- sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
- }
up_read(&mm->mmap_sem);
-
- /* preparing for next loop */
- sg_list_start = sg;
}
+ sg_mark_end(sg);
+
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
- umem->npages,
+ umem->sg_nents,
DMA_BIDIRECTIONAL,
dma_attrs);
@@ -236,8 +335,6 @@ umem_release:
vma:
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
- if (vma_list)
- free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
umem_kfree:
if (ret) {
@@ -315,7 +412,7 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
return -EINVAL;
}
- ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
+ ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->sg_nents, dst, length,
offset + ib_umem_offset(umem));
if (ret < 0)