summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-17 11:12:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-17 11:12:42 -0700
commit87768582a440e7049a04e8af7383b86738d15b38 (patch)
treec6490ceb75f0af1ff214b64067fcd29d596f3a53
parentd662a710c668a86a39ebaad334d9960a0cc776c2 (diff)
parent15818b2cd42df3cc886f4cc46acfab4d072dcacc (diff)
downloadlwn-87768582a440e7049a04e8af7383b86738d15b38.tar.gz
lwn-87768582a440e7049a04e8af7383b86738d15b38.zip
Merge tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux
Pull dma-mapping updates from Marek Szyprowski: - added support for batched cache sync, what improves performance of dma_map/unmap_sg() operations on ARM64 architecture (Barry Song) - introduced DMA_ATTR_CC_SHARED attribute for explicitly shared memory used in confidential computing (Jiri Pirko) - refactored spaghetti-like code in drivers/of/of_reserved_mem.c and its clients (Marek Szyprowski, shared branch with device-tree updates to avoid merge conflicts) - prepared Contiguous Memory Allocator related code for making dma-buf drivers modularized (Maxime Ripard) - added support for benchmarking dma_map_sg() calls to tools/dma utility (Qinxin Xia) * tag 'dma-mapping-7.1-2026-04-16' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux: (24 commits) dma-buf: heaps: system: document system_cc_shared heap dma-buf: heaps: system: add system_cc_shared heap for explicitly shared memory dma-mapping: introduce DMA_ATTR_CC_SHARED for shared memory mm: cma: Export cma_alloc(), cma_release() and cma_get_name() dma: contiguous: Export dev_get_cma_area() dma: contiguous: Make dma_contiguous_default_area static dma: contiguous: Make dev_get_cma_area() a proper function dma: contiguous: Turn heap registration logic around of: reserved_mem: rework fdt_init_reserved_mem_node() of: reserved_mem: clarify fdt_scan_reserved_mem*() functions of: reserved_mem: rearrange code a bit of: reserved_mem: replace CMA quirks by generic methods of: reserved_mem: switch to ops based OF_DECLARE() of: reserved_mem: use -ENODEV instead of -ENOENT of: reserved_mem: remove fdt node from the structure dma-mapping: fix false kernel-doc comment marker dma-mapping: Support batch mode for dma_direct_{map,unmap}_sg dma-mapping: Separate DMA sync issuing and completion waiting arm64: Provide dcache_inval_poc_nosync helper arm64: Provide dcache_clean_poc_nosync helper ...
-rw-r--r--Documentation/userspace-api/dma-buf-heaps.rst7
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/assembler.h25
-rw-r--r--arch/arm64/include/asm/cache.h5
-rw-r--r--arch/arm64/include/asm/cacheflush.h2
-rw-r--r--arch/arm64/kernel/relocate_kernel.S3
-rw-r--r--arch/arm64/mm/cache.S57
-rw-r--r--arch/arm64/mm/dma-mapping.c4
-rw-r--r--drivers/dma-buf/heaps/cma_heap.c19
-rw-r--r--drivers/dma-buf/heaps/system_heap.c103
-rw-r--r--drivers/iommu/dma-iommu.c35
-rw-r--r--drivers/memory/tegra/tegra210-emc-table.c19
-rw-r--r--drivers/of/fdt.c2
-rw-r--r--drivers/of/of_private.h2
-rw-r--r--drivers/of/of_reserved_mem.c320
-rw-r--r--drivers/xen/swiotlb-xen.c24
-rw-r--r--include/linux/cma.h10
-rw-r--r--include/linux/dma-buf/heaps/cma.h16
-rw-r--r--include/linux/dma-map-ops.h23
-rw-r--r--include/linux/dma-mapping.h12
-rw-r--r--include/linux/of_reserved_mem.h16
-rw-r--r--include/trace/events/dma.h3
-rw-r--r--include/uapi/linux/map_benchmark.h13
-rw-r--r--kernel/dma/Kconfig3
-rw-r--r--kernel/dma/coherent.c19
-rw-r--r--kernel/dma/contiguous.c144
-rw-r--r--kernel/dma/direct.c23
-rw-r--r--kernel/dma/direct.h35
-rw-r--r--kernel/dma/map_benchmark.c246
-rw-r--r--kernel/dma/mapping.c19
-rw-r--r--kernel/dma/swiotlb.c26
-rw-r--r--mm/cma.c3
-rw-r--r--tools/dma/dma_map_benchmark.c23
33 files changed, 917 insertions, 345 deletions
diff --git a/Documentation/userspace-api/dma-buf-heaps.rst b/Documentation/userspace-api/dma-buf-heaps.rst
index 05445c83b79a..f56b743cdb36 100644
--- a/Documentation/userspace-api/dma-buf-heaps.rst
+++ b/Documentation/userspace-api/dma-buf-heaps.rst
@@ -16,6 +16,13 @@ following heaps:
- The ``system`` heap allocates virtually contiguous, cacheable, buffers.
+ - The ``system_cc_shared`` heap allocates virtually contiguous, cacheable,
+ buffers using shared (decrypted) memory. It is only present on
+ confidential computing (CoCo) VMs where memory encryption is active
+ (e.g., AMD SEV, Intel TDX). The allocated pages have the encryption
+ bit cleared, making them accessible for device DMA without TDISP
+ support. On non-CoCo VM configurations, this heap is not registered.
+
- The ``default_cma_region`` heap allocates physically contiguous,
cacheable, buffers. Only present if a CMA region is present. Such a
region is usually created either through the kernel commandline
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 94bf7cc43063..a0af483c0488 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -54,6 +54,7 @@ config ARM64
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select ARCH_HAS_BATCHED_DMA_SYNC
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_ZONE_DMA_SET if EXPERT
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 9d7c9ae5ac96..effae53e9739 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -371,14 +371,13 @@ alternative_endif
* [start, end) with dcache line size explicitly provided.
*
* op: operation passed to dc instruction
- * domain: domain used in dsb instruction
* start: starting virtual address of the region
* end: end virtual address of the region
* linesz: dcache line size
* fixup: optional label to branch to on user fault
* Corrupts: start, end, tmp
*/
- .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
+ .macro dcache_by_myline_op_nosync op, start, end, linesz, tmp, fixup
sub \tmp, \linesz, #1
bic \start, \start, \tmp
alternative_if ARM64_WORKAROUND_4311569
@@ -412,14 +411,28 @@ alternative_if ARM64_WORKAROUND_4311569
cbnz \start, .Ldcache_op\@
.endif
alternative_else_nop_endif
- dsb \domain
_cond_uaccess_extable .Ldcache_op\@, \fixup
.endm
/*
* Macro to perform a data cache maintenance for the interval
- * [start, end)
+ * [start, end) without waiting for completion
+ *
+ * op: operation passed to dc instruction
+ * start: starting virtual address of the region
+ * end: end virtual address of the region
+ * fixup: optional label to branch to on user fault
+ * Corrupts: start, end, tmp1, tmp2
+ */
+ .macro dcache_by_line_op_nosync op, start, end, tmp1, tmp2, fixup
+ dcache_line_size \tmp1, \tmp2
+ dcache_by_myline_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup
+ .endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [start, end) and wait for completion
*
* op: operation passed to dc instruction
* domain: domain used in dsb instruction
@@ -429,8 +442,8 @@ alternative_else_nop_endif
* Corrupts: start, end, tmp1, tmp2
*/
.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
- dcache_line_size \tmp1, \tmp2
- dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
+ dcache_by_line_op_nosync \op, \start, \end, \tmp1, \tmp2, \fixup
+ dsb \domain
.endm
/*
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index dd2c8586a725..10a7ffadee3d 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -87,6 +87,11 @@ int cache_line_size(void);
#define dma_get_cache_alignment cache_line_size
+static inline void arch_sync_dma_flush(void)
+{
+ dsb(sy);
+}
+
/* Compress a u64 MPIDR value into 32 bits. */
static inline u64 arch_compact_of_hwid(u64 id)
{
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 28ab96e808ef..382b4ac3734d 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -74,6 +74,8 @@ extern void icache_inval_pou(unsigned long start, unsigned long end);
extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
extern void dcache_inval_poc(unsigned long start, unsigned long end);
extern void dcache_clean_poc(unsigned long start, unsigned long end);
+extern void dcache_inval_poc_nosync(unsigned long start, unsigned long end);
+extern void dcache_clean_poc_nosync(unsigned long start, unsigned long end);
extern void dcache_clean_pop(unsigned long start, unsigned long end);
extern void dcache_clean_pou(unsigned long start, unsigned long end);
extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index 413f899e4ac6..6cb4209f5dab 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -64,7 +64,8 @@ SYM_CODE_START(arm64_relocate_new_kernel)
mov x19, x13
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
add x1, x19, #PAGE_SIZE
- dcache_by_myline_op civac, sy, x19, x1, x15, x20
+ dcache_by_myline_op_nosync civac, x19, x1, x15, x20
+ dsb sy
b .Lnext
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 503567c864fd..ab75c050f559 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -132,17 +132,7 @@ alternative_else_nop_endif
ret
SYM_FUNC_END(dcache_clean_pou)
-/*
- * dcache_inval_poc(start, end)
- *
- * Ensure that any D-cache lines for the interval [start, end)
- * are invalidated. Any partial lines at the ends of the interval are
- * also cleaned to PoC to prevent data loss.
- *
- * - start - kernel start address of region
- * - end - kernel end address of region
- */
-SYM_FUNC_START(__pi_dcache_inval_poc)
+.macro __dcache_inval_poc_nosync
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
@@ -158,12 +148,42 @@ SYM_FUNC_START(__pi_dcache_inval_poc)
3: add x0, x0, x2
cmp x0, x1
b.lo 2b
+.endm
+
+/*
+ * dcache_inval_poc(start, end)
+ *
+ * Ensure that any D-cache lines for the interval [start, end)
+ * are invalidated. Any partial lines at the ends of the interval are
+ * also cleaned to PoC to prevent data loss.
+ *
+ * - start - kernel start address of region
+ * - end - kernel end address of region
+ */
+SYM_FUNC_START(__pi_dcache_inval_poc)
+ __dcache_inval_poc_nosync
dsb sy
ret
SYM_FUNC_END(__pi_dcache_inval_poc)
SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc)
/*
+ * dcache_inval_poc_nosync(start, end)
+ *
+ * Issue the instructions of D-cache lines for the interval [start, end)
+ * for invalidation. Not necessarily cleaned to PoC till an explicit dsb
+ * sy is issued later
+ *
+ * - start - kernel start address of region
+ * - end - kernel end address of region
+ */
+SYM_FUNC_START(__pi_dcache_inval_poc_nosync)
+ __dcache_inval_poc_nosync
+ ret
+SYM_FUNC_END(__pi_dcache_inval_poc_nosync)
+SYM_FUNC_ALIAS(dcache_inval_poc_nosync, __pi_dcache_inval_poc_nosync)
+
+/*
* dcache_clean_poc(start, end)
*
* Ensure that any D-cache lines for the interval [start, end)
@@ -179,6 +199,21 @@ SYM_FUNC_END(__pi_dcache_clean_poc)
SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc)
/*
+ * dcache_clean_poc_nosync(start, end)
+ *
+ * Issue the instructions of D-cache lines for the interval [start, end).
+ * not necessarily cleaned to the PoC till an explicit dsb sy afterward.
+ *
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+SYM_FUNC_START(__pi_dcache_clean_poc_nosync)
+ dcache_by_line_op_nosync cvac, x0, x1, x2, x3
+ ret
+SYM_FUNC_END(__pi_dcache_clean_poc_nosync)
+SYM_FUNC_ALIAS(dcache_clean_poc_nosync, __pi_dcache_clean_poc_nosync)
+
+/*
* dcache_clean_pop(start, end)
*
* Ensure that any D-cache lines for the interval [start, end)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index b2b5792b2caa..ae1ae0280eef 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -17,7 +17,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
{
unsigned long start = (unsigned long)phys_to_virt(paddr);
- dcache_clean_poc(start, start + size);
+ dcache_clean_poc_nosync(start, start + size);
}
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
@@ -28,7 +28,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
if (dir == DMA_TO_DEVICE)
return;
- dcache_inval_poc(start, start + size);
+ dcache_inval_poc_nosync(start, start + size);
}
void arch_dma_prep_coherent(struct page *page, size_t size)
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index 92865786cfc9..a359aac45579 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -14,7 +14,6 @@
#include <linux/cma.h>
#include <linux/dma-buf.h>
-#include <linux/dma-buf/heaps/cma.h>
#include <linux/dma-heap.h>
#include <linux/dma-map-ops.h>
#include <linux/err.h>
@@ -30,19 +29,6 @@
#define DEFAULT_CMA_NAME "default_cma_region"
-static struct cma *dma_areas[MAX_CMA_AREAS] __initdata;
-static unsigned int dma_areas_num __initdata;
-
-int __init dma_heap_cma_register_heap(struct cma *cma)
-{
- if (dma_areas_num >= ARRAY_SIZE(dma_areas))
- return -EINVAL;
-
- dma_areas[dma_areas_num++] = cma;
-
- return 0;
-}
-
struct cma_heap {
struct dma_heap *heap;
struct cma *cma;
@@ -411,6 +397,7 @@ static int __init __add_cma_heap(struct cma *cma, const char *name)
static int __init add_cma_heaps(void)
{
struct cma *default_cma = dev_get_cma_area(NULL);
+ struct cma *cma;
unsigned int i;
int ret;
@@ -420,9 +407,7 @@ static int __init add_cma_heaps(void)
return ret;
}
- for (i = 0; i < dma_areas_num; i++) {
- struct cma *cma = dma_areas[i];
-
+ for (i = 0; (cma = dma_contiguous_get_area_by_idx(i)) != NULL; i++) {
ret = __add_cma_heap(cma, cma_get_name(cma));
if (ret) {
pr_warn("Failed to add CMA heap %s", cma_get_name(cma));
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c
index b3650d8fd651..03c2b87cb111 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -10,17 +10,25 @@
* Andrew F. Davis <afd@ti.com>
*/
+#include <linux/cc_platform.h>
#include <linux/dma-buf.h>
#include <linux/dma-mapping.h>
#include <linux/dma-heap.h>
#include <linux/err.h>
#include <linux/highmem.h>
+#include <linux/mem_encrypt.h>
#include <linux/mm.h>
+#include <linux/set_memory.h>
#include <linux/module.h>
+#include <linux/pgtable.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+struct system_heap_priv {
+ bool cc_shared;
+};
+
struct system_heap_buffer {
struct dma_heap *heap;
struct list_head attachments;
@@ -29,6 +37,7 @@ struct system_heap_buffer {
struct sg_table sg_table;
int vmap_cnt;
void *vaddr;
+ bool cc_shared;
};
struct dma_heap_attachment {
@@ -36,6 +45,7 @@ struct dma_heap_attachment {
struct sg_table table;
struct list_head list;
bool mapped;
+ bool cc_shared;
};
#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO)
@@ -52,6 +62,34 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP};
static const unsigned int orders[] = {8, 4, 0};
#define NUM_ORDERS ARRAY_SIZE(orders)
+static int system_heap_set_page_decrypted(struct page *page)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+ unsigned int nr_pages = 1 << compound_order(page);
+ int ret;
+
+ ret = set_memory_decrypted(addr, nr_pages);
+ if (ret)
+ pr_warn_ratelimited("dma-buf system heap: failed to decrypt page at %p\n",
+ page_address(page));
+
+ return ret;
+}
+
+static int system_heap_set_page_encrypted(struct page *page)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+ unsigned int nr_pages = 1 << compound_order(page);
+ int ret;
+
+ ret = set_memory_encrypted(addr, nr_pages);
+ if (ret)
+ pr_warn_ratelimited("dma-buf system heap: failed to re-encrypt page at %p, leaking memory\n",
+ page_address(page));
+
+ return ret;
+}
+
static int dup_sg_table(struct sg_table *from, struct sg_table *to)
{
struct scatterlist *sg, *new_sg;
@@ -90,6 +128,7 @@ static int system_heap_attach(struct dma_buf *dmabuf,
a->dev = attachment->dev;
INIT_LIST_HEAD(&a->list);
a->mapped = false;
+ a->cc_shared = buffer->cc_shared;
attachment->priv = a;
@@ -119,9 +158,11 @@ static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attac
{
struct dma_heap_attachment *a = attachment->priv;
struct sg_table *table = &a->table;
+ unsigned long attrs;
int ret;
- ret = dma_map_sgtable(attachment->dev, table, direction, 0);
+ attrs = a->cc_shared ? DMA_ATTR_CC_SHARED : 0;
+ ret = dma_map_sgtable(attachment->dev, table, direction, attrs);
if (ret)
return ERR_PTR(ret);
@@ -188,8 +229,13 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
unsigned long addr = vma->vm_start;
unsigned long pgoff = vma->vm_pgoff;
struct scatterlist *sg;
+ pgprot_t prot;
int i, ret;
+ prot = vma->vm_page_prot;
+ if (buffer->cc_shared)
+ prot = pgprot_decrypted(prot);
+
for_each_sgtable_sg(table, sg, i) {
unsigned long n = sg->length >> PAGE_SHIFT;
@@ -206,8 +252,7 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
if (addr + size > vma->vm_end)
size = vma->vm_end - addr;
- ret = remap_pfn_range(vma, addr, page_to_pfn(page),
- size, vma->vm_page_prot);
+ ret = remap_pfn_range(vma, addr, page_to_pfn(page), size, prot);
if (ret)
return ret;
@@ -225,6 +270,7 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
struct page **pages = vmalloc(sizeof(struct page *) * npages);
struct page **tmp = pages;
struct sg_page_iter piter;
+ pgprot_t prot;
void *vaddr;
if (!pages)
@@ -235,7 +281,10 @@ static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
*tmp++ = sg_page_iter_page(&piter);
}
- vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL);
+ prot = PAGE_KERNEL;
+ if (buffer->cc_shared)
+ prot = pgprot_decrypted(prot);
+ vaddr = vmap(pages, npages, VM_MAP, prot);
vfree(pages);
if (!vaddr)
@@ -296,6 +345,14 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
for_each_sgtable_sg(table, sg, i) {
struct page *page = sg_page(sg);
+ /*
+ * Intentionally leak pages that cannot be re-encrypted
+ * to prevent shared memory from being reused.
+ */
+ if (buffer->cc_shared &&
+ system_heap_set_page_encrypted(page))
+ continue;
+
__free_pages(page, compound_order(page));
}
sg_free_table(table);
@@ -347,6 +404,8 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
unsigned long size_remaining = len;
unsigned int max_order = orders[0];
+ struct system_heap_priv *priv = dma_heap_get_drvdata(heap);
+ bool cc_shared = priv->cc_shared;
struct dma_buf *dmabuf;
struct sg_table *table;
struct scatterlist *sg;
@@ -362,6 +421,7 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
mutex_init(&buffer->lock);
buffer->heap = heap;
buffer->len = len;
+ buffer->cc_shared = cc_shared;
INIT_LIST_HEAD(&pages);
i = 0;
@@ -396,6 +456,14 @@ static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
list_del(&page->lru);
}
+ if (cc_shared) {
+ for_each_sgtable_sg(table, sg, i) {
+ ret = system_heap_set_page_decrypted(sg_page(sg));
+ if (ret)
+ goto free_pages;
+ }
+ }
+
/* create the dmabuf */
exp_info.exp_name = dma_heap_get_name(heap);
exp_info.ops = &system_heap_buf_ops;
@@ -413,6 +481,13 @@ free_pages:
for_each_sgtable_sg(table, sg, i) {
struct page *p = sg_page(sg);
+ /*
+ * Intentionally leak pages that cannot be re-encrypted
+ * to prevent shared memory from being reused.
+ */
+ if (buffer->cc_shared &&
+ system_heap_set_page_encrypted(p))
+ continue;
__free_pages(p, compound_order(p));
}
sg_free_table(table);
@@ -428,6 +503,14 @@ static const struct dma_heap_ops system_heap_ops = {
.allocate = system_heap_allocate,
};
+static struct system_heap_priv system_heap_priv = {
+ .cc_shared = false,
+};
+
+static struct system_heap_priv system_heap_cc_shared_priv = {
+ .cc_shared = true,
+};
+
static int __init system_heap_create(void)
{
struct dma_heap_export_info exp_info;
@@ -435,8 +518,18 @@ static int __init system_heap_create(void)
exp_info.name = "system";
exp_info.ops = &system_heap_ops;
- exp_info.priv = NULL;
+ exp_info.priv = &system_heap_priv;
+
+ sys_heap = dma_heap_add(&exp_info);
+ if (IS_ERR(sys_heap))
+ return PTR_ERR(sys_heap);
+
+ if (IS_ENABLED(CONFIG_HIGHMEM) ||
+ !cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+ return 0;
+ exp_info.name = "system_cc_shared";
+ exp_info.priv = &system_heap_cc_shared_priv;
sys_heap = dma_heap_add(&exp_info);
if (IS_ERR(sys_heap))
return PTR_ERR(sys_heap);
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 095235334eaf..54d96e847f16 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1106,8 +1106,10 @@ void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(phys, size, dir);
+ arch_sync_dma_flush();
+ }
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
}
@@ -1123,8 +1125,10 @@ void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
swiotlb_sync_single_for_device(dev, phys, size, dir);
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_device(phys, size, dir);
+ arch_sync_dma_flush();
+ }
}
void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
@@ -1133,13 +1137,15 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
struct scatterlist *sg;
int i;
- if (sg_dma_is_swiotlb(sgl))
+ if (sg_dma_is_swiotlb(sgl)) {
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
sg->length, dir);
- else if (!dev_is_dma_coherent(dev))
+ } else if (!dev_is_dma_coherent(dev)) {
for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
+ arch_sync_dma_flush();
+ }
}
void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
@@ -1148,14 +1154,16 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
struct scatterlist *sg;
int i;
- if (sg_dma_is_swiotlb(sgl))
+ if (sg_dma_is_swiotlb(sgl)) {
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
sg_dma_address(sg),
sg->length, dir);
- else if (!dev_is_dma_coherent(dev))
+ } else if (!dev_is_dma_coherent(dev)) {
for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
+ arch_sync_dma_flush();
+ }
}
static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
@@ -1230,8 +1238,10 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
return DMA_MAPPING_ERROR;
}
- if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
+ if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
arch_sync_dma_for_device(phys, size, dir);
+ arch_sync_dma_flush();
+ }
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR &&
@@ -1254,8 +1264,10 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
if (WARN_ON(!phys))
return;
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(phys, size, dir);
+ arch_sync_dma_flush();
+ }
__iommu_dma_unmap(dev, dma_handle, size);
@@ -2004,6 +2016,8 @@ int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
dma_addr_t addr = state->addr + offset;
size_t iova_start_pad = iova_offset(iovad, addr);
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
return iommu_sync_map(domain, addr - iova_start_pad,
iova_align(iovad, size + iova_start_pad));
}
@@ -2017,6 +2031,8 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
size_t iova_start_pad = iova_offset(iovad, addr);
+ bool need_sync_dma = !dev_is_dma_coherent(dev) &&
+ !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO));
dma_addr_t end = addr + size;
do {
@@ -2040,6 +2056,9 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
addr += len;
iova_start_pad = 0;
} while (addr < end);
+
+ if (need_sync_dma)
+ arch_sync_dma_flush();
}
static void __iommu_dma_iova_unlink(struct device *dev,
diff --git a/drivers/memory/tegra/tegra210-emc-table.c b/drivers/memory/tegra/tegra210-emc-table.c
index 34a8785d2861..4b3c478b2743 100644
--- a/drivers/memory/tegra/tegra210-emc-table.c
+++ b/drivers/memory/tegra/tegra210-emc-table.c
@@ -70,19 +70,20 @@ static void tegra210_emc_table_device_release(struct reserved_mem *rmem,
memunmap(timings);
}
-static const struct reserved_mem_ops tegra210_emc_table_ops = {
- .device_init = tegra210_emc_table_device_init,
- .device_release = tegra210_emc_table_device_release,
-};
-
-static int tegra210_emc_table_init(struct reserved_mem *rmem)
+static int tegra210_emc_table_init(unsigned long node,
+ struct reserved_mem *rmem)
{
pr_debug("Tegra210 EMC table at %pa, size %lu bytes\n", &rmem->base,
(unsigned long)rmem->size);
- rmem->ops = &tegra210_emc_table_ops;
-
return 0;
}
+
+static const struct reserved_mem_ops tegra210_emc_table_ops = {
+ .node_init = tegra210_emc_table_init,
+ .device_init = tegra210_emc_table_device_init,
+ .device_release = tegra210_emc_table_device_release,
+};
+
RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table",
- tegra210_emc_table_init);
+ &tegra210_emc_table_ops);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 2967e4aff807..104e697bee7b 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1295,7 +1295,7 @@ void __init unflatten_device_tree(void)
void *fdt = initial_boot_params;
/* Save the statically-placed regions in the reserved_mem array */
- fdt_scan_reserved_mem_reg_nodes();
+ fdt_scan_reserved_mem_late();
/* Populate an empty root node when bootloader doesn't provide one */
if (!fdt) {
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index df0bb00349e0..0ae16da066e2 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -186,7 +186,7 @@ static inline struct device_node *__of_get_dma_parent(const struct device_node *
#endif
int fdt_scan_reserved_mem(void);
-void __init fdt_scan_reserved_mem_reg_nodes(void);
+void __init fdt_scan_reserved_mem_late(void);
bool of_fdt_device_is_available(const void *blob, unsigned long node);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 1fd28f805610..8d5777cb5d1b 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -24,8 +24,6 @@
#include <linux/slab.h>
#include <linux/memblock.h>
#include <linux/kmemleak.h>
-#include <linux/cma.h>
-#include <linux/dma-map-ops.h>
#include "of_private.h"
@@ -104,30 +102,12 @@ static void __init alloc_reserved_mem_array(void)
reserved_mem = new_array;
}
-static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem);
-/*
- * fdt_reserved_mem_save_node() - save fdt node for second pass initialization
- */
-static void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
- phys_addr_t base, phys_addr_t size)
-{
- struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
-
- if (reserved_mem_count == total_reserved_mem_cnt) {
- pr_err("not enough space for all defined regions.\n");
- return;
- }
-
- rmem->fdt_node = node;
- rmem->name = uname;
- rmem->base = base;
- rmem->size = size;
-
- /* Call the region specific initialization function */
- fdt_init_reserved_mem_node(rmem);
-
- reserved_mem_count++;
-}
+static void fdt_init_reserved_mem_node(unsigned long node, const char *uname,
+ phys_addr_t base, phys_addr_t size);
+static int fdt_validate_reserved_mem_node(unsigned long node,
+ phys_addr_t *align);
+static int fdt_fixup_reserved_mem_node(unsigned long node,
+ phys_addr_t base, phys_addr_t size);
static int __init early_init_dt_reserve_memory(phys_addr_t base,
phys_addr_t size, bool nomap)
@@ -154,21 +134,19 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
const char *uname)
{
phys_addr_t base, size;
- int i, len;
+ int i, len, err;
const __be32 *prop;
- bool nomap, default_cma;
+ bool nomap;
prop = of_flat_dt_get_addr_size_prop(node, "reg", &len);
if (!prop)
return -ENOENT;
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
- default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
- if (default_cma && cma_skip_dt_default_reserved_mem()) {
- pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n");
- return -EINVAL;
- }
+ err = fdt_validate_reserved_mem_node(node, NULL);
+ if (err && err != -ENODEV)
+ return err;
for (i = 0; i < len; i++) {
u64 b, s;
@@ -179,10 +157,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
size = s;
if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) {
- /* Architecture specific contiguous memory fixup. */
- if (of_flat_dt_is_compatible(node, "shared-dma-pool") &&
- of_get_flat_dt_prop(node, "reusable", NULL))
- dma_contiguous_early_fixup(base, size);
+ fdt_fixup_reserved_mem_node(node, base, size);
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
} else {
@@ -216,19 +191,66 @@ static int __init __reserved_mem_check_root(unsigned long node)
return 0;
}
-static void __init __rmem_check_for_overlap(void);
+static int __init __rmem_cmp(const void *a, const void *b)
+{
+ const struct reserved_mem *ra = a, *rb = b;
+
+ if (ra->base < rb->base)
+ return -1;
+
+ if (ra->base > rb->base)
+ return 1;
+
+ /*
+ * Put the dynamic allocations (address == 0, size == 0) before static
+ * allocations at address 0x0 so that overlap detection works
+ * correctly.
+ */
+ if (ra->size < rb->size)
+ return -1;
+ if (ra->size > rb->size)
+ return 1;
+
+ return 0;
+}
+
+static void __init __rmem_check_for_overlap(void)
+{
+ int i;
+
+ if (reserved_mem_count < 2)
+ return;
+
+ sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]),
+ __rmem_cmp, NULL);
+ for (i = 0; i < reserved_mem_count - 1; i++) {
+ struct reserved_mem *this, *next;
+
+ this = &reserved_mem[i];
+ next = &reserved_mem[i + 1];
+
+ if (this->base + this->size > next->base) {
+ phys_addr_t this_end, next_end;
+
+ this_end = this->base + this->size;
+ next_end = next->base + next->size;
+ pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n",
+ this->name, &this->base, &this_end,
+ next->name, &next->base, &next_end);
+ }
+ }
+}
/**
- * fdt_scan_reserved_mem_reg_nodes() - Store info for the "reg" defined
- * reserved memory regions.
+ * fdt_scan_reserved_mem_late() - Scan FDT and initialize remaining reserved
+ * memory regions.
*
- * This function is used to scan through the DT and store the
- * information for the reserved memory regions that are defined using
- * the "reg" property. The region node number, name, base address, and
- * size are all stored in the reserved_mem array by calling the
- * fdt_reserved_mem_save_node() function.
+ * This function is used to scan again through the DT and initialize the
+ * "static" reserved memory regions, that are defined using the "reg"
+ * property. Each such region is then initialized with its specific init
+ * function and stored in the global reserved_mem array.
*/
-void __init fdt_scan_reserved_mem_reg_nodes(void)
+void __init fdt_scan_reserved_mem_late(void)
{
const void *fdt = initial_boot_params;
phys_addr_t base, size;
@@ -253,23 +275,25 @@ void __init fdt_scan_reserved_mem_reg_nodes(void)
fdt_for_each_subnode(child, fdt, node) {
const char *uname;
- bool default_cma = of_get_flat_dt_prop(child, "linux,cma-default", NULL);
u64 b, s;
+ int ret;
if (!of_fdt_device_is_available(fdt, child))
continue;
- if (default_cma && cma_skip_dt_default_reserved_mem())
- continue;
if (!of_flat_dt_get_addr_size(child, "reg", &b, &s))
continue;
+ ret = fdt_validate_reserved_mem_node(child, NULL);
+ if (ret && ret != -ENODEV)
+ continue;
+
base = b;
size = s;
if (size) {
uname = fdt_get_name(fdt, child, NULL);
- fdt_reserved_mem_save_node(child, uname, base, size);
+ fdt_init_reserved_mem_node(child, uname, base, size);
}
}
@@ -280,7 +304,14 @@ void __init fdt_scan_reserved_mem_reg_nodes(void)
static int __init __reserved_mem_alloc_size(unsigned long node, const char *uname);
/*
- * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory
+ * fdt_scan_reserved_mem() - reserve and allocate memory occupied by
+ * reserved memory regions.
+ *
+ * This function is used to scan through the FDT and mark memory occupied
+ * by all static (defined by the "reg" property) reserved memory regions.
+ * Then memory for all dynamic regions (defined by size & alignment) is
+ * allocated, a region specific init function is called and region information
+ * is stored in the reserved_mem array.
*/
int __init fdt_scan_reserved_mem(void)
{
@@ -397,7 +428,7 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
phys_addr_t base = 0, align = 0, size;
int i, len;
const __be32 *prop;
- bool nomap, default_cma;
+ bool nomap;
int ret;
prop = of_get_flat_dt_prop(node, "size", &len);
@@ -421,19 +452,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
}
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
- default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
- if (default_cma && cma_skip_dt_default_reserved_mem()) {
- pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n");
- return -EINVAL;
- }
-
- /* Need adjust the alignment to satisfy the CMA requirement */
- if (IS_ENABLED(CONFIG_CMA)
- && of_flat_dt_is_compatible(node, "shared-dma-pool")
- && of_get_flat_dt_prop(node, "reusable", NULL)
- && !nomap)
- align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
+ ret = fdt_validate_reserved_mem_node(node, &align);
+ if (ret && ret != -ENODEV)
+ return ret;
prop = of_flat_dt_get_addr_size_prop(node, "alloc-ranges", &len);
if (prop) {
@@ -468,121 +490,151 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam
uname, (unsigned long)(size / SZ_1M));
return -ENOMEM;
}
- /* Architecture specific contiguous memory fixup. */
- if (of_flat_dt_is_compatible(node, "shared-dma-pool") &&
- of_get_flat_dt_prop(node, "reusable", NULL))
- dma_contiguous_early_fixup(base, size);
- /* Save region in the reserved_mem array */
- fdt_reserved_mem_save_node(node, uname, base, size);
+
+ fdt_fixup_reserved_mem_node(node, base, size);
+ fdt_init_reserved_mem_node(node, uname, base, size);
+
return 0;
}
+extern const struct of_device_id __reservedmem_of_table[];
static const struct of_device_id __rmem_of_table_sentinel
__used __section("__reservedmem_of_table_end");
-/*
- * __reserved_mem_init_node() - call region specific reserved memory init code
+/**
+ * fdt_fixup_reserved_mem_node() - call fixup function for a reserved memory node
+ * @node: FDT node to fixup
+ * @base: base address of the reserved memory region
+ * @size: size of the reserved memory region
+ *
+ * This function iterates through the reserved memory drivers and calls
+ * the node_fixup callback for the compatible entry matching the node.
+ *
+ * Return: 0 on success, -ENODEV if no compatible match found
*/
-static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
+static int __init fdt_fixup_reserved_mem_node(unsigned long node,
+ phys_addr_t base, phys_addr_t size)
{
- extern const struct of_device_id __reservedmem_of_table[];
const struct of_device_id *i;
- int ret = -ENOENT;
+ int ret = -ENODEV;
- for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) {
- reservedmem_of_init_fn initfn = i->data;
- const char *compat = i->compatible;
+ for (i = __reservedmem_of_table; ret == -ENODEV &&
+ i < &__rmem_of_table_sentinel; i++) {
+ const struct reserved_mem_ops *ops = i->data;
- if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
+ if (!of_flat_dt_is_compatible(node, i->compatible))
continue;
- ret = initfn(rmem);
- if (ret == 0) {
- pr_info("initialized node %s, compatible id %s\n",
- rmem->name, compat);
- break;
- }
+ if (ops->node_fixup)
+ ret = ops->node_fixup(node, base, size);
}
return ret;
}
-static int __init __rmem_cmp(const void *a, const void *b)
+/**
+ * fdt_validate_reserved_mem_node() - validate a reserved memory node
+ * @node: FDT node to validate
+ * @align: pointer to store the validated alignment (may be modified by callback)
+ *
+ * This function iterates through the reserved memory drivers and calls
+ * the node_validate callback for the compatible entry matching the node.
+ *
+ * Return: 0 on success, -ENODEV if no compatible match found
+ */
+static int __init fdt_validate_reserved_mem_node(unsigned long node, phys_addr_t *align)
{
- const struct reserved_mem *ra = a, *rb = b;
-
- if (ra->base < rb->base)
- return -1;
-
- if (ra->base > rb->base)
- return 1;
+ const struct of_device_id *i;
+ int ret = -ENODEV;
- /*
- * Put the dynamic allocations (address == 0, size == 0) before static
- * allocations at address 0x0 so that overlap detection works
- * correctly.
- */
- if (ra->size < rb->size)
- return -1;
- if (ra->size > rb->size)
- return 1;
+ for (i = __reservedmem_of_table; ret == -ENODEV &&
+ i < &__rmem_of_table_sentinel; i++) {
+ const struct reserved_mem_ops *ops = i->data;
- if (ra->fdt_node < rb->fdt_node)
- return -1;
- if (ra->fdt_node > rb->fdt_node)
- return 1;
+ if (!of_flat_dt_is_compatible(node, i->compatible))
+ continue;
- return 0;
+ if (ops->node_validate)
+ ret = ops->node_validate(node, align);
+ }
+ return ret;
}
-static void __init __rmem_check_for_overlap(void)
+/**
+ * __reserved_mem_init_node() - initialize a reserved memory region
+ * @rmem: reserved_mem structure to initialize
+ * @node: FDT node describing the reserved memory region
+ *
+ * This function iterates through the reserved memory drivers and calls the
+ * node_init callback for the compatible entry matching the node. On success,
+ * the operations pointer is stored in the reserved_mem structure.
+ *
+ * Return: 0 on success, -ENODEV if no compatible match found
+ */
+static int __init __reserved_mem_init_node(struct reserved_mem *rmem,
+ unsigned long node)
{
- int i;
-
- if (reserved_mem_count < 2)
- return;
-
- sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]),
- __rmem_cmp, NULL);
- for (i = 0; i < reserved_mem_count - 1; i++) {
- struct reserved_mem *this, *next;
+ const struct of_device_id *i;
+ int ret = -ENODEV;
- this = &reserved_mem[i];
- next = &reserved_mem[i + 1];
+ for (i = __reservedmem_of_table; ret == -ENODEV &&
+ i < &__rmem_of_table_sentinel; i++) {
+ const struct reserved_mem_ops *ops = i->data;
+ const char *compat = i->compatible;
- if (this->base + this->size > next->base) {
- phys_addr_t this_end, next_end;
+ if (!of_flat_dt_is_compatible(node, compat))
+ continue;
- this_end = this->base + this->size;
- next_end = next->base + next->size;
- pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n",
- this->name, &this->base, &this_end,
- next->name, &next->base, &next_end);
+ ret = ops->node_init(node, rmem);
+ if (ret == 0) {
+ rmem->ops = ops;
+ pr_info("initialized node %s, compatible id %s\n",
+ rmem->name, compat);
+ return ret;
}
}
+ return ret;
}
/**
* fdt_init_reserved_mem_node() - Initialize a reserved memory region
- * @rmem: reserved_mem struct of the memory region to be initialized.
+ * @node: fdt node of the initialized region
+ * @uname: name of the reserved memory node
+ * @base: base address of the reserved memory region
+ * @size: size of the reserved memory region
*
- * This function is used to call the region specific initialization
- * function for a reserved memory region.
+ * This function calls the region-specific initialization function for a
+ * reserved memory region and saves all region-specific data to the
+ * reserved_mem array to allow of_reserved_mem_lookup() to find it.
*/
-static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem)
+static void __init fdt_init_reserved_mem_node(unsigned long node, const char *uname,
+ phys_addr_t base, phys_addr_t size)
{
- unsigned long node = rmem->fdt_node;
int err = 0;
bool nomap;
+ struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
+
+ if (reserved_mem_count == total_reserved_mem_cnt) {
+ pr_err("not enough space for all defined regions.\n");
+ return;
+ }
+
+ rmem->name = uname;
+ rmem->base = base;
+ rmem->size = size;
+
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
- err = __reserved_mem_init_node(rmem);
- if (err != 0 && err != -ENOENT) {
+ err = __reserved_mem_init_node(rmem, node);
+ if (err != 0 && err != -ENODEV) {
pr_info("node %s compatible matching fail\n", rmem->name);
+ rmem->name = NULL;
+
if (nomap)
memblock_clear_nomap(rmem->base, rmem->size);
else
memblock_phys_free(rmem->base, rmem->size);
+ return;
} else {
phys_addr_t end = rmem->base + rmem->size - 1;
bool reusable =
@@ -594,6 +646,8 @@ static void __init fdt_init_reserved_mem_node(struct reserved_mem *rmem)
reusable ? "reusable" : "non-reusable",
rmem->name ? rmem->name : "unknown");
}
+
+ reserved_mem_count++;
}
struct rmem_assigned_device {
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4a734ee38994..2cbf2b588f5b 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -262,10 +262,12 @@ static dma_addr_t xen_swiotlb_map_phys(struct device *dev, phys_addr_t phys,
done:
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
- if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) {
arch_sync_dma_for_device(phys, size, dir);
- else
+ arch_sync_dma_flush();
+ } else {
xen_dma_sync_for_device(dev, dev_addr, size, dir);
+ }
}
return dev_addr;
}
@@ -287,10 +289,12 @@ static void xen_swiotlb_unmap_phys(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
- if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
+ if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) {
arch_sync_dma_for_cpu(paddr, size, dir);
- else
+ arch_sync_dma_flush();
+ } else {
xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
+ }
}
/* NOTE: We use dev_addr here, not paddr! */
@@ -308,10 +312,12 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
struct io_tlb_pool *pool;
if (!dev_is_dma_coherent(dev)) {
- if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
arch_sync_dma_for_cpu(paddr, size, dir);
- else
+ arch_sync_dma_flush();
+ } else {
xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
+ }
}
pool = xen_swiotlb_find_pool(dev, dma_addr);
@@ -331,10 +337,12 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
__swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
if (!dev_is_dma_coherent(dev)) {
- if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
arch_sync_dma_for_device(paddr, size, dir);
- else
+ arch_sync_dma_flush();
+ } else {
xen_dma_sync_for_device(dev, dma_addr, size, dir);
+ }
}
}
diff --git a/include/linux/cma.h b/include/linux/cma.h
index d0793eaaadaa..8555d38a97b1 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -61,14 +61,4 @@ extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end);
extern void cma_reserve_pages_on_error(struct cma *cma);
-
-#ifdef CONFIG_DMA_CMA
-extern bool cma_skip_dt_default_reserved_mem(void);
-#else
-static inline bool cma_skip_dt_default_reserved_mem(void)
-{
- return false;
-}
-#endif
-
#endif
diff --git a/include/linux/dma-buf/heaps/cma.h b/include/linux/dma-buf/heaps/cma.h
deleted file mode 100644
index e751479e21e7..000000000000
--- a/include/linux/dma-buf/heaps/cma.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef DMA_BUF_HEAP_CMA_H_
-#define DMA_BUF_HEAP_CMA_H_
-
-struct cma;
-
-#ifdef CONFIG_DMABUF_HEAPS_CMA
-int dma_heap_cma_register_heap(struct cma *cma);
-#else
-static inline int dma_heap_cma_register_heap(struct cma *cma)
-{
- return 0;
-}
-#endif // CONFIG_DMABUF_HEAPS_CMA
-
-#endif // DMA_BUF_HEAP_CMA_H_
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 60b63756df82..6a1832a73cad 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -91,14 +91,8 @@ static inline void set_dma_ops(struct device *dev,
#endif /* CONFIG_ARCH_HAS_DMA_OPS */
#ifdef CONFIG_DMA_CMA
-extern struct cma *dma_contiguous_default_area;
-
-static inline struct cma *dev_get_cma_area(struct device *dev)
-{
- if (dev && dev->cma_area)
- return dev->cma_area;
- return dma_contiguous_default_area;
-}
+struct cma *dev_get_cma_area(struct device *dev);
+struct cma *dma_contiguous_get_area_by_idx(unsigned int idx);
void dma_contiguous_reserve(phys_addr_t addr_limit);
int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
@@ -117,6 +111,10 @@ static inline struct cma *dev_get_cma_area(struct device *dev)
{
return NULL;
}
+static inline struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
+{
+ return NULL;
+}
static inline void dma_contiguous_reserve(phys_addr_t limit)
{
}
@@ -147,9 +145,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
{
__free_pages(page, get_order(size));
}
-static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
-{
-}
#endif /* CONFIG_DMA_CMA*/
#ifdef CONFIG_DMA_DECLARE_COHERENT
@@ -361,6 +356,12 @@ static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
}
#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
+#ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC
+static inline void arch_sync_dma_flush(void)
+{
+}
+#endif
+
#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
void arch_sync_dma_for_cpu_all(void);
#else
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 99ef042ecdb4..db8ab24a54f4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -9,7 +9,7 @@
#include <linux/bug.h>
#include <linux/cache.h>
-/**
+/*
* List of possible attributes associated with a DMA mapping. The semantics
* of each attribute should be defined in Documentation/core-api/dma-attributes.rst.
*/
@@ -92,6 +92,16 @@
* flushing.
*/
#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12)
+/*
+ * DMA_ATTR_CC_SHARED: Indicates the DMA mapping is shared (decrypted) for
+ * confidential computing guests. For normal system memory the caller must have
+ * called set_memory_decrypted(), and pgprot_decrypted must be used when
+ * creating CPU PTEs for the mapping. The same shared semantic may be passed
+ * to the vIOMMU when it sets up the IOPTE. For MMIO use together with
+ * DMA_ATTR_MMIO to indicate shared MMIO. Unless DMA_ATTR_MMIO is provided
+ * a struct page is required.
+ */
+#define DMA_ATTR_CC_SHARED (1UL << 13)
/*
* A dma_addr_t can hold any valid DMA or bus address for the platform. It can
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index f573423359f4..e8b20b29fa68 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -11,7 +11,6 @@ struct resource;
struct reserved_mem {
const char *name;
- unsigned long fdt_node;
const struct reserved_mem_ops *ops;
phys_addr_t base;
phys_addr_t size;
@@ -19,18 +18,20 @@ struct reserved_mem {
};
struct reserved_mem_ops {
+ int (*node_validate)(unsigned long fdt_node, phys_addr_t *align);
+ int (*node_fixup)(unsigned long fdt_node, phys_addr_t base,
+ phys_addr_t size);
+ int (*node_init)(unsigned long fdt_node, struct reserved_mem *rmem);
int (*device_init)(struct reserved_mem *rmem,
struct device *dev);
void (*device_release)(struct reserved_mem *rmem,
struct device *dev);
};
-typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
-
#ifdef CONFIG_OF_RESERVED_MEM
-#define RESERVEDMEM_OF_DECLARE(name, compat, init) \
- _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
+#define RESERVEDMEM_OF_DECLARE(name, compat, ops) \
+ _OF_DECLARE(reservedmem, name, compat, ops, struct reserved_mem_ops *)
int of_reserved_mem_device_init_by_idx(struct device *dev,
struct device_node *np, int idx);
@@ -48,8 +49,9 @@ int of_reserved_mem_region_count(const struct device_node *np);
#else
-#define RESERVEDMEM_OF_DECLARE(name, compat, init) \
- _OF_DECLARE_STUB(reservedmem, name, compat, init, reservedmem_of_init_fn)
+#define RESERVEDMEM_OF_DECLARE(name, compat, ops) \
+ _OF_DECLARE_STUB(reservedmem, name, compat, ops, \
+ struct reserved_mem_ops *)
static inline int of_reserved_mem_device_init_by_idx(struct device *dev,
struct device_node *np, int idx)
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 63597b004424..31c9ddf72c9d 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -34,7 +34,8 @@ TRACE_DEFINE_ENUM(DMA_NONE);
{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
{ DMA_ATTR_MMIO, "MMIO" }, \
{ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
- { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" })
+ { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }, \
+ { DMA_ATTR_CC_SHARED, "CC_SHARED" })
DECLARE_EVENT_CLASS(dma_map,
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
diff --git a/include/uapi/linux/map_benchmark.h b/include/uapi/linux/map_benchmark.h
index c2d91088a40d..4b17829a9f17 100644
--- a/include/uapi/linux/map_benchmark.h
+++ b/include/uapi/linux/map_benchmark.h
@@ -17,6 +17,12 @@
#define DMA_MAP_TO_DEVICE 1
#define DMA_MAP_FROM_DEVICE 2
+enum {
+ DMA_MAP_BENCH_SINGLE_MODE,
+ DMA_MAP_BENCH_SG_MODE,
+ DMA_MAP_BENCH_MODE_MAX
+};
+
struct map_benchmark {
__u64 avg_map_100ns; /* average map latency in 100ns */
__u64 map_stddev; /* standard deviation of map latency */
@@ -28,8 +34,11 @@ struct map_benchmark {
__u32 dma_bits; /* DMA addressing capability */
__u32 dma_dir; /* DMA data direction */
__u32 dma_trans_ns; /* time for DMA transmission in ns */
- __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
- __u8 expansion[76]; /* For future use */
+ __u32 granule; /* - SINGLE_MODE: number of pages mapped/unmapped per operation
+ * - SG_MODE: number of scatterlist entries (each maps one page)
+ */
+ __u8 map_mode; /* the mode of dma map */
+ __u8 expansion[75]; /* For future use */
};
#endif /* _UAPI_DMA_BENCHMARK_H */
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 159900736f25..bfef21b4a9ae 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -72,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
+config ARCH_HAS_BATCHED_DMA_SYNC
+ bool
+
#
# Select this option if the architecture assumes DMA devices are coherent
# by default.
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 1147497bc512..bcdc0f76d2e8 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -362,17 +362,11 @@ static void rmem_dma_device_release(struct reserved_mem *rmem,
dev->dma_mem = NULL;
}
-static const struct reserved_mem_ops rmem_dma_ops = {
- .device_init = rmem_dma_device_init,
- .device_release = rmem_dma_device_release,
-};
-static int __init rmem_dma_setup(struct reserved_mem *rmem)
+static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem)
{
- unsigned long node = rmem->fdt_node;
-
if (of_get_flat_dt_prop(node, "reusable", NULL))
- return -EINVAL;
+ return -ENODEV;
#ifdef CONFIG_ARM
if (!of_get_flat_dt_prop(node, "no-map", NULL)) {
@@ -390,7 +384,6 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
}
#endif
- rmem->ops = &rmem_dma_ops;
pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);
return 0;
@@ -407,5 +400,11 @@ static int __init dma_init_reserved_memory(void)
core_initcall(dma_init_reserved_memory);
#endif /* CONFIG_DMA_GLOBAL_POOL */
-RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
+static const struct reserved_mem_ops rmem_dma_ops = {
+ .node_init = rmem_dma_setup,
+ .device_init = rmem_dma_device_init,
+ .device_release = rmem_dma_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", &rmem_dma_ops);
#endif
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index c56004d314dc..03f52bd17120 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -42,7 +42,6 @@
#include <linux/memblock.h>
#include <linux/err.h>
#include <linux/sizes.h>
-#include <linux/dma-buf/heaps/cma.h>
#include <linux/dma-map-ops.h>
#include <linux/cma.h>
#include <linux/nospec.h>
@@ -53,7 +52,38 @@
#define CMA_SIZE_MBYTES 0
#endif
-struct cma *dma_contiguous_default_area;
+static struct cma *dma_contiguous_areas[MAX_CMA_AREAS];
+static unsigned int dma_contiguous_areas_num;
+
+static int dma_contiguous_insert_area(struct cma *cma)
+{
+ if (dma_contiguous_areas_num >= ARRAY_SIZE(dma_contiguous_areas))
+ return -EINVAL;
+
+ dma_contiguous_areas[dma_contiguous_areas_num++] = cma;
+
+ return 0;
+}
+
+/**
+ * dma_contiguous_get_area_by_idx() - Get contiguous area at given index
+ * @idx: index of the area we query
+ *
+ * Queries for the contiguous area located at index @idx.
+ *
+ * Returns:
+ * A pointer to the requested contiguous area, or NULL otherwise.
+ */
+struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
+{
+ if (idx >= dma_contiguous_areas_num)
+ return NULL;
+
+ return dma_contiguous_areas[idx];
+}
+EXPORT_SYMBOL_GPL(dma_contiguous_get_area_by_idx);
+
+static struct cma *dma_contiguous_default_area;
/*
* Default global CMA area size can be defined in kernel's .config.
@@ -91,15 +121,14 @@ static int __init early_cma(char *p)
}
early_param("cma", early_cma);
-/*
- * cma_skip_dt_default_reserved_mem - This is called from the
- * reserved_mem framework to detect if the default cma region is being
- * set by the "cma=" kernel parameter.
- */
-bool __init cma_skip_dt_default_reserved_mem(void)
+struct cma *dev_get_cma_area(struct device *dev)
{
- return size_cmdline != -1;
+ if (dev && dev->cma_area)
+ return dev->cma_area;
+
+ return dma_contiguous_default_area;
}
+EXPORT_SYMBOL_GPL(dev_get_cma_area);
#ifdef CONFIG_DMA_NUMA_CMA
@@ -264,9 +293,24 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
if (ret)
return;
- ret = dma_heap_cma_register_heap(dma_contiguous_default_area);
+ /*
+ * We need to insert the new area in our list to avoid
+ * any inconsistencies between having the default area
+ * listed in the DT or not.
+ *
+ * The DT case is handled by rmem_cma_setup() and will
+ * always insert all its areas in our list. However, if
+ * it didn't run (because OF_RESERVED_MEM isn't set, or
+ * there's no DT region specified), then we don't have a
+ * default area yet, and no area in our list.
+ *
+ * This block creates the default area in such a case,
+ * but we also need to insert it in our list to avoid
+ * having a default area but an empty list.
+ */
+ ret = dma_contiguous_insert_area(dma_contiguous_default_area);
if (ret)
- pr_warn("Couldn't register default CMA heap.");
+ pr_warn("Couldn't queue default CMA region for heap creation.");
}
}
@@ -470,47 +514,89 @@ static void rmem_cma_device_release(struct reserved_mem *rmem,
dev->cma_area = NULL;
}
-static const struct reserved_mem_ops rmem_cma_ops = {
- .device_init = rmem_cma_device_init,
- .device_release = rmem_cma_device_release,
-};
+static int __init __rmem_cma_verify_node(unsigned long node)
+{
+ if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
+ of_get_flat_dt_prop(node, "no-map", NULL))
+ return -ENODEV;
+
+ if (size_cmdline != -1 &&
+ of_get_flat_dt_prop(node, "linux,cma-default", NULL)) {
+ pr_err("Skipping dt linux,cma-default node in favor for \"cma=\" kernel param.\n");
+ return -EBUSY;
+ }
+ return 0;
+}
-static int __init rmem_cma_setup(struct reserved_mem *rmem)
+static int __init rmem_cma_validate(unsigned long node, phys_addr_t *align)
+{
+ int ret = __rmem_cma_verify_node(node);
+
+ if (ret)
+ return ret;
+
+ if (align)
+ *align = max_t(phys_addr_t, *align, CMA_MIN_ALIGNMENT_BYTES);
+
+ return 0;
+}
+
+static int __init rmem_cma_fixup(unsigned long node, phys_addr_t base,
+ phys_addr_t size)
+{
+ int ret = __rmem_cma_verify_node(node);
+
+ if (ret)
+ return ret;
+
+ /* Architecture specific contiguous memory fixup. */
+ dma_contiguous_early_fixup(base, size);
+ return 0;
+}
+
+static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
{
- unsigned long node = rmem->fdt_node;
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
struct cma *cma;
- int err;
+ int ret;
- if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
- of_get_flat_dt_prop(node, "no-map", NULL))
- return -EINVAL;
+ ret = __rmem_cma_verify_node(node);
+ if (ret)
+ return ret;
if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) {
pr_err("Reserved memory: incorrect alignment of CMA region\n");
return -EINVAL;
}
- err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
- if (err) {
+ ret = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
+ if (ret) {
pr_err("Reserved memory: unable to setup CMA region\n");
- return err;
+ return ret;
}
if (default_cma)
dma_contiguous_default_area = cma;
- rmem->ops = &rmem_cma_ops;
rmem->priv = cma;
pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);
- err = dma_heap_cma_register_heap(cma);
- if (err)
- pr_warn("Couldn't register CMA heap.");
+ ret = dma_contiguous_insert_area(cma);
+ if (ret)
+ pr_warn("Couldn't store CMA reserved area.");
return 0;
}
-RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
+
+static const struct reserved_mem_ops rmem_cma_ops = {
+ .node_validate = rmem_cma_validate,
+ .node_fixup = rmem_cma_fixup,
+ .node_init = rmem_cma_setup,
+ .device_init = rmem_cma_device_init,
+ .device_release = rmem_cma_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", &rmem_cma_ops);
#endif
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8f43a930716d..ec887f443741 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -406,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev,
arch_sync_dma_for_device(paddr, sg->length,
dir);
}
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
}
#endif
@@ -427,8 +429,10 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
}
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
+ }
}
/*
@@ -440,14 +444,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
{
struct scatterlist *sg;
int i;
+ bool need_sync = false;
for_each_sg(sgl, sg, nents, i) {
- if (sg_dma_is_bus_address(sg))
+ if (sg_dma_is_bus_address(sg)) {
sg_dma_unmark_bus_address(sg);
- else
+ } else {
+ need_sync = true;
dma_direct_unmap_phys(dev, sg->dma_address,
- sg_dma_len(sg), dir, attrs);
+ sg_dma_len(sg), dir, attrs, false);
+ }
}
+ if (need_sync && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
}
#endif
@@ -457,6 +466,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
struct pci_p2pdma_map_state p2pdma_state = {};
struct scatterlist *sg;
int i, ret;
+ bool need_sync = false;
for_each_sg(sgl, sg, nents, i) {
switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
@@ -468,8 +478,9 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
*/
break;
case PCI_P2PDMA_MAP_NONE:
+ need_sync = true;
sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg),
- sg->length, dir, attrs);
+ sg->length, dir, attrs, false);
if (sg->dma_address == DMA_MAPPING_ERROR) {
ret = -EIO;
goto out_unmap;
@@ -488,6 +499,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
sg_dma_len(sg) = sg->length;
}
+ if (need_sync && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
return nents;
out_unmap:
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 6184ff303f08..7140c208c123 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -60,17 +60,22 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
swiotlb_sync_single_for_device(dev, paddr, size, dir);
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_device(paddr, size, dir);
+ arch_sync_dma_flush();
+ }
}
static inline void dma_direct_sync_single_for_cpu(struct device *dev,
- dma_addr_t addr, size_t size, enum dma_data_direction dir)
+ dma_addr_t addr, size_t size, enum dma_data_direction dir,
+ bool flush)
{
phys_addr_t paddr = dma_to_phys(dev, addr);
if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(paddr, size, dir);
+ if (flush)
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
}
@@ -79,21 +84,29 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
static inline dma_addr_t dma_direct_map_phys(struct device *dev,
phys_addr_t phys, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
+ unsigned long attrs, bool flush)
{
dma_addr_t dma_addr;
if (is_swiotlb_force_bounce(dev)) {
- if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
- return DMA_MAPPING_ERROR;
+ if (!(attrs & DMA_ATTR_CC_SHARED)) {
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
+ return DMA_MAPPING_ERROR;
- return swiotlb_map(dev, phys, size, dir, attrs);
+ return swiotlb_map(dev, phys, size, dir, attrs);
+ }
+ } else if (attrs & DMA_ATTR_CC_SHARED) {
+ return DMA_MAPPING_ERROR;
}
if (attrs & DMA_ATTR_MMIO) {
dma_addr = phys;
if (unlikely(!dma_capable(dev, dma_addr, size, false)))
goto err_overflow;
+ } else if (attrs & DMA_ATTR_CC_SHARED) {
+ dma_addr = phys_to_dma_unencrypted(dev, phys);
+ if (unlikely(!dma_capable(dev, dma_addr, size, false)))
+ goto err_overflow;
} else {
dma_addr = phys_to_dma(dev, phys);
if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
@@ -107,8 +120,11 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
}
if (!dev_is_dma_coherent(dev) &&
- !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
+ !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
arch_sync_dma_for_device(phys, size, dir);
+ if (flush)
+ arch_sync_dma_flush();
+ }
return dma_addr;
err_overflow:
@@ -120,7 +136,8 @@ err_overflow:
}
static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
+ size_t size, enum dma_data_direction dir, unsigned long attrs,
+ bool flush)
{
phys_addr_t phys;
@@ -130,7 +147,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
phys = dma_to_phys(dev, addr);
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir, flush);
swiotlb_tbl_unmap_single(dev, phys, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 0f33b3ea7daf..29eeb5fdf199 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -5,6 +5,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cleanup.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/device.h>
@@ -15,6 +16,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/timekeeping.h>
#include <uapi/linux/map_benchmark.h>
@@ -31,17 +33,219 @@ struct map_benchmark_data {
atomic64_t loops;
};
+struct map_benchmark_ops {
+ void *(*prepare)(struct map_benchmark_data *map);
+ void (*unprepare)(void *mparam);
+ void (*initialize_data)(void *mparam);
+ int (*do_map)(void *mparam);
+ void (*do_unmap)(void *mparam);
+};
+
+struct dma_single_map_param {
+ struct device *dev;
+ dma_addr_t addr;
+ void *xbuf;
+ u32 npages;
+ u32 dma_dir;
+};
+
+static void *dma_single_map_benchmark_prepare(struct map_benchmark_data *map)
+{
+ struct dma_single_map_param *params __free(kfree) = kzalloc(sizeof(*params),
+ GFP_KERNEL);
+ if (!params)
+ return NULL;
+
+ params->npages = map->bparam.granule;
+ params->dma_dir = map->bparam.dma_dir;
+ params->dev = map->dev;
+ params->xbuf = alloc_pages_exact(params->npages * PAGE_SIZE, GFP_KERNEL);
+ if (!params->xbuf)
+ return NULL;
+
+ return_ptr(params);
+}
+
+static void dma_single_map_benchmark_unprepare(void *mparam)
+{
+ struct dma_single_map_param *params = mparam;
+
+ free_pages_exact(params->xbuf, params->npages * PAGE_SIZE);
+ kfree(params);
+}
+
+static void dma_single_map_benchmark_initialize_data(void *mparam)
+{
+ struct dma_single_map_param *params = mparam;
+
+ /*
+ * for a non-coherent device, if we don't stain them in the
+ * cache, this will give an underestimate of the real-world
+ * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
+ * 66 means everything goes well! 66 is lucky.
+ */
+ if (params->dma_dir != DMA_FROM_DEVICE)
+ memset(params->xbuf, 0x66, params->npages * PAGE_SIZE);
+}
+
+static int dma_single_map_benchmark_do_map(void *mparam)
+{
+ struct dma_single_map_param *params = mparam;
+
+ params->addr = dma_map_single(params->dev, params->xbuf,
+ params->npages * PAGE_SIZE, params->dma_dir);
+ if (unlikely(dma_mapping_error(params->dev, params->addr))) {
+ pr_err("dma_map_single failed on %s\n", dev_name(params->dev));
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void dma_single_map_benchmark_do_unmap(void *mparam)
+{
+ struct dma_single_map_param *params = mparam;
+
+ dma_unmap_single(params->dev, params->addr,
+ params->npages * PAGE_SIZE, params->dma_dir);
+}
+
+static struct map_benchmark_ops dma_single_map_benchmark_ops = {
+ .prepare = dma_single_map_benchmark_prepare,
+ .unprepare = dma_single_map_benchmark_unprepare,
+ .initialize_data = dma_single_map_benchmark_initialize_data,
+ .do_map = dma_single_map_benchmark_do_map,
+ .do_unmap = dma_single_map_benchmark_do_unmap,
+};
+
+struct dma_sg_map_param {
+ struct sg_table sgt;
+ struct device *dev;
+ void **buf;
+ u32 npages;
+ u32 dma_dir;
+};
+
+static void *dma_sg_map_benchmark_prepare(struct map_benchmark_data *map)
+{
+ struct scatterlist *sg;
+ int i;
+
+ struct dma_sg_map_param *params = kzalloc(sizeof(*params), GFP_KERNEL);
+
+ if (!params)
+ return NULL;
+ /*
+ * Set the number of scatterlist entries based on the granule.
+ * In SG mode, 'granule' represents the number of scatterlist entries.
+ * Each scatterlist entry corresponds to a single page.
+ */
+ params->npages = map->bparam.granule;
+ params->dma_dir = map->bparam.dma_dir;
+ params->dev = map->dev;
+ params->buf = kmalloc_array(params->npages, sizeof(*params->buf),
+ GFP_KERNEL);
+ if (!params->buf)
+ goto out;
+
+ if (sg_alloc_table(&params->sgt, params->npages, GFP_KERNEL))
+ goto free_buf;
+
+ for_each_sgtable_sg(&params->sgt, sg, i) {
+ params->buf[i] = (void *)__get_free_page(GFP_KERNEL);
+ if (!params->buf[i])
+ goto free_page;
+
+ sg_set_buf(sg, params->buf[i], PAGE_SIZE);
+ }
+
+ return params;
+
+free_page:
+ while (i-- > 0)
+ free_page((unsigned long)params->buf[i]);
+
+ sg_free_table(&params->sgt);
+free_buf:
+ kfree(params->buf);
+out:
+ kfree(params);
+ return NULL;
+}
+
+static void dma_sg_map_benchmark_unprepare(void *mparam)
+{
+ struct dma_sg_map_param *params = mparam;
+ int i;
+
+ for (i = 0; i < params->npages; i++)
+ free_page((unsigned long)params->buf[i]);
+
+ sg_free_table(&params->sgt);
+
+ kfree(params->buf);
+ kfree(params);
+}
+
+static void dma_sg_map_benchmark_initialize_data(void *mparam)
+{
+ struct dma_sg_map_param *params = mparam;
+ struct scatterlist *sg;
+ int i = 0;
+
+ if (params->dma_dir == DMA_FROM_DEVICE)
+ return;
+
+ for_each_sgtable_sg(&params->sgt, sg, i)
+ memset(params->buf[i], 0x66, PAGE_SIZE);
+}
+
+static int dma_sg_map_benchmark_do_map(void *mparam)
+{
+ struct dma_sg_map_param *params = mparam;
+ int ret = 0;
+
+ int sg_mapped = dma_map_sg(params->dev, params->sgt.sgl,
+ params->npages, params->dma_dir);
+ if (!sg_mapped) {
+ pr_err("dma_map_sg failed on %s\n", dev_name(params->dev));
+ ret = -ENOMEM;
+ }
+
+ return ret;
+}
+
+static void dma_sg_map_benchmark_do_unmap(void *mparam)
+{
+ struct dma_sg_map_param *params = mparam;
+
+ dma_unmap_sg(params->dev, params->sgt.sgl, params->npages,
+ params->dma_dir);
+}
+
+static struct map_benchmark_ops dma_sg_map_benchmark_ops = {
+ .prepare = dma_sg_map_benchmark_prepare,
+ .unprepare = dma_sg_map_benchmark_unprepare,
+ .initialize_data = dma_sg_map_benchmark_initialize_data,
+ .do_map = dma_sg_map_benchmark_do_map,
+ .do_unmap = dma_sg_map_benchmark_do_unmap,
+};
+
+static struct map_benchmark_ops *dma_map_benchmark_ops[DMA_MAP_BENCH_MODE_MAX] = {
+ [DMA_MAP_BENCH_SINGLE_MODE] = &dma_single_map_benchmark_ops,
+ [DMA_MAP_BENCH_SG_MODE] = &dma_sg_map_benchmark_ops,
+};
+
static int map_benchmark_thread(void *data)
{
- void *buf;
- dma_addr_t dma_addr;
struct map_benchmark_data *map = data;
- int npages = map->bparam.granule;
- u64 size = npages * PAGE_SIZE;
+ __u8 map_mode = map->bparam.map_mode;
int ret = 0;
- buf = alloc_pages_exact(size, GFP_KERNEL);
- if (!buf)
+ struct map_benchmark_ops *mb_ops = dma_map_benchmark_ops[map_mode];
+ void *mparam = mb_ops->prepare(map);
+
+ if (!mparam)
return -ENOMEM;
while (!kthread_should_stop()) {
@@ -49,23 +253,12 @@ static int map_benchmark_thread(void *data)
ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
ktime_t map_delta, unmap_delta;
- /*
- * for a non-coherent device, if we don't stain them in the
- * cache, this will give an underestimate of the real-world
- * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
- * 66 means evertything goes well! 66 is lucky.
- */
- if (map->dir != DMA_FROM_DEVICE)
- memset(buf, 0x66, size);
-
+ mb_ops->initialize_data(mparam);
map_stime = ktime_get();
- dma_addr = dma_map_single(map->dev, buf, size, map->dir);
- if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
- pr_err("dma_map_single failed on %s\n",
- dev_name(map->dev));
- ret = -ENOMEM;
+ ret = mb_ops->do_map(mparam);
+ if (ret)
goto out;
- }
+
map_etime = ktime_get();
map_delta = ktime_sub(map_etime, map_stime);
@@ -73,7 +266,8 @@ static int map_benchmark_thread(void *data)
ndelay(map->bparam.dma_trans_ns);
unmap_stime = ktime_get();
- dma_unmap_single(map->dev, dma_addr, size, map->dir);
+ mb_ops->do_unmap(mparam);
+
unmap_etime = ktime_get();
unmap_delta = ktime_sub(unmap_etime, unmap_stime);
@@ -108,7 +302,7 @@ static int map_benchmark_thread(void *data)
}
out:
- free_pages_exact(buf, size);
+ mb_ops->unprepare(mparam);
return ret;
}
@@ -209,6 +403,12 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case DMA_MAP_BENCHMARK:
+ if (map->bparam.map_mode < 0 ||
+ map->bparam.map_mode >= DMA_MAP_BENCH_MODE_MAX) {
+ pr_err("invalid map mode\n");
+ return -EINVAL;
+ }
+
if (map->bparam.threads == 0 ||
map->bparam.threads > DMA_MAP_MAX_THREADS) {
pr_err("invalid thread number\n");
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 6d3dd0bd3a88..23ed8eb9233e 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -157,6 +157,7 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
bool is_mmio = attrs & DMA_ATTR_MMIO;
+ bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
dma_addr_t addr = DMA_MAPPING_ERROR;
BUG_ON(!valid_dma_direction(dir));
@@ -168,8 +169,11 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
return DMA_MAPPING_ERROR;
if (dma_map_direct(dev, ops) ||
- (!is_mmio && arch_dma_map_phys_direct(dev, phys + size)))
- addr = dma_direct_map_phys(dev, phys, size, dir, attrs);
+ (!is_mmio && !is_cc_shared &&
+ arch_dma_map_phys_direct(dev, phys + size)))
+ addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true);
+ else if (is_cc_shared)
+ return DMA_MAPPING_ERROR;
else if (use_dma_iommu(dev))
addr = iommu_dma_map_phys(dev, phys, size, dir, attrs);
else if (ops->map_phys)
@@ -206,11 +210,16 @@ void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
bool is_mmio = attrs & DMA_ATTR_MMIO;
+ bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
BUG_ON(!valid_dma_direction(dir));
+
if (dma_map_direct(dev, ops) ||
- (!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size)))
- dma_direct_unmap_phys(dev, addr, size, dir, attrs);
+ (!is_mmio && !is_cc_shared &&
+ arch_dma_unmap_phys_direct(dev, addr + size)))
+ dma_direct_unmap_phys(dev, addr, size, dir, attrs, true);
+ else if (is_cc_shared)
+ return;
else if (use_dma_iommu(dev))
iommu_dma_unmap_phys(dev, addr, size, dir, attrs);
else if (ops->unmap_phys)
@@ -379,7 +388,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
BUG_ON(!valid_dma_direction(dir));
if (dma_map_direct(dev, ops))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir, true);
else if (use_dma_iommu(dev))
iommu_dma_sync_single_for_cpu(dev, addr, size, dir);
else if (ops->sync_single_for_cpu)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 9fd73700ddcf..9a15e7231e39 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -868,6 +868,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
if (orig_addr == INVALID_PHYS_ADDR)
return;
+ if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
+
/*
* It's valid for tlb_offset to be negative. This can happen when the
* "offset" returned by swiotlb_align_offset() is non-zero, and the
@@ -1612,8 +1615,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
return DMA_MAPPING_ERROR;
}
- if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
arch_sync_dma_for_device(swiotlb_addr, size, dir);
+ arch_sync_dma_flush();
+ }
return dma_addr;
}
@@ -1872,26 +1877,25 @@ static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
dev->dma_io_tlb_mem = &io_tlb_default_mem;
}
-static const struct reserved_mem_ops rmem_swiotlb_ops = {
- .device_init = rmem_swiotlb_device_init,
- .device_release = rmem_swiotlb_device_release,
-};
-
-static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+static int __init rmem_swiotlb_setup(unsigned long node,
+ struct reserved_mem *rmem)
{
- unsigned long node = rmem->fdt_node;
-
if (of_get_flat_dt_prop(node, "reusable", NULL) ||
of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL;
- rmem->ops = &rmem_swiotlb_ops;
pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);
return 0;
}
-RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+ .node_init = rmem_swiotlb_setup,
+ .device_init = rmem_swiotlb_device_init,
+ .device_release = rmem_swiotlb_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", &rmem_swiotlb_ops);
#endif /* CONFIG_DMA_RESTRICTED_POOL */
diff --git a/mm/cma.c b/mm/cma.c
index 15cc0ae76c8e..c7ca567f4c5c 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -52,6 +52,7 @@ const char *cma_get_name(const struct cma *cma)
{
return cma->name;
}
+EXPORT_SYMBOL_GPL(cma_get_name);
static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
unsigned int align_order)
@@ -951,6 +952,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
return page;
}
+EXPORT_SYMBOL_GPL(cma_alloc);
static struct cma_memrange *find_cma_memrange(struct cma *cma,
const struct page *pages, unsigned long count)
@@ -1030,6 +1032,7 @@ bool cma_release(struct cma *cma, const struct page *pages,
return true;
}
+EXPORT_SYMBOL_GPL(cma_release);
bool cma_release_frozen(struct cma *cma, const struct page *pages,
unsigned long count)
diff --git a/tools/dma/dma_map_benchmark.c b/tools/dma/dma_map_benchmark.c
index dd0ed528e6df..eab0ac611a23 100644
--- a/tools/dma/dma_map_benchmark.c
+++ b/tools/dma/dma_map_benchmark.c
@@ -20,12 +20,19 @@ static char *directions[] = {
"FROM_DEVICE",
};
+static char *mode[] = {
+ "SINGLE_MODE",
+ "SG_MODE",
+};
+
int main(int argc, char **argv)
{
struct map_benchmark map;
int fd, opt;
/* default single thread, run 20 seconds on NUMA_NO_NODE */
int threads = 1, seconds = 20, node = -1;
+ /* default single map mode */
+ int map_mode = DMA_MAP_BENCH_SINGLE_MODE;
/* default dma mask 32bit, bidirectional DMA */
int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
/* default granule 1 PAGESIZE */
@@ -33,7 +40,7 @@ int main(int argc, char **argv)
int cmd = DMA_MAP_BENCHMARK;
- while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
+ while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:m:")) != -1) {
switch (opt) {
case 't':
threads = atoi(optarg);
@@ -56,11 +63,20 @@ int main(int argc, char **argv)
case 'g':
granule = atoi(optarg);
break;
+ case 'm':
+ map_mode = atoi(optarg);
+ break;
default:
return -1;
}
}
+ if (map_mode < 0 || map_mode >= DMA_MAP_BENCH_MODE_MAX) {
+ fprintf(stderr, "invalid map mode, SINGLE_MODE:%d, SG_MODE: %d\n",
+ DMA_MAP_BENCH_SINGLE_MODE, DMA_MAP_BENCH_SG_MODE);
+ exit(1);
+ }
+
if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
DMA_MAP_MAX_THREADS);
@@ -110,14 +126,15 @@ int main(int argc, char **argv)
map.dma_dir = dir;
map.dma_trans_ns = xdelay;
map.granule = granule;
+ map.map_mode = map_mode;
if (ioctl(fd, cmd, &map)) {
perror("ioctl");
exit(1);
}
- printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
- threads, seconds, node, directions[dir], granule);
+ printf("dma mapping benchmark(%s): threads:%d seconds:%d node:%d dir:%s granule:%d\n",
+ mode[map_mode], threads, seconds, node, directions[dir], granule);
printf("average map latency(us):%.1f standard deviation:%.1f\n",
map.avg_map_100ns/10.0, map.map_stddev/10.0);
printf("average unmap latency(us):%.1f standard deviation:%.1f\n",