summaryrefslogtreecommitdiff
path: root/kernel/dma
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/dma')
-rw-r--r--kernel/dma/Kconfig9
-rw-r--r--kernel/dma/coherent.c33
-rw-r--r--kernel/dma/contiguous.c165
-rw-r--r--kernel/dma/debug.c176
-rw-r--r--kernel/dma/debug.h57
-rw-r--r--kernel/dma/direct.c124
-rw-r--r--kernel/dma/direct.h86
-rw-r--r--kernel/dma/dummy.c13
-rw-r--r--kernel/dma/map_benchmark.c250
-rw-r--r--kernel/dma/mapping.c172
-rw-r--r--kernel/dma/ops_helpers.c12
-rw-r--r--kernel/dma/pool.c40
-rw-r--r--kernel/dma/remap.c4
-rw-r--r--kernel/dma/swiotlb.c64
14 files changed, 819 insertions, 386 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 31cfdb6b4bc3..bfef21b4a9ae 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -47,12 +47,6 @@ config ARCH_HAS_DMA_SET_MASK
config ARCH_HAS_DMA_WRITE_COMBINE
bool
-#
-# Select if the architectures provides the arch_dma_mark_clean hook
-#
-config ARCH_HAS_DMA_MARK_CLEAN
- bool
-
config DMA_DECLARE_COHERENT
bool
@@ -78,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
+config ARCH_HAS_BATCHED_DMA_SYNC
+ bool
+
#
# Select this option if the architecture assumes DMA devices are coherent
# by default.
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 3b2bdca9f1d4..bcdc0f76d2e8 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -49,7 +49,7 @@ static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr,
if (!mem_base)
return ERR_PTR(-EINVAL);
- dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
+ dma_mem = kzalloc_obj(struct dma_coherent_mem);
if (!dma_mem)
goto out_unmap_membase;
dma_mem->bitmap = bitmap_zalloc(pages, GFP_KERNEL);
@@ -336,16 +336,22 @@ static phys_addr_t dma_reserved_default_memory_size __initdata;
static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
{
- if (!rmem->priv) {
- struct dma_coherent_mem *mem;
+ struct dma_coherent_mem *mem = rmem->priv;
+ if (!mem) {
mem = dma_init_coherent_memory(rmem->base, rmem->base,
rmem->size, true);
if (IS_ERR(mem))
return PTR_ERR(mem);
rmem->priv = mem;
}
- dma_assign_coherent_memory(dev, rmem->priv);
+
+ /* Warn if the device potentially can't use the reserved memory */
+ if (mem->device_base + rmem->size - 1 >
+ min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit))
+ dev_warn(dev, "reserved memory is beyond device's set DMA address range\n");
+
+ dma_assign_coherent_memory(dev, mem);
return 0;
}
@@ -356,17 +362,11 @@ static void rmem_dma_device_release(struct reserved_mem *rmem,
dev->dma_mem = NULL;
}
-static const struct reserved_mem_ops rmem_dma_ops = {
- .device_init = rmem_dma_device_init,
- .device_release = rmem_dma_device_release,
-};
-static int __init rmem_dma_setup(struct reserved_mem *rmem)
+static int __init rmem_dma_setup(unsigned long node, struct reserved_mem *rmem)
{
- unsigned long node = rmem->fdt_node;
-
if (of_get_flat_dt_prop(node, "reusable", NULL))
- return -EINVAL;
+ return -ENODEV;
#ifdef CONFIG_ARM
if (!of_get_flat_dt_prop(node, "no-map", NULL)) {
@@ -384,7 +384,6 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
}
#endif
- rmem->ops = &rmem_dma_ops;
pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);
return 0;
@@ -401,5 +400,11 @@ static int __init dma_init_reserved_memory(void)
core_initcall(dma_init_reserved_memory);
#endif /* CONFIG_DMA_GLOBAL_POOL */
-RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
+static const struct reserved_mem_ops rmem_dma_ops = {
+ .node_init = rmem_dma_setup,
+ .device_init = rmem_dma_device_init,
+ .device_release = rmem_dma_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", &rmem_dma_ops);
#endif
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 055da410ac71..03f52bd17120 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -52,7 +52,38 @@
#define CMA_SIZE_MBYTES 0
#endif
-struct cma *dma_contiguous_default_area;
+static struct cma *dma_contiguous_areas[MAX_CMA_AREAS];
+static unsigned int dma_contiguous_areas_num;
+
+static int dma_contiguous_insert_area(struct cma *cma)
+{
+ if (dma_contiguous_areas_num >= ARRAY_SIZE(dma_contiguous_areas))
+ return -EINVAL;
+
+ dma_contiguous_areas[dma_contiguous_areas_num++] = cma;
+
+ return 0;
+}
+
+/**
+ * dma_contiguous_get_area_by_idx() - Get contiguous area at given index
+ * @idx: index of the area we query
+ *
+ * Queries for the contiguous area located at index @idx.
+ *
+ * Returns:
+ * A pointer to the requested contiguous area, or NULL otherwise.
+ */
+struct cma *dma_contiguous_get_area_by_idx(unsigned int idx)
+{
+ if (idx >= dma_contiguous_areas_num)
+ return NULL;
+
+ return dma_contiguous_areas[idx];
+}
+EXPORT_SYMBOL_GPL(dma_contiguous_get_area_by_idx);
+
+static struct cma *dma_contiguous_default_area;
/*
* Default global CMA area size can be defined in kernel's .config.
@@ -64,8 +95,7 @@ struct cma *dma_contiguous_default_area;
* Users, who want to set the size of global CMA area for their system
* should use cma= kernel parameter.
*/
-static const phys_addr_t size_bytes __initconst =
- (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
+#define size_bytes ((phys_addr_t)CMA_SIZE_MBYTES * SZ_1M)
static phys_addr_t size_cmdline __initdata = -1;
static phys_addr_t base_cmdline __initdata;
static phys_addr_t limit_cmdline __initdata;
@@ -91,6 +121,15 @@ static int __init early_cma(char *p)
}
early_param("cma", early_cma);
+struct cma *dev_get_cma_area(struct device *dev)
+{
+ if (dev && dev->cma_area)
+ return dev->cma_area;
+
+ return dma_contiguous_default_area;
+}
+EXPORT_SYMBOL_GPL(dev_get_cma_area);
+
#ifdef CONFIG_DMA_NUMA_CMA
static struct cma *dma_contiguous_numa_area[MAX_NUMNODES];
@@ -223,7 +262,10 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
if (size_cmdline != -1) {
selected_size = size_cmdline;
selected_base = base_cmdline;
- selected_limit = min_not_zero(limit_cmdline, limit);
+
+ /* Hornor the user setup dma address limit */
+ selected_limit = limit_cmdline ?: limit;
+
if (base_cmdline + size_cmdline == limit_cmdline)
fixed = true;
} else {
@@ -239,13 +281,36 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
}
if (selected_size && !dma_contiguous_default_area) {
+ int ret;
+
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);
- dma_contiguous_reserve_area(selected_size, selected_base,
- selected_limit,
- &dma_contiguous_default_area,
- fixed);
+ ret = dma_contiguous_reserve_area(selected_size, selected_base,
+ selected_limit,
+ &dma_contiguous_default_area,
+ fixed);
+ if (ret)
+ return;
+
+ /*
+ * We need to insert the new area in our list to avoid
+ * any inconsistencies between having the default area
+ * listed in the DT or not.
+ *
+ * The DT case is handled by rmem_cma_setup() and will
+ * always insert all its areas in our list. However, if
+ * it didn't run (because OF_RESERVED_MEM isn't set, or
+ * there's no DT region specified), then we don't have a
+ * default area yet, and no area in our list.
+ *
+ * This block creates the default area in such a case,
+ * but we also need to insert it in our list to avoid
+ * having a default area but an empty list.
+ */
+ ret = dma_contiguous_insert_area(dma_contiguous_default_area);
+ if (ret)
+ pr_warn("Couldn't queue default CMA region for heap creation.");
}
}
@@ -449,51 +514,89 @@ static void rmem_cma_device_release(struct reserved_mem *rmem,
dev->cma_area = NULL;
}
-static const struct reserved_mem_ops rmem_cma_ops = {
- .device_init = rmem_cma_device_init,
- .device_release = rmem_cma_device_release,
-};
-
-static int __init rmem_cma_setup(struct reserved_mem *rmem)
+static int __init __rmem_cma_verify_node(unsigned long node)
{
- unsigned long node = rmem->fdt_node;
- bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
- struct cma *cma;
- int err;
+ if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
+ of_get_flat_dt_prop(node, "no-map", NULL))
+ return -ENODEV;
- if (size_cmdline != -1 && default_cma) {
- pr_info("Reserved memory: bypass %s node, using cmdline CMA params instead\n",
- rmem->name);
+ if (size_cmdline != -1 &&
+ of_get_flat_dt_prop(node, "linux,cma-default", NULL)) {
+ pr_err("Skipping dt linux,cma-default node in favor for \"cma=\" kernel param.\n");
return -EBUSY;
}
+ return 0;
+}
- if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
- of_get_flat_dt_prop(node, "no-map", NULL))
- return -EINVAL;
+static int __init rmem_cma_validate(unsigned long node, phys_addr_t *align)
+{
+ int ret = __rmem_cma_verify_node(node);
+
+ if (ret)
+ return ret;
+
+ if (align)
+ *align = max_t(phys_addr_t, *align, CMA_MIN_ALIGNMENT_BYTES);
+
+ return 0;
+}
+
+static int __init rmem_cma_fixup(unsigned long node, phys_addr_t base,
+ phys_addr_t size)
+{
+ int ret = __rmem_cma_verify_node(node);
+
+ if (ret)
+ return ret;
+
+ /* Architecture specific contiguous memory fixup. */
+ dma_contiguous_early_fixup(base, size);
+ return 0;
+}
+
+static int __init rmem_cma_setup(unsigned long node, struct reserved_mem *rmem)
+{
+ bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
+ struct cma *cma;
+ int ret;
+
+ ret = __rmem_cma_verify_node(node);
+ if (ret)
+ return ret;
if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) {
pr_err("Reserved memory: incorrect alignment of CMA region\n");
return -EINVAL;
}
- err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
- if (err) {
+ ret = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
+ if (ret) {
pr_err("Reserved memory: unable to setup CMA region\n");
- return err;
+ return ret;
}
- /* Architecture specific contiguous memory fixup. */
- dma_contiguous_early_fixup(rmem->base, rmem->size);
if (default_cma)
dma_contiguous_default_area = cma;
- rmem->ops = &rmem_cma_ops;
rmem->priv = cma;
pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);
+ ret = dma_contiguous_insert_area(cma);
+ if (ret)
+ pr_warn("Couldn't store CMA reserved area.");
+
return 0;
}
-RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
+
+static const struct reserved_mem_ops rmem_cma_ops = {
+ .node_validate = rmem_cma_validate,
+ .node_fixup = rmem_cma_fixup,
+ .node_init = rmem_cma_setup,
+ .device_init = rmem_cma_device_init,
+ .device_release = rmem_cma_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", &rmem_cma_ops);
#endif
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index e43c6de2bce4..3248f8b4d096 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -23,6 +23,7 @@
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/swiotlb.h>
#include <asm/sections.h>
#include "debug.h"
@@ -38,7 +39,8 @@ enum {
dma_debug_single,
dma_debug_sg,
dma_debug_coherent,
- dma_debug_resource,
+ dma_debug_noncoherent,
+ dma_debug_phy,
};
enum map_err_types {
@@ -61,6 +63,7 @@ enum map_err_types {
* @sg_mapped_ents: 'mapped_ents' from dma_map_sg
* @paddr: physical start address of the mapping
* @map_err_type: track whether dma_mapping_error() was checked
+ * @is_cache_clean: driver promises not to write to buffer while mapped
* @stack_len: number of backtrace entries in @stack_entries
* @stack_entries: stack of backtrace history
*/
@@ -74,7 +77,8 @@ struct dma_debug_entry {
int sg_call_ents;
int sg_mapped_ents;
phys_addr_t paddr;
- enum map_err_types map_err_type;
+ enum map_err_types map_err_type;
+ bool is_cache_clean;
#ifdef CONFIG_STACKTRACE
unsigned int stack_len;
unsigned long stack_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
@@ -140,7 +144,8 @@ static const char *type2name[] = {
[dma_debug_single] = "single",
[dma_debug_sg] = "scatter-gather",
[dma_debug_coherent] = "coherent",
- [dma_debug_resource] = "resource",
+ [dma_debug_noncoherent] = "noncoherent",
+ [dma_debug_phy] = "phy",
};
static const char *dir2name[] = {
@@ -448,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
return overlap;
}
-static void active_cacheline_inc_overlap(phys_addr_t cln)
+static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean)
{
int overlap = active_cacheline_read_overlap(cln);
@@ -457,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
/* If we overflowed the overlap counter then we're potentially
* leaking dma-mappings.
*/
- WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
+ WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"),
ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
}
@@ -469,12 +474,15 @@ static int active_cacheline_dec_overlap(phys_addr_t cln)
return active_cacheline_set_overlap(cln, --overlap);
}
-static int active_cacheline_insert(struct dma_debug_entry *entry)
+static int active_cacheline_insert(struct dma_debug_entry *entry,
+ bool *overlap_cache_clean)
{
phys_addr_t cln = to_cacheline_number(entry);
unsigned long flags;
int rc;
+ *overlap_cache_clean = false;
+
/* If the device is not writing memory then we don't have any
* concerns about the cpu consuming stale data. This mitigates
* legitimate usages of overlapping mappings.
@@ -484,8 +492,16 @@ static int active_cacheline_insert(struct dma_debug_entry *entry)
spin_lock_irqsave(&radix_lock, flags);
rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
- if (rc == -EEXIST)
- active_cacheline_inc_overlap(cln);
+ if (rc == -EEXIST) {
+ struct dma_debug_entry *existing;
+
+ active_cacheline_inc_overlap(cln, entry->is_cache_clean);
+ existing = radix_tree_lookup(&dma_active_cacheline, cln);
+ /* A lookup failure here after we got -EEXIST is unexpected. */
+ WARN_ON(!existing);
+ if (existing)
+ *overlap_cache_clean = existing->is_cache_clean;
+ }
spin_unlock_irqrestore(&radix_lock, flags);
return rc;
@@ -580,19 +596,28 @@ DEFINE_SHOW_ATTRIBUTE(dump);
*/
static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
{
+ bool overlap_cache_clean;
struct hash_bucket *bucket;
unsigned long flags;
int rc;
+ entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES |
+ DMA_ATTR_REQUIRE_COHERENT);
+
bucket = get_hash_bucket(entry, &flags);
hash_bucket_add(bucket, entry);
put_hash_bucket(bucket, flags);
- rc = active_cacheline_insert(entry);
+ rc = active_cacheline_insert(entry, &overlap_cache_clean);
if (rc == -ENOMEM) {
pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n");
global_disable = true;
- } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ } else if (rc == -EEXIST &&
+ !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ !(entry->is_cache_clean && overlap_cache_clean) &&
+ dma_get_cache_alignment() >= L1_CACHE_BYTES &&
+ !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
+ is_swiotlb_active(entry->dev))) {
err_printk(entry->dev, entry,
"cacheline tracking EEXIST, overlapping mappings aren't supported\n");
}
@@ -877,7 +902,7 @@ void dma_debug_add_bus(const struct bus_type *bus)
if (dma_debug_disabled())
return;
- nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+ nb = kzalloc_obj(struct notifier_block);
if (nb == NULL) {
pr_err("dma_debug_add_bus: out of memory\n");
return;
@@ -993,7 +1018,8 @@ static void check_unmap(struct dma_debug_entry *ref)
"[mapped as %s] [unmapped as %s]\n",
ref->dev_addr, ref->size,
type2name[entry->type], type2name[ref->type]);
- } else if (entry->type == dma_debug_coherent &&
+ } else if ((entry->type == dma_debug_coherent ||
+ entry->type == dma_debug_noncoherent) &&
ref->paddr != entry->paddr) {
err_printk(ref->dev, entry, "device driver frees "
"DMA memory with different CPU address "
@@ -1051,17 +1077,16 @@ static void check_unmap(struct dma_debug_entry *ref)
dma_entry_free(entry);
}
-static void check_for_stack(struct device *dev,
- struct page *page, size_t offset)
+static void check_for_stack(struct device *dev, phys_addr_t phys)
{
void *addr;
struct vm_struct *stack_vm_area = task_stack_vm_area(current);
if (!stack_vm_area) {
/* Stack is direct-mapped. */
- if (PageHighMem(page))
+ if (PhysHighMem(phys))
return;
- addr = page_address(page) + offset;
+ addr = phys_to_virt(phys);
if (object_is_on_stack(addr))
err_printk(dev, NULL, "device driver maps memory from stack [addr=%p]\n", addr);
} else {
@@ -1069,10 +1094,12 @@ static void check_for_stack(struct device *dev,
int i;
for (i = 0; i < stack_vm_area->nr_pages; i++) {
- if (page != stack_vm_area->pages[i])
+ if (__phys_to_pfn(phys) !=
+ page_to_pfn(stack_vm_area->pages[i]))
continue;
- addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
+ addr = (u8 *)current->stack + i * PAGE_SIZE +
+ (phys % PAGE_SIZE);
err_printk(dev, NULL, "device driver maps memory from stack [probable addr=%p]\n", addr);
break;
}
@@ -1201,9 +1228,8 @@ void debug_dma_map_single(struct device *dev, const void *addr,
}
EXPORT_SYMBOL(debug_dma_map_single);
-void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
- size_t size, int direction, dma_addr_t dma_addr,
- unsigned long attrs)
+void debug_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
+ int direction, dma_addr_t dma_addr, unsigned long attrs)
{
struct dma_debug_entry *entry;
@@ -1218,19 +1244,25 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
return;
entry->dev = dev;
- entry->type = dma_debug_single;
- entry->paddr = page_to_phys(page) + offset;
+ entry->type = dma_debug_phy;
+ entry->paddr = phys;
entry->dev_addr = dma_addr;
entry->size = size;
entry->direction = direction;
entry->map_err_type = MAP_ERR_NOT_CHECKED;
- check_for_stack(dev, page, offset);
+ if (attrs & DMA_ATTR_MMIO) {
+ unsigned long pfn = PHYS_PFN(phys);
- if (!PageHighMem(page)) {
- void *addr = page_address(page) + offset;
+ if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
+ err_printk(dev, entry,
+ "dma_map_resource called for RAM address %pa\n",
+ &phys);
+ } else {
+ check_for_stack(dev, phys);
- check_for_illegal_area(dev, addr, size);
+ if (!PhysHighMem(phys))
+ check_for_illegal_area(dev, phys_to_virt(phys), size);
}
add_dma_entry(entry, attrs);
@@ -1274,11 +1306,11 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
}
EXPORT_SYMBOL(debug_dma_mapping_error);
-void debug_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+void debug_dma_unmap_phys(struct device *dev, dma_addr_t dma_addr,
size_t size, int direction)
{
struct dma_debug_entry ref = {
- .type = dma_debug_single,
+ .type = dma_debug_phy,
.dev = dev,
.dev_addr = dma_addr,
.size = size,
@@ -1302,7 +1334,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
return;
for_each_sg(sg, s, nents, i) {
- check_for_stack(dev, sg_page(s), s->offset);
+ check_for_stack(dev, sg_phys(s));
if (!PageHighMem(sg_page(s)))
check_for_illegal_area(dev, sg_virt(s), s->length);
}
@@ -1442,47 +1474,6 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
check_unmap(&ref);
}
-void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
- int direction, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- struct dma_debug_entry *entry;
-
- if (unlikely(dma_debug_disabled()))
- return;
-
- entry = dma_entry_alloc();
- if (!entry)
- return;
-
- entry->type = dma_debug_resource;
- entry->dev = dev;
- entry->paddr = addr;
- entry->size = size;
- entry->dev_addr = dma_addr;
- entry->direction = direction;
- entry->map_err_type = MAP_ERR_NOT_CHECKED;
-
- add_dma_entry(entry, attrs);
-}
-
-void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
- size_t size, int direction)
-{
- struct dma_debug_entry ref = {
- .type = dma_debug_resource,
- .dev = dev,
- .dev_addr = dma_addr,
- .size = size,
- .direction = direction,
- };
-
- if (unlikely(dma_debug_disabled()))
- return;
-
- check_unmap(&ref);
-}
-
void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
size_t size, int direction)
{
@@ -1581,6 +1572,49 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
}
}
+void debug_dma_alloc_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+ struct dma_debug_entry *entry;
+
+ if (unlikely(dma_debug_disabled()))
+ return;
+
+ entry = dma_entry_alloc();
+ if (!entry)
+ return;
+
+ entry->type = dma_debug_noncoherent;
+ entry->dev = dev;
+ entry->paddr = page_to_phys(page);
+ entry->size = size;
+ entry->dev_addr = dma_addr;
+ entry->direction = direction;
+
+ add_dma_entry(entry, attrs);
+}
+
+void debug_dma_free_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr)
+{
+ struct dma_debug_entry ref = {
+ .type = dma_debug_noncoherent,
+ .dev = dev,
+ .paddr = page_to_phys(page),
+ .dev_addr = dma_addr,
+ .size = size,
+ .direction = direction,
+ };
+
+ if (unlikely(dma_debug_disabled()))
+ return;
+
+ check_unmap(&ref);
+}
+
static int __init dma_debug_driver_setup(char *str)
{
int i;
diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h
index f525197d3cae..da7be0bddcf6 100644
--- a/kernel/dma/debug.h
+++ b/kernel/dma/debug.h
@@ -9,12 +9,11 @@
#define _KERNEL_DMA_DEBUG_H
#ifdef CONFIG_DMA_API_DEBUG
-extern void debug_dma_map_page(struct device *dev, struct page *page,
- size_t offset, size_t size,
- int direction, dma_addr_t dma_addr,
+extern void debug_dma_map_phys(struct device *dev, phys_addr_t phys,
+ size_t size, int direction, dma_addr_t dma_addr,
unsigned long attrs);
-extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+extern void debug_dma_unmap_phys(struct device *dev, dma_addr_t addr,
size_t size, int direction);
extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
@@ -31,14 +30,6 @@ extern void debug_dma_alloc_coherent(struct device *dev, size_t size,
extern void debug_dma_free_coherent(struct device *dev, size_t size,
void *virt, dma_addr_t addr);
-extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr,
- size_t size, int direction,
- dma_addr_t dma_addr,
- unsigned long attrs);
-
-extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
- size_t size, int direction);
-
extern void debug_dma_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle, size_t size,
int direction);
@@ -54,15 +45,21 @@ extern void debug_dma_sync_sg_for_cpu(struct device *dev,
extern void debug_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg,
int nelems, int direction);
+extern void debug_dma_alloc_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr,
+ unsigned long attrs);
+extern void debug_dma_free_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr);
#else /* CONFIG_DMA_API_DEBUG */
-static inline void debug_dma_map_page(struct device *dev, struct page *page,
- size_t offset, size_t size,
- int direction, dma_addr_t dma_addr,
- unsigned long attrs)
+static inline void debug_dma_map_phys(struct device *dev, phys_addr_t phys,
+ size_t size, int direction,
+ dma_addr_t dma_addr, unsigned long attrs)
{
}
-static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+static inline void debug_dma_unmap_phys(struct device *dev, dma_addr_t addr,
size_t size, int direction)
{
}
@@ -90,19 +87,6 @@ static inline void debug_dma_free_coherent(struct device *dev, size_t size,
{
}
-static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr,
- size_t size, int direction,
- dma_addr_t dma_addr,
- unsigned long attrs)
-{
-}
-
-static inline void debug_dma_unmap_resource(struct device *dev,
- dma_addr_t dma_addr, size_t size,
- int direction)
-{
-}
-
static inline void debug_dma_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle,
size_t size, int direction)
@@ -126,5 +110,18 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev,
int nelems, int direction)
{
}
+
+static inline void debug_dma_alloc_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+}
+
+static inline void debug_dma_free_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr)
+{
+}
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* _KERNEL_DMA_DEBUG_H */
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index b8fe0b3d0ffb..583c5922bca2 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -13,6 +13,7 @@
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/slab.h>
+#include <linux/pci-p2pdma.h>
#include "direct.h"
/*
@@ -38,7 +39,7 @@ static inline struct page *dma_direct_to_page(struct device *dev,
u64 dma_direct_get_required_mask(struct device *dev)
{
- phys_addr_t phys = (phys_addr_t)(max_pfn - 1) << PAGE_SHIFT;
+ phys_addr_t phys = ((phys_addr_t)max_pfn << PAGE_SHIFT) - 1;
u64 max_dma = phys_to_dma_direct(dev, phys);
return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
@@ -119,7 +120,7 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp_t gfp, bool allow_highmem)
{
int node = dev_to_node(dev);
- struct page *page = NULL;
+ struct page *page;
u64 phys_limit;
WARN_ON_ONCE(!PAGE_ALIGNED(size));
@@ -130,30 +131,25 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit);
page = dma_alloc_contiguous(dev, size, gfp);
if (page) {
- if (!dma_coherent_ok(dev, page_to_phys(page), size) ||
- (!allow_highmem && PageHighMem(page))) {
- dma_free_contiguous(dev, page, size);
- page = NULL;
- }
+ if (dma_coherent_ok(dev, page_to_phys(page), size) &&
+ (allow_highmem || !PageHighMem(page)))
+ return page;
+
+ dma_free_contiguous(dev, page, size);
}
-again:
- if (!page)
- page = alloc_pages_node(node, gfp, get_order(size));
- if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+
+ while ((page = alloc_pages_node(node, gfp, get_order(size)))
+ && !dma_coherent_ok(dev, page_to_phys(page), size)) {
__free_pages(page, get_order(size));
- page = NULL;
if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
phys_limit < DMA_BIT_MASK(64) &&
- !(gfp & (GFP_DMA32 | GFP_DMA))) {
+ !(gfp & (GFP_DMA32 | GFP_DMA)))
gfp |= GFP_DMA32;
- goto again;
- }
-
- if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) {
+ else if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA))
gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
- goto again;
- }
+ else
+ return NULL;
}
return page;
@@ -410,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev,
arch_sync_dma_for_device(paddr, sg->length,
dir);
}
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
}
#endif
@@ -429,13 +427,12 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
arch_sync_dma_for_cpu(paddr, sg->length, dir);
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
-
- if (dir == DMA_FROM_DEVICE)
- arch_dma_mark_clean(paddr, sg->length);
}
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
+ }
}
/*
@@ -447,14 +444,19 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
{
struct scatterlist *sg;
int i;
+ bool need_sync = false;
for_each_sg(sgl, sg, nents, i) {
- if (sg_dma_is_bus_address(sg))
+ if (sg_dma_is_bus_address(sg)) {
sg_dma_unmark_bus_address(sg);
- else
- dma_direct_unmap_page(dev, sg->dma_address,
- sg_dma_len(sg), dir, attrs);
+ } else {
+ need_sync = true;
+ dma_direct_unmap_phys(dev, sg->dma_address,
+ sg_dma_len(sg), dir, attrs, false);
+ }
}
+ if (need_sync && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
}
#endif
@@ -462,39 +464,43 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
struct pci_p2pdma_map_state p2pdma_state = {};
- enum pci_p2pdma_map_type map;
struct scatterlist *sg;
int i, ret;
+ bool need_sync = false;
for_each_sg(sgl, sg, nents, i) {
- if (is_pci_p2pdma_page(sg_page(sg))) {
- map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg);
- switch (map) {
- case PCI_P2PDMA_MAP_BUS_ADDR:
- continue;
- case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
- /*
- * Any P2P mapping that traverses the PCI
- * host bridge must be mapped with CPU physical
- * address and not PCI bus addresses. This is
- * done with dma_direct_map_page() below.
- */
- break;
- default:
- ret = -EREMOTEIO;
+ switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) {
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ /*
+ * Any P2P mapping that traverses the PCI host bridge
+ * must be mapped with CPU physical address and not PCI
+ * bus addresses.
+ */
+ break;
+ case PCI_P2PDMA_MAP_NONE:
+ need_sync = true;
+ sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg),
+ sg->length, dir, attrs, false);
+ if (sg->dma_address == DMA_MAPPING_ERROR) {
+ ret = -EIO;
goto out_unmap;
}
- }
-
- sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
- sg->offset, sg->length, dir, attrs);
- if (sg->dma_address == DMA_MAPPING_ERROR) {
- ret = -EIO;
+ break;
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ sg->dma_address = pci_p2pdma_bus_addr_map(
+ p2pdma_state.mem, sg_phys(sg));
+ sg_dma_len(sg) = sg->length;
+ sg_dma_mark_bus_address(sg);
+ continue;
+ default:
+ ret = -EREMOTEIO;
goto out_unmap;
}
sg_dma_len(sg) = sg->length;
}
+ if (need_sync && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
return nents;
out_unmap:
@@ -502,22 +508,6 @@ out_unmap:
return ret;
}
-dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
- dma_addr_t dma_addr = paddr;
-
- if (unlikely(!dma_capable(dev, dma_addr, size, false))) {
- dev_err_once(dev,
- "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
- &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
- WARN_ON_ONCE(1);
- return DMA_MAPPING_ERROR;
- }
-
- return dma_addr;
-}
-
int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
@@ -563,7 +553,7 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
int dma_direct_supported(struct device *dev, u64 mask)
{
- u64 min_mask = (max_pfn - 1) << PAGE_SHIFT;
+ u64 min_mask = ((u64)max_pfn << PAGE_SHIFT) - 1;
/*
* Because 32-bit DMA masks are so common we expect every architecture
@@ -677,7 +667,7 @@ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start,
if (!offset)
return 0;
- map = kcalloc(2, sizeof(*map), GFP_KERNEL);
+ map = kzalloc_objs(*map, 2);
if (!map)
return -ENOMEM;
map[0].cpu_start = cpu_start;
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index d2c0b7e632fc..7140c208c123 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -60,64 +60,94 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
swiotlb_sync_single_for_device(dev, paddr, size, dir);
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_device(paddr, size, dir);
+ arch_sync_dma_flush();
+ }
}
static inline void dma_direct_sync_single_for_cpu(struct device *dev,
- dma_addr_t addr, size_t size, enum dma_data_direction dir)
+ dma_addr_t addr, size_t size, enum dma_data_direction dir,
+ bool flush)
{
phys_addr_t paddr = dma_to_phys(dev, addr);
if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(paddr, size, dir);
+ if (flush)
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
}
swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
-
- if (dir == DMA_FROM_DEVICE)
- arch_dma_mark_clean(paddr, size);
}
-static inline dma_addr_t dma_direct_map_page(struct device *dev,
- struct page *page, unsigned long offset, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
+static inline dma_addr_t dma_direct_map_phys(struct device *dev,
+ phys_addr_t phys, size_t size, enum dma_data_direction dir,
+ unsigned long attrs, bool flush)
{
- phys_addr_t phys = page_to_phys(page) + offset;
- dma_addr_t dma_addr = phys_to_dma(dev, phys);
+ dma_addr_t dma_addr;
if (is_swiotlb_force_bounce(dev)) {
- if (is_pci_p2pdma_page(page))
- return DMA_MAPPING_ERROR;
- return swiotlb_map(dev, phys, size, dir, attrs);
- }
+ if (!(attrs & DMA_ATTR_CC_SHARED)) {
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
+ return DMA_MAPPING_ERROR;
- if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
- dma_kmalloc_needs_bounce(dev, size, dir)) {
- if (is_pci_p2pdma_page(page))
- return DMA_MAPPING_ERROR;
- if (is_swiotlb_active(dev))
return swiotlb_map(dev, phys, size, dir, attrs);
-
- dev_WARN_ONCE(dev, 1,
- "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
- &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
+ }
+ } else if (attrs & DMA_ATTR_CC_SHARED) {
return DMA_MAPPING_ERROR;
}
- if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ if (attrs & DMA_ATTR_MMIO) {
+ dma_addr = phys;
+ if (unlikely(!dma_capable(dev, dma_addr, size, false)))
+ goto err_overflow;
+ } else if (attrs & DMA_ATTR_CC_SHARED) {
+ dma_addr = phys_to_dma_unencrypted(dev, phys);
+ if (unlikely(!dma_capable(dev, dma_addr, size, false)))
+ goto err_overflow;
+ } else {
+ dma_addr = phys_to_dma(dev, phys);
+ if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
+ dma_kmalloc_needs_bounce(dev, size, dir)) {
+ if (is_swiotlb_active(dev) &&
+ !(attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return swiotlb_map(dev, phys, size, dir, attrs);
+
+ goto err_overflow;
+ }
+ }
+
+ if (!dev_is_dma_coherent(dev) &&
+ !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
arch_sync_dma_for_device(phys, size, dir);
+ if (flush)
+ arch_sync_dma_flush();
+ }
return dma_addr;
+
+err_overflow:
+ dev_WARN_ONCE(
+ dev, 1,
+ "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
+ return DMA_MAPPING_ERROR;
}
-static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
+static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs,
+ bool flush)
{
- phys_addr_t phys = dma_to_phys(dev, addr);
+ phys_addr_t phys;
+
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
+ /* nothing to do: uncached and no swiotlb */
+ return;
+ phys = dma_to_phys(dev, addr);
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir, flush);
swiotlb_tbl_unmap_single(dev, phys, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c
index 92de80e5b057..16a51736a2a3 100644
--- a/kernel/dma/dummy.c
+++ b/kernel/dma/dummy.c
@@ -11,17 +11,16 @@ static int dma_dummy_mmap(struct device *dev, struct vm_area_struct *vma,
return -ENXIO;
}
-static dma_addr_t dma_dummy_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
+static dma_addr_t dma_dummy_map_phys(struct device *dev, phys_addr_t phys,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
{
return DMA_MAPPING_ERROR;
}
-static void dma_dummy_unmap_page(struct device *dev, dma_addr_t dma_handle,
+static void dma_dummy_unmap_phys(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
/*
- * Dummy ops doesn't support map_page, so unmap_page should never be
+ * Dummy ops doesn't support map_phys, so unmap_page should never be
* called.
*/
WARN_ON_ONCE(true);
@@ -51,8 +50,8 @@ static int dma_dummy_supported(struct device *hwdev, u64 mask)
const struct dma_map_ops dma_dummy_ops = {
.mmap = dma_dummy_mmap,
- .map_page = dma_dummy_map_page,
- .unmap_page = dma_dummy_unmap_page,
+ .map_phys = dma_dummy_map_phys,
+ .unmap_phys = dma_dummy_unmap_phys,
.map_sg = dma_dummy_map_sg,
.unmap_sg = dma_dummy_unmap_sg,
.dma_supported = dma_dummy_supported,
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index cc19a3efea89..29eeb5fdf199 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -5,19 +5,21 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cleanup.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
-#include <linux/map_benchmark.h>
#include <linux/math64.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/timekeeping.h>
+#include <uapi/linux/map_benchmark.h>
struct map_benchmark_data {
struct map_benchmark bparam;
@@ -31,17 +33,219 @@ struct map_benchmark_data {
atomic64_t loops;
};
+struct map_benchmark_ops {
+ void *(*prepare)(struct map_benchmark_data *map);
+ void (*unprepare)(void *mparam);
+ void (*initialize_data)(void *mparam);
+ int (*do_map)(void *mparam);
+ void (*do_unmap)(void *mparam);
+};
+
+struct dma_single_map_param {
+ struct device *dev;
+ dma_addr_t addr;
+ void *xbuf;
+ u32 npages;
+ u32 dma_dir;
+};
+
+static void *dma_single_map_benchmark_prepare(struct map_benchmark_data *map)
+{
+ struct dma_single_map_param *params __free(kfree) = kzalloc(sizeof(*params),
+ GFP_KERNEL);
+ if (!params)
+ return NULL;
+
+ params->npages = map->bparam.granule;
+ params->dma_dir = map->bparam.dma_dir;
+ params->dev = map->dev;
+ params->xbuf = alloc_pages_exact(params->npages * PAGE_SIZE, GFP_KERNEL);
+ if (!params->xbuf)
+ return NULL;
+
+ return_ptr(params);
+}
+
+static void dma_single_map_benchmark_unprepare(void *mparam)
+{
+ struct dma_single_map_param *params = mparam;
+
+ free_pages_exact(params->xbuf, params->npages * PAGE_SIZE);
+ kfree(params);
+}
+
+static void dma_single_map_benchmark_initialize_data(void *mparam)
+{
+ struct dma_single_map_param *params = mparam;
+
+ /*
+ * for a non-coherent device, if we don't stain them in the
+ * cache, this will give an underestimate of the real-world
+ * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
+ * 66 means everything goes well! 66 is lucky.
+ */
+ if (params->dma_dir != DMA_FROM_DEVICE)
+ memset(params->xbuf, 0x66, params->npages * PAGE_SIZE);
+}
+
+static int dma_single_map_benchmark_do_map(void *mparam)
+{
+ struct dma_single_map_param *params = mparam;
+
+ params->addr = dma_map_single(params->dev, params->xbuf,
+ params->npages * PAGE_SIZE, params->dma_dir);
+ if (unlikely(dma_mapping_error(params->dev, params->addr))) {
+ pr_err("dma_map_single failed on %s\n", dev_name(params->dev));
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void dma_single_map_benchmark_do_unmap(void *mparam)
+{
+ struct dma_single_map_param *params = mparam;
+
+ dma_unmap_single(params->dev, params->addr,
+ params->npages * PAGE_SIZE, params->dma_dir);
+}
+
+static struct map_benchmark_ops dma_single_map_benchmark_ops = {
+ .prepare = dma_single_map_benchmark_prepare,
+ .unprepare = dma_single_map_benchmark_unprepare,
+ .initialize_data = dma_single_map_benchmark_initialize_data,
+ .do_map = dma_single_map_benchmark_do_map,
+ .do_unmap = dma_single_map_benchmark_do_unmap,
+};
+
+struct dma_sg_map_param {
+ struct sg_table sgt;
+ struct device *dev;
+ void **buf;
+ u32 npages;
+ u32 dma_dir;
+};
+
+static void *dma_sg_map_benchmark_prepare(struct map_benchmark_data *map)
+{
+ struct scatterlist *sg;
+ int i;
+
+ struct dma_sg_map_param *params = kzalloc(sizeof(*params), GFP_KERNEL);
+
+ if (!params)
+ return NULL;
+ /*
+ * Set the number of scatterlist entries based on the granule.
+ * In SG mode, 'granule' represents the number of scatterlist entries.
+ * Each scatterlist entry corresponds to a single page.
+ */
+ params->npages = map->bparam.granule;
+ params->dma_dir = map->bparam.dma_dir;
+ params->dev = map->dev;
+ params->buf = kmalloc_array(params->npages, sizeof(*params->buf),
+ GFP_KERNEL);
+ if (!params->buf)
+ goto out;
+
+ if (sg_alloc_table(&params->sgt, params->npages, GFP_KERNEL))
+ goto free_buf;
+
+ for_each_sgtable_sg(&params->sgt, sg, i) {
+ params->buf[i] = (void *)__get_free_page(GFP_KERNEL);
+ if (!params->buf[i])
+ goto free_page;
+
+ sg_set_buf(sg, params->buf[i], PAGE_SIZE);
+ }
+
+ return params;
+
+free_page:
+ while (i-- > 0)
+ free_page((unsigned long)params->buf[i]);
+
+ sg_free_table(&params->sgt);
+free_buf:
+ kfree(params->buf);
+out:
+ kfree(params);
+ return NULL;
+}
+
+static void dma_sg_map_benchmark_unprepare(void *mparam)
+{
+ struct dma_sg_map_param *params = mparam;
+ int i;
+
+ for (i = 0; i < params->npages; i++)
+ free_page((unsigned long)params->buf[i]);
+
+ sg_free_table(&params->sgt);
+
+ kfree(params->buf);
+ kfree(params);
+}
+
+static void dma_sg_map_benchmark_initialize_data(void *mparam)
+{
+ struct dma_sg_map_param *params = mparam;
+ struct scatterlist *sg;
+ int i = 0;
+
+ if (params->dma_dir == DMA_FROM_DEVICE)
+ return;
+
+ for_each_sgtable_sg(&params->sgt, sg, i)
+ memset(params->buf[i], 0x66, PAGE_SIZE);
+}
+
+static int dma_sg_map_benchmark_do_map(void *mparam)
+{
+ struct dma_sg_map_param *params = mparam;
+ int ret = 0;
+
+ int sg_mapped = dma_map_sg(params->dev, params->sgt.sgl,
+ params->npages, params->dma_dir);
+ if (!sg_mapped) {
+ pr_err("dma_map_sg failed on %s\n", dev_name(params->dev));
+ ret = -ENOMEM;
+ }
+
+ return ret;
+}
+
+static void dma_sg_map_benchmark_do_unmap(void *mparam)
+{
+ struct dma_sg_map_param *params = mparam;
+
+ dma_unmap_sg(params->dev, params->sgt.sgl, params->npages,
+ params->dma_dir);
+}
+
+static struct map_benchmark_ops dma_sg_map_benchmark_ops = {
+ .prepare = dma_sg_map_benchmark_prepare,
+ .unprepare = dma_sg_map_benchmark_unprepare,
+ .initialize_data = dma_sg_map_benchmark_initialize_data,
+ .do_map = dma_sg_map_benchmark_do_map,
+ .do_unmap = dma_sg_map_benchmark_do_unmap,
+};
+
+static struct map_benchmark_ops *dma_map_benchmark_ops[DMA_MAP_BENCH_MODE_MAX] = {
+ [DMA_MAP_BENCH_SINGLE_MODE] = &dma_single_map_benchmark_ops,
+ [DMA_MAP_BENCH_SG_MODE] = &dma_sg_map_benchmark_ops,
+};
+
static int map_benchmark_thread(void *data)
{
- void *buf;
- dma_addr_t dma_addr;
struct map_benchmark_data *map = data;
- int npages = map->bparam.granule;
- u64 size = npages * PAGE_SIZE;
+ __u8 map_mode = map->bparam.map_mode;
int ret = 0;
- buf = alloc_pages_exact(size, GFP_KERNEL);
- if (!buf)
+ struct map_benchmark_ops *mb_ops = dma_map_benchmark_ops[map_mode];
+ void *mparam = mb_ops->prepare(map);
+
+ if (!mparam)
return -ENOMEM;
while (!kthread_should_stop()) {
@@ -49,23 +253,12 @@ static int map_benchmark_thread(void *data)
ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
ktime_t map_delta, unmap_delta;
- /*
- * for a non-coherent device, if we don't stain them in the
- * cache, this will give an underestimate of the real-world
- * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
- * 66 means evertything goes well! 66 is lucky.
- */
- if (map->dir != DMA_FROM_DEVICE)
- memset(buf, 0x66, size);
-
+ mb_ops->initialize_data(mparam);
map_stime = ktime_get();
- dma_addr = dma_map_single(map->dev, buf, size, map->dir);
- if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
- pr_err("dma_map_single failed on %s\n",
- dev_name(map->dev));
- ret = -ENOMEM;
+ ret = mb_ops->do_map(mparam);
+ if (ret)
goto out;
- }
+
map_etime = ktime_get();
map_delta = ktime_sub(map_etime, map_stime);
@@ -73,7 +266,8 @@ static int map_benchmark_thread(void *data)
ndelay(map->bparam.dma_trans_ns);
unmap_stime = ktime_get();
- dma_unmap_single(map->dev, dma_addr, size, map->dir);
+ mb_ops->do_unmap(mparam);
+
unmap_etime = ktime_get();
unmap_delta = ktime_sub(unmap_etime, unmap_stime);
@@ -108,7 +302,7 @@ static int map_benchmark_thread(void *data)
}
out:
- free_pages_exact(buf, size);
+ mb_ops->unprepare(mparam);
return ret;
}
@@ -121,7 +315,7 @@ static int do_map_benchmark(struct map_benchmark_data *map)
int ret = 0;
int i;
- tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL);
+ tsk = kmalloc_objs(*tsk, threads);
if (!tsk)
return -ENOMEM;
@@ -209,6 +403,12 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case DMA_MAP_BENCHMARK:
+ if (map->bparam.map_mode < 0 ||
+ map->bparam.map_mode >= DMA_MAP_BENCH_MODE_MAX) {
+ pr_err("invalid map mode\n");
+ return -EINVAL;
+ }
+
if (map->bparam.threads == 0 ||
map->bparam.threads > DMA_MAP_MAX_THREADS) {
pr_err("invalid thread number\n");
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index cda127027e48..e6b07f160d20 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -152,49 +152,90 @@ static inline bool dma_map_direct(struct device *dev,
return dma_go_direct(dev, *dev->dma_mask, ops);
}
-dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
- size_t offset, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
+dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- dma_addr_t addr;
+ bool is_mmio = attrs & DMA_ATTR_MMIO;
+ bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
+ dma_addr_t addr = DMA_MAPPING_ERROR;
BUG_ON(!valid_dma_direction(dir));
if (WARN_ON_ONCE(!dev->dma_mask))
return DMA_MAPPING_ERROR;
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return DMA_MAPPING_ERROR;
+
if (dma_map_direct(dev, ops) ||
- arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size))
- addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+ (!is_mmio && !is_cc_shared &&
+ arch_dma_map_phys_direct(dev, phys + size)))
+ addr = dma_direct_map_phys(dev, phys, size, dir, attrs, true);
+ else if (is_cc_shared)
+ return DMA_MAPPING_ERROR;
else if (use_dma_iommu(dev))
- addr = iommu_dma_map_page(dev, page, offset, size, dir, attrs);
- else
- addr = ops->map_page(dev, page, offset, size, dir, attrs);
- kmsan_handle_dma(page, offset, size, dir);
- trace_dma_map_page(dev, page_to_phys(page) + offset, addr, size, dir,
- attrs);
- debug_dma_map_page(dev, page, offset, size, dir, addr, attrs);
+ addr = iommu_dma_map_phys(dev, phys, size, dir, attrs);
+ else if (ops->map_phys)
+ addr = ops->map_phys(dev, phys, size, dir, attrs);
+
+ if (!is_mmio)
+ kmsan_handle_dma(phys, size, dir);
+ trace_dma_map_phys(dev, phys, addr, size, dir, attrs);
+ debug_dma_map_phys(dev, phys, size, dir, addr, attrs);
return addr;
}
+EXPORT_SYMBOL_GPL(dma_map_phys);
+
+dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ phys_addr_t phys = page_to_phys(page) + offset;
+
+ if (unlikely(attrs & DMA_ATTR_MMIO))
+ return DMA_MAPPING_ERROR;
+
+ if (IS_ENABLED(CONFIG_DMA_API_DEBUG) &&
+ WARN_ON_ONCE(is_zone_device_page(page)))
+ return DMA_MAPPING_ERROR;
+
+ return dma_map_phys(dev, phys, size, dir, attrs);
+}
EXPORT_SYMBOL(dma_map_page_attrs);
-void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
+void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
+ bool is_mmio = attrs & DMA_ATTR_MMIO;
+ bool is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
BUG_ON(!valid_dma_direction(dir));
+
if (dma_map_direct(dev, ops) ||
- arch_dma_unmap_page_direct(dev, addr + size))
- dma_direct_unmap_page(dev, addr, size, dir, attrs);
+ (!is_mmio && !is_cc_shared &&
+ arch_dma_unmap_phys_direct(dev, addr + size)))
+ dma_direct_unmap_phys(dev, addr, size, dir, attrs, true);
+ else if (is_cc_shared)
+ return;
else if (use_dma_iommu(dev))
- iommu_dma_unmap_page(dev, addr, size, dir, attrs);
- else
- ops->unmap_page(dev, addr, size, dir, attrs);
- trace_dma_unmap_page(dev, addr, size, dir, attrs);
- debug_dma_unmap_page(dev, addr, size, dir);
+ iommu_dma_unmap_phys(dev, addr, size, dir, attrs);
+ else if (ops->unmap_phys)
+ ops->unmap_phys(dev, addr, size, dir, attrs);
+ trace_dma_unmap_phys(dev, addr, size, dir, attrs);
+ debug_dma_unmap_phys(dev, addr, size, dir);
+}
+EXPORT_SYMBOL_GPL(dma_unmap_phys);
+
+void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ if (unlikely(attrs & DMA_ATTR_MMIO))
+ return;
+
+ dma_unmap_phys(dev, addr, size, dir, attrs);
}
EXPORT_SYMBOL(dma_unmap_page_attrs);
@@ -206,6 +247,9 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
BUG_ON(!valid_dma_direction(dir));
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return -EOPNOTSUPP;
+
if (WARN_ON_ONCE(!dev->dma_mask))
return 0;
@@ -321,41 +365,14 @@ EXPORT_SYMBOL(dma_unmap_sg_attrs);
dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- const struct dma_map_ops *ops = get_dma_ops(dev);
- dma_addr_t addr = DMA_MAPPING_ERROR;
-
- BUG_ON(!valid_dma_direction(dir));
-
- if (WARN_ON_ONCE(!dev->dma_mask))
- return DMA_MAPPING_ERROR;
-
- if (dma_map_direct(dev, ops))
- addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
- else if (use_dma_iommu(dev))
- addr = iommu_dma_map_resource(dev, phys_addr, size, dir, attrs);
- else if (ops->map_resource)
- addr = ops->map_resource(dev, phys_addr, size, dir, attrs);
-
- trace_dma_map_resource(dev, phys_addr, addr, size, dir, attrs);
- debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs);
- return addr;
+ return dma_map_phys(dev, phys_addr, size, dir, attrs | DMA_ATTR_MMIO);
}
EXPORT_SYMBOL(dma_map_resource);
void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (dma_map_direct(dev, ops))
- ; /* nothing to do: uncached and no swiotlb */
- else if (use_dma_iommu(dev))
- iommu_dma_unmap_resource(dev, addr, size, dir, attrs);
- else if (ops->unmap_resource)
- ops->unmap_resource(dev, addr, size, dir, attrs);
- trace_dma_unmap_resource(dev, addr, size, dir, attrs);
- debug_dma_unmap_resource(dev, addr, size, dir);
+ dma_unmap_phys(dev, addr, size, dir, attrs | DMA_ATTR_MMIO);
}
EXPORT_SYMBOL(dma_unmap_resource);
@@ -367,7 +384,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
BUG_ON(!valid_dma_direction(dir));
if (dma_map_direct(dev, ops))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir, true);
else if (use_dma_iommu(dev))
iommu_dma_sync_single_for_cpu(dev, addr, size, dir);
else if (ops->sync_single_for_cpu)
@@ -443,6 +460,24 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
}
EXPORT_SYMBOL_GPL(__dma_need_sync);
+/**
+ * dma_need_unmap - does this device need dma_unmap_* operations
+ * @dev: device to check
+ *
+ * If this function returns %false, drivers can skip calling dma_unmap_* after
+ * finishing an I/O. This function must be called after all mappings that might
+ * need to be unmapped have been performed.
+ */
+bool dma_need_unmap(struct device *dev)
+{
+ if (!dma_map_direct(dev, get_dma_ops(dev)))
+ return true;
+ if (!dev->dma_skip_sync)
+ return true;
+ return IS_ENABLED(CONFIG_DMA_API_DEBUG);
+}
+EXPORT_SYMBOL_GPL(dma_need_unmap);
+
static void dma_setup_need_sync(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -614,7 +649,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
- if (dma_alloc_direct(dev, ops)) {
+ if (dma_alloc_direct(dev, ops) || arch_dma_alloc_direct(dev)) {
cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
} else if (use_dma_iommu(dev)) {
cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs);
@@ -655,7 +690,7 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
return;
debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- if (dma_alloc_direct(dev, ops))
+ if (dma_alloc_direct(dev, ops) || arch_dma_free_direct(dev, dma_handle))
dma_direct_free(dev, size, cpu_addr, dma_handle, attrs);
else if (use_dma_iommu(dev))
iommu_dma_free(dev, size, cpu_addr, dma_handle, attrs);
@@ -694,7 +729,7 @@ struct page *dma_alloc_pages(struct device *dev, size_t size,
if (page) {
trace_dma_alloc_pages(dev, page_to_virt(page), *dma_handle,
size, dir, gfp, 0);
- debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0);
+ debug_dma_alloc_pages(dev, page, size, dir, *dma_handle, 0);
} else {
trace_dma_alloc_pages(dev, NULL, 0, size, dir, gfp, 0);
}
@@ -720,7 +755,7 @@ void dma_free_pages(struct device *dev, size_t size, struct page *page,
dma_addr_t dma_handle, enum dma_data_direction dir)
{
trace_dma_free_pages(dev, page_to_virt(page), dma_handle, size, dir, 0);
- debug_dma_unmap_page(dev, dma_handle, size, dir);
+ debug_dma_free_pages(dev, page, size, dir, dma_handle);
__dma_free_pages(dev, size, page, dma_handle, dir);
}
EXPORT_SYMBOL_GPL(dma_free_pages);
@@ -744,7 +779,7 @@ static struct sg_table *alloc_single_sgt(struct device *dev, size_t size,
struct sg_table *sgt;
struct page *page;
- sgt = kmalloc(sizeof(*sgt), gfp);
+ sgt = kmalloc_obj(*sgt, gfp);
if (!sgt)
return NULL;
if (sg_alloc_table(sgt, 1, gfp))
@@ -910,6 +945,19 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_set_coherent_mask);
+static bool __dma_addressing_limited(struct device *dev)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) <
+ dma_get_required_mask(dev))
+ return true;
+
+ if (unlikely(ops) || use_dma_iommu(dev))
+ return false;
+ return !dma_direct_all_ram_mapped(dev);
+}
+
/**
* dma_addressing_limited - return if the device is addressing limited
* @dev: device to check
@@ -920,15 +968,11 @@ EXPORT_SYMBOL(dma_set_coherent_mask);
*/
bool dma_addressing_limited(struct device *dev)
{
- const struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) <
- dma_get_required_mask(dev))
- return true;
-
- if (unlikely(ops) || use_dma_iommu(dev))
+ if (!__dma_addressing_limited(dev))
return false;
- return !dma_direct_all_ram_mapped(dev);
+
+ dev_dbg(dev, "device is DMA addressing limited\n");
+ return true;
}
EXPORT_SYMBOL_GPL(dma_addressing_limited);
diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c
index 9afd569eadb9..20caf9cabf69 100644
--- a/kernel/dma/ops_helpers.c
+++ b/kernel/dma/ops_helpers.c
@@ -64,6 +64,7 @@ struct page *dma_common_alloc_pages(struct device *dev, size_t size,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
struct page *page;
+ phys_addr_t phys;
page = dma_alloc_contiguous(dev, size, gfp);
if (!page)
@@ -71,11 +72,12 @@ struct page *dma_common_alloc_pages(struct device *dev, size_t size,
if (!page)
return NULL;
+ phys = page_to_phys(page);
if (use_dma_iommu(dev))
- *dma_handle = iommu_dma_map_page(dev, page, 0, size, dir,
+ *dma_handle = iommu_dma_map_phys(dev, phys, size, dir,
DMA_ATTR_SKIP_CPU_SYNC);
else
- *dma_handle = ops->map_page(dev, page, 0, size, dir,
+ *dma_handle = ops->map_phys(dev, phys, size, dir,
DMA_ATTR_SKIP_CPU_SYNC);
if (*dma_handle == DMA_MAPPING_ERROR) {
dma_free_contiguous(dev, page, size);
@@ -92,10 +94,10 @@ void dma_common_free_pages(struct device *dev, size_t size, struct page *page,
const struct dma_map_ops *ops = get_dma_ops(dev);
if (use_dma_iommu(dev))
- iommu_dma_unmap_page(dev, dma_handle, size, dir,
+ iommu_dma_unmap_phys(dev, dma_handle, size, dir,
DMA_ATTR_SKIP_CPU_SYNC);
- else if (ops->unmap_page)
- ops->unmap_page(dev, dma_handle, size, dir,
+ else if (ops->unmap_phys)
+ ops->unmap_phys(dev, dma_handle, size, dir,
DMA_ATTR_SKIP_CPU_SYNC);
dma_free_contiguous(dev, page, size);
}
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 7b04f7575796..2b2fbb709242 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -93,7 +93,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
page = dma_alloc_from_contiguous(NULL, 1 << order,
order, false);
if (!page)
- page = alloc_pages(gfp, order);
+ page = alloc_pages(gfp | __GFP_NOWARN, order);
} while (!page && order-- > 0);
if (!page)
goto out;
@@ -102,8 +102,8 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
#ifdef CONFIG_DMA_DIRECT_REMAP
addr = dma_common_contiguous_remap(page, pool_size,
- pgprot_dmacoherent(PAGE_KERNEL),
- __builtin_return_address(0));
+ pgprot_decrypted(pgprot_dmacoherent(PAGE_KERNEL)),
+ __builtin_return_address(0));
if (!addr)
goto free_page;
#else
@@ -184,6 +184,12 @@ static __init struct gen_pool *__dma_atomic_pool_init(size_t pool_size,
return pool;
}
+#ifdef CONFIG_ZONE_DMA32
+#define has_managed_dma32 has_managed_zone(ZONE_DMA32)
+#else
+#define has_managed_dma32 false
+#endif
+
static int __init dma_atomic_pool_init(void)
{
int ret = 0;
@@ -199,17 +205,20 @@ static int __init dma_atomic_pool_init(void)
}
INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
- atomic_pool_kernel = __dma_atomic_pool_init(atomic_pool_size,
+ /* All memory might be in the DMA zone(s) to begin with */
+ if (has_managed_zone(ZONE_NORMAL)) {
+ atomic_pool_kernel = __dma_atomic_pool_init(atomic_pool_size,
GFP_KERNEL);
- if (!atomic_pool_kernel)
- ret = -ENOMEM;
+ if (!atomic_pool_kernel)
+ ret = -ENOMEM;
+ }
if (has_managed_dma()) {
atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
GFP_KERNEL | GFP_DMA);
if (!atomic_pool_dma)
ret = -ENOMEM;
}
- if (IS_ENABLED(CONFIG_ZONE_DMA32)) {
+ if (has_managed_dma32) {
atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size,
GFP_KERNEL | GFP_DMA32);
if (!atomic_pool_dma32)
@@ -224,11 +233,11 @@ postcore_initcall(dma_atomic_pool_init);
static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp)
{
if (prev == NULL) {
- if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
- return atomic_pool_dma32;
- if (atomic_pool_dma && (gfp & GFP_DMA))
- return atomic_pool_dma;
- return atomic_pool_kernel;
+ if (gfp & GFP_DMA)
+ return atomic_pool_dma ?: atomic_pool_dma32 ?: atomic_pool_kernel;
+ if (gfp & GFP_DMA32)
+ return atomic_pool_dma32 ?: atomic_pool_dma ?: atomic_pool_kernel;
+ return atomic_pool_kernel ?: atomic_pool_dma32 ?: atomic_pool_dma;
}
if (prev == atomic_pool_kernel)
return atomic_pool_dma32 ? atomic_pool_dma32 : atomic_pool_dma;
@@ -268,15 +277,20 @@ struct page *dma_alloc_from_pool(struct device *dev, size_t size,
{
struct gen_pool *pool = NULL;
struct page *page;
+ bool pool_found = false;
while ((pool = dma_guess_pool(pool, gfp))) {
+ pool_found = true;
page = __dma_alloc_from_pool(dev, size, pool, cpu_addr,
phys_addr_ok);
if (page)
return page;
}
- WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev));
+ if (pool_found)
+ WARN(!(gfp & __GFP_NOWARN), "DMA pool exhausted for %s\n", dev_name(dev));
+ else
+ WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev));
return NULL;
}
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 9e2afad1c615..205c0c0ba2fe 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -45,11 +45,11 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
void *vaddr;
int i;
- pages = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
+ pages = kvmalloc_objs(struct page *, count);
if (!pages)
return NULL;
for (i = 0; i < count; i++)
- pages[i] = nth_page(page, i);
+ pages[i] = page++;
vaddr = vmap(pages, count, VM_DMA_COHERENT, prot);
kvfree(pages);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index abcf3fa63a56..1abd3e6146f4 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -30,6 +30,7 @@
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/io.h>
+#include <linux/kmsan-checks.h>
#include <linux/iommu-helper.h>
#include <linux/init.h>
#include <linux/memblock.h>
@@ -61,8 +62,6 @@
*/
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-
/**
* struct io_tlb_slot - IO TLB slot descriptor
* @orig_addr: The original address corresponding to a mapped entry.
@@ -548,10 +547,10 @@ void __init swiotlb_exit(void)
free_pages(tbl_vaddr, get_order(tbl_size));
free_pages((unsigned long)mem->slots, get_order(slots_size));
} else {
- memblock_free_late(__pa(mem->areas),
+ memblock_free(mem->areas,
array_size(sizeof(*mem->areas), mem->nareas));
- memblock_free_late(mem->start, tbl_size);
- memblock_free_late(__pa(mem->slots), slots_size);
+ memblock_phys_free(mem->start, tbl_size);
+ memblock_free(mem->slots, slots_size);
}
memset(mem, 0, sizeof(*mem));
@@ -869,6 +868,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
if (orig_addr == INVALID_PHYS_ADDR)
return;
+ if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
+
/*
* It's valid for tlb_offset to be negative. This can happen when the
* "offset" returned by swiotlb_align_offset() is non-zero, and the
@@ -903,10 +905,19 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
local_irq_save(flags);
page = pfn_to_page(pfn);
- if (dir == DMA_TO_DEVICE)
+ if (dir == DMA_TO_DEVICE) {
+ /*
+ * Ideally, kmsan_check_highmem_page()
+ * could be used here to detect infoleaks,
+ * but callers may map uninitialized buffers
+ * that will be written by the device,
+ * causing false positives.
+ */
memcpy_from_page(vaddr, page, offset, sz);
- else
+ } else {
+ kmsan_unpoison_memory(vaddr, sz);
memcpy_to_page(page, offset, vaddr, sz);
+ }
local_irq_restore(flags);
size -= sz;
@@ -915,8 +926,15 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
offset = 0;
}
} else if (dir == DMA_TO_DEVICE) {
+ /*
+ * Ideally, kmsan_check_memory() could be used here to detect
+ * infoleaks (uninitialized data being sent to device), but
+ * callers may map uninitialized buffers that will be written
+ * by the device, causing false positives.
+ */
memcpy(vaddr, phys_to_virt(orig_addr), size);
} else {
+ kmsan_unpoison_memory(vaddr, size);
memcpy(phys_to_virt(orig_addr), vaddr, size);
}
}
@@ -1209,7 +1227,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
nslabs = nr_slots(alloc_size);
phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit,
- GFP_NOWAIT | __GFP_NOWARN);
+ GFP_NOWAIT);
if (!pool)
return -1;
@@ -1597,8 +1615,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
return DMA_MAPPING_ERROR;
}
- if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
arch_sync_dma_for_device(swiotlb_addr, size, dir);
+ arch_sync_dma_flush();
+ }
return dma_addr;
}
@@ -1811,19 +1831,18 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
if (!mem) {
struct io_tlb_pool *pool;
- mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ mem = kzalloc_obj(*mem);
if (!mem)
return -ENOMEM;
pool = &mem->defpool;
- pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL);
+ pool->slots = kzalloc_objs(*pool->slots, nslabs);
if (!pool->slots) {
kfree(mem);
return -ENOMEM;
}
- pool->areas = kcalloc(nareas, sizeof(*pool->areas),
- GFP_KERNEL);
+ pool->areas = kzalloc_objs(*pool->areas, nareas);
if (!pool->areas) {
kfree(pool->slots);
kfree(mem);
@@ -1858,26 +1877,25 @@ static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
dev->dma_io_tlb_mem = &io_tlb_default_mem;
}
-static const struct reserved_mem_ops rmem_swiotlb_ops = {
- .device_init = rmem_swiotlb_device_init,
- .device_release = rmem_swiotlb_device_release,
-};
-
-static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+static int __init rmem_swiotlb_setup(unsigned long node,
+ struct reserved_mem *rmem)
{
- unsigned long node = rmem->fdt_node;
-
if (of_get_flat_dt_prop(node, "reusable", NULL) ||
of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL;
- rmem->ops = &rmem_swiotlb_ops;
pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);
return 0;
}
-RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+ .node_init = rmem_swiotlb_setup,
+ .device_init = rmem_swiotlb_device_init,
+ .device_release = rmem_swiotlb_device_release,
+};
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", &rmem_swiotlb_ops);
#endif /* CONFIG_DMA_RESTRICTED_POOL */