diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-19 13:27:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-19 13:27:23 -0700 |
commit | 671df189537883f36cf9c7d4f9495bfac0f86627 (patch) | |
tree | 22e5f598ed1f5d9b2218d85d4426140f804d61e6 | |
parent | c9fe5630dae1df2328d82042602e2c4d1add8d57 (diff) | |
parent | c7d9eccb3c1e802c5cbb2a764eb0eb9807d9f12e (diff) | |
download | lwn-671df189537883f36cf9c7d4f9495bfac0f86627.tar.gz lwn-671df189537883f36cf9c7d4f9495bfac0f86627.zip |
Merge tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig:
- add dma-mapping and block layer helpers to take care of IOMMU merging
for mmc plus subsequent fixups (Yoshihiro Shimoda)
- rework handling of the pgprot bits for remapping (me)
- take care of the dma direct infrastructure for swiotlb-xen (me)
- improve the dma noncoherent remapping infrastructure (me)
- better defaults for ->mmap, ->get_sgtable and ->get_required_mask
(me)
- cleanup mmaping of coherent DMA allocations (me)
- various misc cleanups (Andy Shevchenko, me)
* tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping: (41 commits)
mmc: renesas_sdhi_internal_dmac: Add MMC_CAP2_MERGE_CAPABLE
mmc: queue: Fix bigger segments usage
arm64: use asm-generic/dma-mapping.h
swiotlb-xen: merge xen_unmap_single into xen_swiotlb_unmap_page
swiotlb-xen: simplify cache maintainance
swiotlb-xen: use the same foreign page check everywhere
swiotlb-xen: remove xen_swiotlb_dma_mmap and xen_swiotlb_dma_get_sgtable
xen: remove the exports for xen_{create,destroy}_contiguous_region
xen/arm: remove xen_dma_ops
xen/arm: simplify dma_cache_maint
xen/arm: use dev_is_dma_coherent
xen/arm: consolidate page-coherent.h
xen/arm: use dma-noncoherent.h calls for xen-swiotlb cache maintainance
arm: remove wrappers for the generic dma remap helpers
dma-mapping: introduce a dma_common_find_pages helper
dma-mapping: always use VM_DMA_COHERENT for generic DMA remap
vmalloc: lift the arm flag for coherent mappings to common code
dma-mapping: provide a better default ->get_required_mask
dma-mapping: remove the dma_declare_coherent_memory export
remoteproc: don't allow modular build
...
73 files changed, 397 insertions, 674 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index e47c63bd4887..2d8d2fed7317 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -204,6 +204,14 @@ Returns the maximum size of a mapping for the device. The size parameter of the mapping functions like dma_map_single(), dma_map_page() and others should not be larger than the returned value. +:: + + unsigned long + dma_get_merge_boundary(struct device *dev); + +Returns the DMA merge boundary. If the device cannot merge any the DMA address +segments, the function returns 0. + Part Id - Streaming DMA mappings -------------------------------- @@ -595,17 +603,6 @@ For reasons of efficiency, most platforms choose to track the declared region only at the granularity of a page. For smaller allocations, you should use the dma_pool() API. -:: - - void - dma_release_declared_memory(struct device *dev) - -Remove the memory region previously declared from the system. This -API performs *no* in-use checking for this region and will return -unconditionally having removed all the required structures. It is the -driver's job to ensure that no parts of this memory region are -currently in use. - Part III - Debug drivers use of the DMA-API ------------------------------------------- diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst index 6a4285a3c7a4..2b98efb5ba7f 100644 --- a/Documentation/x86/x86_64/boot-options.rst +++ b/Documentation/x86/x86_64/boot-options.rst @@ -230,7 +230,7 @@ IOMMU (input/output memory management unit) =========================================== Multiple x86-64 PCI-DMA mapping implementations exist, for example: - 1. <lib/dma-direct.c>: use no hardware/software IOMMU at all + 1. <kernel/dma/direct.c>: use no hardware/software IOMMU at all (e.g. because you have < 3 GB memory). Kernel boot message: "PCI-DMA: Disabling IOMMU" diff --git a/arch/Kconfig b/arch/Kconfig index 6baedab10dca..c4b2afa138ca 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -793,9 +793,6 @@ config COMPAT_32BIT_TIME This is relevant on all 32-bit architectures, and 64-bit architectures as part of compat syscall handling. -config ARCH_NO_COHERENT_DMA_MMAP - bool - config ARCH_NO_PREEMPT bool diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 242108439f42..7f1925a32c99 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -955,5 +955,7 @@ const struct dma_map_ops alpha_pci_ops = { .map_sg = alpha_pci_map_sg, .unmap_sg = alpha_pci_unmap_sg, .dma_supported = alpha_pci_supported, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 70a3fbe79fba..73a7e88a1e92 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -104,9 +104,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev_info(dev, "use %scoherent DMA ops\n", dev->dma_coherent ? "" : "non"); } - -static int __init atomic_pool_init(void) -{ - return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); -} -postcore_initcall(atomic_pool_init); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2ae7f8adcac4..aa1d3b25e89f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -8,7 +8,7 @@ config ARM select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB - select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB + select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KEEPINITRD diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index f6955b55c544..c675bc0d5aa8 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -15,9 +15,6 @@ struct dev_archdata { #ifdef CONFIG_ARM_DMA_USE_IOMMU struct dma_iommu_mapping *mapping; #endif -#ifdef CONFIG_XEN - const struct dma_map_ops *dev_dma_ops; -#endif unsigned int dma_coherent:1; unsigned int dma_ops_setup:1; }; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index dba9355e2484..bdd80ddbca34 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -91,12 +91,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) } #endif -/* do not use this function in a driver */ -static inline bool is_device_dma_coherent(struct device *dev) -{ - return dev->archdata.dma_coherent; -} - /** * arm_dma_alloc - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index 0b1f6799a32e..d0de24f06724 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -62,7 +62,6 @@ typedef pte_t *pte_addr_t; */ #define pgprot_noncached(prot) (prot) #define pgprot_writecombine(prot) (prot) -#define pgprot_dmacoherent(prot) (prot) #define pgprot_device(prot) (prot) diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index 2c403e7c782d..27e984977402 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -1,95 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H -#define _ASM_ARM_XEN_PAGE_COHERENT_H - -#include <linux/dma-mapping.h> -#include <asm/page.h> #include <xen/arm/page-coherent.h> - -static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev) -{ - if (dev && dev->archdata.dev_dma_ops) - return dev->archdata.dev_dma_ops; - return get_arch_dma_ops(NULL); -} - -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) -{ - return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) -{ - xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); -} - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long page_pfn = page_to_xen_pfn(page); - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); - unsigned long compound_pages = - (1<<compound_order(page)) * XEN_PFN_PER_PAGE; - bool local = (page_pfn <= dev_pfn) && - (dev_pfn - page_pfn < compound_pages); - - /* - * Dom0 is mapped 1:1, while the Linux page can span across - * multiple Xen pages, it's not possible for it to contain a - * mix of local and foreign Xen pages. So if the first xen_pfn - * == mfn the page is local otherwise it's a foreign page - * grant-mapped in dom0. If the page is local we can safely - * call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (local) - xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); - else - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); -} - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long pfn = PFN_DOWN(handle); - /* - * Dom0 is mapped 1:1, while the Linux page can be spanned accross - * multiple Xen page, it's not possible to have a mix of local and - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a - * foreign mfn will always return false. If the page is local we can - * safely call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->unmap_page) - xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); - } else - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); -} - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->sync_single_for_cpu) - xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); - } else - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); -} - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->sync_single_for_device) - xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); - } else - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); -} - -#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 52b82559d99b..db9247898300 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -68,8 +68,9 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) return ret; - - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + return -ENXIO; } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index d42557ee69c2..7d042d5c43e3 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -14,6 +14,7 @@ #include <linux/list.h> #include <linux/init.h> #include <linux/device.h> +#include <linux/dma-direct.h> #include <linux/dma-mapping.h> #include <linux/dma-noncoherent.h> #include <linux/dma-contiguous.h> @@ -35,6 +36,7 @@ #include <asm/mach/map.h> #include <asm/system_info.h> #include <asm/dma-contiguous.h> +#include <xen/swiotlb-xen.h> #include "dma.h" #include "mm.h" @@ -192,6 +194,7 @@ const struct dma_map_ops arm_dma_ops = { .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, .sync_sg_for_device = arm_dma_sync_sg_for_device, .dma_supported = arm_dma_supported, + .get_required_mask = dma_direct_get_required_mask, }; EXPORT_SYMBOL(arm_dma_ops); @@ -212,6 +215,7 @@ const struct dma_map_ops arm_coherent_dma_ops = { .map_sg = arm_dma_map_sg, .map_resource = dma_direct_map_resource, .dma_supported = arm_dma_supported, + .get_required_mask = dma_direct_get_required_mask, }; EXPORT_SYMBOL(arm_coherent_dma_ops); @@ -336,25 +340,6 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, const void *caller, bool want_vaddr); -static void * -__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, - const void *caller) -{ - /* - * DMA allocation can be mapped to user space, so lets - * set VM_USERMAP flags too. - */ - return dma_common_contiguous_remap(page, size, - VM_ARM_DMA_CONSISTENT | VM_USERMAP, - prot, caller); -} - -static void __dma_free_remap(void *cpu_addr, size_t size) -{ - dma_common_free_remap(cpu_addr, size, - VM_ARM_DMA_CONSISTENT | VM_USERMAP); -} - #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K static struct gen_pool *atomic_pool __ro_after_init; @@ -510,7 +495,7 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, if (!want_vaddr) goto out; - ptr = __dma_alloc_remap(page, size, gfp, prot, caller); + ptr = dma_common_contiguous_remap(page, size, prot, caller); if (!ptr) { __dma_free_buffer(page, size); return NULL; @@ -577,7 +562,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size, goto out; if (PageHighMem(page)) { - ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); + ptr = dma_common_contiguous_remap(page, size, prot, caller); if (!ptr) { dma_release_from_contiguous(dev, page, count); return NULL; @@ -597,7 +582,7 @@ static void __free_from_contiguous(struct device *dev, struct page *page, { if (want_vaddr) { if (PageHighMem(page)) - __dma_free_remap(cpu_addr, size); + dma_common_free_remap(cpu_addr, size); else __dma_remap(page, size, PAGE_KERNEL); } @@ -689,7 +674,7 @@ static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, static void remap_allocator_free(struct arm_dma_free_args *args) { if (args->want_vaddr) - __dma_free_remap(args->cpu_addr, args->size); + dma_common_free_remap(args->cpu_addr, args->size); __dma_free_buffer(args->page, args->size); } @@ -877,17 +862,6 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); } -/* - * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems - * that the intention is to allow exporting memory allocated via the - * coherent DMA APIs through the dma_buf API, which only accepts a - * scattertable. This presents a couple of problems: - * 1. Not all memory allocated via the coherent DMA APIs is backed by - * a struct page - * 2. Passing coherent DMA memory into the streaming APIs is not allowed - * as we will try to flush the memory through a different alias to that - * actually being used (and the flushes are redundant.) - */ int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs) @@ -1132,10 +1106,6 @@ static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent) * 32-bit DMA. * Use the generic dma-direct / swiotlb ops code in that case, as that * handles bounce buffering for us. - * - * Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the - * latter is also selected by the Xen code, but that code for now relies - * on non-NULL dev_dma_ops. To be cleaned up later. */ if (IS_ENABLED(CONFIG_ARM_LPAE)) return NULL; @@ -1373,17 +1343,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, } /* - * Create a CPU mapping for a specified pages - */ -static void * -__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, - const void *caller) -{ - return dma_common_pages_remap(pages, size, - VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller); -} - -/* * Create a mapping in device IO address space for specified pages */ static dma_addr_t @@ -1455,18 +1414,13 @@ static struct page **__atomic_get_pages(void *addr) static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) { - struct vm_struct *area; - if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) return __atomic_get_pages(cpu_addr); if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) return cpu_addr; - area = find_vm_area(cpu_addr); - if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) - return area->pages; - return NULL; + return dma_common_find_pages(cpu_addr); } static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, @@ -1539,7 +1493,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) return pages; - addr = __iommu_alloc_remap(pages, size, gfp, prot, + addr = dma_common_pages_remap(pages, size, prot, __builtin_return_address(0)); if (!addr) goto err_mapping; @@ -1622,10 +1576,8 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, return; } - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) { - dma_common_free_remap(cpu_addr, size, - VM_ARM_DMA_CONSISTENT | VM_USERMAP); - } + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) + dma_common_free_remap(cpu_addr, size); __iommu_remove_mapping(dev, handle, size); __iommu_free_buffer(dev, pages, size, attrs); @@ -2363,10 +2315,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, set_dma_ops(dev, dma_ops); #ifdef CONFIG_XEN - if (xen_initial_domain()) { - dev->archdata.dev_dma_ops = dev->dma_ops; - dev->dma_ops = xen_dma_ops; - } + if (xen_initial_domain()) + dev->dma_ops = &xen_swiotlb_dma_ops; #endif dev->archdata.dma_ops_setup = true; } @@ -2402,12 +2352,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, return dma_to_pfn(dev, dma_addr); } -pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) -{ - return __get_dma_pgprot(attrs, prot); -} - void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 941356d95a67..88c121ac14b3 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -70,9 +70,6 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page #define VM_ARM_MTYPE(mt) ((mt) << 20) #define VM_ARM_MTYPE_MASK (0x1f << 20) -/* consistent regions used by dma_alloc_attrs() */ -#define VM_ARM_DMA_CONSISTENT 0x20000000 - struct static_vm { struct vm_struct vm; diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index d33b77e9add3..2b2c208408bb 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/cpu.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/export.h> @@ -35,105 +35,56 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order) return __get_free_pages(flags, order); } -enum dma_cache_op { - DMA_UNMAP, - DMA_MAP, -}; static bool hypercall_cflush = false; -/* functions called by SWIOTLB */ - -static void dma_cache_maint(dma_addr_t handle, unsigned long offset, - size_t size, enum dma_data_direction dir, enum dma_cache_op op) +/* buffers in highmem or foreign pages cannot cross page boundaries */ +static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op) { struct gnttab_cache_flush cflush; - unsigned long xen_pfn; - size_t left = size; - xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE; - offset %= XEN_PAGE_SIZE; + cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK; + cflush.offset = xen_offset_in_page(handle); + cflush.op = op; do { - size_t len = left; - - /* buffers in highmem or foreign pages cannot cross page - * boundaries */ - if (len + offset > XEN_PAGE_SIZE) - len = XEN_PAGE_SIZE - offset; - - cflush.op = 0; - cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT; - cflush.offset = offset; - cflush.length = len; - - if (op == DMA_UNMAP && dir != DMA_TO_DEVICE) - cflush.op = GNTTAB_CACHE_INVAL; - if (op == DMA_MAP) { - if (dir == DMA_FROM_DEVICE) - cflush.op = GNTTAB_CACHE_INVAL; - else - cflush.op = GNTTAB_CACHE_CLEAN; - } - if (cflush.op) - HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); + if (size + cflush.offset > XEN_PAGE_SIZE) + cflush.length = XEN_PAGE_SIZE - cflush.offset; + else + cflush.length = size; - offset = 0; - xen_pfn++; - left -= len; - } while (left); -} + HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); -static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_UNMAP); + cflush.offset = 0; + cflush.a.dev_bus_addr += cflush.length; + size -= cflush.length; + } while (size); } -static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) +/* + * Dom0 is mapped 1:1, and while the Linux page can span across multiple Xen + * pages, it is not possible for it to contain a mix of local and foreign Xen + * pages. Calling pfn_valid on a foreign mfn will always return false, so if + * pfn_valid returns true the pages is local and we can use the native + * dma-direct functions, otherwise we call the Xen specific version. + */ +void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, + phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP); + if (pfn_valid(PFN_DOWN(handle))) + arch_sync_dma_for_cpu(dev, paddr, size, dir); + else if (dir != DMA_TO_DEVICE) + dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); } -void __xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) +void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, + phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - if (is_device_dma_coherent(hwdev)) - return; - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - return; - - __xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir); -} - -void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) - -{ - if (is_device_dma_coherent(hwdev)) - return; - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - return; - - __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); -} - -void __xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - if (is_device_dma_coherent(hwdev)) - return; - __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); -} - -void __xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - if (is_device_dma_coherent(hwdev)) - return; - __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir); + if (pfn_valid(PFN_DOWN(handle))) + arch_sync_dma_for_device(dev, paddr, size, dir); + else if (dir == DMA_FROM_DEVICE) + dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); + else + dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN); } bool xen_arch_need_swiotlb(struct device *dev, @@ -159,7 +110,7 @@ bool xen_arch_need_swiotlb(struct device *dev, * memory and we are not able to flush the cache. */ return (!hypercall_cflush && (xen_pfn != bfn) && - !is_device_dma_coherent(dev)); + !dev_is_dma_coherent(dev)); } int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, @@ -173,16 +124,11 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, *dma_handle = pstart; return 0; } -EXPORT_SYMBOL_GPL(xen_create_contiguous_region); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { return; } -EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); - -const struct dma_map_ops *xen_dma_ops; -EXPORT_SYMBOL(xen_dma_ops); int __init xen_mm_init(void) { @@ -190,7 +136,6 @@ int __init xen_mm_init(void) if (!xen_initial_domain()) return 0; xen_swiotlb_init(1, false); - xen_dma_ops = &xen_swiotlb_dma_ops; cflush.op = 0; cflush.a.dev_bus_addr = 0; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6ae6ad8a4db0..835a1509882b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -13,7 +13,6 @@ config ARM64 select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_COHERENT_TO_PFN - select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index c52e151afab0..98a5405c8558 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += delay.h generic-y += div64.h generic-y += dma.h generic-y += dma-contiguous.h +generic-y += dma-mapping.h generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += hw_irq.h diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h deleted file mode 100644 index fb3e5044f473..000000000000 --- a/arch/arm64/include/asm/dma-mapping.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ -#ifndef __ASM_DMA_MAPPING_H -#define __ASM_DMA_MAPPING_H - -#include <linux/types.h> -#include <linux/vmalloc.h> - -#include <xen/xen.h> -#include <asm/xen/hypervisor.h> - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return NULL; -} - -/* - * Do not use this function in a driver, it is only provided for - * arch/arm/mm/xen.c, which is used by arm64 as well. - */ -static inline bool is_device_dma_coherent(struct device *dev) -{ - return dev->dma_coherent; -} - -#endif /* __ASM_DMA_MAPPING_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 470ba7ae8821..57427d17580e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -437,6 +437,18 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_device(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) +/* + * DMA allocations for non-coherent devices use what the Arm architecture calls + * "Normal non-cacheable" memory, which permits speculation, unaligned accesses + * and merging of writes. This is different from "Device-nGnR[nE]" memory which + * is intended for MMIO and thus forbids speculation, preserves access size, + * requires strict alignment and can also force write responses to come from the + * endpoint. + */ +#define pgprot_dmacoherent(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, \ + PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) + #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h index d88e56b90b93..27e984977402 100644 --- a/arch/arm64/include/asm/xen/page-coherent.h +++ b/arch/arm64/include/asm/xen/page-coherent.h @@ -1,77 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H -#define _ASM_ARM64_XEN_PAGE_COHERENT_H - -#include <linux/dma-mapping.h> -#include <asm/page.h> #include <xen/arm/page-coherent.h> - -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) -{ - return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) -{ - dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); -} - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - - if (pfn_valid(pfn)) - dma_direct_sync_single_for_cpu(hwdev, handle, size, dir); - else - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); -} - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) - dma_direct_sync_single_for_device(hwdev, handle, size, dir); - else - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); -} - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long page_pfn = page_to_xen_pfn(page); - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); - unsigned long compound_pages = - (1<<compound_order(page)) * XEN_PFN_PER_PAGE; - bool local = (page_pfn <= dev_pfn) && - (dev_pfn - page_pfn < compound_pages); - - if (local) - dma_direct_map_page(hwdev, page, offset, size, dir, attrs); - else - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); -} - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long pfn = PFN_DOWN(handle); - /* - * Dom0 is mapped 1:1, while the Linux page can be spanned accross - * multiple Xen page, it's not possible to have a mix of local and - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a - * foreign mfn will always return false. If the page is local we can - * safely call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (pfn_valid(pfn)) - dma_direct_unmap_page(hwdev, handle, size, dir, attrs); - else - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); -} - -#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index bd2b039f43a6..9239416e93d4 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -8,15 +8,11 @@ #include <linux/cache.h> #include <linux/dma-noncoherent.h> #include <linux/dma-iommu.h> +#include <xen/xen.h> +#include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> -pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) -{ - return pgprot_writecombine(prot); -} - void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { @@ -34,12 +30,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) __dma_flush_area(page_address(page), size); } -static int __init arm64_dma_init(void) -{ - return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); -} -arch_initcall(arm64_dma_init); - #ifdef CONFIG_IOMMU_DMA void arch_teardown_dma_ops(struct device *dev) { @@ -64,6 +54,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, #ifdef CONFIG_XEN if (xen_initial_domain()) - dev->dma_ops = xen_dma_ops; + dev->dma_ops = &xen_swiotlb_dma_ops; #endif } diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index b4fb61c83494..e65e8d82442a 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -20,7 +20,6 @@ config C6X select OF_EARLY_FLATTREE select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA - select ARCH_NO_COHERENT_DMA_MMAP select MMU_GATHER_NO_RANGE if MMU config MMU diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 80783bb71c5c..602a60d47a94 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -14,12 +14,6 @@ #include <linux/version.h> #include <asm/cache.h> -static int __init atomic_pool_init(void) -{ - return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); -} -postcore_initcall(atomic_pool_init); - void arch_dma_prep_coherent(struct page *page, size_t size) { if (PageHighMem(page)) { diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index a7eff5e6d260..a806227c1fad 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2069,6 +2069,8 @@ static const struct dma_map_ops sba_dma_ops = { .map_sg = sba_map_sg_attrs, .unmap_sg = sba_unmap_sg_attrs, .dma_supported = sba_dma_supported, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; static int __init diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 8eb276aac5ce..bb320c6d0cc9 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -259,7 +259,7 @@ __initcall(register_memory); * This function checks if the reserved crashkernel is allowed on the specific * IA64 machine flavour. Machines without an IO TLB use swiotlb and require * some memory below 4 GB (i.e. in 32 bit area), see the implementation of - * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that + * kernel/dma/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that * in kdump case. See the comment in sba_init() in sba_iommu.c. * * So, the only machvec that really supports loading the kdump kernel diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index c518d695c376..935599893d3e 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -4,11 +4,9 @@ config M68K default y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT - select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA select ARCH_MIGHT_HAVE_PC_PARPORT if ISA - select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_NO_PREEMPT if !COLDFIRE select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index fe3ddd73a0cc..fde4534b974f 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -169,6 +169,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, ? (__pgprot((pgprot_val(prot) & _CACHEMASK040) | _PAGE_NOCACHE_S)) \ : (prot))) +pgprot_t pgprot_dmacoherent(pgprot_t prot); +#define pgprot_dmacoherent(prot) pgprot_dmacoherent(prot) + #endif /* CONFIG_COLDFIRE */ #include <asm-generic/pgtable.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index 447849d1d645..3fab684cc0db 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -23,8 +23,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) cache_push(page_to_phys(page), size); } -pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) +pgprot_t pgprot_dmacoherent(pgprot_t prot) { if (CPU_IS_040_OR_060) { pgprot_val(prot) &= ~_PAGE_CACHE040; diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index d411de05b628..632c9477a0f6 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -9,7 +9,6 @@ config MICROBLAZE select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_MIGHT_HAVE_PC_PARPORT - select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select TIMER_OF diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d50fafd7bf3a..aff1cadeea43 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1119,7 +1119,14 @@ config DMA_PERDEV_COHERENT config DMA_NONCOHERENT bool - select ARCH_HAS_DMA_MMAP_PGPROT + # + # MIPS allows mixing "slightly different" Cacheability and Coherency + # Attribute bits. It is believed that the uncached access through + # KSEG1 and the implementation specific "uncached accelerated" used + # by pgprot_writcombine can be mixed, and the latter sometimes provides + # significant advantages. + # + select ARCH_HAS_DMA_WRITE_COMBINE select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_UNCACHED_SEGMENT select NEED_DMA_MAP_STATE diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 1804dc9d8136..a01e14955187 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -682,5 +682,7 @@ const struct dma_map_ops jazz_dma_ops = { .sync_sg_for_device = jazz_dma_sync_sg_for_device, .dma_supported = dma_direct_supported, .cache_sync = arch_dma_cache_sync, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; EXPORT_SYMBOL(jazz_dma_ops); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index ed56c6fa7be2..1d4d57dd9acf 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -65,14 +65,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr))); } -pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) -{ - if (attrs & DMA_ATTR_WRITE_COMBINE) - return pgprot_writecombine(prot); - return pgprot_noncached(prot); -} - static inline void dma_sync_virt(void *addr, size_t size, enum dma_data_direction dir) { diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c index 490e3720d694..4206d4b6c8ce 100644 --- a/arch/nds32/kernel/dma.c +++ b/arch/nds32/kernel/dma.c @@ -80,9 +80,3 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { cache_op(page_to_phys(page), size, cpu_dma_wbinval_range); } - -static int __init atomic_pool_init(void) -{ - return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); -} -postcore_initcall(atomic_pool_init); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 2e757c785239..b16237c95ea3 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -52,7 +52,6 @@ config PARISC select GENERIC_SCHED_CLOCK select HAVE_UNSTABLE_SCHED_CLOCK if SMP select GENERIC_CLOCKEVENTS - select ARCH_NO_COHERENT_DMA_MMAP select CPU_NO_EFFICIENT_FFS select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index a0879674a9c8..2f5a53874f6d 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -208,4 +208,6 @@ const struct dma_map_ops dma_iommu_ops = { .sync_single_for_device = dma_iommu_sync_for_device, .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu, .sync_sg_for_device = dma_iommu_sync_sg_for_device, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 98410119c47b..3542b7bd6a46 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -686,20 +686,16 @@ static int ps3_dma_supported(struct device *_dev, u64 mask) return mask >= DMA_BIT_MASK(32); } -static u64 ps3_dma_get_required_mask(struct device *_dev) -{ - return DMA_BIT_MASK(32); -} - static const struct dma_map_ops ps3_sb_dma_ops = { .alloc = ps3_alloc_coherent, .free = ps3_free_coherent, .map_sg = ps3_sb_map_sg, .unmap_sg = ps3_sb_unmap_sg, .dma_supported = ps3_dma_supported, - .get_required_mask = ps3_dma_get_required_mask, .map_page = ps3_sb_map_page, .unmap_page = ps3_unmap_page, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; static const struct dma_map_ops ps3_ioc0_dma_ops = { @@ -708,9 +704,10 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = { .map_sg = ps3_ioc0_map_sg, .unmap_sg = ps3_ioc0_unmap_sg, .dma_supported = ps3_dma_supported, - .get_required_mask = ps3_dma_get_required_mask, .map_page = ps3_ioc0_map_page, .unmap_page = ps3_unmap_page, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; /** diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 6601b9d404dc..3473eef7628c 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -605,6 +605,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .unmap_page = vio_dma_iommu_unmap_page, .dma_supported = dma_iommu_dma_supported, .get_required_mask = dma_iommu_get_required_mask, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; /** diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index fb2c7db0164e..64b1399a73f0 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -668,6 +668,8 @@ const struct dma_map_ops s390_pci_dma_ops = { .unmap_sg = s390_dma_unmap_sg, .map_page = s390_dma_map_pages, .unmap_page = s390_dma_unmap_pages, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_pci_dma_ops); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 6b1b5941b618..f356ee674d89 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -5,7 +5,6 @@ config SUPERH select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_MIGHT_HAVE_PC_PARPORT - select ARCH_NO_COHERENT_DMA_MMAP if !MMU select HAVE_PATA_PLATFORM select CLKDEV_LOOKUP select DMA_DECLARE_COHERENT diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index 9492aa304f03..126e961a8cb0 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -198,8 +198,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE) #define pgprot_writecombine(prot) \ __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE) -#define pgprot_dmacoherent(prot) \ - __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_present(pmd) (pmd_val(pmd) & PMD_PRESENT) diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h index 116777e7f387..63cd41b2e17a 100644 --- a/arch/x86/include/asm/xen/page-coherent.h +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -21,18 +21,4 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, free_pages((unsigned long) cpu_addr, get_order(size)); } -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) { } - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) { } - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } - #endif /* _ASM_X86_XEN_PAGE_COHERENT_H */ diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index a585ea6f686a..a6ac3712db8b 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -677,7 +677,10 @@ static const struct dma_map_ops gart_dma_ops = { .unmap_page = gart_unmap_page, .alloc = gart_alloc_coherent, .free = gart_free_coherent, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, .dma_supported = dma_direct_supported, + .get_required_mask = dma_direct_get_required_mask, }; static void gart_iommu_shutdown(void) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 9d4343aa481b..23fdec030c37 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -468,6 +468,8 @@ static const struct dma_map_ops calgary_dma_ops = { .map_page = calgary_map_page, .unmap_page = calgary_unmap_page, .dma_supported = dma_direct_supported, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; static inline void __iomem * busno_to_bbar(unsigned char num) diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 5f5302028a9a..c2cfa5e7c152 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -/* Glue code to lib/swiotlb.c */ #include <linux/pci.h> #include <linux/cache.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbe35bf879f5..77ea96b794bd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -486,7 +486,7 @@ static int __init reserve_crashkernel_low(void) ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); if (ret) { /* - * two parts from lib/swiotlb.c: + * two parts from kernel/dma/swiotlb.c: * -swiotlb size: user-specified with swiotlb= or default. * * -swiotlb overflow buffer: now hardcoded to 32k. We round it diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 97bbc12dd6b2..6269a175385d 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * arch/x86/pci/sta2x11-fixup.c - * glue code for lib/swiotlb.c and DMA translation between STA2x11 - * AMBA memory mapping and the X86 memory mapping + * DMA translation between STA2x11 AMBA memory mapping and the x86 memory mapping * * ST Microelectronics ConneXt (STA2X11/STA2X10) * diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 26e8b326966d..c8dbee62ec2a 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2625,7 +2625,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, *dma_handle = virt_to_machine(vstart).maddr; return success ? 0 : -ENOMEM; } -EXPORT_SYMBOL_GPL(xen_create_contiguous_region); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { @@ -2660,7 +2659,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) spin_unlock_irqrestore(&xen_reservation_lock, flags); } -EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); static noinline void xen_flush_tlb_all(void) { diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index fb64469ca8f0..a8e7beb6b7b5 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -5,7 +5,6 @@ config XTENSA select ARCH_HAS_BINFMT_FLAT if !MMU select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_FRAME_POINTERS diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 65f05776d827..154979d62b73 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -167,7 +167,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (PageHighMem(page)) { void *p; - p = dma_common_contiguous_remap(page, size, VM_MAP, + p = dma_common_contiguous_remap(page, size, pgprot_noncached(PAGE_KERNEL), __builtin_return_address(0)); if (!p) { @@ -192,7 +192,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, page = virt_to_page(platform_vaddr_to_cached(vaddr)); } else { #ifdef CONFIG_MMU - dma_common_free_remap(vaddr, size, VM_MAP); + dma_common_free_remap(vaddr, size); #endif page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle))); } diff --git a/block/blk-settings.c b/block/blk-settings.c index 6bd1e3b082d8..5f6dcc7a47bd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -12,6 +12,7 @@ #include <linux/lcm.h> #include <linux/jiffies.h> #include <linux/gfp.h> +#include <linux/dma-mapping.h> #include "blk.h" #include "blk-wbt.h" @@ -848,6 +849,28 @@ void blk_queue_required_elevator_features(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features); +/** + * blk_queue_can_use_dma_map_merging - configure queue for merging segments. + * @q: the request queue for the device + * @dev: the device pointer for dma + * + * Tell the block layer about merging the segments by dma map of @q. + */ +bool blk_queue_can_use_dma_map_merging(struct request_queue *q, + struct device *dev) +{ + unsigned long boundary = dma_get_merge_boundary(dev); + + if (!boundary) + return false; + + /* No need to update max_segment_size. see blk_queue_virt_boundary() */ + blk_queue_virt_boundary(q, boundary); + + return true; +} +EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c index 785c5546067a..ed0ccbeed70f 100644 --- a/drivers/gpu/drm/omapdrm/dss/dispc.c +++ b/drivers/gpu/drm/omapdrm/dss/dispc.c @@ -4609,11 +4609,10 @@ static int dispc_errata_i734_wa_init(struct dispc_device *dispc) i734_buf.size = i734.ovli.width * i734.ovli.height * color_mode_to_bpp(i734.ovli.fourcc) / 8; - i734_buf.vaddr = dma_alloc_writecombine(&dispc->pdev->dev, - i734_buf.size, &i734_buf.paddr, - GFP_KERNEL); + i734_buf.vaddr = dma_alloc_wc(&dispc->pdev->dev, i734_buf.size, + &i734_buf.paddr, GFP_KERNEL); if (!i734_buf.vaddr) { - dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n", + dev_err(&dispc->pdev->dev, "%s: dma_alloc_wc failed\n", __func__); return -ENOMEM; } @@ -4626,8 +4625,8 @@ static void dispc_errata_i734_wa_fini(struct dispc_device *dispc) if (!dispc->feat->has_gamma_i734_bug) return; - dma_free_writecombine(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr, - i734_buf.paddr); + dma_free_wc(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr, + i734_buf.paddr); } static void dispc_errata_i734_wa(struct dispc_device *dispc) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1ed3b98324ba..97975bb7f347 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2754,6 +2754,8 @@ static const struct dma_map_ops amd_iommu_dma_ops = { .map_sg = map_sg, .unmap_sg = unmap_sg, .dma_supported = amd_iommu_dma_supported, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; static int init_reserved_iova_ranges(void) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8f412af84247..f321279baf9e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -548,15 +548,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, return pages; } -static struct page **__iommu_dma_get_pages(void *cpu_addr) -{ - struct vm_struct *area = find_vm_area(cpu_addr); - - if (!area || !area->pages) - return NULL; - return area->pages; -} - /** * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space * @dev: Device to allocate memory for. Must be a real device @@ -624,7 +615,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, < size) goto out_free_sg; - vaddr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + vaddr = dma_common_pages_remap(pages, size, prot, __builtin_return_address(0)); if (!vaddr) goto out_unmap; @@ -945,10 +936,10 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) * If it the address is remapped, then it's either non-coherent * or highmem CMA, or an iommu_dma_alloc_remap() construction. */ - pages = __iommu_dma_get_pages(cpu_addr); + pages = dma_common_find_pages(cpu_addr); if (!pages) page = vmalloc_to_page(cpu_addr); - dma_common_free_remap(cpu_addr, alloc_size, VM_USERMAP); + dma_common_free_remap(cpu_addr, alloc_size); } else { /* Lowmem means a coherent atomic or CMA allocation */ page = virt_to_page(cpu_addr); @@ -986,7 +977,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); cpu_addr = dma_common_contiguous_remap(page, alloc_size, - VM_USERMAP, prot, __builtin_return_address(0)); + prot, __builtin_return_address(0)); if (!cpu_addr) goto out_free_pages; @@ -1052,7 +1043,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { - struct page **pages = __iommu_dma_get_pages(cpu_addr); + struct page **pages = dma_common_find_pages(cpu_addr); if (pages) return __iommu_dma_mmap(pages, size, vma); @@ -1074,7 +1065,7 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, int ret; if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { - struct page **pages = __iommu_dma_get_pages(cpu_addr); + struct page **pages = dma_common_find_pages(cpu_addr); if (pages) { return sg_alloc_table_from_pages(sgt, pages, @@ -1093,6 +1084,13 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } +static unsigned long iommu_dma_get_merge_boundary(struct device *dev) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + + return (1UL << __ffs(domain->pgsize_bitmap)) - 1; +} + static const struct dma_map_ops iommu_dma_ops = { .alloc = iommu_dma_alloc, .free = iommu_dma_free, @@ -1108,6 +1106,7 @@ static const struct dma_map_ops iommu_dma_ops = { .sync_sg_for_device = iommu_dma_sync_sg_for_device, .map_resource = iommu_dma_map_resource, .unmap_resource = iommu_dma_unmap_resource, + .get_merge_boundary = iommu_dma_get_merge_boundary, }; /* diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 87de0b975672..3f974919d3bd 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3785,6 +3785,8 @@ static const struct dma_map_ops intel_dma_ops = { .map_resource = intel_map_resource, .unmap_resource = intel_unmap_resource, .dma_supported = dma_direct_supported, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; static void diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 7102e2ebc614..9edc08685e86 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -21,6 +21,8 @@ #include "card.h" #include "host.h" +#define MMC_DMA_MAP_MERGE_SEGMENTS 512 + static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq) { /* Allow only 1 DCMD at a time */ @@ -193,6 +195,12 @@ static void mmc_queue_setup_discard(struct request_queue *q, blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); } +static unsigned int mmc_get_max_segments(struct mmc_host *host) +{ + return host->can_dma_map_merge ? MMC_DMA_MAP_MERGE_SEGMENTS : + host->max_segs; +} + /** * mmc_init_request() - initialize the MMC-specific per-request data * @q: the request queue @@ -206,7 +214,7 @@ static int __mmc_init_request(struct mmc_queue *mq, struct request *req, struct mmc_card *card = mq->card; struct mmc_host *host = card->host; - mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp); + mq_rq->sg = mmc_alloc_sg(mmc_get_max_segments(host), gfp); if (!mq_rq->sg) return -ENOMEM; @@ -362,13 +370,23 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); blk_queue_max_hw_sectors(mq->queue, min(host->max_blk_count, host->max_req_size / 512)); - blk_queue_max_segments(mq->queue, host->max_segs); + if (host->can_dma_map_merge) + WARN(!blk_queue_can_use_dma_map_merging(mq->queue, + mmc_dev(host)), + "merging was advertised but not possible"); + blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); if (mmc_card_mmc(card)) block_size = card->ext_csd.data_sector_size; blk_queue_logical_block_size(mq->queue, block_size); - blk_queue_max_segment_size(mq->queue, + /* + * After blk_queue_can_use_dma_map_merging() was called with succeed, + * since it calls blk_queue_virt_boundary(), the mmc should not call + * both blk_queue_max_segment_size(). + */ + if (!host->can_dma_map_merge) + blk_queue_max_segment_size(mq->queue, round_down(host->max_seg_size, block_size)); dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); @@ -381,6 +399,11 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) init_waitqueue_head(&mq->wait); } +static inline bool mmc_merge_capable(struct mmc_host *host) +{ + return host->caps2 & MMC_CAP2_MERGE_CAPABLE; +} + /* Set queue depth to get a reasonable value for q->nr_requests */ #define MMC_QUEUE_DEPTH 64 @@ -418,6 +441,18 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) mq->tag_set.cmd_size = sizeof(struct mmc_queue_req); mq->tag_set.driver_data = mq; + /* + * Since blk_mq_alloc_tag_set() calls .init_request() of mmc_mq_ops, + * the host->can_dma_map_merge should be set before to get max_segs + * from mmc_get_max_segments(). + */ + if (mmc_merge_capable(host) && + host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS && + dma_get_merge_boundary(mmc_dev(host))) + host->can_dma_map_merge = 1; + else + host->can_dma_map_merge = 0; + ret = blk_mq_alloc_tag_set(&mq->tag_set); if (ret) return ret; diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 751fe91c7571..a66f8d6d61d1 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -106,7 +106,7 @@ static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = { TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2, .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_CMD23, - .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT, + .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT | MMC_CAP2_MERGE_CAPABLE, .bus_shift = 2, .scc_offset = 0x1000, .taps = rcar_gen3_scc_taps, diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 217f15aafa4a..ad290f79983b 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1024,6 +1024,7 @@ static const struct dma_map_ops ccio_ops = { .unmap_page = ccio_unmap_page, .map_sg = ccio_map_sg, .unmap_sg = ccio_unmap_sg, + .get_sgtable = dma_common_get_sgtable, }; #ifdef CONFIG_PROC_FS diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 296668caf7e5..ed50502cc65a 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1084,6 +1084,7 @@ static const struct dma_map_ops sba_ops = { .unmap_page = sba_unmap_page, .map_sg = sba_map_sg, .unmap_sg = sba_unmap_sg, + .get_sgtable = dma_common_get_sgtable, }; diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 28ed306982f7..94afdde4bc9f 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -2,7 +2,7 @@ menu "Remoteproc drivers" config REMOTEPROC - tristate "Support for Remote Processor subsystem" + bool "Support for Remote Processor subsystem" depends on HAS_DMA select CRC32 select FW_LOADER diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index adcabd9473eb..58c9365fa217 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -28,6 +28,7 @@ #include <linux/memblock.h> #include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> #include <linux/export.h> #include <xen/swiotlb-xen.h> #include <xen/page.h> @@ -391,6 +392,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (map == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; + phys = map; dev_addr = xen_phys_to_bus(map); /* @@ -402,14 +404,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, return DMA_MAPPING_ERROR; } - page = pfn_to_page(map >> PAGE_SHIFT); - offset = map & ~PAGE_MASK; done: - /* - * we are not interested in the dma_addr returned by xen_dma_map_page, - * only in the potential cache flushes executed by the function. - */ - xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs); + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + xen_dma_sync_for_device(dev, dev_addr, phys, size, dir); return dev_addr; } @@ -421,35 +418,29 @@ done: * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ -static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) +static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) { phys_addr_t paddr = xen_bus_to_phys(dev_addr); BUG_ON(dir == DMA_NONE); - xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs); + if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + xen_dma_sync_for_cpu(hwdev, dev_addr, paddr, size, dir); /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs); } -static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - xen_unmap_single(hwdev, dev_addr, size, dir, attrs); -} - static void xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { phys_addr_t paddr = xen_bus_to_phys(dma_addr); - xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir); + if (!dev_is_dma_coherent(dev)) + xen_dma_sync_for_cpu(dev, dma_addr, paddr, size, dir); if (is_xen_swiotlb_buffer(dma_addr)) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); @@ -464,7 +455,8 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, if (is_xen_swiotlb_buffer(dma_addr)) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); - xen_dma_sync_single_for_device(dev, dma_addr, size, dir); + if (!dev_is_dma_coherent(dev)) + xen_dma_sync_for_device(dev, dma_addr, paddr, size, dir); } /* @@ -481,7 +473,8 @@ xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs); + xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), + dir, attrs); } @@ -547,51 +540,6 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask; } -/* - * Create userspace mapping for the DMA-coherent memory. - * This function should be called with the pages from the current domain only, - * passing pages mapped from other domains would lead to memory corruption. - */ -static int -xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_ARM - if (xen_get_dma_ops(dev)->mmap) - return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr, - dma_addr, size, attrs); -#endif - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); -} - -/* - * This function should be called with the pages from the current domain only, - * passing pages mapped from other domains would lead to memory corruption. - */ -static int -xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_ARM - if (xen_get_dma_ops(dev)->get_sgtable) { -#if 0 - /* - * This check verifies that the page belongs to the current domain and - * is not one mapped from another domain. - * This check is for debug only, and should not go to production build - */ - unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle)); - BUG_ON (!page_is_ram(bfn)); -#endif - return xen_get_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, - handle, size, attrs); - } -#endif - return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs); -} - const struct dma_map_ops xen_swiotlb_dma_ops = { .alloc = xen_swiotlb_alloc_coherent, .free = xen_swiotlb_free_coherent, @@ -604,6 +552,6 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .map_page = xen_swiotlb_map_page, .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, - .mmap = xen_swiotlb_dma_mmap, - .get_sgtable = xen_swiotlb_get_sgtable, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3094f2d513b2..d9db32fb75ee 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1110,6 +1110,8 @@ extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); extern void blk_queue_required_elevator_features(struct request_queue *q, unsigned int features); +extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, + struct device *dev); /* * Number of physical segments as sent to the device. diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 14702e2d6fa8..4a1c4fca475a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -131,6 +131,7 @@ struct dma_map_ops { int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); size_t (*max_mapping_size)(struct device *dev); + unsigned long (*get_merge_boundary)(struct device *dev); }; #define DMA_MAPPING_ERROR (~(dma_addr_t)0) @@ -457,11 +458,13 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); +bool dma_can_mmap(struct device *dev); int dma_supported(struct device *dev, u64 mask); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +unsigned long dma_get_merge_boundary(struct device *dev); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, size_t offset, size_t size, @@ -547,6 +550,10 @@ static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, { return -ENXIO; } +static inline bool dma_can_mmap(struct device *dev) +{ + return false; +} static inline int dma_supported(struct device *dev, u64 mask) { return 0; @@ -567,6 +574,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline unsigned long dma_get_merge_boundary(struct device *dev) +{ + return 0; +} #endif /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, @@ -610,16 +621,14 @@ extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); +struct page **dma_common_find_pages(void *cpu_addr); void *dma_common_contiguous_remap(struct page *page, size_t size, - unsigned long vm_flags, pgprot_t prot, const void *caller); void *dma_common_pages_remap(struct page **pages, size_t size, - unsigned long vm_flags, pgprot_t prot, - const void *caller); -void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); + pgprot_t prot, const void *caller); +void dma_common_free_remap(void *cpu_addr, size_t size); -int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot); bool dma_in_atomic_pool(void *start, size_t size); void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); bool dma_free_from_pool(void *start, size_t size); @@ -749,7 +758,6 @@ static inline int dma_get_cache_alignment(void) #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); -void dma_release_declared_memory(struct device *dev); #else static inline int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, @@ -757,11 +765,6 @@ dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, { return -ENOSYS; } - -static inline void -dma_release_declared_memory(struct device *dev) -{ -} #endif /* CONFIG_DMA_DECLARE_COHERENT */ static inline void *dmam_alloc_coherent(struct device *dev, size_t size, @@ -781,9 +784,6 @@ static inline void *dma_alloc_wc(struct device *dev, size_t size, return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs); } -#ifndef dma_alloc_writecombine -#define dma_alloc_writecombine dma_alloc_wc -#endif static inline void dma_free_wc(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr) @@ -791,9 +791,6 @@ static inline void dma_free_wc(struct device *dev, size_t size, return dma_free_attrs(dev, size, cpu_addr, dma_addr, DMA_ATTR_WRITE_COMBINE); } -#ifndef dma_free_writecombine -#define dma_free_writecombine dma_free_wc -#endif static inline int dma_mmap_wc(struct device *dev, struct vm_area_struct *vma, @@ -803,9 +800,6 @@ static inline int dma_mmap_wc(struct device *dev, return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, DMA_ATTR_WRITE_COMBINE); } -#ifndef dma_mmap_writecombine -#define dma_mmap_writecombine dma_mmap_wc -#endif #ifdef CONFIG_NEED_DMA_MAP_STATE #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 0bff3d7fac92..dd3de6d88fc0 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -3,6 +3,7 @@ #define _LINUX_DMA_NONCOHERENT_H 1 #include <linux/dma-mapping.h> +#include <asm/pgtable.h> #ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H #include <asm/dma-coherence.h> @@ -42,10 +43,18 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, dma_addr_t dma_addr); -pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs); #ifdef CONFIG_MMU +/* + * Page protection so that devices that can't snoop CPU caches can use the + * memory coherently. We default to pgprot_noncached which is usually used + * for ioremap as a safe bet, but architectures can override this with less + * strict semantics if possible. + */ +#ifndef pgprot_dmacoherent +#define pgprot_dmacoherent(prot) pgprot_noncached(prot) +#endif + pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); #else static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 4704b77259ee..ba703384bea0 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -368,6 +368,7 @@ struct mmc_host { #define MMC_CAP2_CQE (1 << 23) /* Has eMMC command queue engine */ #define MMC_CAP2_CQE_DCMD (1 << 24) /* CQE can issue a direct command */ #define MMC_CAP2_AVOID_3_3V (1 << 25) /* Host must negotiate down from 3.3V */ +#define MMC_CAP2_MERGE_CAPABLE (1 << 26) /* Host can merge a segment over the segment size */ int fixed_drv_type; /* fixed driver type for non-removable media */ @@ -397,6 +398,7 @@ struct mmc_host { unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ unsigned int use_blk_mq:1; /* use blk-mq */ unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */ + unsigned int can_dma_map_merge:1; /* merging can be used */ int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 9b21d0047710..dfa718ffdd4f 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -18,6 +18,7 @@ struct notifier_block; /* in notifier.h */ #define VM_ALLOC 0x00000002 /* vmalloc() */ #define VM_MAP 0x00000004 /* vmap()ed pages */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ +#define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ @@ -26,6 +27,7 @@ struct notifier_block; /* in notifier.h */ * vfree_atomic(). */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */ + /* bits [20..32] reserved for arch specific ioremap internals */ /* diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h index 2982571f7cc1..43ef24dd030e 100644 --- a/include/xen/arm/hypervisor.h +++ b/include/xen/arm/hypervisor.h @@ -19,8 +19,6 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return PARAVIRT_LAZY_NONE; } -extern const struct dma_map_ops *xen_dma_ops; - #ifdef CONFIG_XEN void __init xen_early_init(void); #else diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h index 2ca9164a79bf..b9cc11e887ed 100644 --- a/include/xen/arm/page-coherent.h +++ b/include/xen/arm/page-coherent.h @@ -2,15 +2,19 @@ #ifndef _XEN_ARM_PAGE_COHERENT_H #define _XEN_ARM_PAGE_COHERENT_H -void __xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs); -void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs); -void __xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir); -void __xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir); +#include <linux/dma-mapping.h> +#include <asm/page.h> + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) +{ + return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) +{ + dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); +} #endif /* _XEN_ARM_PAGE_COHERENT_H */ diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index 5e4b83f83dbc..d71380f6ed0b 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -4,6 +4,11 @@ #include <linux/swiotlb.h> +void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, + phys_addr_t paddr, size_t size, enum dma_data_direction dir); +void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, + phys_addr_t paddr, size_t size, enum dma_data_direction dir); + extern int xen_swiotlb_init(int verbose, bool early); extern const struct dma_map_ops xen_swiotlb_dma_ops; diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 9decbba255fc..73c5c2b8e824 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -20,6 +20,15 @@ config ARCH_HAS_DMA_COHERENCE_H config ARCH_HAS_DMA_SET_MASK bool +# +# Select this option if the architecture needs special handling for +# DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be what +# people thing of when saying write combine, so very few platforms should +# need to enable this. +# +config ARCH_HAS_DMA_WRITE_COMBINE + bool + config DMA_DECLARE_COHERENT bool @@ -45,9 +54,6 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_DMA_COHERENT_TO_PFN bool -config ARCH_HAS_DMA_MMAP_PGPROT - bool - config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 29fd6590dc1e..545e3869b0e3 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -122,18 +122,6 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_release_coherent_memory(mem); return ret; } -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if (!mem) - return; - dma_release_coherent_memory(mem); - dev->dma_mem = NULL; -} -EXPORT_SYMBOL(dma_release_declared_memory); static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem, ssize_t size, dma_addr_t *dma_handle) @@ -288,7 +276,6 @@ int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); } -EXPORT_SYMBOL(dma_mmap_from_dev_coherent); int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index b0038ca3aa92..64a3d294f4b4 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -136,17 +136,29 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } +/* + * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems + * that the intention is to allow exporting memory allocated via the + * coherent DMA APIs through the dma_buf API, which only accepts a + * scattertable. This presents a couple of problems: + * 1. Not all memory allocated via the coherent DMA APIs is backed by + * a struct page + * 2. Passing coherent DMA memory into the streaming APIs is not allowed + * as we will try to flush the memory through a different alias to that + * actually being used (and the flushes are redundant.) + */ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (!dma_is_direct(ops) && ops->get_sgtable) - return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, - attrs); - return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, - attrs); + if (dma_is_direct(ops)) + return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, + size, attrs); + if (!ops->get_sgtable) + return -ENXIO; + return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); } EXPORT_SYMBOL(dma_get_sgtable_attrs); @@ -161,9 +173,11 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && (attrs & DMA_ATTR_NON_CONSISTENT))) return prot; - if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT)) - return arch_dma_mmap_pgprot(dev, prot, attrs); - return pgprot_noncached(prot); +#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE + if (attrs & DMA_ATTR_WRITE_COMBINE) + return pgprot_writecombine(prot); +#endif + return pgprot_dmacoherent(prot); } #endif /* CONFIG_MMU */ @@ -174,7 +188,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { -#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP +#ifdef CONFIG_MMU unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; @@ -205,8 +219,29 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, user_count << PAGE_SHIFT, vma->vm_page_prot); #else return -ENXIO; -#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ +#endif /* CONFIG_MMU */ +} + +/** + * dma_can_mmap - check if a given device supports dma_mmap_* + * @dev: device to check + * + * Returns %true if @dev supports dma_mmap_coherent() and dma_mmap_attrs() to + * map DMA allocations to userspace. + */ +bool dma_can_mmap(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_is_direct(ops)) { + return IS_ENABLED(CONFIG_MMU) && + (dev_is_dma_coherent(dev) || + IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)); + } + + return ops->mmap != NULL; } +EXPORT_SYMBOL_GPL(dma_can_mmap); /** * dma_mmap_attrs - map a coherent DMA allocation into user space @@ -227,31 +262,15 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, { const struct dma_map_ops *ops = get_dma_ops(dev); - if (!dma_is_direct(ops) && ops->mmap) - return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + if (dma_is_direct(ops)) + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, + attrs); + if (!ops->mmap) + return -ENXIO; + return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } EXPORT_SYMBOL(dma_mmap_attrs); -static u64 dma_default_get_required_mask(struct device *dev) -{ - u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); - u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); - u64 mask; - - if (!high_totalram) { - /* convert to mask just covering totalram */ - low_totalram = (1 << (fls(low_totalram) - 1)); - low_totalram += low_totalram - 1; - mask = low_totalram; - } else { - high_totalram = (1 << (fls(high_totalram) - 1)); - high_totalram += high_totalram - 1; - mask = (((u64)high_totalram) << 32) + 0xffffffff; - } - return mask; -} - u64 dma_get_required_mask(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -260,7 +279,16 @@ u64 dma_get_required_mask(struct device *dev) return dma_direct_get_required_mask(dev); if (ops->get_required_mask) return ops->get_required_mask(dev); - return dma_default_get_required_mask(dev); + + /* + * We require every DMA ops implementation to at least support a 32-bit + * DMA mask (and use bounce buffering if that isn't supported in + * hardware). As the direct mapping code has its own routine to + * actually report an optimal mask we default to 32-bit here as that + * is the right thing for most IOMMUs, and at least not actively + * harmful in general. + */ + return DMA_BIT_MASK(32); } EXPORT_SYMBOL_GPL(dma_get_required_mask); @@ -405,3 +433,14 @@ size_t dma_max_mapping_size(struct device *dev) return size; } EXPORT_SYMBOL_GPL(dma_max_mapping_size); + +unsigned long dma_get_merge_boundary(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (!ops || !ops->get_merge_boundary) + return 0; /* can't merge */ + + return ops->get_merge_boundary(dev); +} +EXPORT_SYMBOL_GPL(dma_get_merge_boundary); diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index ffe78f0b2fe4..ca4e5d44b571 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -11,13 +11,21 @@ #include <linux/slab.h> #include <linux/vmalloc.h> +struct page **dma_common_find_pages(void *cpu_addr) +{ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (!area || area->flags != VM_DMA_COHERENT) + return NULL; + return area->pages; +} + static struct vm_struct *__dma_common_pages_remap(struct page **pages, - size_t size, unsigned long vm_flags, pgprot_t prot, - const void *caller) + size_t size, pgprot_t prot, const void *caller) { struct vm_struct *area; - area = get_vm_area_caller(size, vm_flags, caller); + area = get_vm_area_caller(size, VM_DMA_COHERENT, caller); if (!area) return NULL; @@ -34,12 +42,11 @@ static struct vm_struct *__dma_common_pages_remap(struct page **pages, * Cannot be used in non-sleeping contexts */ void *dma_common_pages_remap(struct page **pages, size_t size, - unsigned long vm_flags, pgprot_t prot, - const void *caller) + pgprot_t prot, const void *caller) { struct vm_struct *area; - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); + area = __dma_common_pages_remap(pages, size, prot, caller); if (!area) return NULL; @@ -53,7 +60,6 @@ void *dma_common_pages_remap(struct page **pages, size_t size, * Cannot be used in non-sleeping contexts */ void *dma_common_contiguous_remap(struct page *page, size_t size, - unsigned long vm_flags, pgprot_t prot, const void *caller) { int i; @@ -67,7 +73,7 @@ void *dma_common_contiguous_remap(struct page *page, size_t size, for (i = 0; i < (size >> PAGE_SHIFT); i++) pages[i] = nth_page(page, i); - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); + area = __dma_common_pages_remap(pages, size, prot, caller); kfree(pages); @@ -79,11 +85,11 @@ void *dma_common_contiguous_remap(struct page *page, size_t size, /* * Unmaps a range previously mapped by dma_common_*_remap */ -void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) +void dma_common_free_remap(void *cpu_addr, size_t size) { - struct vm_struct *area = find_vm_area(cpu_addr); + struct page **pages = dma_common_find_pages(cpu_addr); - if (!area || (area->flags & vm_flags) != vm_flags) { + if (!pages) { WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } @@ -105,7 +111,16 @@ static int __init early_coherent_pool(char *p) } early_param("coherent_pool", early_coherent_pool); -int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) +static gfp_t dma_atomic_pool_gfp(void) +{ + if (IS_ENABLED(CONFIG_ZONE_DMA)) + return GFP_DMA; + if (IS_ENABLED(CONFIG_ZONE_DMA32)) + return GFP_DMA32; + return GFP_KERNEL; +} + +static int __init dma_atomic_pool_init(void) { unsigned int pool_size_order = get_order(atomic_pool_size); unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; @@ -117,7 +132,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) page = dma_alloc_from_contiguous(NULL, nr_pages, pool_size_order, false); else - page = alloc_pages(gfp, pool_size_order); + page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order); if (!page) goto out; @@ -127,8 +142,9 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) if (!atomic_pool) goto free_page; - addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP, - prot, __builtin_return_address(0)); + addr = dma_common_contiguous_remap(page, atomic_pool_size, + pgprot_dmacoherent(PAGE_KERNEL), + __builtin_return_address(0)); if (!addr) goto destroy_genpool; @@ -143,7 +159,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) return 0; remove_mapping: - dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); + dma_common_free_remap(addr, atomic_pool_size); destroy_genpool: gen_pool_destroy(atomic_pool); atomic_pool = NULL; @@ -155,6 +171,7 @@ out: atomic_pool_size / 1024); return -ENOMEM; } +postcore_initcall(dma_atomic_pool_init); bool dma_in_atomic_pool(void *start, size_t size) { @@ -217,7 +234,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, arch_dma_prep_coherent(page, size); /* create a coherent mapping */ - ret = dma_common_contiguous_remap(page, size, VM_USERMAP, + ret = dma_common_contiguous_remap(page, size, dma_pgprot(dev, PAGE_KERNEL, attrs), __builtin_return_address(0)); if (!ret) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7ba11e12a11f..c1246d77cf75 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2993,7 +2993,7 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, if (!area) return -EINVAL; - if (!(area->flags & VM_USERMAP)) + if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) return -EINVAL; if (kaddr + size > area->addr + get_vm_area_size(area)) @@ -3496,6 +3496,9 @@ static int s_show(struct seq_file *m, void *p) if (v->flags & VM_USERMAP) seq_puts(m, " user"); + if (v->flags & VM_DMA_COHERENT) + seq_puts(m, " dma-coherent"); + if (is_vmalloc_addr(v->pages)) seq_puts(m, " vpages"); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 11e653c8aa0e..91c6ad58729f 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -220,13 +220,12 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream) { if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) return false; - /* architecture supports dma_mmap_coherent()? */ -#if defined(CONFIG_ARCH_NO_COHERENT_DMA_MMAP) || !defined(CONFIG_HAS_DMA) - if (!substream->ops->mmap && - substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV) - return false; -#endif - return true; + + if (substream->ops->mmap || + substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV) + return true; + + return dma_can_mmap(substream->dma_buffer.dev.dev); } static int constrain_mask_params(struct snd_pcm_substream *substream, |