diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 14:12:21 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 14:12:21 -0800 |
commit | af7ddd8a627c62a835524b3f5b471edbbbcce025 (patch) | |
tree | af9777ddef6d394c7cc01fca599328b584ca2bc1 /kernel/dma/direct.c | |
parent | fe2b0cdabcd9e6aeca66a104bc03576946e5fee2 (diff) | |
parent | 8b1cce9f5832a8eda17d37a3c49fb7dd2d650f46 (diff) | |
download | lwn-af7ddd8a627c62a835524b3f5b471edbbbcce025.tar.gz lwn-af7ddd8a627c62a835524b3f5b471edbbbcce025.zip |
Merge tag 'dma-mapping-4.21' of git://git.infradead.org/users/hch/dma-mapping
Pull DMA mapping updates from Christoph Hellwig:
"A huge update this time, but a lot of that is just consolidating or
removing code:
- provide a common DMA_MAPPING_ERROR definition and avoid indirect
calls for dma_map_* error checking
- use direct calls for the DMA direct mapping case, avoiding huge
retpoline overhead for high performance workloads
- merge the swiotlb dma_map_ops into dma-direct
- provide a generic remapping DMA consistent allocator for
architectures that have devices that perform DMA that is not cache
coherent. Based on the existing arm64 implementation and also used
for csky now.
- improve the dma-debug infrastructure, including dynamic allocation
of entries (Robin Murphy)
- default to providing chaining scatterlist everywhere, with opt-outs
for the few architectures (alpha, parisc, most arm32 variants) that
can't cope with it
- misc sparc32 dma-related cleanups
- remove the dma_mark_clean arch hook used by swiotlb on ia64 and
replace it with the generic noncoherent infrastructure
- fix the return type of dma_set_max_seg_size (Niklas Söderlund)
- move the dummy dma ops for not DMA capable devices from arm64 to
common code (Robin Murphy)
- ensure dma_alloc_coherent returns zeroed memory to avoid kernel
data leaks through userspace. We already did this for most common
architectures, but this ensures we do it everywhere.
dma_zalloc_coherent has been deprecated and can hopefully be
removed after -rc1 with a coccinelle script"
* tag 'dma-mapping-4.21' of git://git.infradead.org/users/hch/dma-mapping: (73 commits)
dma-mapping: fix inverted logic in dma_supported
dma-mapping: deprecate dma_zalloc_coherent
dma-mapping: zero memory returned from dma_alloc_*
sparc/iommu: fix ->map_sg return value
sparc/io-unit: fix ->map_sg return value
arm64: default to the direct mapping in get_arch_dma_ops
PCI: Remove unused attr variable in pci_dma_configure
ia64: only select ARCH_HAS_DMA_COHERENT_TO_PFN if swiotlb is enabled
dma-mapping: bypass indirect calls for dma-direct
vmd: use the proper dma_* APIs instead of direct methods calls
dma-direct: merge swiotlb_dma_ops into the dma_direct code
dma-direct: use dma_direct_map_page to implement dma_direct_map_sg
dma-direct: improve addressability error reporting
swiotlb: remove dma_mark_clean
swiotlb: remove SWIOTLB_MAP_ERROR
ACPI / scan: Refactor _CCA enforcement
dma-mapping: factor out dummy DMA ops
dma-mapping: always build the direct mapping code
dma-mapping: move dma_cache_sync out of line
dma-mapping: move various slow path functions out of line
...
Diffstat (limited to 'kernel/dma/direct.c')
-rw-r--r-- | kernel/dma/direct.c | 222 |
1 files changed, 129 insertions, 93 deletions
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 375c77e8d52f..355d16acee6d 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -13,6 +13,7 @@ #include <linux/dma-noncoherent.h> #include <linux/pfn.h> #include <linux/set_memory.h> +#include <linux/swiotlb.h> /* * Most architectures use ZONE_DMA for the first 16 Megabytes, but @@ -30,27 +31,16 @@ static inline bool force_dma_unencrypted(void) return sev_active(); } -static bool -check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, - const char *caller) +static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) { - if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { - if (!dev->dma_mask) { - dev_err(dev, - "%s: call on device without dma_mask\n", - caller); - return false; - } - - if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { - dev_err(dev, - "%s: overflow %pad+%zu of device mask %llx bus mask %llx\n", - caller, &dma_addr, size, - *dev->dma_mask, dev->bus_dma_mask); - } - return false; + if (!dev->dma_mask) { + dev_err_once(dev, "DMA map on device without dma_mask\n"); + } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { + dev_err_once(dev, + "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); } - return true; + WARN_ON_ONCE(1); } static inline dma_addr_t phys_to_dma_direct(struct device *dev, @@ -103,14 +93,13 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); } -void *dma_direct_alloc_pages(struct device *dev, size_t size, +struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int page_order = get_order(size); struct page *page = NULL; u64 phys_mask; - void *ret; if (attrs & DMA_ATTR_NO_WARN) gfp |= __GFP_NOWARN; @@ -150,11 +139,34 @@ again: } } + return page; +} + +void *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +{ + struct page *page; + void *ret; + + page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); if (!page) return NULL; + + if (PageHighMem(page)) { + /* + * Depending on the cma= arguments and per-arch setup + * dma_alloc_from_contiguous could return highmem pages. + * Without remapping there is no way to return them here, + * so log an error and fail. + */ + dev_info(dev, "Rejecting highmem page from CMA.\n"); + __dma_direct_free_pages(dev, size, page); + return NULL; + } + ret = page_address(page); if (force_dma_unencrypted()) { - set_memory_decrypted((unsigned long)ret, 1 << page_order); + set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); *dma_handle = __phys_to_dma(dev, page_to_phys(page)); } else { *dma_handle = phys_to_dma(dev, page_to_phys(page)); @@ -163,20 +175,22 @@ again: return ret; } -/* - * NOTE: this function must never look at the dma_addr argument, because we want - * to be able to use it as a helper for iommu implementations as well. - */ +void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (!dma_release_from_contiguous(dev, page, count)) + __free_pages(page, get_order(size)); +} + void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned int page_order = get_order(size); if (force_dma_unencrypted()) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); - if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, page_order); + __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); } void *dma_direct_alloc(struct device *dev, size_t size, @@ -196,67 +210,111 @@ void dma_direct_free(struct device *dev, size_t size, dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); } -static void dma_direct_sync_single_for_device(struct device *dev, +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - if (dev_is_dma_coherent(dev)) - return; - arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(dev, paddr, size, dir); } +EXPORT_SYMBOL(dma_direct_sync_single_for_device); -static void dma_direct_sync_sg_for_device(struct device *dev, +void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { struct scatterlist *sg; int i; - if (dev_is_dma_coherent(dev)) - return; + for_each_sg(sgl, sg, nents, i) { + if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) + swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, + dir, SYNC_FOR_DEVICE); - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, + dir); + } } +EXPORT_SYMBOL(dma_direct_sync_sg_for_device); +#endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) -static void dma_direct_sync_single_for_cpu(struct device *dev, + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - if (dev_is_dma_coherent(dev)) - return; - arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); - arch_sync_dma_for_cpu_all(dev); + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_for_cpu(dev, paddr, size, dir); + arch_sync_dma_for_cpu_all(dev); + } + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); } +EXPORT_SYMBOL(dma_direct_sync_single_for_cpu); -static void dma_direct_sync_sg_for_cpu(struct device *dev, +void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { struct scatterlist *sg; int i; - if (dev_is_dma_coherent(dev)) - return; + for_each_sg(sgl, sg, nents, i) { + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); + + if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) + swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir, + SYNC_FOR_CPU); + } - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); - arch_sync_dma_for_cpu_all(dev); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu_all(dev); } +EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu); -static void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, +void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { + phys_addr_t phys = dma_to_phys(dev, addr); + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); + + if (unlikely(is_swiotlb_buffer(phys))) + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); } +EXPORT_SYMBOL(dma_direct_unmap_page); -static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir); + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, + attrs); } +EXPORT_SYMBOL(dma_direct_unmap_sg); #endif +static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, + size_t size) +{ + return swiotlb_force != SWIOTLB_FORCE && + (!dev || dma_capable(dev, dma_addr, size)); +} + dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -264,13 +322,17 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dma_addr = phys_to_dma(dev, phys); - if (!check_addr(dev, dma_addr, size, __func__)) - return DIRECT_MAPPING_ERROR; + if (unlikely(!dma_direct_possible(dev, dma_addr, size)) && + !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) { + report_addr(dev, dma_addr, size); + return DMA_MAPPING_ERROR; + } - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_single_for_device(dev, dma_addr, size, dir); + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(dev, phys, size, dir); return dma_addr; } +EXPORT_SYMBOL(dma_direct_map_page); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) @@ -279,18 +341,20 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, struct scatterlist *sg; for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); - if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__)) - return 0; + sg->dma_address = dma_direct_map_page(dev, sg_page(sg), + sg->offset, sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) + goto out_unmap; sg_dma_len(sg) = sg->length; } - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_sg_for_device(dev, sgl, nents, dir); return nents; + +out_unmap: + dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + return 0; } +EXPORT_SYMBOL(dma_direct_map_sg); /* * Because 32-bit DMA masks are so common we expect every architecture to be @@ -316,31 +380,3 @@ int dma_direct_supported(struct device *dev, u64 mask) */ return mask >= __phys_to_dma(dev, min_mask); } - -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == DIRECT_MAPPING_ERROR; -} - -const struct dma_map_ops dma_direct_ops = { - .alloc = dma_direct_alloc, - .free = dma_direct_free, - .map_page = dma_direct_map_page, - .map_sg = dma_direct_map_sg, -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) - .sync_single_for_device = dma_direct_sync_single_for_device, - .sync_sg_for_device = dma_direct_sync_sg_for_device, -#endif -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) - .sync_single_for_cpu = dma_direct_sync_single_for_cpu, - .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, - .unmap_page = dma_direct_unmap_page, - .unmap_sg = dma_direct_unmap_sg, -#endif - .get_required_mask = dma_direct_get_required_mask, - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, - .cache_sync = arch_dma_cache_sync, -}; -EXPORT_SYMBOL(dma_direct_ops); |