From abc5992b9dd0ba599f7a91d5a0f4569a78ae88ac Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 7 Jun 2022 20:50:23 +0800 Subject: mm: ioremap: Use more sensible name in ioremap_prot() Use more meaningful and sensible naming phys_addr instead addr in ioremap_prot(). Suggested-by: Andrew Morton Reviewed-by: Anshuman Khandual Signed-off-by: Kefeng Wang Reviewed-by: Baoquan He Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610092255.32445-1-wangkefeng.wang@huawei.com Signed-off-by: Will Deacon --- mm/ioremap.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/ioremap.c b/mm/ioremap.c index 5fe598ecd9b7..2d754b48d230 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -11,20 +11,21 @@ #include #include -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot) { unsigned long offset, vaddr; phys_addr_t last_addr; struct vm_struct *area; /* Disallow wrap-around or zero size */ - last_addr = addr + size - 1; - if (!size || last_addr < addr) + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) return NULL; /* Page-align mappings */ - offset = addr & (~PAGE_MASK); - addr -= offset; + offset = phys_addr & (~PAGE_MASK); + phys_addr -= offset; size = PAGE_ALIGN(size + offset); area = get_vm_area_caller(size, VM_IOREMAP, @@ -33,7 +34,8 @@ void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) return NULL; vaddr = (unsigned long)area->addr; - if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) { + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(prot))) { free_vm_area(area); return NULL; } -- cgit v1.2.3 From a14fff1c0379d0422c5ca0084b0ab86ed7bedf3e Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 7 Jun 2022 20:50:24 +0800 Subject: mm: ioremap: Setup phys_addr of struct vm_struct Show physical address of each ioremap in /proc/vmallocinfo. Acked-by: Andrew Morton Reviewed-by: Christoph Hellwig Reviewed-by: Anshuman Khandual Signed-off-by: Kefeng Wang Link: https://lore.kernel.org/r/20220607125027.44946-4-wangkefeng.wang@huawei.com Signed-off-by: Will Deacon --- mm/ioremap.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm') diff --git a/mm/ioremap.c b/mm/ioremap.c index 2d754b48d230..e1d008e8f87f 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -33,6 +33,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, if (!area) return NULL; vaddr = (unsigned long)area->addr; + area->phys_addr = phys_addr; if (ioremap_page_range(vaddr, vaddr + size, phys_addr, __pgprot(prot))) { -- cgit v1.2.3 From 18e780b4e6ab89a3a10f46d151971863562c1c91 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 7 Jun 2022 20:50:25 +0800 Subject: mm: ioremap: Add ioremap/iounmap_allowed() Add special hook for architecture to verify addr, size or prot when ioremap() or iounmap(), which will make the generic ioremap more useful. ioremap_allowed() return a bool, - true means continue to remap - false means skip remap and return directly iounmap_allowed() return a bool, - true means continue to vunmap - false code means skip vunmap and return directly Meanwhile, only vunmap the address when it is in vmalloc area as the generic ioremap only returns vmalloc addresses. Acked-by: Andrew Morton Signed-off-by: Kefeng Wang Reviewed-by: Christoph Hellwig Reviewed-by: Baoquan He Link: https://lore.kernel.org/r/20220607125027.44946-5-wangkefeng.wang@huawei.com Signed-off-by: Will Deacon --- include/asm-generic/io.h | 26 ++++++++++++++++++++++++++ mm/ioremap.c | 11 ++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index b76379628a02..db5b890eaff7 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -964,6 +964,32 @@ static inline void iounmap(volatile void __iomem *addr) #elif defined(CONFIG_GENERIC_IOREMAP) #include +/* + * Arch code can implement the following two hooks when using GENERIC_IOREMAP + * ioremap_allowed() return a bool, + * - true means continue to remap + * - false means skip remap and return directly + * iounmap_allowed() return a bool, + * - true means continue to vunmap + * - false means skip vunmap and return directly + */ +#ifndef ioremap_allowed +#define ioremap_allowed ioremap_allowed +static inline bool ioremap_allowed(phys_addr_t phys_addr, size_t size, + unsigned long prot) +{ + return true; +} +#endif + +#ifndef iounmap_allowed +#define iounmap_allowed iounmap_allowed +static inline bool iounmap_allowed(void *addr) +{ + return true; +} +#endif + void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, unsigned long prot); void iounmap(volatile void __iomem *addr); diff --git a/mm/ioremap.c b/mm/ioremap.c index e1d008e8f87f..8652426282cc 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -28,6 +28,9 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, phys_addr -= offset; size = PAGE_ALIGN(size + offset); + if (!ioremap_allowed(phys_addr, size, prot)) + return NULL; + area = get_vm_area_caller(size, VM_IOREMAP, __builtin_return_address(0)); if (!area) @@ -47,6 +50,12 @@ EXPORT_SYMBOL(ioremap_prot); void iounmap(volatile void __iomem *addr) { - vunmap((void *)((unsigned long)addr & PAGE_MASK)); + void *vaddr = (void *)((unsigned long)addr & PAGE_MASK); + + if (!iounmap_allowed(vaddr)) + return; + + if (is_vmalloc_addr(vaddr)) + vunmap(vaddr); } EXPORT_SYMBOL(iounmap); -- cgit v1.2.3 From ed0a6d1d973e9763989b44913ae1bd2a5d5d5777 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 10 Jun 2022 16:21:38 +0100 Subject: mm: kasan: Ensure the tags are visible before the tag in page->flags __kasan_unpoison_pages() colours the memory with a random tag and stores it in page->flags in order to re-create the tagged pointer via page_to_virt() later. When the tag from the page->flags is read, ensure that the in-memory tags are already visible by re-ordering the page_kasan_tag_set() after kasan_unpoison(). The former already has barriers in place through try_cmpxchg(). On the reader side, the order is ensured by the address dependency between page->flags and the memory access. Signed-off-by: Catalin Marinas Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20220610152141.2148929-2-catalin.marinas@arm.com Signed-off-by: Will Deacon --- mm/kasan/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/kasan/common.c b/mm/kasan/common.c index c40c0e7b3b5f..78be2beb7453 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -108,9 +108,10 @@ void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init) return; tag = kasan_random_tag(); + kasan_unpoison(set_tag(page_address(page), tag), + PAGE_SIZE << order, init); for (i = 0; i < (1 << order); i++) page_kasan_tag_set(page + i, tag); - kasan_unpoison(page_address(page), PAGE_SIZE << order, init); } void __kasan_poison_pages(struct page *page, unsigned int order, bool init) -- cgit v1.2.3 From 70c248aca9e7efa85a6664d5ab56c17c326c958f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 10 Jun 2022 16:21:39 +0100 Subject: mm: kasan: Skip unpoisoning of user pages Commit c275c5c6d50a ("kasan: disable freed user page poisoning with HW tags") added __GFP_SKIP_KASAN_POISON to GFP_HIGHUSER_MOVABLE. A similar argument can be made about unpoisoning, so also add __GFP_SKIP_KASAN_UNPOISON to user pages. To ensure the user page is still accessible via page_address() without a kasan fault, reset the page->flags tag. With the above changes, there is no need for the arm64 tag_clear_highpage() to reset the page->flags tag. Signed-off-by: Catalin Marinas Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Peter Collingbourne Cc: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20220610152141.2148929-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 1 - include/linux/gfp.h | 2 +- mm/page_alloc.c | 7 +++++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c5e11768e5c1..cdf3ffa0c223 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { mte_zero_clear_page_tags(page_address(page)); - page_kasan_tag_reset(page); set_bit(PG_mte_tagged, &page->flags); } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2d2ccae933c2..0ace7759acd2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -348,7 +348,7 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ - __GFP_SKIP_KASAN_POISON) + __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON) #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e008a3df0485..f6ed240870bc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2397,6 +2397,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && !should_skip_init(gfp_flags); bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); + int i; set_page_private(page, 0); set_page_refcounted(page); @@ -2422,8 +2423,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order, * should be initialized as well). */ if (init_tags) { - int i; - /* Initialize both memory and tags. */ for (i = 0; i != 1 << order; ++i) tag_clear_highpage(page + i); @@ -2438,6 +2437,10 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* Note that memory is already initialized by KASAN. */ if (kasan_has_integrated_init()) init = false; + } else { + /* Ensure page_address() dereferencing does not fault. */ + for (i = 0; i != 1 << order; ++i) + page_kasan_tag_reset(page + i); } /* If memory is still not initialized, do it now. */ if (init) -- cgit v1.2.3 From 6d05141a393071e104bf5be5ad4d0c79c6dff343 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 10 Jun 2022 16:21:40 +0100 Subject: mm: kasan: Skip page unpoisoning only if __GFP_SKIP_KASAN_UNPOISON Currently post_alloc_hook() skips the kasan unpoisoning if the tags will be zeroed (__GFP_ZEROTAGS) or __GFP_SKIP_KASAN_UNPOISON is passed. Since __GFP_ZEROTAGS is now accompanied by __GFP_SKIP_KASAN_UNPOISON, remove the extra check. Signed-off-by: Catalin Marinas Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Peter Collingbourne Cc: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20220610152141.2148929-4-catalin.marinas@arm.com Signed-off-by: Will Deacon --- mm/page_alloc.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f6ed240870bc..bf45a6aa407a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2361,7 +2361,7 @@ static inline bool check_new_pcp(struct page *page, unsigned int order) } #endif /* CONFIG_DEBUG_VM */ -static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) +static inline bool should_skip_kasan_unpoison(gfp_t flags) { /* Don't skip if a software KASAN mode is enabled. */ if (IS_ENABLED(CONFIG_KASAN_GENERIC) || @@ -2373,12 +2373,10 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) return true; /* - * With hardware tag-based KASAN enabled, skip if either: - * - * 1. Memory tags have already been cleared via tag_clear_highpage(). - * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON. + * With hardware tag-based KASAN enabled, skip if this has been + * requested via __GFP_SKIP_KASAN_UNPOISON. */ - return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON); + return flags & __GFP_SKIP_KASAN_UNPOISON; } static inline bool should_skip_init(gfp_t flags) @@ -2430,7 +2428,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* Note that memory is already initialized by the loop above. */ init = false; } - if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) { + if (!should_skip_kasan_unpoison(gfp_flags)) { /* Unpoison shadow memory or set memory tags. */ kasan_unpoison_pages(page, order, init); -- cgit v1.2.3 From d0637c505f8a1d8c4088642f1f3e9e3b22da14f6 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 20 Jul 2022 21:37:37 +1200 Subject: arm64: enable THP_SWAP for arm64 THP_SWAP has been proven to improve the swap throughput significantly on x86_64 according to commit bd4c82c22c367e ("mm, THP, swap: delay splitting THP after swapped out"). As long as arm64 uses 4K page size, it is quite similar with x86_64 by having 2MB PMD THP. THP_SWAP is architecture-independent, thus, enabling it on arm64 will benefit arm64 as well. A corner case is that MTE has an assumption that only base pages can be swapped. We won't enable THP_SWAP for ARM64 hardware with MTE support until MTE is reworked to coexist with THP_SWAP. A micro-benchmark is written to measure thp swapout throughput as below, unsigned long long tv_to_ms(struct timeval tv) { return tv.tv_sec * 1000 + tv.tv_usec / 1000; } main() { struct timeval tv_b, tv_e;; #define SIZE 400*1024*1024 volatile void *p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (!p) { perror("fail to get memory"); exit(-1); } madvise(p, SIZE, MADV_HUGEPAGE); memset(p, 0x11, SIZE); /* write to get mem */ gettimeofday(&tv_b, NULL); madvise(p, SIZE, MADV_PAGEOUT); gettimeofday(&tv_e, NULL); printf("swp out bandwidth: %ld bytes/ms\n", SIZE/(tv_to_ms(tv_e) - tv_to_ms(tv_b))); } Testing is done on rk3568 64bit Quad Core Cortex-A55 platform - ROCK 3A. thp swp throughput w/o patch: 2734bytes/ms (mean of 10 tests) thp swp throughput w/ patch: 3331bytes/ms (mean of 10 tests) Cc: "Huang, Ying" Cc: Minchan Kim Cc: Johannes Weiner Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Steven Price Cc: Yang Shi Reviewed-by: Anshuman Khandual Signed-off-by: Barry Song Link: https://lore.kernel.org/r/20220720093737.133375-1-21cnbao@gmail.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/pgtable.h | 6 ++++++ include/linux/huge_mm.h | 12 ++++++++++++ mm/swap_slots.c | 2 +- 4 files changed, 20 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1652a9800ebe..e1c540e80eec 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -101,6 +101,7 @@ config ARM64 select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_NO_INSTR + select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b6632f18364..78d6f6014bfb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -45,6 +45,12 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline bool arch_thp_swp_supported(void) +{ + return !system_supports_mte(); +} +#define arch_thp_swp_supported arch_thp_swp_supported + /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index de29821231c9..4ddaf6ad73ef 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } +/* + * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to + * limitations in the implementation like arm64 MTE can override this to + * false + */ +#ifndef arch_thp_swp_supported +static inline bool arch_thp_swp_supported(void) +{ + return true; +} +#endif + #endif /* _LINUX_HUGE_MM_H */ diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 2a65a89b5b4d..10b94d64cc25 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -307,7 +307,7 @@ swp_entry_t folio_alloc_swap(struct folio *folio) entry.val = 0; if (folio_test_large(folio)) { - if (IS_ENABLED(CONFIG_THP_SWAP)) + if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported()) get_swap_pages(1, &entry, folio_nr_pages(folio)); goto out; } -- cgit v1.2.3