From abc5992b9dd0ba599f7a91d5a0f4569a78ae88ac Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 7 Jun 2022 20:50:23 +0800
Subject: mm: ioremap: Use more sensible name in ioremap_prot()

Use more meaningful and sensible naming phys_addr
instead addr in ioremap_prot().

Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220610092255.32445-1-wangkefeng.wang@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 mm/ioremap.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'mm')

diff --git a/mm/ioremap.c b/mm/ioremap.c
index 5fe598ecd9b7..2d754b48d230 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -11,20 +11,21 @@
 #include <linux/io.h>
 #include <linux/export.h>
 
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+			   unsigned long prot)
 {
 	unsigned long offset, vaddr;
 	phys_addr_t last_addr;
 	struct vm_struct *area;
 
 	/* Disallow wrap-around or zero size */
-	last_addr = addr + size - 1;
-	if (!size || last_addr < addr)
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
 		return NULL;
 
 	/* Page-align mappings */
-	offset = addr & (~PAGE_MASK);
-	addr -= offset;
+	offset = phys_addr & (~PAGE_MASK);
+	phys_addr -= offset;
 	size = PAGE_ALIGN(size + offset);
 
 	area = get_vm_area_caller(size, VM_IOREMAP,
@@ -33,7 +34,8 @@ void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
 		return NULL;
 	vaddr = (unsigned long)area->addr;
 
-	if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) {
+	if (ioremap_page_range(vaddr, vaddr + size, phys_addr,
+			       __pgprot(prot))) {
 		free_vm_area(area);
 		return NULL;
 	}
-- 
cgit v1.2.3


From a14fff1c0379d0422c5ca0084b0ab86ed7bedf3e Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 7 Jun 2022 20:50:24 +0800
Subject: mm: ioremap: Setup phys_addr of struct vm_struct

Show physical address of each ioremap in /proc/vmallocinfo.

Acked-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Link: https://lore.kernel.org/r/20220607125027.44946-4-wangkefeng.wang@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 mm/ioremap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'mm')

diff --git a/mm/ioremap.c b/mm/ioremap.c
index 2d754b48d230..e1d008e8f87f 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -33,6 +33,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
 	if (!area)
 		return NULL;
 	vaddr = (unsigned long)area->addr;
+	area->phys_addr = phys_addr;
 
 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr,
 			       __pgprot(prot))) {
-- 
cgit v1.2.3


From 18e780b4e6ab89a3a10f46d151971863562c1c91 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 7 Jun 2022 20:50:25 +0800
Subject: mm: ioremap: Add ioremap/iounmap_allowed()

Add special hook for architecture to verify addr, size or prot
when ioremap() or iounmap(), which will make the generic ioremap
more useful.

  ioremap_allowed() return a bool,
    - true means continue to remap
    - false means skip remap and return directly
  iounmap_allowed() return a bool,
    - true means continue to vunmap
    - false code means skip vunmap and return directly

Meanwhile, only vunmap the address when it is in vmalloc area
as the generic ioremap only returns vmalloc addresses.

Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Baoquan He <bhe@redhat.com>
Link: https://lore.kernel.org/r/20220607125027.44946-5-wangkefeng.wang@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/asm-generic/io.h | 26 ++++++++++++++++++++++++++
 mm/ioremap.c             | 11 ++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'mm')

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index b76379628a02..db5b890eaff7 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -964,6 +964,32 @@ static inline void iounmap(volatile void __iomem *addr)
 #elif defined(CONFIG_GENERIC_IOREMAP)
 #include <linux/pgtable.h>
 
+/*
+ * Arch code can implement the following two hooks when using GENERIC_IOREMAP
+ * ioremap_allowed() return a bool,
+ *   - true means continue to remap
+ *   - false means skip remap and return directly
+ * iounmap_allowed() return a bool,
+ *   - true means continue to vunmap
+ *   - false means skip vunmap and return directly
+ */
+#ifndef ioremap_allowed
+#define ioremap_allowed ioremap_allowed
+static inline bool ioremap_allowed(phys_addr_t phys_addr, size_t size,
+				   unsigned long prot)
+{
+	return true;
+}
+#endif
+
+#ifndef iounmap_allowed
+#define iounmap_allowed iounmap_allowed
+static inline bool iounmap_allowed(void *addr)
+{
+	return true;
+}
+#endif
+
 void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
 			   unsigned long prot);
 void iounmap(volatile void __iomem *addr);
diff --git a/mm/ioremap.c b/mm/ioremap.c
index e1d008e8f87f..8652426282cc 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -28,6 +28,9 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
 	phys_addr -= offset;
 	size = PAGE_ALIGN(size + offset);
 
+	if (!ioremap_allowed(phys_addr, size, prot))
+		return NULL;
+
 	area = get_vm_area_caller(size, VM_IOREMAP,
 			__builtin_return_address(0));
 	if (!area)
@@ -47,6 +50,12 @@ EXPORT_SYMBOL(ioremap_prot);
 
 void iounmap(volatile void __iomem *addr)
 {
-	vunmap((void *)((unsigned long)addr & PAGE_MASK));
+	void *vaddr = (void *)((unsigned long)addr & PAGE_MASK);
+
+	if (!iounmap_allowed(vaddr))
+		return;
+
+	if (is_vmalloc_addr(vaddr))
+		vunmap(vaddr);
 }
 EXPORT_SYMBOL(iounmap);
-- 
cgit v1.2.3


From ed0a6d1d973e9763989b44913ae1bd2a5d5d5777 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 10 Jun 2022 16:21:38 +0100
Subject: mm: kasan: Ensure the tags are visible before the tag in page->flags

__kasan_unpoison_pages() colours the memory with a random tag and stores
it in page->flags in order to re-create the tagged pointer via
page_to_virt() later. When the tag from the page->flags is read, ensure
that the in-memory tags are already visible by re-ordering the
page_kasan_tag_set() after kasan_unpoison(). The former already has
barriers in place through try_cmpxchg(). On the reader side, the order
is ensured by the address dependency between page->flags and the memory
access.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Link: https://lore.kernel.org/r/20220610152141.2148929-2-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 mm/kasan/common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'mm')

diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index c40c0e7b3b5f..78be2beb7453 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -108,9 +108,10 @@ void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init)
 		return;
 
 	tag = kasan_random_tag();
+	kasan_unpoison(set_tag(page_address(page), tag),
+		       PAGE_SIZE << order, init);
 	for (i = 0; i < (1 << order); i++)
 		page_kasan_tag_set(page + i, tag);
-	kasan_unpoison(page_address(page), PAGE_SIZE << order, init);
 }
 
 void __kasan_poison_pages(struct page *page, unsigned int order, bool init)
-- 
cgit v1.2.3


From 70c248aca9e7efa85a6664d5ab56c17c326c958f Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 10 Jun 2022 16:21:39 +0100
Subject: mm: kasan: Skip unpoisoning of user pages

Commit c275c5c6d50a ("kasan: disable freed user page poisoning with HW
tags") added __GFP_SKIP_KASAN_POISON to GFP_HIGHUSER_MOVABLE. A similar
argument can be made about unpoisoning, so also add
__GFP_SKIP_KASAN_UNPOISON to user pages. To ensure the user page is
still accessible via page_address() without a kasan fault, reset the
page->flags tag.

With the above changes, there is no need for the arm64
tag_clear_highpage() to reset the page->flags tag.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/r/20220610152141.2148929-3-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/fault.c | 1 -
 include/linux/gfp.h   | 2 +-
 mm/page_alloc.c       | 7 +++++--
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'mm')

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c5e11768e5c1..cdf3ffa0c223 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
 void tag_clear_highpage(struct page *page)
 {
 	mte_zero_clear_page_tags(page_address(page));
-	page_kasan_tag_reset(page);
 	set_bit(PG_mte_tagged, &page->flags);
 }
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2d2ccae933c2..0ace7759acd2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -348,7 +348,7 @@ struct vm_area_struct;
 #define GFP_DMA32	__GFP_DMA32
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE | \
-			 __GFP_SKIP_KASAN_POISON)
+			 __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON)
 #define GFP_TRANSHUGE_LIGHT	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE	(GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e008a3df0485..f6ed240870bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2397,6 +2397,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 	bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) &&
 			!should_skip_init(gfp_flags);
 	bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS);
+	int i;
 
 	set_page_private(page, 0);
 	set_page_refcounted(page);
@@ -2422,8 +2423,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 	 * should be initialized as well).
 	 */
 	if (init_tags) {
-		int i;
-
 		/* Initialize both memory and tags. */
 		for (i = 0; i != 1 << order; ++i)
 			tag_clear_highpage(page + i);
@@ -2438,6 +2437,10 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 		/* Note that memory is already initialized by KASAN. */
 		if (kasan_has_integrated_init())
 			init = false;
+	} else {
+		/* Ensure page_address() dereferencing does not fault. */
+		for (i = 0; i != 1 << order; ++i)
+			page_kasan_tag_reset(page + i);
 	}
 	/* If memory is still not initialized, do it now. */
 	if (init)
-- 
cgit v1.2.3


From 6d05141a393071e104bf5be5ad4d0c79c6dff343 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 10 Jun 2022 16:21:40 +0100
Subject: mm: kasan: Skip page unpoisoning only if __GFP_SKIP_KASAN_UNPOISON

Currently post_alloc_hook() skips the kasan unpoisoning if the tags will
be zeroed (__GFP_ZEROTAGS) or __GFP_SKIP_KASAN_UNPOISON is passed. Since
__GFP_ZEROTAGS is now accompanied by __GFP_SKIP_KASAN_UNPOISON, remove
the extra check.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/r/20220610152141.2148929-4-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 mm/page_alloc.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'mm')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f6ed240870bc..bf45a6aa407a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2361,7 +2361,7 @@ static inline bool check_new_pcp(struct page *page, unsigned int order)
 }
 #endif /* CONFIG_DEBUG_VM */
 
-static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags)
+static inline bool should_skip_kasan_unpoison(gfp_t flags)
 {
 	/* Don't skip if a software KASAN mode is enabled. */
 	if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
@@ -2373,12 +2373,10 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags)
 		return true;
 
 	/*
-	 * With hardware tag-based KASAN enabled, skip if either:
-	 *
-	 * 1. Memory tags have already been cleared via tag_clear_highpage().
-	 * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON.
+	 * With hardware tag-based KASAN enabled, skip if this has been
+	 * requested via __GFP_SKIP_KASAN_UNPOISON.
 	 */
-	return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON);
+	return flags & __GFP_SKIP_KASAN_UNPOISON;
 }
 
 static inline bool should_skip_init(gfp_t flags)
@@ -2430,7 +2428,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 		/* Note that memory is already initialized by the loop above. */
 		init = false;
 	}
-	if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) {
+	if (!should_skip_kasan_unpoison(gfp_flags)) {
 		/* Unpoison shadow memory or set memory tags. */
 		kasan_unpoison_pages(page, order, init);
 
-- 
cgit v1.2.3


From d0637c505f8a1d8c4088642f1f3e9e3b22da14f6 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Wed, 20 Jul 2022 21:37:37 +1200
Subject: arm64: enable THP_SWAP for arm64

THP_SWAP has been proven to improve the swap throughput significantly
on x86_64 according to commit bd4c82c22c367e ("mm, THP, swap: delay
splitting THP after swapped out").
As long as arm64 uses 4K page size, it is quite similar with x86_64
by having 2MB PMD THP. THP_SWAP is architecture-independent, thus,
enabling it on arm64 will benefit arm64 as well.
A corner case is that MTE has an assumption that only base pages
can be swapped. We won't enable THP_SWAP for ARM64 hardware with
MTE support until MTE is reworked to coexist with THP_SWAP.

A micro-benchmark is written to measure thp swapout throughput as
below,

 unsigned long long tv_to_ms(struct timeval tv)
 {
 	return tv.tv_sec * 1000 + tv.tv_usec / 1000;
 }

 main()
 {
 	struct timeval tv_b, tv_e;;
 #define SIZE 400*1024*1024
 	volatile void *p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
 				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	if (!p) {
 		perror("fail to get memory");
 		exit(-1);
 	}

 	madvise(p, SIZE, MADV_HUGEPAGE);
 	memset(p, 0x11, SIZE); /* write to get mem */

 	gettimeofday(&tv_b, NULL);
 	madvise(p, SIZE, MADV_PAGEOUT);
 	gettimeofday(&tv_e, NULL);

 	printf("swp out bandwidth: %ld bytes/ms\n",
 			SIZE/(tv_to_ms(tv_e) - tv_to_ms(tv_b)));
 }

Testing is done on rk3568 64bit Quad Core Cortex-A55 platform -
ROCK 3A.
thp swp throughput w/o patch: 2734bytes/ms (mean of 10 tests)
thp swp throughput w/  patch: 3331bytes/ms (mean of 10 tests)

Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Steven Price <steven.price@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Link: https://lore.kernel.org/r/20220720093737.133375-1-21cnbao@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig               |  1 +
 arch/arm64/include/asm/pgtable.h |  6 ++++++
 include/linux/huge_mm.h          | 12 ++++++++++++
 mm/swap_slots.c                  |  2 +-
 4 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'mm')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1652a9800ebe..e1c540e80eec 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -101,6 +101,7 @@ config ARM64
 	select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANTS_NO_INSTR
+	select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0b6632f18364..78d6f6014bfb 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -45,6 +45,12 @@
 	__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static inline bool arch_thp_swp_supported(void)
+{
+	return !system_supports_mte();
+}
+#define arch_thp_swp_supported arch_thp_swp_supported
+
 /*
  * Outside of a few very special situations (e.g. hibernation), we always
  * use broadcast TLB invalidation instructions, therefore a spurious page
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index de29821231c9..4ddaf6ad73ef 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio,
 	return split_huge_page_to_list(&folio->page, list);
 }
 
+/*
+ * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
+ * limitations in the implementation like arm64 MTE can override this to
+ * false
+ */
+#ifndef arch_thp_swp_supported
+static inline bool arch_thp_swp_supported(void)
+{
+	return true;
+}
+#endif
+
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 2a65a89b5b4d..10b94d64cc25 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -307,7 +307,7 @@ swp_entry_t folio_alloc_swap(struct folio *folio)
 	entry.val = 0;
 
 	if (folio_test_large(folio)) {
-		if (IS_ENABLED(CONFIG_THP_SWAP))
+		if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported())
 			get_swap_pages(1, &entry, folio_nr_pages(folio));
 		goto out;
 	}
-- 
cgit v1.2.3