From 06dbbb4d5f7126b6307ab807cbf04ecfc459b933 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:42 +0530 Subject: powerpc/mm/fault: Fix kfence page fault reporting copy_from_kernel_nofault() can be called when doing read of /proc/kcore. /proc/kcore can have some unmapped kfence objects which when read via copy_from_kernel_nofault() can cause page faults. Since *_nofault() functions define their own fixup table for handling fault, use that instead of asking kfence to handle such faults. Hence we search the exception tables for the nip which generated the fault. If there is an entry then we let the fixup table handler handle the page fault by returning an error from within ___do_page_fault(). This can be easily triggered if someone tries to do dd from /proc/kcore. eg. dd if=/proc/kcore of=/dev/null bs=1M Some example false negatives: =============================== BUG: KFENCE: invalid read in copy_from_kernel_nofault+0x9c/0x1a0 Invalid read at 0xc0000000fdff0000: copy_from_kernel_nofault+0x9c/0x1a0 0xc00000000665f950 read_kcore_iter+0x57c/0xa04 proc_reg_read_iter+0xe4/0x16c vfs_read+0x320/0x3ec ksys_read+0x90/0x154 system_call_exception+0x120/0x310 system_call_vectored_common+0x15c/0x2ec BUG: KFENCE: use-after-free read in copy_from_kernel_nofault+0x9c/0x1a0 Use-after-free read at 0xc0000000fe050000 (in kfence-#2): copy_from_kernel_nofault+0x9c/0x1a0 0xc00000000665f950 read_kcore_iter+0x57c/0xa04 proc_reg_read_iter+0xe4/0x16c vfs_read+0x320/0x3ec ksys_read+0x90/0x154 system_call_exception+0x120/0x310 system_call_vectored_common+0x15c/0x2ec Fixes: 90cbac0e995d ("powerpc: Enable KFENCE for PPC32") Suggested-by: Christophe Leroy Reported-by: Disha Goel Signed-off-by: Ritesh Harjani (IBM) Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/a411788081d50e3b136c6270471e35aba3dfafa3.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/fault.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 81c77ddce2e3..c156fe0d53c3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -439,10 +439,16 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, /* * The kernel should never take an execute fault nor should it * take a page fault to a kernel address or a page fault to a user - * address outside of dedicated places + * address outside of dedicated places. + * + * Rather than kfence directly reporting false negatives, search whether + * the NIP belongs to the fixup table for cases where fault could come + * from functions like copy_from_kernel_nofault(). */ if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) { - if (kfence_handle_page_fault(address, is_write, regs)) + if (is_kfence_address((void *)address) && + !search_exception_tables(instruction_pointer(regs)) && + kfence_handle_page_fault(address, is_write, regs)) return 0; return SIGSEGV; -- cgit v1.2.3 From 47780e7eae783674b557cc16cf6852c0ce9dbbe9 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:43 +0530 Subject: book3s64/hash: Remove kfence support temporarily Kfence on book3s Hash on pseries is anyways broken. It fails to boot due to RMA size limitation. That is because, kfence with Hash uses debug_pagealloc infrastructure. debug_pagealloc allocates linear map for entire dram size instead of just kfence relevant objects. This means for 16TB of DRAM it will require (16TB >> PAGE_SHIFT) which is 256MB which is half of RMA region on P8. crash kernel reserves 256MB and we also need 2048 * 16KB * 3 for emergency stack and some more for paca allocations. That means there is not enough memory for reserving the full linear map in the RMA region, if the DRAM size is too big (>=16TB) (The issue is seen above 8TB with crash kernel 256 MB reservation). Now Kfence does not require linear memory map for entire DRAM. It only needs for kfence objects. So this patch temporarily removes the kfence functionality since debug_pagealloc code needs some refactoring. We will bring in kfence on Hash support in later patches. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/1761bc39674473c8878dedca15e0d9a0d3a1b528.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/include/asm/kfence.h | 5 +++++ arch/powerpc/mm/book3s64/hash_utils.c | 16 +++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h index fab124ada1c7..f3a9476a71b3 100644 --- a/arch/powerpc/include/asm/kfence.h +++ b/arch/powerpc/include/asm/kfence.h @@ -10,6 +10,7 @@ #include #include +#include #ifdef CONFIG_PPC64_ELF_ABI_V1 #define ARCH_FUNC_PREFIX "." @@ -25,6 +26,10 @@ static inline void disable_kfence(void) static inline bool arch_kfence_init_pool(void) { +#ifdef CONFIG_PPC64 + if (!radix_enabled()) + return false; +#endif return !kfence_disabled; } #endif diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e1eadd03f133..296bb74dbf40 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -431,7 +431,7 @@ repeat: break; cond_resched(); - if (debug_pagealloc_enabled_or_kfence() && + if (debug_pagealloc_enabled() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; } @@ -814,7 +814,7 @@ static void __init htab_init_page_sizes(void) bool aligned = true; init_hpte_page_sizes(); - if (!debug_pagealloc_enabled_or_kfence()) { + if (!debug_pagealloc_enabled()) { /* * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default @@ -1134,7 +1134,7 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); - if (debug_pagealloc_enabled_or_kfence()) { + if (debug_pagealloc_enabled()) { linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; linear_map_hash_slots = memblock_alloc_try_nid( linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, @@ -2120,7 +2120,7 @@ void hpt_do_stress(unsigned long ea, unsigned long hpte_group) } } -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_DEBUG_PAGEALLOC static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) @@ -2194,7 +2194,13 @@ int hash__kernel_map_pages(struct page *page, int numpages, int enable) local_irq_restore(flags); return 0; } -#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */ +#else /* CONFIG_DEBUG_PAGEALLOC */ +int hash__kernel_map_pages(struct page *page, int numpages, + int enable) +{ + return 0; +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) -- cgit v1.2.3 From 8b1085523fd22bf29a097d53c669a7dcf017d5ea Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:44 +0530 Subject: book3s64/hash: Refactor kernel linear map related calls This just brings all linear map related handling at one place instead of having those functions scattered in hash_utils file. Makes it easy for review. No functionality changes in this patch. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/56c610310aa50b5417976a39c5f15b78bc76c764.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 164 +++++++++++++++++----------------- 1 file changed, 82 insertions(+), 82 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 296bb74dbf40..82151fff9648 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -273,6 +273,88 @@ void hash__tlbiel_all(unsigned int action) WARN(1, "%s called on pre-POWER7 CPU\n", __func__); } +#ifdef CONFIG_DEBUG_PAGEALLOC +static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); + +static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) +{ + unsigned long hash; + unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); + long ret; + + hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); + + /* Don't create HPTE entries for bad address */ + if (!vsid) + return; + + if (linear_map_hash_slots[lmi] & 0x80) + return; + + ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, + HPTE_V_BOLTED, + mmu_linear_psize, mmu_kernel_ssize); + + BUG_ON (ret < 0); + raw_spin_lock(&linear_map_hash_lock); + BUG_ON(linear_map_hash_slots[lmi] & 0x80); + linear_map_hash_slots[lmi] = ret | 0x80; + raw_spin_unlock(&linear_map_hash_lock); +} + +static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) +{ + unsigned long hash, hidx, slot; + unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); + + hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); + raw_spin_lock(&linear_map_hash_lock); + if (!(linear_map_hash_slots[lmi] & 0x80)) { + raw_spin_unlock(&linear_map_hash_lock); + return; + } + hidx = linear_map_hash_slots[lmi] & 0x7f; + linear_map_hash_slots[lmi] = 0; + raw_spin_unlock(&linear_map_hash_lock); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, + mmu_linear_psize, + mmu_kernel_ssize, 0); +} + +int hash__kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long flags, vaddr, lmi; + int i; + + local_irq_save(flags); + for (i = 0; i < numpages; i++, page++) { + vaddr = (unsigned long)page_address(page); + lmi = __pa(vaddr) >> PAGE_SHIFT; + if (lmi >= linear_map_hash_count) + continue; + if (enable) + kernel_map_linear_page(vaddr, lmi); + else + kernel_unmap_linear_page(vaddr, lmi); + } + local_irq_restore(flags); + return 0; +} +#else /* CONFIG_DEBUG_PAGEALLOC */ +int hash__kernel_map_pages(struct page *page, int numpages, + int enable) +{ + return 0; +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ + /* * 'R' and 'C' update notes: * - Under pHyp or KVM, the updatepp path will not set C, thus it *will* @@ -2120,88 +2202,6 @@ void hpt_do_stress(unsigned long ea, unsigned long hpte_group) } } -#ifdef CONFIG_DEBUG_PAGEALLOC -static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); - -static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) -{ - unsigned long hash; - unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); - unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); - long ret; - - hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); - - /* Don't create HPTE entries for bad address */ - if (!vsid) - return; - - if (linear_map_hash_slots[lmi] & 0x80) - return; - - ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, - HPTE_V_BOLTED, - mmu_linear_psize, mmu_kernel_ssize); - - BUG_ON (ret < 0); - raw_spin_lock(&linear_map_hash_lock); - BUG_ON(linear_map_hash_slots[lmi] & 0x80); - linear_map_hash_slots[lmi] = ret | 0x80; - raw_spin_unlock(&linear_map_hash_lock); -} - -static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) -{ - unsigned long hash, hidx, slot; - unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); - unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - - hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); - raw_spin_lock(&linear_map_hash_lock); - if (!(linear_map_hash_slots[lmi] & 0x80)) { - raw_spin_unlock(&linear_map_hash_lock); - return; - } - hidx = linear_map_hash_slots[lmi] & 0x7f; - linear_map_hash_slots[lmi] = 0; - raw_spin_unlock(&linear_map_hash_lock); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, - mmu_linear_psize, - mmu_kernel_ssize, 0); -} - -int hash__kernel_map_pages(struct page *page, int numpages, int enable) -{ - unsigned long flags, vaddr, lmi; - int i; - - local_irq_save(flags); - for (i = 0; i < numpages; i++, page++) { - vaddr = (unsigned long)page_address(page); - lmi = __pa(vaddr) >> PAGE_SHIFT; - if (lmi >= linear_map_hash_count) - continue; - if (enable) - kernel_map_linear_page(vaddr, lmi); - else - kernel_unmap_linear_page(vaddr, lmi); - } - local_irq_restore(flags); - return 0; -} -#else /* CONFIG_DEBUG_PAGEALLOC */ -int hash__kernel_map_pages(struct page *page, int numpages, - int enable) -{ - return 0; -} -#endif /* CONFIG_DEBUG_PAGEALLOC */ - void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { -- cgit v1.2.3 From cc5734481b3c24ddee1551f9732d743453bca010 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:45 +0530 Subject: book3s64/hash: Add hash_debug_pagealloc_add_slot() function This adds hash_debug_pagealloc_add_slot() function instead of open coding that in htab_bolt_mapping(). This is required since we will be separating kfence functionality to not depend upon debug_pagealloc. No functionality change in this patch. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/026f0aaa1dddd89154dc8d20ceccfca4f63ccf79.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 82151fff9648..6e3860224351 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -328,6 +328,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) mmu_kernel_ssize, 0); } +static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) +{ + if (!debug_pagealloc_enabled()) + return; + if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) + linear_map_hash_slots[paddr >> PAGE_SHIFT] = slot | 0x80; +} + int hash__kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long flags, vaddr, lmi; @@ -353,6 +361,7 @@ int hash__kernel_map_pages(struct page *page, int numpages, { return 0; } +static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) {} #endif /* CONFIG_DEBUG_PAGEALLOC */ /* @@ -513,9 +522,7 @@ repeat: break; cond_resched(); - if (debug_pagealloc_enabled() && - (paddr >> PAGE_SHIFT) < linear_map_hash_count) - linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; + hash_debug_pagealloc_add_slot(paddr, ret); } return ret < 0 ? ret : 0; } -- cgit v1.2.3 From ff8631cdc23ad42f662a8510c57aeb0555ac3d5f Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:46 +0530 Subject: book3s64/hash: Add hash_debug_pagealloc_alloc_slots() function This adds hash_debug_pagealloc_alloc_slots() function instead of open coding that in htab_initialize(). This is required since we will be separating the kfence functionality to not depend upon debug_pagealloc. Now that everything required for debug_pagealloc is under a #ifdef config. Bring in linear_map_hash_slots and linear_map_hash_count variables under the same config too. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/d1d5aabe1e4c693a983e59ccf3de08e3c28c5161.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6e3860224351..030c120d1399 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -123,8 +123,6 @@ EXPORT_SYMBOL_GPL(mmu_slb_size); #ifdef CONFIG_PPC_64K_PAGES int mmu_ci_restrictions; #endif -static u8 *linear_map_hash_slots; -static unsigned long linear_map_hash_count; struct mmu_hash_ops mmu_hash_ops __ro_after_init; EXPORT_SYMBOL(mmu_hash_ops); @@ -274,6 +272,8 @@ void hash__tlbiel_all(unsigned int action) } #ifdef CONFIG_DEBUG_PAGEALLOC +static u8 *linear_map_hash_slots; +static unsigned long linear_map_hash_count; static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) @@ -328,6 +328,19 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) mmu_kernel_ssize, 0); } +static inline void hash_debug_pagealloc_alloc_slots(void) +{ + if (!debug_pagealloc_enabled()) + return; + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = memblock_alloc_try_nid( + linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); + if (!linear_map_hash_slots) + panic("%s: Failed to allocate %lu bytes max_addr=%pa\n", + __func__, linear_map_hash_count, &ppc64_rma_size); +} + static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) { if (!debug_pagealloc_enabled()) @@ -361,6 +374,7 @@ int hash__kernel_map_pages(struct page *page, int numpages, { return 0; } +static inline void hash_debug_pagealloc_alloc_slots(void) {} static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) {} #endif /* CONFIG_DEBUG_PAGEALLOC */ @@ -1223,16 +1237,7 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); - if (debug_pagealloc_enabled()) { - linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = memblock_alloc_try_nid( - linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, - ppc64_rma_size, NUMA_NO_NODE); - if (!linear_map_hash_slots) - panic("%s: Failed to allocate %lu bytes max_addr=%pa\n", - __func__, linear_map_hash_count, &ppc64_rma_size); - } - + hash_debug_pagealloc_alloc_slots(); /* create bolted the linear mapping in the hash table */ for_each_mem_range(i, &base, &end) { size = end - base; -- cgit v1.2.3 From 43919f4154bebbef0a0d3004f1b022643d21082c Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:47 +0530 Subject: book3s64/hash: Refactor hash__kernel_map_pages() function This refactors hash__kernel_map_pages() function to call hash_debug_pagealloc_map_pages(). This will come useful when we will add kfence support. No functionality changes in this patch. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/0cb8ddcccdcf61ea06ab4d92aacd770c16cc0f2c.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 030c120d1399..da9b089c8e8b 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -349,7 +349,8 @@ static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) linear_map_hash_slots[paddr >> PAGE_SHIFT] = slot | 0x80; } -int hash__kernel_map_pages(struct page *page, int numpages, int enable) +static int hash_debug_pagealloc_map_pages(struct page *page, int numpages, + int enable) { unsigned long flags, vaddr, lmi; int i; @@ -368,6 +369,12 @@ int hash__kernel_map_pages(struct page *page, int numpages, int enable) local_irq_restore(flags); return 0; } + +int hash__kernel_map_pages(struct page *page, int numpages, int enable) +{ + return hash_debug_pagealloc_map_pages(page, numpages, enable); +} + #else /* CONFIG_DEBUG_PAGEALLOC */ int hash__kernel_map_pages(struct page *page, int numpages, int enable) -- cgit v1.2.3 From 685d942d00d8b0edf8431869028e23eac6cc4bab Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:48 +0530 Subject: book3s64/hash: Make kernel_map_linear_page() generic Currently kernel_map_linear_page() function assumes to be working on linear_map_hash_slots array. But since in later patches we need a separate linear map array for kfence, hence make kernel_map_linear_page() take a linear map array and lock in it's function argument. This is needed to separate out kfence from debug_pagealloc infrastructure. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/5b67df7b29e68d7c78d6fc1f42d41137299bac6b.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 47 +++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index da9b089c8e8b..cc2eaa97982c 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -272,11 +272,8 @@ void hash__tlbiel_all(unsigned int action) } #ifdef CONFIG_DEBUG_PAGEALLOC -static u8 *linear_map_hash_slots; -static unsigned long linear_map_hash_count; -static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); - -static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) +static void kernel_map_linear_page(unsigned long vaddr, unsigned long idx, + u8 *slots, raw_spinlock_t *lock) { unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); @@ -290,7 +287,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) if (!vsid) return; - if (linear_map_hash_slots[lmi] & 0x80) + if (slots[idx] & 0x80) return; ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, @@ -298,36 +295,40 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) mmu_linear_psize, mmu_kernel_ssize); BUG_ON (ret < 0); - raw_spin_lock(&linear_map_hash_lock); - BUG_ON(linear_map_hash_slots[lmi] & 0x80); - linear_map_hash_slots[lmi] = ret | 0x80; - raw_spin_unlock(&linear_map_hash_lock); + raw_spin_lock(lock); + BUG_ON(slots[idx] & 0x80); + slots[idx] = ret | 0x80; + raw_spin_unlock(lock); } -static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) +static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long idx, + u8 *slots, raw_spinlock_t *lock) { - unsigned long hash, hidx, slot; + unsigned long hash, hslot, slot; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); - raw_spin_lock(&linear_map_hash_lock); - if (!(linear_map_hash_slots[lmi] & 0x80)) { - raw_spin_unlock(&linear_map_hash_lock); + raw_spin_lock(lock); + if (!(slots[idx] & 0x80)) { + raw_spin_unlock(lock); return; } - hidx = linear_map_hash_slots[lmi] & 0x7f; - linear_map_hash_slots[lmi] = 0; - raw_spin_unlock(&linear_map_hash_lock); - if (hidx & _PTEIDX_SECONDARY) + hslot = slots[idx] & 0x7f; + slots[idx] = 0; + raw_spin_unlock(lock); + if (hslot & _PTEIDX_SECONDARY) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; + slot += hslot & _PTEIDX_GROUP_IX; mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize, mmu_kernel_ssize, 0); } +static u8 *linear_map_hash_slots; +static unsigned long linear_map_hash_count; +static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); static inline void hash_debug_pagealloc_alloc_slots(void) { if (!debug_pagealloc_enabled()) @@ -362,9 +363,11 @@ static int hash_debug_pagealloc_map_pages(struct page *page, int numpages, if (lmi >= linear_map_hash_count) continue; if (enable) - kernel_map_linear_page(vaddr, lmi); + kernel_map_linear_page(vaddr, lmi, + linear_map_hash_slots, &linear_map_hash_lock); else - kernel_unmap_linear_page(vaddr, lmi); + kernel_unmap_linear_page(vaddr, lmi, + linear_map_hash_slots, &linear_map_hash_lock); } local_irq_restore(flags); return 0; -- cgit v1.2.3 From 47dd2e63d42a7a1b0a9c374d3a236f58b97c19e6 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:49 +0530 Subject: book3s64/hash: Disable debug_pagealloc if it requires more memory Make size of the linear map to be allocated in RMA region to be of ppc64_rma_size / 4. If debug_pagealloc requires more memory than that then do not allocate any memory and disable debug_pagealloc. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/e1ef66f32a1fe63bcbb89d5c11d86c65beef5ded.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index cc2eaa97982c..cffbb6499ac4 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -331,9 +331,19 @@ static unsigned long linear_map_hash_count; static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); static inline void hash_debug_pagealloc_alloc_slots(void) { + unsigned long max_hash_count = ppc64_rma_size / 4; + if (!debug_pagealloc_enabled()) return; linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + if (unlikely(linear_map_hash_count > max_hash_count)) { + pr_info("linear map size (%llu) greater than 4 times RMA region (%llu). Disabling debug_pagealloc\n", + ((u64)linear_map_hash_count << PAGE_SHIFT), + ppc64_rma_size); + linear_map_hash_count = 0; + return; + } + linear_map_hash_slots = memblock_alloc_try_nid( linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, ppc64_rma_size, NUMA_NO_NODE); @@ -344,7 +354,7 @@ static inline void hash_debug_pagealloc_alloc_slots(void) static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) { - if (!debug_pagealloc_enabled()) + if (!debug_pagealloc_enabled() || !linear_map_hash_count) return; if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) linear_map_hash_slots[paddr >> PAGE_SHIFT] = slot | 0x80; @@ -356,6 +366,9 @@ static int hash_debug_pagealloc_map_pages(struct page *page, int numpages, unsigned long flags, vaddr, lmi; int i; + if (!debug_pagealloc_enabled() || !linear_map_hash_count) + return 0; + local_irq_save(flags); for (i = 0; i < numpages; i++, page++) { vaddr = (unsigned long)page_address(page); -- cgit v1.2.3 From 8fec58f503b296af87ffca3898965e3054f2b616 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:50 +0530 Subject: book3s64/hash: Add kfence functionality Now that linear map functionality of debug_pagealloc is made generic, enable kfence to use this generic infrastructure. 1. Define kfence related linear map variables. - u8 *linear_map_kf_hash_slots; - unsigned long linear_map_kf_hash_count; - DEFINE_RAW_SPINLOCK(linear_map_kf_hash_lock); 2. The linear map size allocated in RMA region is quite small (KFENCE_POOL_SIZE >> PAGE_SHIFT) which is 512 bytes by default. 3. kfence pool memory is reserved using memblock_phys_alloc() which has can come from anywhere. (default 255 objects => ((1+255) * 2) << PAGE_SHIFT = 32MB) 4. The hash slot information for kfence memory gets added in linear map in hash_linear_map_add_slot() (which also adds for debug_pagealloc). Reported-by: Pavithra Prakash Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/5c2b61941b344077a2b8654dab46efa0322af3af.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/include/asm/kfence.h | 5 -- arch/powerpc/mm/book3s64/hash_utils.c | 162 +++++++++++++++++++++++++++++++--- 2 files changed, 149 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h index f3a9476a71b3..fab124ada1c7 100644 --- a/arch/powerpc/include/asm/kfence.h +++ b/arch/powerpc/include/asm/kfence.h @@ -10,7 +10,6 @@ #include #include -#include #ifdef CONFIG_PPC64_ELF_ABI_V1 #define ARCH_FUNC_PREFIX "." @@ -26,10 +25,6 @@ static inline void disable_kfence(void) static inline bool arch_kfence_init_pool(void) { -#ifdef CONFIG_PPC64 - if (!radix_enabled()) - return false; -#endif return !kfence_disabled; } #endif diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index cffbb6499ac4..5bd87d4b3999 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -66,6 +67,7 @@ #include #include #include +#include #include @@ -271,7 +273,7 @@ void hash__tlbiel_all(unsigned int action) WARN(1, "%s called on pre-POWER7 CPU\n", __func__); } -#ifdef CONFIG_DEBUG_PAGEALLOC +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static void kernel_map_linear_page(unsigned long vaddr, unsigned long idx, u8 *slots, raw_spinlock_t *lock) { @@ -325,11 +327,13 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long idx, mmu_linear_psize, mmu_kernel_ssize, 0); } +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); -static inline void hash_debug_pagealloc_alloc_slots(void) +static void hash_debug_pagealloc_alloc_slots(void) { unsigned long max_hash_count = ppc64_rma_size / 4; @@ -352,7 +356,8 @@ static inline void hash_debug_pagealloc_alloc_slots(void) __func__, linear_map_hash_count, &ppc64_rma_size); } -static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) +static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, + int slot) { if (!debug_pagealloc_enabled() || !linear_map_hash_count) return; @@ -386,20 +391,148 @@ static int hash_debug_pagealloc_map_pages(struct page *page, int numpages, return 0; } -int hash__kernel_map_pages(struct page *page, int numpages, int enable) +#else /* CONFIG_DEBUG_PAGEALLOC */ +static inline void hash_debug_pagealloc_alloc_slots(void) {} +static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) {} +static int __maybe_unused +hash_debug_pagealloc_map_pages(struct page *page, int numpages, int enable) { - return hash_debug_pagealloc_map_pages(page, numpages, enable); + return 0; } +#endif /* CONFIG_DEBUG_PAGEALLOC */ -#else /* CONFIG_DEBUG_PAGEALLOC */ -int hash__kernel_map_pages(struct page *page, int numpages, - int enable) +#ifdef CONFIG_KFENCE +static u8 *linear_map_kf_hash_slots; +static unsigned long linear_map_kf_hash_count; +static DEFINE_RAW_SPINLOCK(linear_map_kf_hash_lock); + +static phys_addr_t kfence_pool; + +static inline void hash_kfence_alloc_pool(void) +{ + + /* allocate linear map for kfence within RMA region */ + linear_map_kf_hash_count = KFENCE_POOL_SIZE >> PAGE_SHIFT; + linear_map_kf_hash_slots = memblock_alloc_try_nid( + linear_map_kf_hash_count, 1, + MEMBLOCK_LOW_LIMIT, ppc64_rma_size, + NUMA_NO_NODE); + if (!linear_map_kf_hash_slots) { + pr_err("%s: memblock for linear map (%lu) failed\n", __func__, + linear_map_kf_hash_count); + goto err; + } + + /* allocate kfence pool early */ + kfence_pool = memblock_phys_alloc_range(KFENCE_POOL_SIZE, PAGE_SIZE, + MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ANYWHERE); + if (!kfence_pool) { + pr_err("%s: memblock for kfence pool (%lu) failed\n", __func__, + KFENCE_POOL_SIZE); + memblock_free(linear_map_kf_hash_slots, + linear_map_kf_hash_count); + linear_map_kf_hash_count = 0; + goto err; + } + memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); + + return; +err: + pr_info("Disabling kfence\n"); + disable_kfence(); +} + +static inline void hash_kfence_map_pool(void) +{ + unsigned long kfence_pool_start, kfence_pool_end; + unsigned long prot = pgprot_val(PAGE_KERNEL); + + if (!kfence_pool) + return; + + kfence_pool_start = (unsigned long) __va(kfence_pool); + kfence_pool_end = kfence_pool_start + KFENCE_POOL_SIZE; + __kfence_pool = (char *) kfence_pool_start; + BUG_ON(htab_bolt_mapping(kfence_pool_start, kfence_pool_end, + kfence_pool, prot, mmu_linear_psize, + mmu_kernel_ssize)); + memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); +} + +static inline void hash_kfence_add_slot(phys_addr_t paddr, int slot) { + unsigned long vaddr = (unsigned long) __va(paddr); + unsigned long lmi = (vaddr - (unsigned long)__kfence_pool) + >> PAGE_SHIFT; + + if (!kfence_pool) + return; + BUG_ON(!is_kfence_address((void *)vaddr)); + BUG_ON(lmi >= linear_map_kf_hash_count); + linear_map_kf_hash_slots[lmi] = slot | 0x80; +} + +static int hash_kfence_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long flags, vaddr, lmi; + int i; + + WARN_ON_ONCE(!linear_map_kf_hash_count); + local_irq_save(flags); + for (i = 0; i < numpages; i++, page++) { + vaddr = (unsigned long)page_address(page); + lmi = (vaddr - (unsigned long)__kfence_pool) >> PAGE_SHIFT; + + /* Ideally this should never happen */ + if (lmi >= linear_map_kf_hash_count) { + WARN_ON_ONCE(1); + continue; + } + + if (enable) + kernel_map_linear_page(vaddr, lmi, + linear_map_kf_hash_slots, + &linear_map_kf_hash_lock); + else + kernel_unmap_linear_page(vaddr, lmi, + linear_map_kf_hash_slots, + &linear_map_kf_hash_lock); + } + local_irq_restore(flags); return 0; } -static inline void hash_debug_pagealloc_alloc_slots(void) {} -static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) {} -#endif /* CONFIG_DEBUG_PAGEALLOC */ +#else +static inline void hash_kfence_alloc_pool(void) {} +static inline void hash_kfence_map_pool(void) {} +static inline void hash_kfence_add_slot(phys_addr_t paddr, int slot) {} +static int __maybe_unused +hash_kfence_map_pages(struct page *page, int numpages, int enable) +{ + return 0; +} +#endif + +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +int hash__kernel_map_pages(struct page *page, int numpages, int enable) +{ + void *vaddr = page_address(page); + + if (is_kfence_address(vaddr)) + return hash_kfence_map_pages(page, numpages, enable); + else + return hash_debug_pagealloc_map_pages(page, numpages, enable); +} + +static void hash_linear_map_add_slot(phys_addr_t paddr, int slot) +{ + if (is_kfence_address(__va(paddr))) + hash_kfence_add_slot(paddr, slot); + else + hash_debug_pagealloc_add_slot(paddr, slot); +} +#else +static void hash_linear_map_add_slot(phys_addr_t paddr, int slot) {} +#endif /* * 'R' and 'C' update notes: @@ -559,7 +692,8 @@ repeat: break; cond_resched(); - hash_debug_pagealloc_add_slot(paddr, ret); + /* add slot info in debug_pagealloc / kfence linear map */ + hash_linear_map_add_slot(paddr, ret); } return ret < 0 ? ret : 0; } @@ -940,7 +1074,7 @@ static void __init htab_init_page_sizes(void) bool aligned = true; init_hpte_page_sizes(); - if (!debug_pagealloc_enabled()) { + if (!debug_pagealloc_enabled_or_kfence()) { /* * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default @@ -1261,6 +1395,7 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); hash_debug_pagealloc_alloc_slots(); + hash_kfence_alloc_pool(); /* create bolted the linear mapping in the hash table */ for_each_mem_range(i, &base, &end) { size = end - base; @@ -1277,6 +1412,7 @@ static void __init htab_initialize(void) BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } + hash_kfence_map_pool(); memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* -- cgit v1.2.3 From b5fbf7e2c6a403344e83139a14322f0c42911f2d Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:51 +0530 Subject: book3s64/radix: Refactoring common kfence related functions Both radix and hash on book3s requires to detect if kfence early init is enabled or not. Hash needs to disable kfence if early init is not enabled because with kfence the linear map is mapped using PAGE_SIZE rather than 16M mapping. We don't support multiple page sizes for slb entry used for kernel linear map in book3s64. This patch refactors out the common functions required to detect kfence early init is enabled or not. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/f4a787224fbe5bb787158ace579780c0257f6602.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/include/asm/kfence.h | 8 ++++++-- arch/powerpc/mm/book3s64/pgtable.c | 13 +++++++++++++ arch/powerpc/mm/book3s64/radix_pgtable.c | 12 ------------ arch/powerpc/mm/init-common.c | 1 + 4 files changed, 20 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h index fab124ada1c7..1f7cab58ab2c 100644 --- a/arch/powerpc/include/asm/kfence.h +++ b/arch/powerpc/include/asm/kfence.h @@ -15,7 +15,7 @@ #define ARCH_FUNC_PREFIX "." #endif -#ifdef CONFIG_KFENCE +extern bool kfence_early_init; extern bool kfence_disabled; static inline void disable_kfence(void) @@ -27,7 +27,11 @@ static inline bool arch_kfence_init_pool(void) { return !kfence_disabled; } -#endif + +static inline bool kfence_early_init_enabled(void) +{ + return IS_ENABLED(CONFIG_KFENCE) && kfence_early_init; +} #ifdef CONFIG_PPC64 static inline bool kfence_protect_page(unsigned long addr, bool protect) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 5a4a75369043..374542528080 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -37,6 +37,19 @@ EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; EXPORT_SYMBOL(__pmd_frag_size_shift); +#ifdef CONFIG_KFENCE +extern bool kfence_early_init; +static int __init parse_kfence_early_init(char *arg) +{ + int val; + + if (get_option(&arg, &val)) + kfence_early_init = !!val; + return 0; +} +early_param("kfence.sample_interval", parse_kfence_early_init); +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * This is called when relaxing access to a hugepage. It's also called in the page diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index b0d927009af8..311e2112d782 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -363,18 +363,6 @@ static int __meminit create_physical_mapping(unsigned long start, } #ifdef CONFIG_KFENCE -static bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; - -static int __init parse_kfence_early_init(char *arg) -{ - int val; - - if (get_option(&arg, &val)) - kfence_early_init = !!val; - return 0; -} -early_param("kfence.sample_interval", parse_kfence_early_init); - static inline phys_addr_t alloc_kfence_pool(void) { phys_addr_t kfence_pool; diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 2978fcbe307e..745097554bea 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -33,6 +33,7 @@ bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); #ifdef CONFIG_KFENCE bool __ro_after_init kfence_disabled; +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; #endif static int __init parse_nosmep(char *p) -- cgit v1.2.3 From 76b7d6463fc504ac266472f5948b83902dfca4c6 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:52 +0530 Subject: book3s64/hash: Disable kfence if not early init Enable kfence on book3s64 hash only when early init is enabled. This is because, kfence could cause the kernel linear map to be mapped at PAGE_SIZE level instead of 16M (which I guess we don't want). Also currently there is no way to - 1. Make multiple page size entries for the SLB used for kernel linear map. 2. No easy way of getting the hash slot details after the page table mapping for kernel linear setup. So even if kfence allocate the pool in late init, we won't be able to get the hash slot details in kfence linear map. Thus this patch disables kfence on hash if kfence early init is not enabled. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/4a6eea8cfd1cd28fccfae067026bff30cbec1d4b.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 5bd87d4b3999..d6683903fefb 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -410,6 +410,8 @@ static phys_addr_t kfence_pool; static inline void hash_kfence_alloc_pool(void) { + if (!kfence_early_init_enabled()) + goto err; /* allocate linear map for kfence within RMA region */ linear_map_kf_hash_count = KFENCE_POOL_SIZE >> PAGE_SHIFT; @@ -1074,7 +1076,7 @@ static void __init htab_init_page_sizes(void) bool aligned = true; init_hpte_page_sizes(); - if (!debug_pagealloc_enabled_or_kfence()) { + if (!debug_pagealloc_enabled() && !kfence_early_init_enabled()) { /* * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default -- cgit v1.2.3 From 8846d9683884fa9ef5bb160011a748701216e186 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 18 Oct 2024 22:59:53 +0530 Subject: book3s64/hash: Early detect debug_pagealloc size requirement Add hash_supports_debug_pagealloc() helper to detect whether debug_pagealloc can be supported on hash or not. This checks for both, whether debug_pagealloc config is enabled and the linear map should fit within rma_size/4 region size. This can then be used early during htab_init_page_sizes() to decide linear map pagesize if hash supports either debug_pagealloc or kfence. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/c33c6691b2a2cf619cc74ac100118ca4dbf21a48.1729271995.git.ritesh.list@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index d6683903fefb..db87c2cc2fb6 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -329,25 +329,26 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long idx, } #endif +static inline bool hash_supports_debug_pagealloc(void) +{ + unsigned long max_hash_count = ppc64_rma_size / 4; + unsigned long linear_map_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + + if (!debug_pagealloc_enabled() || linear_map_count > max_hash_count) + return false; + return true; +} + #ifdef CONFIG_DEBUG_PAGEALLOC static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); static void hash_debug_pagealloc_alloc_slots(void) { - unsigned long max_hash_count = ppc64_rma_size / 4; - - if (!debug_pagealloc_enabled()) - return; - linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - if (unlikely(linear_map_hash_count > max_hash_count)) { - pr_info("linear map size (%llu) greater than 4 times RMA region (%llu). Disabling debug_pagealloc\n", - ((u64)linear_map_hash_count << PAGE_SHIFT), - ppc64_rma_size); - linear_map_hash_count = 0; + if (!hash_supports_debug_pagealloc()) return; - } + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; linear_map_hash_slots = memblock_alloc_try_nid( linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, ppc64_rma_size, NUMA_NO_NODE); @@ -1076,7 +1077,7 @@ static void __init htab_init_page_sizes(void) bool aligned = true; init_hpte_page_sizes(); - if (!debug_pagealloc_enabled() && !kfence_early_init_enabled()) { + if (!hash_supports_debug_pagealloc() && !kfence_early_init_enabled()) { /* * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default -- cgit v1.2.3