From 55ab834a86a9934c4f17825c115f7dc16a89aae7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 16 Dec 2022 10:46:33 +0100 Subject: Revert "mm: add nodes= arg to memory.reclaim" This reverts commit 12a5d3955227b0d7e04fb793ccceeb2a1dd275c5. Although it is recognized that a finer grained pro-active reclaim is something we need and want the semantic of this implementation is really ambiguous. In a follow up discussion it became clear that there are two essential usecases here. One is to use memory.reclaim to pro-actively reclaim memory and expectation is that the requested and reported amount of memory is uncharged from the memcg. Another usecase focuses on pro-active demotion when the memory is merely shuffled around to demotion targets while the overall charged memory stays unchanged. The current implementation considers demoted pages as reclaimed and that break both usecases. [1] has tried to address the reporting part but there are more issues with that summarized in [2] and follow up emails. Let's revert the nodemask based extension of the memcg pro-active reclaim for now until we settle with a more robust semantic. [1] http://lkml.kernel.org/r/http://lkml.kernel.org/r/20221206023406.3182800-1-almasrymina@google.com [2] http://lkml.kernel.org/r/Y5bsmpCyeryu3Zz1@dhcp22.suse.cz Link: https://lkml.kernel.org/r/Y5xASNe1x8cusiTx@dhcp22.suse.cz Fixes: 12a5d3955227b0d ("mm: add nodes= arg to memory.reclaim") Signed-off-by: Michal Hocko Cc: Bagas Sanjaya Cc: Huang Ying Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Mina Almasry Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tejun Heo Cc: Wei Xu Cc: Yang Shi Cc: Yosry Ahmed Cc: zefan li Signed-off-by: Andrew Morton --- include/linux/swap.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2787b84eaf12..0ceed49516ad 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -418,8 +418,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options, - nodemask_t *nodemask); + unsigned int reclaim_options); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, -- cgit v1.2.3 From 3489dbb696d25602aea8c3e669a6d43b76bd5358 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 26 Jan 2023 14:27:20 -0800 Subject: mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps Patch series "Fixes for hugetlb mapcount at most 1 for shared PMDs". This issue of mapcount in hugetlb pages referenced by shared PMDs was discussed in [1]. The following two patches address user visible behavior caused by this issue. [1] https://lore.kernel.org/linux-mm/Y9BF+OCdWnCSilEu@monkey/ This patch (of 2): A hugetlb page will have a mapcount of 1 if mapped by multiple processes via a shared PMD. This is because only the first process increases the map count, and subsequent processes just add the shared PMD page to their page table. page_mapcount is being used to decide if a hugetlb page is shared or private in /proc/PID/smaps. Pages referenced via a shared PMD were incorrectly being counted as private. To fix, check for a shared PMD if mapcount is 1. If a shared PMD is found count the hugetlb page as shared. A new helper to check for a shared PMD is added. [akpm@linux-foundation.org: simplification, per David] [akpm@linux-foundation.org: hugetlb.h: include page_ref.h for page_count()] Link: https://lkml.kernel.org/r/20230126222721.222195-2-mike.kravetz@oracle.com Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps") Signed-off-by: Mike Kravetz Acked-by: Peter Xu Cc: David Hildenbrand Cc: James Houghton Cc: Matthew Wilcox Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: Vishal Moola (Oracle) Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 4 +--- include/linux/hugetlb.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e35a0398db63..af1c49ae11b1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -745,9 +745,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, page = pfn_swap_entry_to_page(swpent); } if (page) { - int mapcount = page_mapcount(page); - - if (mapcount >= 2) + if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 551834cd5299..db194e2ba69f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1187,6 +1188,18 @@ static inline __init void hugetlb_cma_reserve(int order) } #endif +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static inline bool hugetlb_pmd_shared(pte_t *pte) +{ + return page_count(virt_to_page(pte)) > 1; +} +#else +static inline bool hugetlb_pmd_shared(pte_t *pte) +{ + return false; +} +#endif + bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE -- cgit v1.2.3 From 88d7b12068b95731c280af8ce88e8ee9561f96de Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 Jan 2023 20:07:27 +0000 Subject: highmem: round down the address passed to kunmap_flush_on_unmap() We already round down the address in kunmap_local_indexed() which is the other implementation of __kunmap_local(). The only implementation of kunmap_flush_on_unmap() is PA-RISC which is expecting a page-aligned address. This may be causing PA-RISC to be flushing the wrong addresses currently. Link: https://lkml.kernel.org/r/20230126200727.1680362-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Fixes: 298fa1ad5571 ("highmem: Provide generic variant of kmap_atomic*") Reviewed-by: Ira Weiny Cc: "Fabio M. De Francesco" Cc: Al Viro Cc: Thomas Gleixner Cc: Helge Deller Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Bagas Sanjaya Cc: David Sterba Cc: Kees Cook Cc: Sebastian Andrzej Siewior Cc: Tony Luck Cc: Signed-off-by: Andrew Morton --- include/linux/highmem-internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 034b1106d022..e098f38422af 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -200,7 +200,7 @@ static inline void *kmap_local_pfn(unsigned long pfn) static inline void __kunmap_local(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); + kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); #endif } @@ -227,7 +227,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn) static inline void __kunmap_atomic(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); + kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); #endif pagefault_enable(); if (IS_ENABLED(CONFIG_PREEMPT_RT)) -- cgit v1.2.3 From ac86f547ca1002aec2ef66b9e64d03f45bbbfbb9 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sun, 29 Jan 2023 12:09:45 +0800 Subject: mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath() As commit 18365225f044 ("hwpoison, memcg: forcibly uncharge LRU pages"), hwpoison will forcibly uncharg a LRU hwpoisoned page, the folio_memcg could be NULl, then, mem_cgroup_track_foreign_dirty_slowpath() could occurs a NULL pointer dereference, let's do not record the foreign writebacks for folio memcg is null in mem_cgroup_track_foreign_dirty() to fix it. Link: https://lkml.kernel.org/r/20230129040945.180629-1-wangkefeng.wang@huawei.com Fixes: 97b27821b485 ("writeback, memcg: Implement foreign dirty flushing") Signed-off-by: Kefeng Wang Reported-by: Ma Wupeng Tested-by: Miko Larsson Acked-by: Michal Hocko Cc: Jan Kara Cc: Jens Axboe Cc: Kefeng Wang Cc: Ma Wupeng Cc: Naoya Horiguchi Cc: Shakeel Butt Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d3c8203cab6c..85dc9b88ea37 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { + struct mem_cgroup *memcg; + if (mem_cgroup_disabled()) return; - if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) + memcg = folio_memcg(folio); + if (unlikely(memcg && &memcg->css != wb->memcg_css)) mem_cgroup_track_foreign_dirty_slowpath(folio, wb); } -- cgit v1.2.3