From fb5c2029f8221e904e604938171c4a8ef169aadb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 28 Jun 2022 20:15:29 -0400 Subject: mm: Account dirty folios properly during splits If the last folio in a file is split as a result of truncation, we simply clear the dirty bits for the pages we're discarding. That causes NR_FILE_DIRTY (among other counters) to be thrown off and eventually Linux will hang in balance_dirty_pages_ratelimited() Reported-by: Dave Chinner Tested-by: Dave Chinner Tested-by: Darrick J. Wong Fixes: d68eccad3706 ("mm/filemap: Allow large folios to be added to the page cache") Signed-off-by: Matthew Wilcox (Oracle) --- mm/huge_memory.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 834f288b3769..15965084816d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2440,11 +2441,15 @@ static void __split_huge_page(struct page *page, struct list_head *list, __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond EOF: drop them from page cache */ if (head[i].index >= end) { - ClearPageDirty(head + i); - __delete_from_page_cache(head + i, NULL); + struct folio *tail = page_folio(head + i); + if (shmem_mapping(head->mapping)) shmem_uncharge(head->mapping->host, 1); - put_page(head + i); + else if (folio_test_clear_dirty(tail)) + folio_account_cleaned(tail, + inode_to_wb(folio->mapping->host)); + __filemap_remove_folio(tail, NULL); + folio_put(tail); } else if (!PageAnon(page)) { __xa_store(&head->mapping->i_pages, head[i].index, head + i, 0); -- cgit v1.2.3 From 6ffcd825e7d0416d78fd41cd5b7856a78122cc8c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 28 Jun 2022 20:41:40 -0400 Subject: mm: Remove __delete_from_page_cache() This wrapper is no longer used. Remove it and all references to it. Signed-off-by: Matthew Wilcox (Oracle) --- fs/inode.c | 2 +- include/linux/pagemap.h | 4 ---- mm/memory-failure.c | 2 +- mm/shmem.c | 2 +- mm/truncate.c | 2 +- 5 files changed, 4 insertions(+), 8 deletions(-) (limited to 'mm') diff --git a/fs/inode.c b/fs/inode.c index bd4da9c5207e..d2bdc407c94b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -604,7 +604,7 @@ void clear_inode(struct inode *inode) { /* * We have to cycle the i_pages lock here because reclaim can be in the - * process of removing the last page (in __delete_from_page_cache()) + * process of removing the last page (in __filemap_remove_folio()) * and we must not free the mapping under it. */ xa_lock_irq(&inode->i_data.i_pages); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ce96866fbec4..44b9c265a234 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1107,10 +1107,6 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, void filemap_remove_folio(struct folio *folio); void delete_from_page_cache(struct page *page); void __filemap_remove_folio(struct folio *folio, void *shadow); -static inline void __delete_from_page_cache(struct page *page, void *shadow) -{ - __filemap_remove_folio(page_folio(page), shadow); -} void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct folio_batch *fbatch); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b85661cbdc4a..a859486ddda9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1935,7 +1935,7 @@ try_again: /* * Now take care of user space mappings. - * Abort on fail: __delete_from_page_cache() assumes unmapped page. + * Abort on fail: __filemap_remove_folio() assumes unmapped page. */ if (!hwpoison_user_mappings(p, pfn, flags, p)) { action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); diff --git a/mm/shmem.c b/mm/shmem.c index a6f565308133..b42dfb01c634 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -392,7 +392,7 @@ void shmem_uncharge(struct inode *inode, long pages) struct shmem_inode_info *info = SHMEM_I(inode); unsigned long flags; - /* nrpages adjustment done by __delete_from_page_cache() or caller */ + /* nrpages adjustment done by __filemap_remove_folio() or caller */ spin_lock_irqsave(&info->lock, flags); info->alloced -= pages; diff --git a/mm/truncate.c b/mm/truncate.c index ab50d0d59a2a..0b0708bf935f 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -443,7 +443,7 @@ EXPORT_SYMBOL(truncate_inode_pages_range); * mapping->invalidate_lock. * * Note: When this function returns, there can be a page in the process of - * deletion (inside __delete_from_page_cache()) in the specified range. Thus + * deletion (inside __filemap_remove_folio()) in the specified range. Thus * mapping->nrpages can be non-zero when this function returns even after * truncation of the whole mapping. */ -- cgit v1.2.3 From d9ef44de5d731e1a1fa94ddb5e39ea1b308b1456 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 1 Jun 2022 15:11:01 -0400 Subject: hugetlb: Convert huge_add_to_page_cache() to use a folio Remove the last caller of add_to_page_cache() Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Mike Kravetz Reviewed-by: Muchun Song --- fs/hugetlbfs/inode.c | 2 +- mm/hugetlb.c | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 62408047e8d7..ae2524480f23 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -759,7 +759,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, SetHPageMigratable(page); /* - * unlock_page because locked by add_to_page_cache() + * unlock_page because locked by huge_add_to_page_cache() * put_page() due to reference from alloc_huge_page() */ unlock_page(page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a57e1be41401..33b2c27e7c61 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5414,19 +5414,25 @@ static bool hugetlbfs_pagecache_present(struct hstate *h, int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx) { + struct folio *folio = page_folio(page); struct inode *inode = mapping->host; struct hstate *h = hstate_inode(inode); - int err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); + int err; - if (err) + __folio_set_locked(folio); + err = __filemap_add_folio(mapping, folio, idx, GFP_KERNEL, NULL); + + if (unlikely(err)) { + __folio_clear_locked(folio); return err; + } ClearHPageRestoreReserve(page); /* - * set page dirty so that it will not be removed from cache/file + * mark folio dirty so that it will not be removed from cache/file * by non-hugetlbfs specific code paths. */ - set_page_dirty(page); + folio_mark_dirty(folio); spin_lock(&inode->i_lock); inode->i_blocks += blocks_per_huge_page(h); -- cgit v1.2.3 From 2bb876b58d593d7f2522ec0f41f20a74fde76822 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 1 Jun 2022 15:13:59 -0400 Subject: filemap: Remove add_to_page_cache() and add_to_page_cache_locked() These functions have no more users, so delete them. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Mike Kravetz Reviewed-by: Muchun Song --- Documentation/admin-guide/cgroup-v1/memcg_test.rst | 2 +- include/linux/pagemap.h | 18 ------------------ mm/filemap.c | 20 -------------------- mm/shmem.c | 2 +- mm/swap_state.c | 2 +- 5 files changed, 3 insertions(+), 41 deletions(-) (limited to 'mm') diff --git a/Documentation/admin-guide/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst index 45b94f7b3beb..a402359abb99 100644 --- a/Documentation/admin-guide/cgroup-v1/memcg_test.rst +++ b/Documentation/admin-guide/cgroup-v1/memcg_test.rst @@ -97,7 +97,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. ============= Page Cache is charged at - - add_to_page_cache_locked(). + - filemap_add_folio(). The logic is very clear. (About migration, see below) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 44b9c265a234..dee519ef7436 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1098,8 +1098,6 @@ size_t fault_in_subpage_writeable(char __user *uaddr, size_t size); size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); size_t fault_in_readable(const char __user *uaddr, size_t size); -int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp); int filemap_add_folio(struct address_space *mapping, struct folio *folio, @@ -1115,22 +1113,6 @@ bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); -/* - * Like add_to_page_cache_locked, but used to add newly allocated pages: - * the page is new, so we can just run __SetPageLocked() against it. - */ -static inline int add_to_page_cache(struct page *page, - struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) -{ - int error; - - __SetPageLocked(page); - error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); - if (unlikely(error)) - __ClearPageLocked(page); - return error; -} - /* Must be non-static for BPF error injection */ int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp); diff --git a/mm/filemap.c b/mm/filemap.c index ffdfbc8b0e3c..22a17ab256f7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -929,26 +929,6 @@ error: } ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); -/** - * add_to_page_cache_locked - add a locked page to the pagecache - * @page: page to add - * @mapping: the page's address_space - * @offset: page index - * @gfp_mask: page allocation mode - * - * This function is used to add a page to the pagecache. It must be locked. - * This function does not add the page to the LRU. The caller must do that. - * - * Return: %0 on success, negative error code otherwise. - */ -int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) -{ - return __filemap_add_folio(mapping, page_folio(page), offset, - gfp_mask, NULL); -} -EXPORT_SYMBOL(add_to_page_cache_locked); - int filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp) { diff --git a/mm/shmem.c b/mm/shmem.c index b42dfb01c634..6a5e46f1a326 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -693,7 +693,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * Like add_to_page_cache_locked, but error if expected item has gone. + * Like filemap_add_folio, but error if expected item has gone. */ static int shmem_add_to_page_cache(struct folio *folio, struct address_space *mapping, diff --git a/mm/swap_state.c b/mm/swap_state.c index 778d57d2d92d..f5b6f5638908 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -95,7 +95,7 @@ void *get_shadow_from_swap_cache(swp_entry_t entry) } /* - * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, + * add_to_swap_cache resembles filemap_add_folio on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ int add_to_swap_cache(struct page *page, swp_entry_t entry, -- cgit v1.2.3 From be0ced5e9cb81a4c2edefd081a7794a1814fb60d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 3 Jun 2022 15:30:25 -0400 Subject: filemap: Add filemap_get_folios() This is the equivalent of find_get_pages() but fills a folio_batch instead of an array of pages. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner (Microsoft) --- include/linux/pagemap.h | 2 ++ mm/filemap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'mm') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index dee519ef7436..cfd0e8001b3b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -718,6 +718,8 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) return head + (index & (thp_nr_pages(head) - 1)); } +unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, + pgoff_t end, struct folio_batch *fbatch); unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, pgoff_t end, unsigned int nr_pages, struct page **pages); diff --git a/mm/filemap.c b/mm/filemap.c index 22a17ab256f7..ce3209a8ad49 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2127,6 +2127,65 @@ put: return folio_batch_count(fbatch); } +/** + * filemap_get_folios - Get a batch of folios + * @mapping: The address_space to search + * @start: The starting page index + * @end: The final page index (inclusive) + * @fbatch: The batch to fill. + * + * Search for and return a batch of folios in the mapping starting at + * index @start and up to index @end (inclusive). The folios are returned + * in @fbatch with an elevated reference count. + * + * The first folio may start before @start; if it does, it will contain + * @start. The final folio may extend beyond @end; if it does, it will + * contain @end. The folios have ascending indices. There may be gaps + * between the folios if there are indices which have no folio in the + * page cache. If folios are added to or removed from the page cache + * while this is running, they may or may not be found by this call. + * + * Return: The number of folios which were found. + * We also update @start to index the next folio for the traversal. + */ +unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, + pgoff_t end, struct folio_batch *fbatch) +{ + XA_STATE(xas, &mapping->i_pages, *start); + struct folio *folio; + + rcu_read_lock(); + while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) { + /* Skip over shadow, swap and DAX entries */ + if (xa_is_value(folio)) + continue; + if (!folio_batch_add(fbatch, folio)) { + unsigned long nr = folio_nr_pages(folio); + + if (folio_test_hugetlb(folio)) + nr = 1; + *start = folio->index + nr; + goto out; + } + } + + /* + * We come here when there is no page beyond @end. We take care to not + * overflow the index @start as it confuses some of the callers. This + * breaks the iteration when there is a page at index -1 but that is + * already broken anyway. + */ + if (end == (pgoff_t)-1) + *start = (pgoff_t)-1; + else + *start = end + 1; +out: + rcu_read_unlock(); + + return folio_batch_count(fbatch); +} +EXPORT_SYMBOL(filemap_get_folios); + static inline bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) { -- cgit v1.2.3 From 77414d195f905dd43f58bce82118775ffa59575c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 4 Jun 2022 17:39:09 -0400 Subject: vmscan: Add check_move_unevictable_folios() Change the guts of check_move_unevictable_pages() over to use folios and add check_move_unevictable_pages() as a wrapper. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Christian Brauner (Microsoft) --- include/linux/swap.h | 3 ++- mm/vmscan.c | 56 +++++++++++++++++++++++++++++++--------------------- 2 files changed, 36 insertions(+), 23 deletions(-) (limited to 'mm') diff --git a/include/linux/swap.h b/include/linux/swap.h index 0c0fed1b348f..8672a7123ccd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -438,7 +438,8 @@ static inline bool node_reclaim_enabled(void) return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); } -extern void check_move_unevictable_pages(struct pagevec *pvec); +void check_move_unevictable_folios(struct folio_batch *fbatch); +void check_move_unevictable_pages(struct pagevec *pvec); extern void kswapd_run(int nid); extern void kswapd_stop(int nid); diff --git a/mm/vmscan.c b/mm/vmscan.c index f7d9a683e3a7..04f8671caad9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4790,45 +4790,57 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) } #endif +void check_move_unevictable_pages(struct pagevec *pvec) +{ + struct folio_batch fbatch; + unsigned i; + + folio_batch_init(&fbatch); + for (i = 0; i < pvec->nr; i++) { + struct page *page = pvec->pages[i]; + + if (PageTransTail(page)) + continue; + folio_batch_add(&fbatch, page_folio(page)); + } + check_move_unevictable_folios(&fbatch); +} +EXPORT_SYMBOL_GPL(check_move_unevictable_pages); + /** - * check_move_unevictable_pages - check pages for evictability and move to - * appropriate zone lru list - * @pvec: pagevec with lru pages to check + * check_move_unevictable_folios - Move evictable folios to appropriate zone + * lru list + * @fbatch: Batch of lru folios to check. * - * Checks pages for evictability, if an evictable page is in the unevictable + * Checks folios for evictability, if an evictable folio is in the unevictable * lru list, moves it to the appropriate evictable lru list. This function - * should be only used for lru pages. + * should be only used for lru folios. */ -void check_move_unevictable_pages(struct pagevec *pvec) +void check_move_unevictable_folios(struct folio_batch *fbatch) { struct lruvec *lruvec = NULL; int pgscanned = 0; int pgrescued = 0; int i; - for (i = 0; i < pvec->nr; i++) { - struct page *page = pvec->pages[i]; - struct folio *folio = page_folio(page); - int nr_pages; - - if (PageTransTail(page)) - continue; + for (i = 0; i < fbatch->nr; i++) { + struct folio *folio = fbatch->folios[i]; + int nr_pages = folio_nr_pages(folio); - nr_pages = thp_nr_pages(page); pgscanned += nr_pages; - /* block memcg migration during page moving between lru */ - if (!TestClearPageLRU(page)) + /* block memcg migration while the folio moves between lrus */ + if (!folio_test_clear_lru(folio)) continue; lruvec = folio_lruvec_relock_irq(folio, lruvec); - if (page_evictable(page) && PageUnevictable(page)) { - del_page_from_lru_list(page, lruvec); - ClearPageUnevictable(page); - add_page_to_lru_list(page, lruvec); + if (folio_evictable(folio) && folio_test_unevictable(folio)) { + lruvec_del_folio(lruvec, folio); + folio_clear_unevictable(folio); + lruvec_add_folio(lruvec, folio); pgrescued += nr_pages; } - SetPageLRU(page); + folio_set_lru(folio); } if (lruvec) { @@ -4839,4 +4851,4 @@ void check_move_unevictable_pages(struct pagevec *pvec) count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); } } -EXPORT_SYMBOL_GPL(check_move_unevictable_pages); +EXPORT_SYMBOL_GPL(check_move_unevictable_folios); -- cgit v1.2.3 From 105c988f5dd76397616e0392ea85d45225397191 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 4 Jun 2022 17:40:17 -0400 Subject: shmem: Convert shmem_unlock_mapping() to use filemap_get_folios() This is a straightforward conversion. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner (Microsoft) --- mm/shmem.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/shmem.c b/mm/shmem.c index 6a5e46f1a326..28a62be1d41e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -867,18 +867,17 @@ unsigned long shmem_swap_usage(struct vm_area_struct *vma) */ void shmem_unlock_mapping(struct address_space *mapping) { - struct pagevec pvec; + struct folio_batch fbatch; pgoff_t index = 0; - pagevec_init(&pvec); + folio_batch_init(&fbatch); /* * Minor point, but we might as well stop if someone else SHM_LOCKs it. */ - while (!mapping_unevictable(mapping)) { - if (!pagevec_lookup(&pvec, mapping, &index)) - break; - check_move_unevictable_pages(&pvec); - pagevec_release(&pvec); + while (!mapping_unevictable(mapping) && + filemap_get_folios(mapping, &index, ~0UL, &fbatch)) { + check_move_unevictable_folios(&fbatch); + folio_batch_release(&fbatch); cond_resched(); } } -- cgit v1.2.3 From bb4b42ba926224e26ed699e51def164b4b163935 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 4 Jun 2022 17:46:02 -0400 Subject: filemap: Remove find_get_pages_range() and associated functions All callers of find_get_pages_range(), pagevec_lookup_range() and pagevec_lookup() have now been removed. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner (Microsoft) --- include/linux/pagemap.h | 3 --- include/linux/pagevec.h | 10 -------- mm/filemap.c | 67 ------------------------------------------------- mm/swap.c | 29 --------------------- 4 files changed, 109 deletions(-) (limited to 'mm') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cfd0e8001b3b..87d4ea571240 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -720,9 +720,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); -unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, - pgoff_t end, unsigned int nr_pages, - struct page **pages); unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 67b1246f136b..6649154a2115 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -27,16 +27,6 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); -unsigned pagevec_lookup_range(struct pagevec *pvec, - struct address_space *mapping, - pgoff_t *start, pgoff_t end); -static inline unsigned pagevec_lookup(struct pagevec *pvec, - struct address_space *mapping, - pgoff_t *start) -{ - return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); -} - unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag); diff --git a/mm/filemap.c b/mm/filemap.c index ce3209a8ad49..15399e8cd281 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2196,73 +2196,6 @@ bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) return index < folio->index + folio_nr_pages(folio) - 1; } -/** - * find_get_pages_range - gang pagecache lookup - * @mapping: The address_space to search - * @start: The starting page index - * @end: The final page index (inclusive) - * @nr_pages: The maximum number of pages - * @pages: Where the resulting pages are placed - * - * find_get_pages_range() will search for and return a group of up to @nr_pages - * pages in the mapping starting at index @start and up to index @end - * (inclusive). The pages are placed at @pages. find_get_pages_range() takes - * a reference against the returned pages. - * - * The search returns a group of mapping-contiguous pages with ascending - * indexes. There may be holes in the indices due to not-present pages. - * We also update @start to index the next page for the traversal. - * - * Return: the number of pages which were found. If this number is - * smaller than @nr_pages, the end of specified range has been - * reached. - */ -unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, - pgoff_t end, unsigned int nr_pages, - struct page **pages) -{ - XA_STATE(xas, &mapping->i_pages, *start); - struct folio *folio; - unsigned ret = 0; - - if (unlikely(!nr_pages)) - return 0; - - rcu_read_lock(); - while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { - /* Skip over shadow, swap and DAX entries */ - if (xa_is_value(folio)) - continue; - -again: - pages[ret] = folio_file_page(folio, xas.xa_index); - if (++ret == nr_pages) { - *start = xas.xa_index + 1; - goto out; - } - if (folio_more_pages(folio, xas.xa_index, end)) { - xas.xa_index++; - folio_ref_inc(folio); - goto again; - } - } - - /* - * We come here when there is no page beyond @end. We take care to not - * overflow the index @start as it confuses some of the callers. This - * breaks the iteration when there is a page at index -1 but that is - * already broken anyway. - */ - if (end == (pgoff_t)-1) - *start = (pgoff_t)-1; - else - *start = end + 1; -out: - rcu_read_unlock(); - - return ret; -} - /** * find_get_pages_contig - gang contiguous pagecache lookup * @mapping: The address_space to search diff --git a/mm/swap.c b/mm/swap.c index f3922a96b2e9..f65e284247b2 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1086,35 +1086,6 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch) fbatch->nr = j; } -/** - * pagevec_lookup_range - gang pagecache lookup - * @pvec: Where the resulting pages are placed - * @mapping: The address_space to search - * @start: The starting page index - * @end: The final page index - * - * pagevec_lookup_range() will search for & return a group of up to PAGEVEC_SIZE - * pages in the mapping starting from index @start and upto index @end - * (inclusive). The pages are placed in @pvec. pagevec_lookup() takes a - * reference against the pages in @pvec. - * - * The search returns a group of mapping-contiguous pages with ascending - * indexes. There may be holes in the indices due to not-present pages. We - * also update @start to index the next page for the traversal. - * - * pagevec_lookup_range() returns the number of pages which were found. If this - * number is smaller than PAGEVEC_SIZE, the end of specified range has been - * reached. - */ -unsigned pagevec_lookup_range(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *start, pgoff_t end) -{ - pvec->nr = find_get_pages_range(mapping, start, end, PAGEVEC_SIZE, - pvec->pages); - return pagevec_count(pvec); -} -EXPORT_SYMBOL(pagevec_lookup_range); - unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag) -- cgit v1.2.3 From 9bc3e869386bdf5a00ecf71d6592050997e69d53 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 17:12:21 -0400 Subject: filemap: Move 'filler' case to the end of do_read_cache_folio() No functionality change intended; this simply moves code around to disentangle the function a little. Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 15399e8cd281..c821bc4a648b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3492,20 +3492,7 @@ repeat: return ERR_PTR(err); } -filler: - err = filler(file, folio); - if (err < 0) { - folio_put(folio); - return ERR_PTR(err); - } - - folio_wait_locked(folio); - if (!folio_test_uptodate(folio)) { - folio_put(folio); - return ERR_PTR(-EIO); - } - - goto out; + goto filler; } if (folio_test_uptodate(folio)) goto out; @@ -3535,7 +3522,18 @@ filler: * set again if read page fails. */ folio_clear_error(folio); - goto filler; +filler: + err = filler(file, folio); + if (err < 0) { + folio_put(folio); + return ERR_PTR(err); + } + + folio_wait_locked(folio); + if (!folio_test_uptodate(folio)) { + folio_put(folio); + return ERR_PTR(-EIO); + } out: folio_mark_accessed(folio); -- cgit v1.2.3 From 1dfa24a4bf3b00325d3957019e3ada2ec21d82cf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 17:47:06 -0400 Subject: filemap: Handle AOP_TRUNCATED_PAGE in do_read_cache_folio() If the call to filler() returns AOP_TRUNCATED_PAGE, we need to retry the page cache lookup. Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index c821bc4a648b..4e2601bfff50 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3524,8 +3524,10 @@ repeat: folio_clear_error(folio); filler: err = filler(file, folio); - if (err < 0) { + if (err) { folio_put(folio); + if (err == AOP_TRUNCATED_PAGE) + goto repeat; return ERR_PTR(err); } -- cgit v1.2.3 From 290e1a320437011fc4f26456ab45eaebf9e44f5e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 17:37:01 -0400 Subject: filemap: Use filemap_read_folio() in do_read_cache_folio() By passing ->read_folio to filemap_read_folio(), we can use filemap_read_folio() in do_read_cache_folio(). Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 4e2601bfff50..8ccb868c3d95 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2381,7 +2381,7 @@ retry: rcu_read_unlock(); } -static int filemap_read_folio(struct file *file, struct address_space *mapping, +static int filemap_read_folio(struct file *file, filler_t filler, struct folio *folio) { int error; @@ -2393,7 +2393,7 @@ static int filemap_read_folio(struct file *file, struct address_space *mapping, */ folio_clear_error(folio); /* Start the actual read. The read will unlock the page. */ - error = mapping->a_ops->read_folio(file, folio); + error = filler(file, folio); if (error) return error; @@ -2402,7 +2402,8 @@ static int filemap_read_folio(struct file *file, struct address_space *mapping, return error; if (folio_test_uptodate(folio)) return 0; - shrink_readahead_size_eio(&file->f_ra); + if (file) + shrink_readahead_size_eio(&file->f_ra); return -EIO; } @@ -2475,7 +2476,8 @@ static int filemap_update_page(struct kiocb *iocb, if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) goto unlock; - error = filemap_read_folio(iocb->ki_filp, mapping, folio); + error = filemap_read_folio(iocb->ki_filp, mapping->a_ops->read_folio, + folio); goto unlock_mapping; unlock: folio_unlock(folio); @@ -2518,7 +2520,7 @@ static int filemap_create_folio(struct file *file, if (error) goto error; - error = filemap_read_folio(file, mapping, folio); + error = filemap_read_folio(file, mapping->a_ops->read_folio, folio); if (error) goto error; @@ -3202,7 +3204,7 @@ page_not_uptodate: * and we need to check for errors. */ fpin = maybe_unlock_mmap_for_io(vmf, fpin); - error = filemap_read_folio(file, mapping, folio); + error = filemap_read_folio(file, mapping->a_ops->read_folio, folio); if (fpin) goto out_retry; folio_put(folio); @@ -3515,15 +3517,8 @@ repeat: goto out; } - /* - * A previous I/O error may have been due to temporary - * failures. - * Clear page error before actual read, PG_error will be - * set again if read page fails. - */ - folio_clear_error(folio); filler: - err = filler(file, folio); + err = filemap_read_folio(file, filler, folio); if (err) { folio_put(folio); if (err == AOP_TRUNCATED_PAGE) @@ -3531,12 +3526,6 @@ filler: return ERR_PTR(err); } - folio_wait_locked(folio); - if (!folio_test_uptodate(folio)) { - folio_put(folio); - return ERR_PTR(-EIO); - } - out: folio_mark_accessed(folio); return folio; -- cgit v1.2.3 From 81218f80a70768589ee30e14a8889336f070a339 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Jun 2022 15:37:50 -0400 Subject: secretmem: Remove isolate_page The isolate_page operation is never called for filesystems, only for device drivers which call SetPageMovable. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand --- mm/secretmem.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'mm') diff --git a/mm/secretmem.c b/mm/secretmem.c index 206ed6b40c1d..1c7f1775b56e 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -133,11 +133,6 @@ static const struct file_operations secretmem_fops = { .mmap = secretmem_mmap, }; -static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode) -{ - return false; -} - static int secretmem_migratepage(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) @@ -155,7 +150,6 @@ const struct address_space_operations secretmem_aops = { .dirty_folio = noop_dirty_folio, .free_folio = secretmem_free_folio, .migratepage = secretmem_migratepage, - .isolate_page = secretmem_isolate_page, }; static int secretmem_setattr(struct user_namespace *mnt_userns, -- cgit v1.2.3 From 68f2736a858324c3ec852f6c2cddd9d1c777357d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Jun 2022 15:38:48 -0400 Subject: mm: Convert all PageMovable users to movable_operations These drivers are rather uncomfortably hammered into the address_space_operations hole. They aren't filesystems and don't behave like filesystems. They just need their own movable_operations structure, which we can point to directly from page->mapping. Signed-off-by: Matthew Wilcox (Oracle) --- Documentation/filesystems/locking.rst | 4 -- Documentation/filesystems/vfs.rst | 12 ---- Documentation/vm/page_migration.rst | 113 +++------------------------------- arch/powerpc/platforms/pseries/cmm.c | 60 +----------------- drivers/misc/vmw_balloon.c | 61 +----------------- drivers/virtio/virtio_balloon.c | 47 +------------- include/linux/balloon_compaction.h | 6 +- include/linux/fs.h | 2 - include/linux/migrate.h | 56 +++++++++++++++-- include/linux/page-flags.h | 2 +- include/uapi/linux/magic.h | 4 -- mm/balloon_compaction.c | 10 ++- mm/compaction.c | 29 ++++----- mm/migrate.c | 24 ++++---- mm/util.c | 4 +- mm/z3fold.c | 84 +++---------------------- mm/zsmalloc.c | 102 +++++++----------------------- 17 files changed, 134 insertions(+), 486 deletions(-) (limited to 'mm') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index c0fe711f14d3..9963d9600b71 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -252,9 +252,7 @@ prototypes:: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); int (*direct_IO)(struct kiocb *, struct iov_iter *iter); - bool (*isolate_page) (struct page *, isolate_mode_t); int (*migratepage)(struct address_space *, struct page *, struct page *); - void (*putback_page) (struct page *); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count); int (*error_remove_page)(struct address_space *, struct page *); @@ -280,9 +278,7 @@ invalidate_folio: yes exclusive release_folio: yes free_folio: yes direct_IO: -isolate_page: yes migratepage: yes (both) -putback_page: yes launder_folio: yes is_partially_uptodate: yes error_remove_page: yes diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index a08c652467d7..b51665cdabc4 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -737,12 +737,8 @@ cache in your filesystem. The following members are defined: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); - /* isolate a page for migration */ - bool (*isolate_page) (struct page *, isolate_mode_t); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); - /* put migration-failed page back to right list */ - void (*putback_page) (struct page *); int (*launder_folio) (struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, @@ -930,11 +926,6 @@ cache in your filesystem. The following members are defined: data directly between the storage and the application's address space. -``isolate_page`` - Called by the VM when isolating a movable non-lru page. If page - is successfully isolated, VM marks the page as PG_isolated via - __SetPageIsolated. - ``migrate_page`` This is used to compact the physical memory usage. If the VM wants to relocate a page (maybe off a memory card that is @@ -942,9 +933,6 @@ cache in your filesystem. The following members are defined: page to this function. migrate_page should transfer any private data across and update any references that it has to the page. -``putback_page`` - Called by the VM when isolated page's migration fails. - ``launder_folio`` Called before freeing a folio - it writes back the dirty folio. To prevent redirtying the folio, it is kept locked during the diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst index 8c5cb8147e55..11493bad7112 100644 --- a/Documentation/vm/page_migration.rst +++ b/Documentation/vm/page_migration.rst @@ -152,110 +152,15 @@ Steps: Non-LRU page migration ====================== -Although migration originally aimed for reducing the latency of memory accesses -for NUMA, compaction also uses migration to create high-order pages. +Although migration originally aimed for reducing the latency of memory +accesses for NUMA, compaction also uses migration to create high-order +pages. For compaction purposes, it is also useful to be able to move +non-LRU pages, such as zsmalloc and virtio-balloon pages. -Current problem of the implementation is that it is designed to migrate only -*LRU* pages. However, there are potential non-LRU pages which can be migrated -in drivers, for example, zsmalloc, virtio-balloon pages. - -For virtio-balloon pages, some parts of migration code path have been hooked -up and added virtio-balloon specific functions to intercept migration logics. -It's too specific to a driver so other drivers who want to make their pages -movable would have to add their own specific hooks in the migration path. - -To overcome the problem, VM supports non-LRU page migration which provides -generic functions for non-LRU movable pages without driver specific hooks -in the migration path. - -If a driver wants to make its pages movable, it should define three functions -which are function pointers of struct address_space_operations. - -1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);`` - - What VM expects from isolate_page() function of driver is to return *true* - if driver isolates the page successfully. On returning true, VM marks the page - as PG_isolated so concurrent isolation in several CPUs skip the page - for isolation. If a driver cannot isolate the page, it should return *false*. - - Once page is successfully isolated, VM uses page.lru fields so driver - shouldn't expect to preserve values in those fields. - -2. ``int (*migratepage) (struct address_space *mapping,`` -| ``struct page *newpage, struct page *oldpage, enum migrate_mode);`` - - After isolation, VM calls migratepage() of driver with the isolated page. - The function of migratepage() is to move the contents of the old page to the - new page - and set up fields of struct page newpage. Keep in mind that you should - indicate to the VM the oldpage is no longer movable via __ClearPageMovable() - under page_lock if you migrated the oldpage successfully and returned - MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver - can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time - because VM interprets -EAGAIN as "temporary migration failure". On returning - any error except -EAGAIN, VM will give up the page migration without - retrying. - - Driver shouldn't touch the page.lru field while in the migratepage() function. - -3. ``void (*putback_page)(struct page *);`` - - If migration fails on the isolated page, VM should return the isolated page - to the driver so VM calls the driver's putback_page() with the isolated page. - In this function, the driver should put the isolated page back into its own data - structure. - -Non-LRU movable page flags - - There are two page flags for supporting non-LRU movable page. - - * PG_movable - - Driver should use the function below to make page movable under page_lock:: - - void __SetPageMovable(struct page *page, struct address_space *mapping) - - It needs argument of address_space for registering migration - family functions which will be called by VM. Exactly speaking, - PG_movable is not a real flag of struct page. Rather, VM - reuses the page->mapping's lower bits to represent it:: - - #define PAGE_MAPPING_MOVABLE 0x2 - page->mapping = page->mapping | PAGE_MAPPING_MOVABLE; - - so driver shouldn't access page->mapping directly. Instead, driver should - use page_mapping() which masks off the low two bits of page->mapping under - page lock so it can get the right struct address_space. - - For testing of non-LRU movable pages, VM supports __PageMovable() function. - However, it doesn't guarantee to identify non-LRU movable pages because - the page->mapping field is unified with other variables in struct page. - If the driver releases the page after isolation by VM, page->mapping - doesn't have a stable value although it has PAGE_MAPPING_MOVABLE set - (look at __ClearPageMovable). But __PageMovable() is cheap to call whether - page is LRU or non-LRU movable once the page has been isolated because LRU - pages can never have PAGE_MAPPING_MOVABLE set in page->mapping. It is also - good for just peeking to test non-LRU movable pages before more expensive - checking with lock_page() in pfn scanning to select a victim. - - For guaranteeing non-LRU movable page, VM provides PageMovable() function. - Unlike __PageMovable(), PageMovable() validates page->mapping and - mapping->a_ops->isolate_page under lock_page(). The lock_page() prevents - sudden destroying of page->mapping. - - Drivers using __SetPageMovable() should clear the flag via - __ClearMovablePage() under page_lock() before the releasing the page. - - * PG_isolated - - To prevent concurrent isolation among several CPUs, VM marks isolated page - as PG_isolated under lock_page(). So if a CPU encounters PG_isolated - non-LRU movable page, it can skip it. Driver doesn't need to manipulate the - flag because VM will set/clear it automatically. Keep in mind that if the - driver sees a PG_isolated page, it means the page has been isolated by the - VM so it shouldn't touch the page.lru field. - The PG_isolated flag is aliased with the PG_reclaim flag so drivers - shouldn't use PG_isolated for its own purposes. +If a driver wants to make its pages movable, it should define a struct +movable_operations. It then needs to call __SetPageMovable() on each +page that it may be able to move. This uses the ``page->mapping`` field, +so this field is not available for the driver to use for other purposes. Monitoring Migration ===================== @@ -286,3 +191,5 @@ THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase. Christoph Lameter, May 8, 2006. Minchan Kim, Mar 28, 2016. + +.. kernel-doc:: include/linux/migrate.h diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 15ed8206c463..5f4037c1d7fe 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -19,9 +19,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -500,19 +497,6 @@ static struct notifier_block cmm_mem_nb = { }; #ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; - -static int cmm_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "ppc-cmm", - .init_fs_context = cmm_init_fs_context, - .kill_sb = kill_anon_super, -}; - static int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode) @@ -564,47 +548,13 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info, return MIGRATEPAGE_SUCCESS; } -static int cmm_balloon_compaction_init(void) +static void cmm_balloon_compaction_init(void) { - int rc; - balloon_devinfo_init(&b_dev_info); b_dev_info.migratepage = cmm_migratepage; - - balloon_mnt = kern_mount(&balloon_fs); - if (IS_ERR(balloon_mnt)) { - rc = PTR_ERR(balloon_mnt); - balloon_mnt = NULL; - return rc; - } - - b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); - if (IS_ERR(b_dev_info.inode)) { - rc = PTR_ERR(b_dev_info.inode); - b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; - return rc; - } - - b_dev_info.inode->i_mapping->a_ops = &balloon_aops; - return 0; -} -static void cmm_balloon_compaction_deinit(void) -{ - if (b_dev_info.inode) - iput(b_dev_info.inode); - b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; } #else /* CONFIG_BALLOON_COMPACTION */ -static int cmm_balloon_compaction_init(void) -{ - return 0; -} - -static void cmm_balloon_compaction_deinit(void) +static void cmm_balloon_compaction_init(void) { } #endif /* CONFIG_BALLOON_COMPACTION */ @@ -622,9 +572,7 @@ static int cmm_init(void) if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) return -EOPNOTSUPP; - rc = cmm_balloon_compaction_init(); - if (rc) - return rc; + cmm_balloon_compaction_init(); rc = register_oom_notifier(&cmm_oom_nb); if (rc < 0) @@ -658,7 +606,6 @@ out_reboot_notifier: out_oom_notifier: unregister_oom_notifier(&cmm_oom_nb); out_balloon_compaction: - cmm_balloon_compaction_deinit(); return rc; } @@ -677,7 +624,6 @@ static void cmm_exit(void) unregister_memory_notifier(&cmm_mem_nb); cmm_free_pages(atomic_long_read(&loaned_pages)); cmm_unregister_sysfs(&cmm_dev); - cmm_balloon_compaction_deinit(); } /** diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 086ce77d9074..85dd6aa33df6 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -29,8 +29,6 @@ #include #include #include -#include -#include #include #include #include @@ -1730,20 +1728,6 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) #ifdef CONFIG_BALLOON_COMPACTION - -static int vmballoon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type vmballoon_fs = { - .name = "balloon-vmware", - .init_fs_context = vmballoon_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static struct vfsmount *vmballoon_mnt; - /** * vmballoon_migratepage() - migrates a balloon page. * @b_dev_info: balloon device information descriptor. @@ -1862,21 +1846,6 @@ out_unlock: return ret; } -/** - * vmballoon_compaction_deinit() - removes compaction related data. - * - * @b: pointer to the balloon. - */ -static void vmballoon_compaction_deinit(struct vmballoon *b) -{ - if (!IS_ERR(b->b_dev_info.inode)) - iput(b->b_dev_info.inode); - - b->b_dev_info.inode = NULL; - kern_unmount(vmballoon_mnt); - vmballoon_mnt = NULL; -} - /** * vmballoon_compaction_init() - initialized compaction for the balloon. * @@ -1888,33 +1857,15 @@ static void vmballoon_compaction_deinit(struct vmballoon *b) * * Return: zero on success or error code on failure. */ -static __init int vmballoon_compaction_init(struct vmballoon *b) +static __init void vmballoon_compaction_init(struct vmballoon *b) { - vmballoon_mnt = kern_mount(&vmballoon_fs); - if (IS_ERR(vmballoon_mnt)) - return PTR_ERR(vmballoon_mnt); - b->b_dev_info.migratepage = vmballoon_migratepage; - b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb); - - if (IS_ERR(b->b_dev_info.inode)) - return PTR_ERR(b->b_dev_info.inode); - - b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops; - return 0; } #else /* CONFIG_BALLOON_COMPACTION */ - -static void vmballoon_compaction_deinit(struct vmballoon *b) -{ -} - -static int vmballoon_compaction_init(struct vmballoon *b) +static inline void vmballoon_compaction_init(struct vmballoon *b) { - return 0; } - #endif /* CONFIG_BALLOON_COMPACTION */ static int __init vmballoon_init(void) @@ -1939,9 +1890,7 @@ static int __init vmballoon_init(void) * balloon_devinfo_init() . */ balloon_devinfo_init(&balloon.b_dev_info); - error = vmballoon_compaction_init(&balloon); - if (error) - goto fail; + vmballoon_compaction_init(&balloon); INIT_LIST_HEAD(&balloon.huge_pages); spin_lock_init(&balloon.comm_lock); @@ -1958,7 +1907,6 @@ static int __init vmballoon_init(void) return 0; fail: vmballoon_unregister_shrinker(&balloon); - vmballoon_compaction_deinit(&balloon); return error; } @@ -1985,8 +1933,5 @@ static void __exit vmballoon_exit(void) */ vmballoon_send_start(&balloon, 0); vmballoon_pop(&balloon); - - /* Only once we popped the balloon, compaction can be deinit */ - vmballoon_compaction_deinit(&balloon); } module_exit(vmballoon_exit); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index b9737da6c4dd..bd360b91e9d3 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -17,9 +17,6 @@ #include #include #include -#include -#include -#include #include /* @@ -42,10 +39,6 @@ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER) -#ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; -#endif - enum virtio_balloon_vq { VIRTIO_BALLOON_VQ_INFLATE, VIRTIO_BALLOON_VQ_DEFLATE, @@ -805,18 +798,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, return MIGRATEPAGE_SUCCESS; } - -static int balloon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "balloon-kvm", - .init_fs_context = balloon_init_fs_context, - .kill_sb = kill_anon_super, -}; - #endif /* CONFIG_BALLOON_COMPACTION */ static unsigned long shrink_free_pages(struct virtio_balloon *vb, @@ -909,19 +890,7 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_free_vb; #ifdef CONFIG_BALLOON_COMPACTION - balloon_mnt = kern_mount(&balloon_fs); - if (IS_ERR(balloon_mnt)) { - err = PTR_ERR(balloon_mnt); - goto out_del_vqs; - } - vb->vb_dev_info.migratepage = virtballoon_migratepage; - vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); - if (IS_ERR(vb->vb_dev_info.inode)) { - err = PTR_ERR(vb->vb_dev_info.inode); - goto out_kern_unmount; - } - vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { /* @@ -930,13 +899,13 @@ static int virtballoon_probe(struct virtio_device *vdev) */ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { err = -ENOSPC; - goto out_iput; + goto out_del_vqs; } vb->balloon_wq = alloc_workqueue("balloon-wq", WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); if (!vb->balloon_wq) { err = -ENOMEM; - goto out_iput; + goto out_del_vqs; } INIT_WORK(&vb->report_free_page_work, report_free_page_func); vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; @@ -1030,13 +999,7 @@ out_unregister_shrinker: out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); -out_iput: -#ifdef CONFIG_BALLOON_COMPACTION - iput(vb->vb_dev_info.inode); -out_kern_unmount: - kern_unmount(balloon_mnt); out_del_vqs: -#endif vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); @@ -1083,12 +1046,6 @@ static void virtballoon_remove(struct virtio_device *vdev) } remove_common(vb); -#ifdef CONFIG_BALLOON_COMPACTION - if (vb->vb_dev_info.inode) - iput(vb->vb_dev_info.inode); - - kern_unmount(balloon_mnt); -#endif kfree(vb); } diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index edb7f6d41faa..5ca2d5699620 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -57,7 +57,6 @@ struct balloon_dev_info { struct list_head pages; /* Pages enqueued & handled to Host */ int (*migratepage)(struct balloon_dev_info *, struct page *newpage, struct page *page, enum migrate_mode mode); - struct inode *inode; }; extern struct page *balloon_page_alloc(void); @@ -75,11 +74,10 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) spin_lock_init(&balloon->pages_lock); INIT_LIST_HEAD(&balloon->pages); balloon->migratepage = NULL; - balloon->inode = NULL; } #ifdef CONFIG_BALLOON_COMPACTION -extern const struct address_space_operations balloon_aops; +extern const struct movable_operations balloon_mops; /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -94,7 +92,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - __SetPageMovable(page, balloon->inode->i_mapping); + __SetPageMovable(page, &balloon_mops); set_page_private(page, (unsigned long)balloon); list_add(&page->lru, &balloon->pages); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..5d8ee3155ca2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -367,8 +367,6 @@ struct address_space_operations { */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); - bool (*isolate_page)(struct page *, isolate_mode_t); - void (*putback_page)(struct page *); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 069a89e847f3..82c735ba6109 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,6 +19,43 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 +/** + * struct movable_operations - Driver page migration + * @isolate_page: + * The VM calls this function to prepare the page to be moved. The page + * is locked and the driver should not unlock it. The driver should + * return ``true`` if the page is movable and ``false`` if it is not + * currently movable. After this function returns, the VM uses the + * page->lru field, so the driver must preserve any information which + * is usually stored here. + * + * @migrate_page: + * After isolation, the VM calls this function with the isolated + * @src page. The driver should copy the contents of the + * @src page to the @dst page and set up the fields of @dst page. + * Both pages are locked. + * If page migration is successful, the driver should call + * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS. + * If the driver cannot migrate the page at the moment, it can return + * -EAGAIN. The VM interprets this as a temporary migration failure and + * will retry it later. Any other error value is a permanent migration + * failure and migration will not be retried. + * The driver shouldn't touch the @src->lru field while in the + * migrate_page() function. It may write to @dst->lru. + * + * @putback_page: + * If migration fails on the isolated page, the VM informs the driver + * that the page is no longer a candidate for migration by calling + * this function. The driver should put the isolated page back into + * its own data structure. + */ +struct movable_operations { + bool (*isolate_page)(struct page *, isolate_mode_t); + int (*migrate_page)(struct page *dst, struct page *src, + enum migrate_mode); + void (*putback_page)(struct page *); +}; + /* Defined in mm/debug.c: */ extern const char *migrate_reason_names[MR_TYPES]; @@ -91,13 +128,13 @@ static inline int next_demotion_node(int node) #endif #ifdef CONFIG_COMPACTION -extern int PageMovable(struct page *page); -extern void __SetPageMovable(struct page *page, struct address_space *mapping); -extern void __ClearPageMovable(struct page *page); +bool PageMovable(struct page *page); +void __SetPageMovable(struct page *page, const struct movable_operations *ops); +void __ClearPageMovable(struct page *page); #else -static inline int PageMovable(struct page *page) { return 0; } +static inline bool PageMovable(struct page *page) { return false; } static inline void __SetPageMovable(struct page *page, - struct address_space *mapping) + const struct movable_operations *ops) { } static inline void __ClearPageMovable(struct page *page) @@ -110,6 +147,15 @@ static inline bool folio_test_movable(struct folio *folio) return PageMovable(&folio->page); } +static inline +const struct movable_operations *page_movable_ops(struct page *page) +{ + VM_BUG_ON(!__PageMovable(page)); + + return (const struct movable_operations *) + ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE); +} + #ifdef CONFIG_NUMA_BALANCING extern int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e66f7aa3191d..3f5490f6f038 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -639,7 +639,7 @@ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then page->mapping points a struct address_space. + * page and then page->mapping points to a struct movable_operations. * * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f724129c0425..6325d1d0e90f 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -98,12 +98,8 @@ /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC 0x15013346 -#define BALLOON_KVM_MAGIC 0x13661366 -#define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ -#define Z3FOLD_MAGIC 0x33 -#define PPC_CMM_MAGIC 0xc7571590 #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #endif /* __LINUX_MAGIC_H__ */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 4b8eab4b3f45..22c96fed70b5 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -228,10 +228,8 @@ static void balloon_page_putback(struct page *page) spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); } - /* move_to_new_page() counterpart for a ballooned page */ -static int balloon_page_migrate(struct address_space *mapping, - struct page *newpage, struct page *page, +static int balloon_page_migrate(struct page *newpage, struct page *page, enum migrate_mode mode) { struct balloon_dev_info *balloon = balloon_page_device(page); @@ -250,11 +248,11 @@ static int balloon_page_migrate(struct address_space *mapping, return balloon->migratepage(balloon, newpage, page, mode); } -const struct address_space_operations balloon_aops = { - .migratepage = balloon_page_migrate, +const struct movable_operations balloon_mops = { + .migrate_page = balloon_page_migrate, .isolate_page = balloon_page_isolate, .putback_page = balloon_page_putback, }; -EXPORT_SYMBOL_GPL(balloon_aops); +EXPORT_SYMBOL_GPL(balloon_mops); #endif /* CONFIG_BALLOON_COMPACTION */ diff --git a/mm/compaction.c b/mm/compaction.c index 1f89b969c12b..f23efba1d118 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -110,28 +110,27 @@ static void split_map_pages(struct list_head *list) } #ifdef CONFIG_COMPACTION - -int PageMovable(struct page *page) +bool PageMovable(struct page *page) { - struct address_space *mapping; + const struct movable_operations *mops; VM_BUG_ON_PAGE(!PageLocked(page), page); if (!__PageMovable(page)) - return 0; + return false; - mapping = page_mapping(page); - if (mapping && mapping->a_ops && mapping->a_ops->isolate_page) - return 1; + mops = page_movable_ops(page); + if (mops) + return true; - return 0; + return false; } EXPORT_SYMBOL(PageMovable); -void __SetPageMovable(struct page *page, struct address_space *mapping) +void __SetPageMovable(struct page *page, const struct movable_operations *mops) { VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page); - page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE); + VM_BUG_ON_PAGE((unsigned long)mops & PAGE_MAPPING_MOVABLE, page); + page->mapping = (void *)((unsigned long)mops | PAGE_MAPPING_MOVABLE); } EXPORT_SYMBOL(__SetPageMovable); @@ -139,12 +138,10 @@ void __ClearPageMovable(struct page *page) { VM_BUG_ON_PAGE(!PageMovable(page), page); /* - * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE - * flag so that VM can catch up released page by driver after isolation. - * With it, VM migration doesn't try to put it back. + * This page still has the type of a movable page, but it's + * actually not movable any more. */ - page->mapping = (void *)((unsigned long)page->mapping & - PAGE_MAPPING_MOVABLE); + page->mapping = (void *)PAGE_MAPPING_MOVABLE; } EXPORT_SYMBOL(__ClearPageMovable); diff --git a/mm/migrate.c b/mm/migrate.c index 6c1ea61f39d8..491f03747832 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -59,7 +59,7 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) { - struct address_space *mapping; + const struct movable_operations *mops; /* * Avoid burning cycles with pages that are yet under __free_pages(), @@ -97,10 +97,10 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) if (!PageMovable(page) || PageIsolated(page)) goto out_no_isolated; - mapping = page_mapping(page); - VM_BUG_ON_PAGE(!mapping, page); + mops = page_movable_ops(page); + VM_BUG_ON_PAGE(!mops, page); - if (!mapping->a_ops->isolate_page(page, mode)) + if (!mops->isolate_page(page, mode)) goto out_no_isolated; /* Driver shouldn't use PG_isolated bit of page->flags */ @@ -120,10 +120,9 @@ out: static void putback_movable_page(struct page *page) { - struct address_space *mapping; + const struct movable_operations *mops = page_movable_ops(page); - mapping = page_mapping(page); - mapping->a_ops->putback_page(page); + mops->putback_page(page); ClearPageIsolated(page); } @@ -846,16 +845,15 @@ static int fallback_migrate_page(struct address_space *mapping, static int move_to_new_folio(struct folio *dst, struct folio *src, enum migrate_mode mode) { - struct address_space *mapping; int rc = -EAGAIN; bool is_lru = !__PageMovable(&src->page); VM_BUG_ON_FOLIO(!folio_test_locked(src), src); VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst); - mapping = folio_mapping(src); - if (likely(is_lru)) { + struct address_space *mapping = folio_mapping(src); + if (!mapping) rc = migrate_page(mapping, &dst->page, &src->page, mode); else if (mapping->a_ops->migratepage) @@ -872,6 +870,8 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, rc = fallback_migrate_page(mapping, &dst->page, &src->page, mode); } else { + const struct movable_operations *mops; + /* * In case of non-lru page, it could be released after * isolation step. In that case, we shouldn't try migration. @@ -883,8 +883,8 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, goto out; } - rc = mapping->a_ops->migratepage(mapping, &dst->page, - &src->page, mode); + mops = page_movable_ops(&src->page); + rc = mops->migrate_page(&dst->page, &src->page, mode); WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS && !folio_test_isolated(src)); } diff --git a/mm/util.c b/mm/util.c index 0837570c9225..53af0e79d3e4 100644 --- a/mm/util.c +++ b/mm/util.c @@ -804,10 +804,10 @@ struct address_space *folio_mapping(struct folio *folio) return swap_address_space(folio_swap_entry(folio)); mapping = folio->mapping; - if ((unsigned long)mapping & PAGE_MAPPING_ANON) + if ((unsigned long)mapping & PAGE_MAPPING_FLAGS) return NULL; - return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS); + return mapping; } EXPORT_SYMBOL(folio_mapping); diff --git a/mm/z3fold.c b/mm/z3fold.c index f41f8b0d9e9a..cf71da10d04e 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -34,15 +34,11 @@ #include #include #include -#include -#include -#include #include #include #include #include #include -#include #include /* @@ -149,7 +145,6 @@ struct z3fold_header { * @compact_wq: workqueue for page layout background optimization * @release_wq: workqueue for safe page release * @work: work_struct for safe page release - * @inode: inode for z3fold pseudo filesystem * * This structure is allocated at pool creation time and maintains metadata * pertaining to a particular z3fold pool. @@ -169,7 +164,6 @@ struct z3fold_pool { struct workqueue_struct *compact_wq; struct workqueue_struct *release_wq; struct work_struct work; - struct inode *inode; }; /* @@ -334,54 +328,6 @@ static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) } } -static int z3fold_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type z3fold_fs = { - .name = "z3fold", - .init_fs_context = z3fold_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static struct vfsmount *z3fold_mnt; -static int __init z3fold_mount(void) -{ - int ret = 0; - - z3fold_mnt = kern_mount(&z3fold_fs); - if (IS_ERR(z3fold_mnt)) - ret = PTR_ERR(z3fold_mnt); - - return ret; -} - -static void z3fold_unmount(void) -{ - kern_unmount(z3fold_mnt); -} - -static const struct address_space_operations z3fold_aops; -static int z3fold_register_migration(struct z3fold_pool *pool) -{ - pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb); - if (IS_ERR(pool->inode)) { - pool->inode = NULL; - return 1; - } - - pool->inode->i_mapping->private_data = pool; - pool->inode->i_mapping->a_ops = &z3fold_aops; - return 0; -} - -static void z3fold_unregister_migration(struct z3fold_pool *pool) -{ - if (pool->inode) - iput(pool->inode); -} - /* Initializes the z3fold header of a newly allocated z3fold page */ static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, struct z3fold_pool *pool, gfp_t gfp) @@ -1002,14 +948,10 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, pool->release_wq = create_singlethread_workqueue(pool->name); if (!pool->release_wq) goto out_wq; - if (z3fold_register_migration(pool)) - goto out_rwq; INIT_WORK(&pool->work, free_pages_work); pool->ops = ops; return pool; -out_rwq: - destroy_workqueue(pool->release_wq); out_wq: destroy_workqueue(pool->compact_wq); out_unbuddied: @@ -1043,11 +985,12 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool) destroy_workqueue(pool->compact_wq); destroy_workqueue(pool->release_wq); - z3fold_unregister_migration(pool); free_percpu(pool->unbuddied); kfree(pool); } +static const struct movable_operations z3fold_mops; + /** * z3fold_alloc() - allocates a region of a given size * @pool: z3fold pool from which to allocate @@ -1117,11 +1060,11 @@ retry: } if (can_sleep) { lock_page(page); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &z3fold_mops); unlock_page(page); } else { WARN_ON(!trylock_page(page)); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &z3fold_mops); unlock_page(page); } z3fold_page_lock(zhdr); @@ -1554,12 +1497,11 @@ out: return false; } -static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) +static int z3fold_page_migrate(struct page *newpage, struct page *page, + enum migrate_mode mode) { struct z3fold_header *zhdr, *new_zhdr; struct z3fold_pool *pool; - struct address_space *new_mapping; VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); @@ -1592,7 +1534,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa * so we only have to reinitialize it. */ INIT_LIST_HEAD(&new_zhdr->buddy); - new_mapping = page_mapping(page); __ClearPageMovable(page); get_page(newpage); @@ -1608,7 +1549,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa spin_lock(&pool->lock); list_add(&newpage->lru, &pool->lru); spin_unlock(&pool->lock); - __SetPageMovable(newpage, new_mapping); + __SetPageMovable(newpage, &z3fold_mops); z3fold_page_unlock(new_zhdr); queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); @@ -1642,9 +1583,9 @@ static void z3fold_page_putback(struct page *page) z3fold_page_unlock(zhdr); } -static const struct address_space_operations z3fold_aops = { +static const struct movable_operations z3fold_mops = { .isolate_page = z3fold_page_isolate, - .migratepage = z3fold_page_migrate, + .migrate_page = z3fold_page_migrate, .putback_page = z3fold_page_putback, }; @@ -1746,17 +1687,11 @@ MODULE_ALIAS("zpool-z3fold"); static int __init init_z3fold(void) { - int ret; - /* * Make sure the z3fold header is not larger than the page size and * there has remaining spaces for its buddy. */ BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); - ret = z3fold_mount(); - if (ret) - return ret; - zpool_register_driver(&z3fold_zpool_driver); return 0; @@ -1764,7 +1699,6 @@ static int __init init_z3fold(void) static void __exit exit_z3fold(void) { - z3fold_unmount(); zpool_unregister_driver(&z3fold_zpool_driver); } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5d5fc04385b8..71d6edcbea48 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -59,8 +58,6 @@ #include #include #include -#include -#include #include #include #include @@ -177,10 +174,6 @@ struct zs_size_stat { static struct dentry *zs_stat_root; #endif -#ifdef CONFIG_COMPACTION -static struct vfsmount *zsmalloc_mnt; -#endif - /* * We assign a page to ZS_ALMOST_EMPTY fullness group when: * n <= N / f, where @@ -252,7 +245,6 @@ struct zs_pool { struct dentry *stat_dentry; #endif #ifdef CONFIG_COMPACTION - struct inode *inode; struct work_struct free_work; #endif /* protect page/zspage migration */ @@ -271,6 +263,7 @@ struct zspage { unsigned int freeobj; struct page *first_page; struct list_head list; /* fullness list */ + struct zs_pool *pool; #ifdef CONFIG_COMPACTION rwlock_t lock; #endif @@ -295,8 +288,6 @@ static bool ZsHugePage(struct zspage *zspage) } #ifdef CONFIG_COMPACTION -static int zs_register_migration(struct zs_pool *pool); -static void zs_unregister_migration(struct zs_pool *pool); static void migrate_lock_init(struct zspage *zspage); static void migrate_read_lock(struct zspage *zspage); static void migrate_read_unlock(struct zspage *zspage); @@ -307,10 +298,6 @@ static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); #else -static int zsmalloc_mount(void) { return 0; } -static void zsmalloc_unmount(void) {} -static int zs_register_migration(struct zs_pool *pool) { return 0; } -static void zs_unregister_migration(struct zs_pool *pool) {} static void migrate_lock_init(struct zspage *zspage) {} static void migrate_read_lock(struct zspage *zspage) {} static void migrate_read_unlock(struct zspage *zspage) {} @@ -1083,6 +1070,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, create_page_chain(class, zspage, pages); init_zspage(class, zspage); + zspage->pool = pool; return zspage; } @@ -1754,33 +1742,6 @@ static void lock_zspage(struct zspage *zspage) migrate_read_unlock(zspage); } -static int zs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type zsmalloc_fs = { - .name = "zsmalloc", - .init_fs_context = zs_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static int zsmalloc_mount(void) -{ - int ret = 0; - - zsmalloc_mnt = kern_mount(&zsmalloc_fs); - if (IS_ERR(zsmalloc_mnt)) - ret = PTR_ERR(zsmalloc_mnt); - - return ret; -} - -static void zsmalloc_unmount(void) -{ - kern_unmount(zsmalloc_mnt); -} - static void migrate_lock_init(struct zspage *zspage) { rwlock_init(&zspage->lock); @@ -1823,6 +1784,8 @@ static void dec_zspage_isolation(struct zspage *zspage) zspage->isolated--; } +static const struct movable_operations zsmalloc_mops; + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -1843,7 +1806,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage, set_first_obj_offset(newpage, get_first_obj_offset(oldpage)); if (unlikely(ZsHugePage(zspage))) newpage->index = oldpage->index; - __SetPageMovable(newpage, page_mapping(oldpage)); + __SetPageMovable(newpage, &zsmalloc_mops); } static bool zs_page_isolate(struct page *page, isolate_mode_t mode) @@ -1865,8 +1828,8 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) return true; } -static int zs_page_migrate(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) +static int zs_page_migrate(struct page *newpage, struct page *page, + enum migrate_mode mode) { struct zs_pool *pool; struct size_class *class; @@ -1889,14 +1852,15 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); - pool = mapping->private_data; + /* The page is locked, so this pointer must remain valid */ + zspage = get_zspage(page); + pool = zspage->pool; /* * The pool migrate_lock protects the race between zpage migration * and zs_free. */ write_lock(&pool->migrate_lock); - zspage = get_zspage(page); class = zspage_class(pool, zspage); /* @@ -1964,31 +1928,12 @@ static void zs_page_putback(struct page *page) migrate_write_unlock(zspage); } -static const struct address_space_operations zsmalloc_aops = { +static const struct movable_operations zsmalloc_mops = { .isolate_page = zs_page_isolate, - .migratepage = zs_page_migrate, + .migrate_page = zs_page_migrate, .putback_page = zs_page_putback, }; -static int zs_register_migration(struct zs_pool *pool) -{ - pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb); - if (IS_ERR(pool->inode)) { - pool->inode = NULL; - return 1; - } - - pool->inode->i_mapping->private_data = pool; - pool->inode->i_mapping->a_ops = &zsmalloc_aops; - return 0; -} - -static void zs_unregister_migration(struct zs_pool *pool) -{ - flush_work(&pool->free_work); - iput(pool->inode); -} - /* * Caller should hold page_lock of all pages in the zspage * In here, we cannot use zspage meta data. @@ -2032,6 +1977,11 @@ static void kick_deferred_free(struct zs_pool *pool) schedule_work(&pool->free_work); } +static void zs_flush_migration(struct zs_pool *pool) +{ + flush_work(&pool->free_work); +} + static void init_deferred_free(struct zs_pool *pool) { INIT_WORK(&pool->free_work, async_free_zspage); @@ -2043,10 +1993,12 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) do { WARN_ON(!trylock_page(page)); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &zsmalloc_mops); unlock_page(page); } while ((page = get_next_page(page)) != NULL); } +#else +static inline void zs_flush_migration(struct zs_pool *pool) { } #endif /* @@ -2324,9 +2276,6 @@ struct zs_pool *zs_create_pool(const char *name) /* debug only, don't abort if it fails */ zs_pool_stat_create(pool, name); - if (zs_register_migration(pool)) - goto err; - /* * Not critical since shrinker is only used to trigger internal * defragmentation of the pool which is pretty optional thing. If @@ -2348,7 +2297,7 @@ void zs_destroy_pool(struct zs_pool *pool) int i; zs_unregister_shrinker(pool); - zs_unregister_migration(pool); + zs_flush_migration(pool); zs_pool_stat_destroy(pool); for (i = 0; i < ZS_SIZE_CLASSES; i++) { @@ -2380,14 +2329,10 @@ static int __init zs_init(void) { int ret; - ret = zsmalloc_mount(); - if (ret) - goto out; - ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare", zs_cpu_prepare, zs_cpu_dead); if (ret) - goto hp_setup_fail; + goto out; #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); @@ -2397,8 +2342,6 @@ static int __init zs_init(void) return 0; -hp_setup_fail: - zsmalloc_unmount(); out: return ret; } @@ -2408,7 +2351,6 @@ static void __exit zs_exit(void) #ifdef CONFIG_ZPOOL zpool_unregister_driver(&zs_zpool_driver); #endif - zsmalloc_unmount(); cpuhp_remove_state(CPUHP_MM_ZS_PREPARE); zs_stat_exit(); -- cgit v1.2.3 From 5490da4f06d182ba944706875029e98fe7f6b821 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:00:16 -0400 Subject: fs: Add aops->migrate_folio Provide a folio-based replacement for aops->migratepage. Update the documentation to document migrate_folio instead of migratepage. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- Documentation/filesystems/locking.rst | 5 +++-- Documentation/filesystems/vfs.rst | 14 +++++++------- include/linux/fs.h | 4 +++- mm/compaction.c | 4 +++- mm/migrate.c | 11 +++++++---- 5 files changed, 23 insertions(+), 15 deletions(-) (limited to 'mm') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 9963d9600b71..4bb2627026ec 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -252,7 +252,8 @@ prototypes:: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); int (*direct_IO)(struct kiocb *, struct iov_iter *iter); - int (*migratepage)(struct address_space *, struct page *, struct page *); + int (*migrate_folio)(struct address_space *, struct folio *dst, + struct folio *src, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count); int (*error_remove_page)(struct address_space *, struct page *); @@ -278,7 +279,7 @@ invalidate_folio: yes exclusive release_folio: yes free_folio: yes direct_IO: -migratepage: yes (both) +migrate_folio: yes (both) launder_folio: yes is_partially_uptodate: yes error_remove_page: yes diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index b51665cdabc4..6cd6953e175b 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -737,8 +737,8 @@ cache in your filesystem. The following members are defined: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); - /* migrate the contents of a page to the specified target */ - int (*migratepage) (struct page *, struct page *); + int (*migrate_folio)(struct mapping *, struct folio *dst, + struct folio *src, enum migrate_mode); int (*launder_folio) (struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, @@ -926,12 +926,12 @@ cache in your filesystem. The following members are defined: data directly between the storage and the application's address space. -``migrate_page`` +``migrate_folio`` This is used to compact the physical memory usage. If the VM - wants to relocate a page (maybe off a memory card that is - signalling imminent failure) it will pass a new page and an old - page to this function. migrate_page should transfer any private - data across and update any references that it has to the page. + wants to relocate a folio (maybe from a memory device that is + signalling imminent failure) it will pass a new folio and an old + folio to this function. migrate_folio should transfer any private + data across and update any references that it has to the folio. ``launder_folio`` Called before freeing a folio - it writes back the dirty folio. diff --git a/include/linux/fs.h b/include/linux/fs.h index 5d8ee3155ca2..47431cf8fbb3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -362,9 +362,11 @@ struct address_space_operations { void (*free_folio)(struct folio *folio); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* - * migrate the contents of a page to the specified target. If + * migrate the contents of a folio to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ + int (*migrate_folio)(struct address_space *, struct folio *dst, + struct folio *src, enum migrate_mode); int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); int (*launder_folio)(struct folio *); diff --git a/mm/compaction.c b/mm/compaction.c index f23efba1d118..458f49f9ab09 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1042,7 +1042,9 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail_put; mapping = page_mapping(page); - migrate_dirty = !mapping || mapping->a_ops->migratepage; + migrate_dirty = !mapping || + mapping->a_ops->migrate_folio || + mapping->a_ops->migratepage; unlock_page(page); if (!migrate_dirty) goto isolate_fail_put; diff --git a/mm/migrate.c b/mm/migrate.c index 491f03747832..3c3c168097dd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -856,14 +856,17 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, if (!mapping) rc = migrate_page(mapping, &dst->page, &src->page, mode); - else if (mapping->a_ops->migratepage) + else if (mapping->a_ops->migrate_folio) /* - * Most pages have a mapping and most filesystems - * provide a migratepage callback. Anonymous pages + * Most folios have a mapping and most filesystems + * provide a migrate_folio callback. Anonymous folios * are part of swap space which also has its own - * migratepage callback. This is the most common path + * migrate_folio callback. This is the most common path * for page migration. */ + rc = mapping->a_ops->migrate_folio(mapping, dst, src, + mode); + else if (mapping->a_ops->migratepage) rc = mapping->a_ops->migratepage(mapping, &dst->page, &src->page, mode); else -- cgit v1.2.3 From 8faa8ef5dd11abe119ad0c8ccd39f2064ca7ed0e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:34:36 -0400 Subject: mm/migrate: Convert fallback_migrate_page() to fallback_migrate_folio() Use a folio throughout. migrate_page() will be converted to migrate_folio() later. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/migrate.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/mm/migrate.c b/mm/migrate.c index 3c3c168097dd..c5278440f74d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -805,11 +805,11 @@ static int writeout(struct address_space *mapping, struct page *page) /* * Default handling if a filesystem does not provide a migration function. */ -static int fallback_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +static int fallback_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - if (PageDirty(page)) { - /* Only writeback pages in full synchronous migration */ + if (folio_test_dirty(src)) { + /* Only writeback folios in full synchronous migration */ switch (mode) { case MIGRATE_SYNC: case MIGRATE_SYNC_NO_COPY: @@ -817,18 +817,18 @@ static int fallback_migrate_page(struct address_space *mapping, default: return -EBUSY; } - return writeout(mapping, page); + return writeout(mapping, &src->page); } /* * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ - if (page_has_private(page) && - !try_to_release_page(page, GFP_KERNEL)) + if (folio_test_private(src) && + !filemap_release_folio(src, GFP_KERNEL)) return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY; - return migrate_page(mapping, newpage, page, mode); + return migrate_page(mapping, &dst->page, &src->page, mode); } /* @@ -870,8 +870,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, rc = mapping->a_ops->migratepage(mapping, &dst->page, &src->page, mode); else - rc = fallback_migrate_page(mapping, &dst->page, - &src->page, mode); + rc = fallback_migrate_folio(mapping, dst, src, mode); } else { const struct movable_operations *mops; -- cgit v1.2.3 From 2be7fa10c028019f7b2fee11238987762567d41e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:41:03 -0400 Subject: mm/migrate: Convert writeout() to take a folio Use a folio throughout this function. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/migrate.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'mm') diff --git a/mm/migrate.c b/mm/migrate.c index c5278440f74d..75b171425c45 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -761,11 +761,10 @@ int buffer_migrate_page_norefs(struct address_space *mapping, #endif /* - * Writeback a page to clean the dirty state + * Writeback a folio to clean the dirty state */ -static int writeout(struct address_space *mapping, struct page *page) +static int writeout(struct address_space *mapping, struct folio *folio) { - struct folio *folio = page_folio(page); struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = 1, @@ -779,25 +778,25 @@ static int writeout(struct address_space *mapping, struct page *page) /* No write method for the address space */ return -EINVAL; - if (!clear_page_dirty_for_io(page)) + if (!folio_clear_dirty_for_io(folio)) /* Someone else already triggered a write */ return -EAGAIN; /* - * A dirty page may imply that the underlying filesystem has - * the page on some queue. So the page must be clean for - * migration. Writeout may mean we loose the lock and the - * page state is no longer what we checked for earlier. + * A dirty folio may imply that the underlying filesystem has + * the folio on some queue. So the folio must be clean for + * migration. Writeout may mean we lose the lock and the + * folio state is no longer what we checked for earlier. * At this point we know that the migration attempt cannot * be successful. */ remove_migration_ptes(folio, folio, false); - rc = mapping->a_ops->writepage(page, &wbc); + rc = mapping->a_ops->writepage(&folio->page, &wbc); if (rc != AOP_WRITEPAGE_ACTIVATE) /* unlocked. Relock */ - lock_page(page); + folio_lock(folio); return (rc < 0) ? -EIO : -EAGAIN; } @@ -817,7 +816,7 @@ static int fallback_migrate_folio(struct address_space *mapping, default: return -EBUSY; } - return writeout(mapping, &src->page); + return writeout(mapping, src); } /* -- cgit v1.2.3 From 67235182a41c1bd6b32806a1556a1d299b84212b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:20:31 -0400 Subject: mm/migrate: Convert buffer_migrate_page() to buffer_migrate_folio() Use a folio throughout __buffer_migrate_folio(), add kernel-doc for buffer_migrate_folio() and buffer_migrate_folio_norefs(), move their declarations to buffer.h and switch all filesystems that have wired them up. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- block/fops.c | 2 +- fs/ext2/inode.c | 4 +-- fs/ext4/inode.c | 4 +-- fs/ntfs/aops.c | 6 ++-- fs/ocfs2/aops.c | 2 +- include/linux/buffer_head.h | 10 ++++++ include/linux/fs.h | 12 ------- mm/migrate.c | 76 +++++++++++++++++++++++++++------------------ 8 files changed, 65 insertions(+), 51 deletions(-) (limited to 'mm') diff --git a/block/fops.c b/block/fops.c index d6b3276a6c68..743fc46d0aad 100644 --- a/block/fops.c +++ b/block/fops.c @@ -417,7 +417,7 @@ const struct address_space_operations def_blk_aops = { .write_end = blkdev_write_end, .writepages = blkdev_writepages, .direct_IO = blkdev_direct_IO, - .migratepage = buffer_migrate_page_norefs, + .migrate_folio = buffer_migrate_folio_norefs, .is_dirty_writeback = buffer_check_dirty_writeback, }; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e6b932219803..58a9d061f17d 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -973,7 +973,7 @@ const struct address_space_operations ext2_aops = { .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -989,7 +989,7 @@ const struct address_space_operations ext2_nobh_aops = { .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 06cc68878176..87a8b4382bce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3633,7 +3633,7 @@ static const struct address_space_operations ext4_aops = { .invalidate_folio = ext4_invalidate_folio, .release_folio = ext4_release_folio, .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = ext4_iomap_swap_activate, @@ -3668,7 +3668,7 @@ static const struct address_space_operations ext4_da_aops = { .invalidate_folio = ext4_invalidate_folio, .release_folio = ext4_release_folio, .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = ext4_iomap_swap_activate, diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 9e3964ea2ea0..5f4fb6ca6f2e 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1659,7 +1659,7 @@ const struct address_space_operations ntfs_normal_aops = { .dirty_folio = block_dirty_folio, #endif /* NTFS_RW */ .bmap = ntfs_bmap, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -1673,7 +1673,7 @@ const struct address_space_operations ntfs_compressed_aops = { .writepage = ntfs_writepage, .dirty_folio = block_dirty_folio, #endif /* NTFS_RW */ - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -1688,7 +1688,7 @@ const struct address_space_operations ntfs_mst_aops = { .writepage = ntfs_writepage, /* Write dirty page to disk. */ .dirty_folio = filemap_dirty_folio, #endif /* NTFS_RW */ - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 767df51f8657..1d489003f99d 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2462,7 +2462,7 @@ const struct address_space_operations ocfs2_aops = { .direct_IO = ocfs2_direct_IO, .invalidate_folio = block_invalidate_folio, .release_folio = ocfs2_release_folio, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..b0366c89d6a4 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -267,6 +267,16 @@ int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); +#ifdef CONFIG_MIGRATION +extern int buffer_migrate_folio(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +extern int buffer_migrate_folio_norefs(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +#else +#define buffer_migrate_folio NULL +#define buffer_migrate_folio_norefs NULL +#endif + void buffer_init(void); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 47431cf8fbb3..9e6b17da4e11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3215,18 +3215,6 @@ extern int generic_check_addressable(unsigned, u64); extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); -#ifdef CONFIG_MIGRATION -extern int buffer_migrate_page(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -extern int buffer_migrate_page_norefs(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -#else -#define buffer_migrate_page NULL -#define buffer_migrate_page_norefs NULL -#endif - int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *); diff --git a/mm/migrate.c b/mm/migrate.c index 75b171425c45..ea5398d0f7f1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -656,23 +656,23 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, return true; } -static int __buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode, +static int __buffer_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode, bool check_refs) { struct buffer_head *bh, *head; int rc; int expected_count; - if (!page_has_buffers(page)) - return migrate_page(mapping, newpage, page, mode); + head = folio_buffers(src); + if (!head) + return migrate_page(mapping, &dst->page, &src->page, mode); /* Check whether page does not have extra refs before we do more work */ - expected_count = expected_page_refs(mapping, page); - if (page_count(page) != expected_count) + expected_count = expected_page_refs(mapping, &src->page); + if (folio_ref_count(src) != expected_count) return -EAGAIN; - head = page_buffers(page); if (!buffer_migrate_lock_buffers(head, mode)) return -EAGAIN; @@ -703,23 +703,22 @@ recheck_buffers: } } - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = folio_migrate_mapping(mapping, dst, src, 0); if (rc != MIGRATEPAGE_SUCCESS) goto unlock_buffers; - attach_page_private(newpage, detach_page_private(page)); + folio_attach_private(dst, folio_detach_private(src)); bh = head; do { - set_bh_page(bh, newpage, bh_offset(bh)); + set_bh_page(bh, &dst->page, bh_offset(bh)); bh = bh->b_this_page; - } while (bh != head); if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(dst, src); else - migrate_page_states(newpage, page); + folio_migrate_flags(dst, src); rc = MIGRATEPAGE_SUCCESS; unlock_buffers: @@ -729,34 +728,51 @@ unlock_buffers: do { unlock_buffer(bh); bh = bh->b_this_page; - } while (bh != head); return rc; } -/* - * Migration function for pages with buffers. This function can only be used - * if the underlying filesystem guarantees that no other references to "page" - * exist. For example attached buffer heads are accessed only under page lock. +/** + * buffer_migrate_folio() - Migration function for folios with buffers. + * @mapping: The address space containing @src. + * @dst: The folio to migrate to. + * @src: The folio to migrate from. + * @mode: How to migrate the folio. + * + * This function can only be used if the underlying filesystem guarantees + * that no other references to @src exist. For example attached buffer + * heads are accessed only under the folio lock. If your filesystem cannot + * provide this guarantee, buffer_migrate_folio_norefs() may be more + * appropriate. + * + * Return: 0 on success or a negative errno on failure. */ -int buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +int buffer_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - return __buffer_migrate_page(mapping, newpage, page, mode, false); + return __buffer_migrate_folio(mapping, dst, src, mode, false); } -EXPORT_SYMBOL(buffer_migrate_page); +EXPORT_SYMBOL(buffer_migrate_folio); -/* - * Same as above except that this variant is more careful and checks that there - * are also no buffer head references. This function is the right one for - * mappings where buffer heads are directly looked up and referenced (such as - * block device mappings). +/** + * buffer_migrate_folio_norefs() - Migration function for folios with buffers. + * @mapping: The address space containing @src. + * @dst: The folio to migrate to. + * @src: The folio to migrate from. + * @mode: How to migrate the folio. + * + * Like buffer_migrate_folio() except that this variant is more careful + * and checks that there are also no buffer head references. This function + * is the right one for mappings where buffer heads are directly looked + * up and referenced (such as block device mappings). + * + * Return: 0 on success or a negative errno on failure. */ -int buffer_migrate_page_norefs(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +int buffer_migrate_folio_norefs(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - return __buffer_migrate_page(mapping, newpage, page, mode, true); + return __buffer_migrate_folio(mapping, dst, src, mode, true); } #endif -- cgit v1.2.3 From 108ca8358139bec4232319debfb20bafdaf4f877 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 16:25:10 -0400 Subject: mm/migrate: Convert expected_page_refs() to folio_expected_refs() Now that both callers have a folio, convert this function to take a folio & rename it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/migrate.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/migrate.c b/mm/migrate.c index ea5398d0f7f1..61cd8d270b03 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -336,13 +336,18 @@ unlock: } #endif -static int expected_page_refs(struct address_space *mapping, struct page *page) +static int folio_expected_refs(struct address_space *mapping, + struct folio *folio) { - int expected_count = 1; + int refs = 1; + if (!mapping) + return refs; - if (mapping) - expected_count += compound_nr(page) + page_has_private(page); - return expected_count; + refs += folio_nr_pages(folio); + if (folio_test_private(folio)) + refs++; + + return refs; } /* @@ -359,7 +364,7 @@ int folio_migrate_mapping(struct address_space *mapping, XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct zone *oldzone, *newzone; int dirty; - int expected_count = expected_page_refs(mapping, &folio->page) + extra_count; + int expected_count = folio_expected_refs(mapping, folio) + extra_count; long nr = folio_nr_pages(folio); if (!mapping) { @@ -669,7 +674,7 @@ static int __buffer_migrate_folio(struct address_space *mapping, return migrate_page(mapping, &dst->page, &src->page, mode); /* Check whether page does not have extra refs before we do more work */ - expected_count = expected_page_refs(mapping, &src->page); + expected_count = folio_expected_refs(mapping, src); if (folio_ref_count(src) != expected_count) return -EAGAIN; -- cgit v1.2.3 From 541846502f4fe826cd7c16e4784695ac90736585 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:27:41 -0400 Subject: mm/migrate: Convert migrate_page() to migrate_folio() Convert all callers to pass a folio. Most have the folio already available. Switch all users from aops->migratepage to aops->migrate_folio. Also turn the documentation into kerneldoc. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: David Sterba --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 4 ++-- fs/btrfs/disk-io.c | 2 +- fs/nfs/write.c | 2 +- include/linux/migrate.h | 5 ++-- mm/migrate.c | 37 ++++++++++++++++------------- mm/migrate_device.c | 3 ++- mm/shmem.c | 2 +- mm/swap_state.c | 2 +- 8 files changed, 30 insertions(+), 27 deletions(-) (limited to 'mm') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce33..8423df021b71 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -216,8 +216,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, * However...! * * The mmu-notifier can be invalidated for a - * migrate_page, that is alreadying holding the lock - * on the page. Such a try_to_unmap() will result + * migrate_folio, that is alreadying holding the lock + * on the folio. Such a try_to_unmap() will result * in us calling put_pages() and so recursively try * to lock the page. We avoid that deadlock with * a trylock_page() and in exchange we risk missing diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4aeb68f8450e..64d9299218f2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -968,7 +968,7 @@ static int btree_migrate_folio(struct address_space *mapping, if (folio_get_private(src) && !filemap_release_folio(src, GFP_KERNEL)) return -EAGAIN; - return migrate_page(mapping, &dst->page, &src->page, mode); + return migrate_folio(mapping, dst, src, mode); } #else #define btree_migrate_folio NULL diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 649b9e633459..69569696dde0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2139,7 +2139,7 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst, folio_wait_fscache(src); } - return migrate_page(mapping, &dst->page, &src->page, mode); + return migrate_folio(mapping, dst, src, mode); } #endif diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 82c735ba6109..c9986d5da335 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -62,9 +62,8 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); -extern int migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode); +int migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded); diff --git a/mm/migrate.c b/mm/migrate.c index 61cd8d270b03..77aeb7e12f62 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -593,34 +593,37 @@ EXPORT_SYMBOL(folio_migrate_copy); * Migration functions ***********************************************************/ -/* - * Common logic to directly migrate a single LRU page suitable for - * pages that do not use PagePrivate/PagePrivate2. +/** + * migrate_folio() - Simple folio migration. + * @mapping: The address_space containing the folio. + * @dst: The folio to migrate the data to. + * @src: The folio containing the current data. + * @mode: How to migrate the page. * - * Pages are locked upon entry and exit. + * Common logic to directly migrate a single LRU folio suitable for + * folios that do not use PagePrivate/PagePrivate2. + * + * Folios are locked upon entry and exit. */ -int migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode) +int migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode) { - struct folio *newfolio = page_folio(newpage); - struct folio *folio = page_folio(page); int rc; - BUG_ON(folio_test_writeback(folio)); /* Writeback must be complete */ + BUG_ON(folio_test_writeback(src)); /* Writeback must be complete */ - rc = folio_migrate_mapping(mapping, newfolio, folio, 0); + rc = folio_migrate_mapping(mapping, dst, src, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; if (mode != MIGRATE_SYNC_NO_COPY) - folio_migrate_copy(newfolio, folio); + folio_migrate_copy(dst, src); else - folio_migrate_flags(newfolio, folio); + folio_migrate_flags(dst, src); return MIGRATEPAGE_SUCCESS; } -EXPORT_SYMBOL(migrate_page); +EXPORT_SYMBOL(migrate_folio); #ifdef CONFIG_BLOCK /* Returns true if all buffers are successfully locked */ @@ -671,7 +674,7 @@ static int __buffer_migrate_folio(struct address_space *mapping, head = folio_buffers(src); if (!head) - return migrate_page(mapping, &dst->page, &src->page, mode); + return migrate_folio(mapping, dst, src, mode); /* Check whether page does not have extra refs before we do more work */ expected_count = folio_expected_refs(mapping, src); @@ -848,7 +851,7 @@ static int fallback_migrate_folio(struct address_space *mapping, !filemap_release_folio(src, GFP_KERNEL)) return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY; - return migrate_page(mapping, &dst->page, &src->page, mode); + return migrate_folio(mapping, dst, src, mode); } /* @@ -875,7 +878,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, struct address_space *mapping = folio_mapping(src); if (!mapping) - rc = migrate_page(mapping, &dst->page, &src->page, mode); + rc = migrate_folio(mapping, dst, src, mode); else if (mapping->a_ops->migrate_folio) /* * Most folios have a mapping and most filesystems diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 5052093d0262..5dd97c39ca6a 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -718,7 +718,8 @@ void migrate_vma_pages(struct migrate_vma *migrate) continue; } - r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY); + r = migrate_folio(mapping, page_folio(newpage), + page_folio(page), MIGRATE_SYNC_NO_COPY); if (r != MIGRATEPAGE_SUCCESS) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; } diff --git a/mm/shmem.c b/mm/shmem.c index 28a62be1d41e..15c61456e087 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3801,7 +3801,7 @@ const struct address_space_operations shmem_aops = { .write_end = shmem_write_end, #endif #ifdef CONFIG_MIGRATION - .migratepage = migrate_page, + .migrate_folio = migrate_folio, #endif .error_remove_page = shmem_error_remove_page, }; diff --git a/mm/swap_state.c b/mm/swap_state.c index f5b6f5638908..0a2021fc55ad 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = { .writepage = swap_writepage, .dirty_folio = noop_dirty_folio, #ifdef CONFIG_MIGRATION - .migratepage = migrate_page, + .migrate_folio = migrate_folio, #endif }; -- cgit v1.2.3 From 2ec810d59602f0e08847f986ef8e16469722496f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 12:55:08 -0400 Subject: mm/migrate: Add filemap_migrate_folio() There is nothing iomap-specific about iomap_migratepage(), and it fits a pattern used by several other filesystems, so move it to mm/migrate.c, convert it to be filemap_migrate_folio() and convert the iomap filesystems to use it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/gfs2/aops.c | 2 +- fs/iomap/buffered-io.c | 25 ------------------------- fs/xfs/xfs_aops.c | 2 +- fs/zonefs/super.c | 2 +- include/linux/iomap.h | 6 ------ include/linux/pagemap.h | 6 ++++++ mm/migrate.c | 20 ++++++++++++++++++++ 7 files changed, 29 insertions(+), 34 deletions(-) (limited to 'mm') diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 106e90a36583..57ff883d432c 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -774,7 +774,7 @@ static const struct address_space_operations gfs2_aops = { .invalidate_folio = iomap_invalidate_folio, .bmap = gfs2_bmap, .direct_IO = noop_direct_IO, - .migratepage = iomap_migrate_page, + .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 66278a14bfa7..5a91aa1db945 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -489,31 +489,6 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) } EXPORT_SYMBOL_GPL(iomap_invalidate_folio); -#ifdef CONFIG_MIGRATION -int -iomap_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) -{ - struct folio *folio = page_folio(page); - struct folio *newfolio = page_folio(newpage); - int ret; - - ret = folio_migrate_mapping(mapping, newfolio, folio, 0); - if (ret != MIGRATEPAGE_SUCCESS) - return ret; - - if (folio_test_private(folio)) - folio_attach_private(newfolio, folio_detach_private(folio)); - - if (mode != MIGRATE_SYNC_NO_COPY) - folio_migrate_copy(newfolio, folio); - else - folio_migrate_flags(newfolio, folio); - return MIGRATEPAGE_SUCCESS; -} -EXPORT_SYMBOL_GPL(iomap_migrate_page); -#endif /* CONFIG_MIGRATION */ - static void iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) { diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 8ec38b25187b..5d1a995b15f8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -570,7 +570,7 @@ const struct address_space_operations xfs_address_space_operations = { .invalidate_folio = iomap_invalidate_folio, .bmap = xfs_vm_bmap, .direct_IO = noop_direct_IO, - .migratepage = iomap_migrate_page, + .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = xfs_iomap_swapfile_activate, diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 053299758deb..cc6d4cf580ac 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -271,7 +271,7 @@ static const struct address_space_operations zonefs_file_aops = { .dirty_folio = filemap_dirty_folio, .release_folio = iomap_release_folio, .invalidate_folio = iomap_invalidate_folio, - .migratepage = iomap_migrate_page, + .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .direct_IO = noop_direct_IO, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e552097c67e0..758a1125e72f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -231,12 +231,6 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); -#ifdef CONFIG_MIGRATION -int iomap_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode); -#else -#define iomap_migrate_page NULL -#endif int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 87d4ea571240..cc9adbaddb59 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1078,6 +1078,12 @@ static inline int __must_check write_one_page(struct page *page) int __set_page_dirty_nobuffers(struct page *page); bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); +#ifdef CONFIG_MIGRATION +int filemap_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); +#else +#define filemap_migrate_folio NULL +#endif void page_endio(struct page *page, bool is_write, int err); void folio_end_private_2(struct folio *folio); diff --git a/mm/migrate.c b/mm/migrate.c index 77aeb7e12f62..4ed8f0d53c77 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -784,6 +784,26 @@ int buffer_migrate_folio_norefs(struct address_space *mapping, } #endif +int filemap_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) +{ + int ret; + + ret = folio_migrate_mapping(mapping, dst, src, 0); + if (ret != MIGRATEPAGE_SUCCESS) + return ret; + + if (folio_get_private(src)) + folio_attach_private(dst, folio_detach_private(src)); + + if (mode != MIGRATE_SYNC_NO_COPY) + folio_migrate_copy(dst, src); + else + folio_migrate_flags(dst, src); + return MIGRATEPAGE_SUCCESS; +} +EXPORT_SYMBOL_GPL(filemap_migrate_folio); + /* * Writeback a folio to clean the dirty state */ -- cgit v1.2.3 From b890ec2a2c2d962f71ba31ae291f8fd252b46258 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:47:21 -0400 Subject: hugetlb: Convert to migrate_folio This involves converting migrate_huge_page_move_mapping(). We also need a folio variant of hugetlb_set_page_subpool(), but that's for a later patch. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Muchun Song Reviewed-by: Mike Kravetz --- fs/hugetlbfs/inode.c | 23 ++++++++++++++--------- include/linux/migrate.h | 6 +++--- mm/migrate.c | 18 +++++++++--------- 3 files changed, 26 insertions(+), 21 deletions(-) (limited to 'mm') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 14d33f725e05..eca1d0fabd7e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -954,28 +954,33 @@ static int hugetlbfs_symlink(struct user_namespace *mnt_userns, return error; } -static int hugetlbfs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, +#ifdef CONFIG_MIGRATION +static int hugetlbfs_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { int rc; - rc = migrate_huge_page_move_mapping(mapping, newpage, page); + rc = migrate_huge_page_move_mapping(mapping, dst, src); if (rc != MIGRATEPAGE_SUCCESS) return rc; - if (hugetlb_page_subpool(page)) { - hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page)); - hugetlb_set_page_subpool(page, NULL); + if (hugetlb_page_subpool(&src->page)) { + hugetlb_set_page_subpool(&dst->page, + hugetlb_page_subpool(&src->page)); + hugetlb_set_page_subpool(&src->page, NULL); } if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(dst, src); else - migrate_page_states(newpage, page); + folio_migrate_flags(dst, src); return MIGRATEPAGE_SUCCESS; } +#else +#define hugetlbfs_migrate_folio NULL +#endif static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) @@ -1142,7 +1147,7 @@ static const struct address_space_operations hugetlbfs_aops = { .write_begin = hugetlbfs_write_begin, .write_end = hugetlbfs_write_end, .dirty_folio = noop_dirty_folio, - .migratepage = hugetlbfs_migrate_page, + .migrate_folio = hugetlbfs_migrate_folio, .error_remove_page = hugetlbfs_error_remove_page, }; diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c9986d5da335..13f793309b75 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -72,8 +72,8 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); -extern int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page); +int migrate_huge_page_move_mapping(struct address_space *mapping, + struct folio *dst, struct folio *src); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, @@ -104,7 +104,7 @@ static inline void migrate_page_copy(struct page *newpage, struct page *page) {} static inline int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) + struct folio *dst, struct folio *src) { return -ENOSYS; } diff --git a/mm/migrate.c b/mm/migrate.c index 4ed8f0d53c77..0dd3ec9525b3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -474,26 +474,26 @@ EXPORT_SYMBOL(folio_migrate_mapping); * of folio_migrate_mapping(). */ int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) + struct folio *dst, struct folio *src) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(src)); int expected_count; xas_lock_irq(&xas); - expected_count = 2 + page_has_private(page); - if (!page_ref_freeze(page, expected_count)) { + expected_count = 2 + folio_has_private(src); + if (!folio_ref_freeze(src, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; } - newpage->index = page->index; - newpage->mapping = page->mapping; + dst->index = src->index; + dst->mapping = src->mapping; - get_page(newpage); + folio_get(dst); - xas_store(&xas, newpage); + xas_store(&xas, dst); - page_ref_unfreeze(page, expected_count - 1); + folio_ref_unfreeze(src, expected_count - 1); xas_unlock_irq(&xas); -- cgit v1.2.3 From 5409548df3876a6fa9115bce237c93c3d50d6cb6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 11:30:43 -0400 Subject: secretmem: Convert to migrate_folio This is little more than changing the types over; there's no real work being done in this function. Signed-off-by: Matthew Wilcox (Oracle) --- mm/secretmem.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/mm/secretmem.c b/mm/secretmem.c index 1c7f1775b56e..658a7486efa9 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -133,9 +133,8 @@ static const struct file_operations secretmem_fops = { .mmap = secretmem_mmap, }; -static int secretmem_migratepage(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode) +static int secretmem_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { return -EBUSY; } @@ -149,7 +148,7 @@ static void secretmem_free_folio(struct folio *folio) const struct address_space_operations secretmem_aops = { .dirty_folio = noop_dirty_folio, .free_folio = secretmem_free_folio, - .migratepage = secretmem_migratepage, + .migrate_folio = secretmem_migrate_folio, }; static int secretmem_setattr(struct user_namespace *mnt_userns, -- cgit v1.2.3 From 9d0ddc0cb575fd41ff16131b06e08e1feac43b81 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 11:53:31 -0400 Subject: fs: Remove aops->migratepage() With all users converted to migrate_folio(), remove this operation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/fs.h | 2 -- mm/compaction.c | 5 ++--- mm/migrate.c | 3 --- 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'mm') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9e6b17da4e11..7e06919b8f60 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -367,8 +367,6 @@ struct address_space_operations { */ int (*migrate_folio)(struct address_space *, struct folio *dst, struct folio *src, enum migrate_mode); - int (*migratepage) (struct address_space *, - struct page *, struct page *, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); diff --git a/mm/compaction.c b/mm/compaction.c index 458f49f9ab09..a2c53fcf933e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1031,7 +1031,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* * Only pages without mappings or that have a - * ->migratepage callback are possible to migrate + * ->migrate_folio callback are possible to migrate * without blocking. However, we can be racing with * truncation so it's necessary to lock the page * to stabilise the mapping as truncation holds @@ -1043,8 +1043,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, mapping = page_mapping(page); migrate_dirty = !mapping || - mapping->a_ops->migrate_folio || - mapping->a_ops->migratepage; + mapping->a_ops->migrate_folio; unlock_page(page); if (!migrate_dirty) goto isolate_fail_put; diff --git a/mm/migrate.c b/mm/migrate.c index 0dd3ec9525b3..1b4b977809a1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -909,9 +909,6 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, */ rc = mapping->a_ops->migrate_folio(mapping, dst, src, mode); - else if (mapping->a_ops->migratepage) - rc = mapping->a_ops->migratepage(mapping, &dst->page, - &src->page, mode); else rc = fallback_migrate_folio(mapping, dst, src, mode); } else { -- cgit v1.2.3 From 9800562f2ab41656b0bdc2a41c77ab3f6dfdd6fc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 13:29:10 -0400 Subject: mm/folio-compat: Remove migration compatibility functions migrate_page_move_mapping(), migrate_page_copy() and migrate_page_states() are all now unused after converting all the filesystems from aops->migratepage() to aops->migrate_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/migrate.h | 11 ----------- mm/folio-compat.c | 22 ---------------------- mm/ksm.c | 2 +- 3 files changed, 1 insertion(+), 34 deletions(-) (limited to 'mm') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 13f793309b75..ae5bb67a9ba1 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -70,12 +70,8 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); -extern void migrate_page_states(struct page *newpage, struct page *page); -extern void migrate_page_copy(struct page *newpage, struct page *page); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -extern int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count); void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, spinlock_t *ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); @@ -96,13 +92,6 @@ static inline struct page *alloc_migration_target(struct page *page, static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } -static inline void migrate_page_states(struct page *newpage, struct page *page) -{ -} - -static inline void migrate_page_copy(struct page *newpage, - struct page *page) {} - static inline int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src) { diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 20bc15b57d93..458618c7302c 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -51,28 +51,6 @@ void mark_page_accessed(struct page *page) } EXPORT_SYMBOL(mark_page_accessed); -#ifdef CONFIG_MIGRATION -int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count) -{ - return folio_migrate_mapping(mapping, page_folio(newpage), - page_folio(page), extra_count); -} -EXPORT_SYMBOL(migrate_page_move_mapping); - -void migrate_page_states(struct page *newpage, struct page *page) -{ - folio_migrate_flags(page_folio(newpage), page_folio(page)); -} -EXPORT_SYMBOL(migrate_page_states); - -void migrate_page_copy(struct page *newpage, struct page *page) -{ - folio_migrate_copy(page_folio(newpage), page_folio(page)); -} -EXPORT_SYMBOL(migrate_page_copy); -#endif - bool set_page_writeback(struct page *page) { return folio_start_writeback(page_folio(page)); diff --git a/mm/ksm.c b/mm/ksm.c index 54f78c9eecae..e8f8c1a2bb39 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -712,7 +712,7 @@ again: * however, it might mean that the page is under page_ref_freeze(). * The __remove_mapping() case is easy, again the node is now stale; * the same is in reuse_ksm_page() case; but if page is swapcache - * in migrate_page_move_mapping(), it might still be our page, + * in folio_migrate_mapping(), it might still be our page, * in which case it's essential to keep the node. */ while (!get_page_unless_zero(page)) { -- cgit v1.2.3