summaryrefslogtreecommitdiff
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorAlistair Popple <apopple@nvidia.com>2022-01-21 22:10:46 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-22 08:33:34 +0200
commitffa65753c43142f3b803486442813744da71cff2 (patch)
treef910bd6b2cdc6a7100ad83d748b0f5dc1324bd4c /mm/migrate.c
parent9b57f458985742bd1c585f4c7f36d04634ce1143 (diff)
downloadlwn-ffa65753c43142f3b803486442813744da71cff2.tar.gz
lwn-ffa65753c43142f3b803486442813744da71cff2.zip
mm/migrate.c: rework migration_entry_wait() to not take a pageref
This fixes the FIXME in migrate_vma_check_page(). Before migrating a page migration code will take a reference and check there are no unexpected page references, failing the migration if there are. When a thread faults on a migration entry it will take a temporary reference to the page to wait for the page to become unlocked signifying the migration entry has been removed. This reference is dropped just prior to waiting on the page lock, however the extra reference can cause migration failures so it is desirable to avoid taking it. As migration code already has a reference to the migrating page an extra reference to wait on PG_locked is unnecessary so long as the reference can't be dropped whilst setting up the wait. When faulting on a migration entry the ptl is taken to check the migration entry. Removing a migration entry also requires the ptl, and migration code won't drop its page reference until after the migration entry has been removed. Therefore retaining the ptl of a migration entry is sufficient to ensure the page has a reference. Reworking migration_entry_wait() to hold the ptl until the wait setup is complete means the extra page reference is no longer needed. [apopple@nvidia.com: v5] Link: https://lkml.kernel.org/r/20211213033848.1973946-1-apopple@nvidia.com Link: https://lkml.kernel.org/r/20211118020754.954425-1-apopple@nvidia.com Signed-off-by: Alistair Popple <apopple@nvidia.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Ralph Campbell <rcampbell@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c38
1 files changed, 4 insertions, 34 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 18ce840914f0..c7da064b4781 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -291,7 +291,6 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
{
pte_t pte;
swp_entry_t entry;
- struct folio *folio;
spin_lock(ptl);
pte = *ptep;
@@ -302,17 +301,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
if (!is_migration_entry(entry))
goto out;
- folio = page_folio(pfn_swap_entry_to_page(entry));
-
- /*
- * Once page cache replacement of page migration started, page_count
- * is zero; but we must not call folio_put_wait_locked() without
- * a ref. Use folio_try_get(), and just fault again if it fails.
- */
- if (!folio_try_get(folio))
- goto out;
- pte_unmap_unlock(ptep, ptl);
- folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+ migration_entry_wait_on_locked(entry, ptep, ptl);
return;
out:
pte_unmap_unlock(ptep, ptl);
@@ -337,16 +326,11 @@ void migration_entry_wait_huge(struct vm_area_struct *vma,
void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
{
spinlock_t *ptl;
- struct folio *folio;
ptl = pmd_lock(mm, pmd);
if (!is_pmd_migration_entry(*pmd))
goto unlock;
- folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)));
- if (!folio_try_get(folio))
- goto unlock;
- spin_unlock(ptl);
- folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+ migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), NULL, ptl);
return;
unlock:
spin_unlock(ptl);
@@ -2431,22 +2415,8 @@ static bool migrate_vma_check_page(struct page *page)
return false;
/* Page from ZONE_DEVICE have one extra reference */
- if (is_zone_device_page(page)) {
- /*
- * Private page can never be pin as they have no valid pte and
- * GUP will fail for those. Yet if there is a pending migration
- * a thread might try to wait on the pte migration entry and
- * will bump the page reference count. Sadly there is no way to
- * differentiate a regular pin from migration wait. Hence to
- * avoid 2 racing thread trying to migrate back to CPU to enter
- * infinite loop (one stopping migration because the other is
- * waiting on pte migration entry). We always return true here.
- *
- * FIXME proper solution is to rework migration_entry_wait() so
- * it does not need to take a reference on page.
- */
- return is_device_private_page(page);
- }
+ if (is_zone_device_page(page))
+ extra++;
/* For file back page */
if (page_mapping(page))