summaryrefslogtreecommitdiff
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorJérôme Glisse <jglisse@redhat.com>2017-11-15 17:34:11 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 18:21:03 -0800
commit4645b9fe84bf4878f04c7959a75df7c3c2d1bbb9 (patch)
tree83fc19e23f907ea229fd8a99aedbbf967057eae4 /mm/huge_memory.c
parent0f10851ea475e08896ee5d9a2036d1bb46a8f3a4 (diff)
downloadlwn-4645b9fe84bf4878f04c7959a75df7c3c2d1bbb9.tar.gz
lwn-4645b9fe84bf4878f04c7959a75df7c3c2d1bbb9.zip
mm/mmu_notifier: avoid call to invalidate_range() in range_end()
This is an optimization patch that only affect mmu_notifier users which rely on the invalidate_range() callback. This patch avoids calling that callback twice in a row from inside __mmu_notifier_invalidate_range_end Existing pattern (before this patch): mmu_notifier_invalidate_range_start() pte/pmd/pud_clear_flush_notify() mmu_notifier_invalidate_range() mmu_notifier_invalidate_range_end() mmu_notifier_invalidate_range() New pattern (after this patch): mmu_notifier_invalidate_range_start() pte/pmd/pud_clear_flush_notify() mmu_notifier_invalidate_range() mmu_notifier_invalidate_range_only_end() We call the invalidate_range callback after clearing the page table under the page table lock and we skip the call to invalidate_range inside the __mmu_notifier_invalidate_range_end() function. Idea from Andrea Arcangeli Link: http://lkml.kernel.org/r/20171017031003.7481-3-jglisse@redhat.com Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Joerg Roedel <jroedel@suse.de> Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Alistair Popple <alistair@popple.id.au> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Andrew Donnellan <andrew.donnellan@au1.ibm.com> Cc: Nadav Amit <nadav.amit@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c46
1 files changed, 42 insertions, 4 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 07ae73f4ef91..cc65fb87c9db 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1223,7 +1223,12 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
page_remove_rmap(page, true);
spin_unlock(vmf->ptl);
- mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
+ /*
+ * No need to double call mmu_notifier->invalidate_range() callback as
+ * the above pmdp_huge_clear_flush_notify() did already call it.
+ */
+ mmu_notifier_invalidate_range_only_end(vma->vm_mm, mmun_start,
+ mmun_end);
ret |= VM_FAULT_WRITE;
put_page(page);
@@ -1372,7 +1377,12 @@ alloc:
}
spin_unlock(vmf->ptl);
out_mn:
- mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
+ /*
+ * No need to double call mmu_notifier->invalidate_range() callback as
+ * the above pmdp_huge_clear_flush_notify() did already call it.
+ */
+ mmu_notifier_invalidate_range_only_end(vma->vm_mm, mmun_start,
+ mmun_end);
out:
return ret;
out_unlock:
@@ -2024,7 +2034,12 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
out:
spin_unlock(ptl);
- mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE);
+ /*
+ * No need to double call mmu_notifier->invalidate_range() callback as
+ * the above pudp_huge_clear_flush_notify() did already call it.
+ */
+ mmu_notifier_invalidate_range_only_end(mm, haddr, haddr +
+ HPAGE_PUD_SIZE);
}
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
@@ -2099,6 +2114,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR);
return;
} else if (is_huge_zero_pmd(*pmd)) {
+ /*
+ * FIXME: Do we want to invalidate secondary mmu by calling
+ * mmu_notifier_invalidate_range() see comments below inside
+ * __split_huge_pmd() ?
+ *
+ * We are going from a zero huge page write protected to zero
+ * small page also write protected so it does not seems useful
+ * to invalidate secondary mmu at this time.
+ */
return __split_huge_zero_page_pmd(vma, haddr, pmd);
}
@@ -2234,7 +2258,21 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
__split_huge_pmd_locked(vma, pmd, haddr, freeze);
out:
spin_unlock(ptl);
- mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
+ /*
+ * No need to double call mmu_notifier->invalidate_range() callback.
+ * They are 3 cases to consider inside __split_huge_pmd_locked():
+ * 1) pmdp_huge_clear_flush_notify() call invalidate_range() obvious
+ * 2) __split_huge_zero_page_pmd() read only zero page and any write
+ * fault will trigger a flush_notify before pointing to a new page
+ * (it is fine if the secondary mmu keeps pointing to the old zero
+ * page in the meantime)
+ * 3) Split a huge pmd into pte pointing to the same page. No need
+ * to invalidate secondary tlb entry they are all still valid.
+ * any further changes to individual pte will notify. So no need
+ * to call mmu_notifier->invalidate_range()
+ */
+ mmu_notifier_invalidate_range_only_end(mm, haddr, haddr +
+ HPAGE_PMD_SIZE);
}
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,