From 4b6ddbf7ed4ef2f40e0a27418146eedaa68953c6 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:09 -0700 Subject: pagewalk: fix walk_page_range() don't check find_vma() result properly The doc of find_vma() says, /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { (snip) Thus, caller should confirm whether the returned vma matches a desired one. Signed-off-by: KOSAKI Motohiro Cc: Naoya Horiguchi Cc: Hiroyuki Kamezawa Cc: Andrea Arcangeli Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/pagewalk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/pagewalk.c') diff --git a/mm/pagewalk.c b/mm/pagewalk.c index c3450d533611..606bbb4125d0 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -176,7 +176,7 @@ int walk_page_range(unsigned long addr, unsigned long end, * we can't handled it in the same manner as non-huge pages. */ vma = find_vma(walk->mm, addr); - if (vma && is_vm_hugetlb_page(vma)) { + if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) { if (vma->vm_end < next) next = vma->vm_end; /* -- cgit v1.2.3 From 6c6d5280431544e4036886ea74e3334a98bc5f96 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:09 -0700 Subject: pagewalk: don't look up vma if walk->hugetlb_entry is unused Currently, walk_page_range() calls find_vma() every page table for walk iteration. but it's completely unnecessary if walk->hugetlb_entry is unused. And we don't have to assume find_vma() is a lightweight operation. So this patch checks the walk->hugetlb_entry and avoids the find_vma() call if possible. This patch also makes some cleanups. 1) remove ugly uninitialized_var() and 2) #ifdef in function body. Signed-off-by: KOSAKI Motohiro Cc: Naoya Horiguchi Cc: Hiroyuki Kamezawa Cc: Andrea Arcangeli Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/pagewalk.c | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) (limited to 'mm/pagewalk.c') diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 606bbb4125d0..ee4ff87c58c1 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -126,7 +126,39 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, return 0; } -#endif + +static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) +{ + struct vm_area_struct *vma; + + /* We don't need vma lookup at all. */ + if (!walk->hugetlb_entry) + return NULL; + + VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); + vma = find_vma(walk->mm, addr); + if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) + return vma; + + return NULL; +} + +#else /* CONFIG_HUGETLB_PAGE */ +static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) +{ + return NULL; +} + +static int walk_hugetlb_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + return 0; +} + +#endif /* CONFIG_HUGETLB_PAGE */ + + /** * walk_page_range - walk a memory map's page tables with a callback @@ -165,18 +197,17 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd = pgd_offset(walk->mm, addr); do { - struct vm_area_struct *uninitialized_var(vma); + struct vm_area_struct *vma; next = pgd_addr_end(addr, end); -#ifdef CONFIG_HUGETLB_PAGE /* * handle hugetlb vma individually because pagetable walk for * the hugetlb page is dependent on the architecture and * we can't handled it in the same manner as non-huge pages. */ - vma = find_vma(walk->mm, addr); - if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) { + vma = hugetlb_vma(addr, walk); + if (vma) { if (vma->vm_end < next) next = vma->vm_end; /* @@ -189,7 +220,7 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd = pgd_offset(walk->mm, next); continue; } -#endif + if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); -- cgit v1.2.3 From c27fe4c8942d3ca715986f79cc26f44608d7d9fb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:10 -0700 Subject: pagewalk: add locking-rule comments Originally, walk_hugetlb_range() didn't require a caller take any lock. But commit d33b9f45bd ("mm: hugetlb: fix hugepage memory leak in walk_page_range") changed its rule. Because it added find_vma() call in walk_hugetlb_range(). Any locking-rule change commit should write a doc too. [akpm@linux-foundation.org: clarify comment] Signed-off-by: KOSAKI Motohiro Cc: Naoya Horiguchi Cc: Hiroyuki Kamezawa Cc: Andrea Arcangeli Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ mm/pagewalk.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'mm/pagewalk.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6321e840e21d..3c5505a3bc49 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -911,6 +911,8 @@ unsigned long unmap_vmas(struct mmu_gather *tlb, * @pte_entry: if set, called for each non-empty PTE (4th-level) entry * @pte_hole: if set, called for each hole at all levels * @hugetlb_entry: if set, called for each hugetlb entry + * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry + * is used. * * (see walk_page_range for more details) */ diff --git a/mm/pagewalk.c b/mm/pagewalk.c index ee4ff87c58c1..f7929406e776 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -181,6 +181,9 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, * * If any callback returns a non-zero value, the walk is aborted and * the return value is propagated back to the caller. Otherwise 0 is returned. + * + * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry + * is !NULL. */ int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk) -- cgit v1.2.3 From dd78553b5e7a0b34c0b60478d04ee16d8d8f4fa7 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 25 Jul 2011 17:12:11 -0700 Subject: pagewalk: fix code comment for THP Commit bae9c19bf1 ("thp: split_huge_page_mm/vma") changed locking behavior of walk_page_range(). Thus this patch changes the comment too. Signed-off-by: KOSAKI Motohiro Cc: Naoya Horiguchi Cc: Hiroyuki Kamezawa Cc: Andrea Arcangeli Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/pagewalk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm/pagewalk.c') diff --git a/mm/pagewalk.c b/mm/pagewalk.c index f7929406e776..2f5cf10ff660 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -176,7 +176,8 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, * associated range, and a copy of the original mm_walk for access to * the ->private or ->mm fields. * - * No locks are taken, but the bottom level iterator will map PTE + * Usually no locks are taken, but splitting transparent huge page may + * take page table lock. And the bottom level iterator will map PTE * directories from highmem if necessary. * * If any callback returns a non-zero value, the walk is aborted and -- cgit v1.2.3