summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-06-21 17:15:07 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 18:46:19 -0700
commit91612e0df20a52f61db3cac280c153311b36df7a (patch)
tree44a19e1d03147aabb842cbaac493a7213b836e4a
parent941150a326be88af245034ef4b3e9fa00229aa2d (diff)
downloadlwn-91612e0df20a52f61db3cac280c153311b36df7a.tar.gz
lwn-91612e0df20a52f61db3cac280c153311b36df7a.zip
[PATCH] mbind: check_range use standard ptwalk
Strict mbind's check for currently mapped pages being on node has been using a slow loop which re-evaluates pgd, pud, pmd, pte for each entry: replace that by a standard four-level page table walk like others in mm. Since mmap_sem is held for writing, page_table_lock can be taken at the inner level to limit latency. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/mempolicy.c115
1 files changed, 70 insertions, 45 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c512cc911e22..cb41c31e7c87 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -238,56 +238,81 @@ static struct mempolicy *mpol_new(int mode, unsigned long *nodes)
}
/* Ensure all existing pages follow the policy. */
-static int
-verify_pages(struct mm_struct *mm,
- unsigned long addr, unsigned long end, unsigned long *nodes)
+static int check_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, unsigned long *nodes)
{
- int err = 0;
+ pte_t *orig_pte;
+ pte_t *pte;
spin_lock(&mm->page_table_lock);
- while (addr < end) {
- struct page *p;
- pte_t *pte;
- pmd_t *pmd;
- pud_t *pud;
- pgd_t *pgd;
- pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd)) {
- unsigned long next = (addr + PGDIR_SIZE) & PGDIR_MASK;
- if (next > addr)
- break;
- addr = next;
- continue;
- }
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud)) {
- addr = (addr + PUD_SIZE) & PUD_MASK;
+ orig_pte = pte = pte_offset_map(pmd, addr);
+ do {
+ unsigned long pfn;
+ unsigned int nid;
+
+ if (!pte_present(*pte))
continue;
- }
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- addr = (addr + PMD_SIZE) & PMD_MASK;
+ pfn = pte_pfn(*pte);
+ if (!pfn_valid(pfn))
continue;
- }
- p = NULL;
- pte = pte_offset_map(pmd, addr);
- if (pte_present(*pte)) {
- unsigned long pfn = pte_pfn(*pte);
- if (pfn_valid(pfn))
- p = pfn_to_page(pfn);
- }
- pte_unmap(pte);
- if (p) {
- unsigned nid = page_to_nid(p);
- if (!test_bit(nid, nodes)) {
- err = -EIO;
- break;
- }
- }
- addr += PAGE_SIZE;
- }
+ nid = pfn_to_nid(pfn);
+ if (!test_bit(nid, nodes))
+ break;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(orig_pte);
spin_unlock(&mm->page_table_lock);
- return err;
+ return addr != end;
+}
+
+static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, unsigned long *nodes)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ if (check_pte_range(mm, pmd, addr, next, nodes))
+ return -EIO;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, unsigned long *nodes)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ if (check_pmd_range(mm, pud, addr, next, nodes))
+ return -EIO;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int check_pgd_range(struct mm_struct *mm,
+ unsigned long addr, unsigned long end, unsigned long *nodes)
+{
+ pgd_t *pgd;
+ unsigned long next;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ if (check_pud_range(mm, pgd, addr, next, nodes))
+ return -EIO;
+ } while (pgd++, addr = next, addr != end);
+ return 0;
}
/* Step 1: check the range */
@@ -308,7 +333,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
if (prev && prev->vm_end < vma->vm_start)
return ERR_PTR(-EFAULT);
if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
- err = verify_pages(vma->vm_mm,
+ err = check_pgd_range(vma->vm_mm,
vma->vm_start, vma->vm_end, nodes);
if (err) {
first = ERR_PTR(err);