summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-10-29 18:15:57 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-29 21:40:37 -0700
commita8fb5618dab7e45c8990f3155628d772a9ed45f9 (patch)
tree77977b8fb2f57c855da9e3168977521e8393776a
parent2c0b381467bc2997be9d741a152f3fc75785eedc (diff)
downloadlwn-a8fb5618dab7e45c8990f3155628d772a9ed45f9.tar.gz
lwn-a8fb5618dab7e45c8990f3155628d772a9ed45f9.zip
[PATCH] mm: unlink_file_vma, remove_vma
Divide remove_vm_struct into two parts: first anon_vma_unlink plus unlink_file_vma, to unlink the vma from the list and tree by which rmap or vmtruncate might find it; then remove_vma to close, fput and free. The intention here is to do the anon_vma_unlink and unlink_file_vma earlier, in free_pgtables before freeing any page tables: so we can be sure that any page tables traversed by rmap and vmtruncate are stable (and other, ordinary cases are stabilized by holding mmap_sem). This will be crucial to traversing pgd,pud,pmd without page_table_lock. But testing the split-out patch showed that lifting the page_table_lock is symbiotically necessary to make this change - the lock ordering is wrong to move those unlinks into free_pgtables while it's under ptlock. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/mmap.c41
2 files changed, 28 insertions, 14 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 376a466743bc..0c64484d8ae0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -834,6 +834,7 @@ extern int split_vma(struct mm_struct *,
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *);
+extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff);
extern void exit_mmap(struct mm_struct *);
diff --git a/mm/mmap.c b/mm/mmap.c
index eeefe19a0fac..a3984fad3fc2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -181,26 +181,44 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
}
/*
- * Remove one vm structure and free it.
+ * Unlink a file-based vm structure from its prio_tree, to hide
+ * vma from rmap and vmtruncate before freeing its page tables.
*/
-static void remove_vm_struct(struct vm_area_struct *vma)
+void unlink_file_vma(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
- might_sleep();
if (file) {
struct address_space *mapping = file->f_mapping;
spin_lock(&mapping->i_mmap_lock);
__remove_shared_vm_struct(vma, file, mapping);
spin_unlock(&mapping->i_mmap_lock);
}
+}
+
+/*
+ * Close a vm structure and free it, returning the next.
+ */
+static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+{
+ struct vm_area_struct *next = vma->vm_next;
+
+ /*
+ * Hide vma from rmap and vmtruncate before freeing page tables:
+ * to be moved into free_pgtables once page_table_lock is lifted
+ * from it, but until then lock ordering forbids that move.
+ */
+ anon_vma_unlink(vma);
+ unlink_file_vma(vma);
+
+ might_sleep();
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
- if (file)
- fput(file);
- anon_vma_unlink(vma);
+ if (vma->vm_file)
+ fput(vma->vm_file);
mpol_free(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma);
+ return next;
}
asmlinkage unsigned long sys_brk(unsigned long brk)
@@ -1612,15 +1630,13 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
{
do {
- struct vm_area_struct *next = vma->vm_next;
long nrpages = vma_pages(vma);
mm->total_vm -= nrpages;
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm -= nrpages;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
- remove_vm_struct(vma);
- vma = next;
+ vma = remove_vma(vma);
} while (vma);
validate_mm(mm);
}
@@ -1944,11 +1960,8 @@ void exit_mmap(struct mm_struct *mm)
* Walk the list again, actually closing and freeing it
* without holding any MM locks.
*/
- while (vma) {
- struct vm_area_struct *next = vma->vm_next;
- remove_vm_struct(vma);
- vma = next;
- }
+ while (vma)
+ vma = remove_vma(vma);
BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
}