diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-08-01 11:14:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-08-01 11:14:49 -0700 |
commit | 4ceb5db9757aaeadcf8fbbf97d76bd42aa4df0d6 (patch) | |
tree | 6a3108ceea457c21130838d49736f5e9de3badc3 /mm | |
parent | 8d894c47975f7222c5537e450e71310b395488c7 (diff) | |
download | lwn-4ceb5db9757aaeadcf8fbbf97d76bd42aa4df0d6.tar.gz lwn-4ceb5db9757aaeadcf8fbbf97d76bd42aa4df0d6.zip |
Fix get_user_pages() race for write access
There's no real guarantee that handle_mm_fault() will always be able to
break a COW situation - if an update from another thread ends up
modifying the page table some way, handle_mm_fault() may end up
requiring us to re-try the operation.
That's normally fine, but get_user_pages() ended up re-trying it as a
read, and thus a write access could in theory end up losing the dirty
bit or be done on a page that had not been properly COW'ed.
This makes get_user_pages() always retry write accesses as write
accesses by making "follow_page()" require that a writable follow has
the dirty bit set. That simplifies the code and solves the race: if the
COW break fails for some reason, we'll just loop around and try again.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 21 |
1 files changed, 4 insertions, 17 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6fe77acbc1cd..4e1c673784db 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -811,18 +811,15 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address, pte = *ptep; pte_unmap(ptep); if (pte_present(pte)) { - if (write && !pte_write(pte)) + if (write && !pte_dirty(pte)) goto out; if (read && !pte_read(pte)) goto out; pfn = pte_pfn(pte); if (pfn_valid(pfn)) { page = pfn_to_page(pfn); - if (accessed) { - if (write && !pte_dirty(pte) &&!PageDirty(page)) - set_page_dirty(page); + if (accessed) mark_page_accessed(page); - } return page; } } @@ -941,10 +938,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, spin_lock(&mm->page_table_lock); do { struct page *page; - int lookup_write = write; cond_resched_lock(&mm->page_table_lock); - while (!(page = follow_page(mm, start, lookup_write))) { + while (!(page = follow_page(mm, start, write))) { /* * Shortcut for anonymous pages. We don't want * to force the creation of pages tables for @@ -952,8 +948,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, * nobody touched so far. This is important * for doing a core dump for these mappings. */ - if (!lookup_write && - untouched_anonymous_page(mm,vma,start)) { + if (!write && untouched_anonymous_page(mm,vma,start)) { page = ZERO_PAGE(start); break; } @@ -972,14 +967,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, default: BUG(); } - /* - * Now that we have performed a write fault - * and surely no longer have a shared page we - * shouldn't write, we shouldn't ignore an - * unwritable page in the page table if - * we are forcing write access. - */ - lookup_write = write && !force; spin_lock(&mm->page_table_lock); } if (pages) { |