diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2014-06-04 16:08:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-04 16:54:05 -0700 |
commit | 1674448345cdb56e724483a2a26622771f4e3a10 (patch) | |
tree | 8c1cd5095234eb46ab600cd875a8e39e9f44536a /mm/gup.c | |
parent | 69e68b4f03135da4a09d1215a3942d7dabd1075b (diff) | |
download | lwn-1674448345cdb56e724483a2a26622771f4e3a10.tar.gz lwn-1674448345cdb56e724483a2a26622771f4e3a10.zip |
mm: extract code to fault in a page from __get_user_pages()
Nesting level in __get_user_pages() is just insane. Let's try to fix it
a bit.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 138 |
1 files changed, 71 insertions, 67 deletions
@@ -214,12 +214,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma, return follow_page_pte(vma, address, pmd, flags); } -static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) -{ - return stack_guard_page_start(vma, addr) || - stack_guard_page_end(vma, addr+PAGE_SIZE); -} - static int get_gate_page(struct mm_struct *mm, unsigned long address, unsigned int gup_flags, struct vm_area_struct **vma, struct page **page) @@ -264,6 +258,63 @@ unmap: return ret; } +static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, + unsigned long address, unsigned int *flags, int *nonblocking) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned int fault_flags = 0; + int ret; + + /* For mlock, just skip the stack guard page. */ + if ((*flags & FOLL_MLOCK) && + (stack_guard_page_start(vma, address) || + stack_guard_page_end(vma, address + PAGE_SIZE))) + return -ENOENT; + if (*flags & FOLL_WRITE) + fault_flags |= FAULT_FLAG_WRITE; + if (nonblocking) + fault_flags |= FAULT_FLAG_ALLOW_RETRY; + if (*flags & FOLL_NOWAIT) + fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; + + ret = handle_mm_fault(mm, vma, address, fault_flags); + if (ret & VM_FAULT_ERROR) { + if (ret & VM_FAULT_OOM) + return -ENOMEM; + if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) + return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT; + if (ret & VM_FAULT_SIGBUS) + return -EFAULT; + BUG(); + } + + if (tsk) { + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + } + + if (ret & VM_FAULT_RETRY) { + if (nonblocking) + *nonblocking = 0; + return -EBUSY; + } + + /* + * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when + * necessary, even if maybe_mkwrite decided not to set pte_write. We + * can thus safely do subsequent page lookups as if they were reads. + * But only do so when looping for pte_write is futile: in some cases + * userspace may also be wanting to write to the gotten user page, + * which a read fault here might prevent (a readonly page might get + * reCOWed by userspace write). + */ + if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) + *flags &= ~FOLL_WRITE; + return 0; +} + /** * __get_user_pages() - pin user pages in memory * @tsk: task_struct of target task @@ -410,69 +461,22 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, while (!(page = follow_page_mask(vma, start, foll_flags, &page_mask))) { int ret; - unsigned int fault_flags = 0; - - /* For mlock, just skip the stack guard page. */ - if (foll_flags & FOLL_MLOCK) { - if (stack_guard_page(vma, start)) - goto next_page; - } - if (foll_flags & FOLL_WRITE) - fault_flags |= FAULT_FLAG_WRITE; - if (nonblocking) - fault_flags |= FAULT_FLAG_ALLOW_RETRY; - if (foll_flags & FOLL_NOWAIT) - fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT); - - ret = handle_mm_fault(mm, vma, start, - fault_flags); - - if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return i ? i : -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | - VM_FAULT_HWPOISON_LARGE)) { - if (i) - return i; - else if (gup_flags & FOLL_HWPOISON) - return -EHWPOISON; - else - return -EFAULT; - } - if (ret & VM_FAULT_SIGBUS) - goto efault; - BUG(); - } - - if (tsk) { - if (ret & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - } - - if (ret & VM_FAULT_RETRY) { - if (nonblocking) - *nonblocking = 0; + ret = faultin_page(tsk, vma, start, &foll_flags, + nonblocking); + switch (ret) { + case 0: + break; + case -EFAULT: + case -ENOMEM: + case -EHWPOISON: + return i ? i : ret; + case -EBUSY: return i; + case -ENOENT: + goto next_page; + default: + BUG(); } - - /* - * The VM_FAULT_WRITE bit tells us that - * do_wp_page has broken COW when necessary, - * even if maybe_mkwrite decided not to set - * pte_write. We can thus safely do subsequent - * page lookups as if they were reads. But only - * do so when looping for pte_write is futile: - * in some cases userspace may also be wanting - * to write to the gotten user page, which a - * read fault here might prevent (a readonly - * page might get reCOWed by userspace write). - */ - if ((ret & VM_FAULT_WRITE) && - !(vma->vm_flags & VM_WRITE)) - foll_flags &= ~FOLL_WRITE; - cond_resched(); } if (IS_ERR(page)) |