diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2016-07-26 15:25:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-26 16:19:19 -0700 |
commit | bae473a423f65e480db83c85b5e92254f6dfcb28 (patch) | |
tree | 9e09cd8cbcafdcc1a27298700f69f8f86f929392 /mm/memory.c | |
parent | dcddffd41d3f1d3bdcc1dce3f1cd142779b6d4c1 (diff) | |
download | lwn-bae473a423f65e480db83c85b5e92254f6dfcb28.tar.gz lwn-bae473a423f65e480db83c85b5e92254f6dfcb28.zip |
mm: introduce fault_env
The idea borrowed from Peter's patch from patchset on speculative page
faults[1]:
Instead of passing around the endless list of function arguments,
replace the lot with a single structure so we can change context without
endless function signature changes.
The changes are mostly mechanical with exception of faultaround code:
filemap_map_pages() got reworked a bit.
This patch is preparation for the next one.
[1] http://lkml.kernel.org/r/20141020222841.302891540@infradead.org
Link: http://lkml.kernel.org/r/1466021202-61880-9-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 582 |
1 files changed, 278 insertions, 304 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6bf2b8564376..72b520897339 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2070,13 +2070,11 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, * case, all we need to do here is to mark the page as writable and update * any related book-keeping. */ -static inline int wp_page_reuse(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, - struct page *page, int page_mkwrite, - int dirty_shared) - __releases(ptl) +static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte, + struct page *page, int page_mkwrite, int dirty_shared) + __releases(fe->ptl) { + struct vm_area_struct *vma = fe->vma; pte_t entry; /* * Clear the pages cpupid information as the existing @@ -2086,12 +2084,12 @@ static inline int wp_page_reuse(struct mm_struct *mm, if (page) page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); - flush_cache_page(vma, address, pte_pfn(orig_pte)); + flush_cache_page(vma, fe->address, pte_pfn(orig_pte)); entry = pte_mkyoung(orig_pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); - if (ptep_set_access_flags(vma, address, page_table, entry, 1)) - update_mmu_cache(vma, address, page_table); - pte_unmap_unlock(page_table, ptl); + if (ptep_set_access_flags(vma, fe->address, fe->pte, entry, 1)) + update_mmu_cache(vma, fe->address, fe->pte); + pte_unmap_unlock(fe->pte, fe->ptl); if (dirty_shared) { struct address_space *mapping; @@ -2137,30 +2135,31 @@ static inline int wp_page_reuse(struct mm_struct *mm, * held to the old page, as well as updating the rmap. * - In any case, unlock the PTL and drop the reference we took to the old page. */ -static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - pte_t orig_pte, struct page *old_page) +static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, + struct page *old_page) { + struct vm_area_struct *vma = fe->vma; + struct mm_struct *mm = vma->vm_mm; struct page *new_page = NULL; - spinlock_t *ptl = NULL; pte_t entry; int page_copied = 0; - const unsigned long mmun_start = address & PAGE_MASK; /* For mmu_notifiers */ - const unsigned long mmun_end = mmun_start + PAGE_SIZE; /* For mmu_notifiers */ + const unsigned long mmun_start = fe->address & PAGE_MASK; + const unsigned long mmun_end = mmun_start + PAGE_SIZE; struct mem_cgroup *memcg; if (unlikely(anon_vma_prepare(vma))) goto oom; if (is_zero_pfn(pte_pfn(orig_pte))) { - new_page = alloc_zeroed_user_highpage_movable(vma, address); + new_page = alloc_zeroed_user_highpage_movable(vma, fe->address); if (!new_page) goto oom; } else { - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, + fe->address); if (!new_page) goto oom; - cow_user_page(new_page, old_page, address, vma); + cow_user_page(new_page, old_page, fe->address, vma); } if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) @@ -2173,8 +2172,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, /* * Re-check the pte - we dropped the lock */ - page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - if (likely(pte_same(*page_table, orig_pte))) { + fe->pte = pte_offset_map_lock(mm, fe->pmd, fe->address, &fe->ptl); + if (likely(pte_same(*fe->pte, orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { dec_mm_counter_fast(mm, @@ -2184,7 +2183,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, } else { inc_mm_counter_fast(mm, MM_ANONPAGES); } - flush_cache_page(vma, address, pte_pfn(orig_pte)); + flush_cache_page(vma, fe->address, pte_pfn(orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* @@ -2193,8 +2192,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, * seen in the presence of one thread doing SMC and another * thread doing COW. */ - ptep_clear_flush_notify(vma, address, page_table); - page_add_new_anon_rmap(new_page, vma, address, false); + ptep_clear_flush_notify(vma, fe->address, fe->pte); + page_add_new_anon_rmap(new_page, vma, fe->address, false); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); /* @@ -2202,8 +2201,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, * mmu page tables (such as kvm shadow page tables), we want the * new page to be mapped directly into the secondary page table. */ - set_pte_at_notify(mm, address, page_table, entry); - update_mmu_cache(vma, address, page_table); + set_pte_at_notify(mm, fe->address, fe->pte, entry); + update_mmu_cache(vma, fe->address, fe->pte); if (old_page) { /* * Only after switching the pte to the new page may @@ -2240,7 +2239,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, if (new_page) put_page(new_page); - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); if (old_page) { /* @@ -2268,44 +2267,43 @@ oom: * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping */ -static int wp_pfn_shared(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, - pmd_t *pmd) +static int wp_pfn_shared(struct fault_env *fe, pte_t orig_pte) { + struct vm_area_struct *vma = fe->vma; + if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { struct vm_fault vmf = { .page = NULL, - .pgoff = linear_page_index(vma, address), - .virtual_address = (void __user *)(address & PAGE_MASK), + .pgoff = linear_page_index(vma, fe->address), + .virtual_address = + (void __user *)(fe->address & PAGE_MASK), .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, }; int ret; - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); ret = vma->vm_ops->pfn_mkwrite(vma, &vmf); if (ret & VM_FAULT_ERROR) return ret; - page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, + &fe->ptl); /* * We might have raced with another page fault while we * released the pte_offset_map_lock. */ - if (!pte_same(*page_table, orig_pte)) { - pte_unmap_unlock(page_table, ptl); + if (!pte_same(*fe->pte, orig_pte)) { + pte_unmap_unlock(fe->pte, fe->ptl); return 0; } } - return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte, - NULL, 0, 0); + return wp_page_reuse(fe, orig_pte, NULL, 0, 0); } -static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, - pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte, - struct page *old_page) - __releases(ptl) +static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, + struct page *old_page) + __releases(fe->ptl) { + struct vm_area_struct *vma = fe->vma; int page_mkwrite = 0; get_page(old_page); @@ -2313,8 +2311,8 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_ops && vma->vm_ops->page_mkwrite) { int tmp; - pte_unmap_unlock(page_table, ptl); - tmp = do_page_mkwrite(vma, old_page, address); + pte_unmap_unlock(fe->pte, fe->ptl); + tmp = do_page_mkwrite(vma, old_page, fe->address); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(old_page); @@ -2326,19 +2324,18 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, * they did, we just return, as we can count on the * MMU to tell us if they didn't also make it writable. */ - page_table = pte_offset_map_lock(mm, pmd, address, - &ptl); - if (!pte_same(*page_table, orig_pte)) { + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, + &fe->ptl); + if (!pte_same(*fe->pte, orig_pte)) { unlock_page(old_page); - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); put_page(old_page); return 0; } page_mkwrite = 1; } - return wp_page_reuse(mm, vma, address, page_table, ptl, - orig_pte, old_page, page_mkwrite, 1); + return wp_page_reuse(fe, orig_pte, old_page, page_mkwrite, 1); } /* @@ -2359,14 +2356,13 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, * but allow concurrent faults), with pte both mapped and locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - spinlock_t *ptl, pte_t orig_pte) - __releases(ptl) +static int do_wp_page(struct fault_env *fe, pte_t orig_pte) + __releases(fe->ptl) { + struct vm_area_struct *vma = fe->vma; struct page *old_page; - old_page = vm_normal_page(vma, address, orig_pte); + old_page = vm_normal_page(vma, fe->address, orig_pte); if (!old_page) { /* * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a @@ -2377,12 +2373,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, */ if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)) - return wp_pfn_shared(mm, vma, address, page_table, ptl, - orig_pte, pmd); + return wp_pfn_shared(fe, orig_pte); - pte_unmap_unlock(page_table, ptl); - return wp_page_copy(mm, vma, address, page_table, pmd, - orig_pte, old_page); + pte_unmap_unlock(fe->pte, fe->ptl); + return wp_page_copy(fe, orig_pte, old_page); } /* @@ -2393,13 +2387,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, int total_mapcount; if (!trylock_page(old_page)) { get_page(old_page); - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); lock_page(old_page); - page_table = pte_offset_map_lock(mm, pmd, address, - &ptl); - if (!pte_same(*page_table, orig_pte)) { + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, + fe->address, &fe->ptl); + if (!pte_same(*fe->pte, orig_pte)) { unlock_page(old_page); - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); put_page(old_page); return 0; } @@ -2417,14 +2411,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, page_move_anon_rmap(old_page, vma); } unlock_page(old_page); - return wp_page_reuse(mm, vma, address, page_table, ptl, - orig_pte, old_page, 0, 0); + return wp_page_reuse(fe, orig_pte, old_page, 0, 0); } unlock_page(old_page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { - return wp_page_shared(mm, vma, address, page_table, pmd, - ptl, orig_pte, old_page); + return wp_page_shared(fe, orig_pte, old_page); } /* @@ -2432,9 +2424,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, */ get_page(old_page); - pte_unmap_unlock(page_table, ptl); - return wp_page_copy(mm, vma, address, page_table, pmd, - orig_pte, old_page); + pte_unmap_unlock(fe->pte, fe->ptl); + return wp_page_copy(fe, orig_pte, old_page); } static void unmap_mapping_range_vma(struct vm_area_struct *vma, @@ -2522,11 +2513,9 @@ EXPORT_SYMBOL(unmap_mapping_range); * We return with the mmap_sem locked or unlocked in the same cases * as does filemap_fault(). */ -int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - unsigned int flags, pte_t orig_pte) +int do_swap_page(struct fault_env *fe, pte_t orig_pte) { - spinlock_t *ptl; + struct vm_area_struct *vma = fe->vma; struct page *page, *swapcache; struct mem_cgroup *memcg; swp_entry_t entry; @@ -2535,17 +2524,17 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, int exclusive = 0; int ret = 0; - if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) + if (!pte_unmap_same(vma->vm_mm, fe->pmd, fe->pte, orig_pte)) goto out; entry = pte_to_swp_entry(orig_pte); if (unlikely(non_swap_entry(entry))) { if (is_migration_entry(entry)) { - migration_entry_wait(mm, pmd, address); + migration_entry_wait(vma->vm_mm, fe->pmd, fe->address); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; } else { - print_bad_pte(vma, address, orig_pte, NULL); + print_bad_pte(vma, fe->address, orig_pte, NULL); ret = VM_FAULT_SIGBUS; } goto out; @@ -2554,14 +2543,15 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, page = lookup_swap_cache(entry); if (!page) { page = swapin_readahead(entry, - GFP_HIGHUSER_MOVABLE, vma, address); + GFP_HIGHUSER_MOVABLE, vma, fe->address); if (!page) { /* * Back out if somebody else faulted in this pte * while we released the pte lock. */ - page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - if (likely(pte_same(*page_table, orig_pte))) + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, + fe->address, &fe->ptl); + if (likely(pte_same(*fe->pte, orig_pte))) ret = VM_FAULT_OOM; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); goto unlock; @@ -2570,7 +2560,7 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Had to read the page from swap area: Major fault */ ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(mm, PGMAJFAULT); + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); } else if (PageHWPoison(page)) { /* * hwpoisoned dirty swapcache pages are kept for killing @@ -2583,7 +2573,7 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, } swapcache = page; - locked = lock_page_or_retry(page, mm, flags); + locked = lock_page_or_retry(page, vma->vm_mm, fe->flags); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); if (!locked) { @@ -2600,14 +2590,15 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) goto out_page; - page = ksm_might_need_to_copy(page, vma, address); + page = ksm_might_need_to_copy(page, vma, fe->address); if (unlikely(!page)) { ret = VM_FAULT_OOM; page = swapcache; goto out_page; } - if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) { + if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, + &memcg, false)) { ret = VM_FAULT_OOM; goto out_page; } @@ -2615,8 +2606,9 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, /* * Back out if somebody else already faulted in this pte. */ - page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - if (unlikely(!pte_same(*page_table, orig_pte))) + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, + &fe->ptl); + if (unlikely(!pte_same(*fe->pte, orig_pte))) goto out_nomap; if (unlikely(!PageUptodate(page))) { @@ -2634,24 +2626,24 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, * must be called after the swap_free(), or it will never succeed. */ - inc_mm_counter_fast(mm, MM_ANONPAGES); - dec_mm_counter_fast(mm, MM_SWAPENTS); + inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); - if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { + if ((fe->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); - flags &= ~FAULT_FLAG_WRITE; + fe->flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; exclusive = RMAP_EXCLUSIVE; } flush_icache_page(vma, page); if (pte_swp_soft_dirty(orig_pte)) pte = pte_mksoft_dirty(pte); - set_pte_at(mm, address, page_table, pte); + set_pte_at(vma->vm_mm, fe->address, fe->pte, pte); if (page == swapcache) { - do_page_add_anon_rmap(page, vma, address, exclusive); + do_page_add_anon_rmap(page, vma, fe->address, exclusive); mem_cgroup_commit_charge(page, memcg, true, false); } else { /* ksm created a completely new copy */ - page_add_new_anon_rmap(page, vma, address, false); + page_add_new_anon_rmap(page, vma, fe->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); } @@ -2674,22 +2666,22 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, put_page(swapcache); } - if (flags & FAULT_FLAG_WRITE) { - ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); + if (fe->flags & FAULT_FLAG_WRITE) { + ret |= do_wp_page(fe, pte); if (ret & VM_FAULT_ERROR) ret &= VM_FAULT_ERROR; goto out; } /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, address, page_table); + update_mmu_cache(vma, fe->address, fe->pte); unlock: - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); out: return ret; out_nomap: mem_cgroup_cancel_charge(page, memcg, false); - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); out_page: unlock_page(page); out_release: @@ -2740,37 +2732,36 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - unsigned int flags) +static int do_anonymous_page(struct fault_env *fe) { + struct vm_area_struct *vma = fe->vma; struct mem_cgroup *memcg; struct page *page; - spinlock_t *ptl; pte_t entry; - pte_unmap(page_table); + pte_unmap(fe->pte); /* File mapping without ->vm_ops ? */ if (vma->vm_flags & VM_SHARED) return VM_FAULT_SIGBUS; /* Check if we need to add a guard page to the stack */ - if (check_stack_guard_page(vma, address) < 0) + if (check_stack_guard_page(vma, fe->address) < 0) return VM_FAULT_SIGSEGV; /* Use the zero-page for reads */ - if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) { - entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), + if (!(fe->flags & FAULT_FLAG_WRITE) && + !mm_forbids_zeropage(vma->vm_mm)) { + entry = pte_mkspecial(pfn_pte(my_zero_pfn(fe->address), vma->vm_page_prot)); - page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - if (!pte_none(*page_table)) + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, + &fe->ptl); + if (!pte_none(*fe->pte)) goto unlock; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { - pte_unmap_unlock(page_table, ptl); - return handle_userfault(vma, address, flags, - VM_UFFD_MISSING); + pte_unmap_unlock(fe->pte, fe->ptl); + return handle_userfault(fe, VM_UFFD_MISSING); } goto setpte; } @@ -2778,11 +2769,11 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Allocate our own private page. */ if (unlikely(anon_vma_prepare(vma))) goto oom; - page = alloc_zeroed_user_highpage_movable(vma, address); + page = alloc_zeroed_user_highpage_movable(vma, fe->address); if (!page) goto oom; - if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) + if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) goto oom_free_page; /* @@ -2796,30 +2787,30 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); - page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - if (!pte_none(*page_table)) + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, + &fe->ptl); + if (!pte_none(*fe->pte)) goto release; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); mem_cgroup_cancel_charge(page, memcg, false); put_page(page); - return handle_userfault(vma, address, flags, - VM_UFFD_MISSING); + return handle_userfault(fe, VM_UFFD_MISSING); } - inc_mm_counter_fast(mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, address, false); + inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + page_add_new_anon_rmap(page, vma, fe->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); setpte: - set_pte_at(mm, address, page_table, entry); + set_pte_at(vma->vm_mm, fe->address, fe->pte, entry); /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, address, page_table); + update_mmu_cache(vma, fe->address, fe->pte); unlock: - pte_unmap_unlock(page_table, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); return 0; release: mem_cgroup_cancel_charge(page, memcg, false); @@ -2836,17 +2827,16 @@ oom: * released depending on flags and vma->vm_ops->fault() return value. * See filemap_fault() and __lock_page_retry(). */ -static int __do_fault(struct vm_area_struct *vma, unsigned long address, - pgoff_t pgoff, unsigned int flags, - struct page *cow_page, struct page **page, - void **entry) +static int __do_fault(struct fault_env *fe, pgoff_t pgoff, + struct page *cow_page, struct page **page, void **entry) { + struct vm_area_struct *vma = fe->vma; struct vm_fault vmf; int ret; - vmf.virtual_address = (void __user *)(address & PAGE_MASK); + vmf.virtual_address = (void __user *)(fe->address & PAGE_MASK); vmf.pgoff = pgoff; - vmf.flags = flags; + vmf.flags = fe->flags; vmf.page = NULL; vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.cow_page = cow_page; @@ -2878,38 +2868,36 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, /** * do_set_pte - setup new PTE entry for given page and add reverse page mapping. * - * @vma: virtual memory area - * @address: user virtual address + * @fe: fault environment * @page: page to map - * @pte: pointer to target page table entry - * @write: true, if new entry is writable - * @anon: true, if it's anonymous page * - * Caller must hold page table lock relevant for @pte. + * Caller must hold page table lock relevant for @fe->pte. * * Target users are page handler itself and implementations of * vm_ops->map_pages. */ -void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon) +void do_set_pte(struct fault_env *fe, struct page *page) { + struct vm_area_struct *vma = fe->vma; + bool write = fe->flags & FAULT_FLAG_WRITE; pte_t entry; flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); - if (anon) { + /* copy-on-write page */ + if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, address, false); + page_add_new_anon_rmap(page, vma, fe->address, false); } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page); } - set_pte_at(vma->vm_mm, address, pte, entry); + set_pte_at(vma->vm_mm, fe->address, fe->pte, entry); /* no need to invalidate: a not-present page won't be cached */ - update_mmu_cache(vma, address, pte); + update_mmu_cache(vma, fe->address, fe->pte); } static unsigned long fault_around_bytes __read_mostly = @@ -2976,57 +2964,53 @@ late_initcall(fault_around_debugfs); * fault_around_pages() value (and therefore to page order). This way it's * easier to guarantee that we don't cross page table boundaries. */ -static void do_fault_around(struct vm_area_struct *vma, unsigned long address, - pte_t *pte, pgoff_t pgoff, unsigned int flags) +static void do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) { - unsigned long start_addr, nr_pages, mask; - pgoff_t max_pgoff; - struct vm_fault vmf; + unsigned long address = fe->address, start_addr, nr_pages, mask; + pte_t *pte = fe->pte; + pgoff_t end_pgoff; int off; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; - start_addr = max(address & mask, vma->vm_start); - off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); - pte -= off; - pgoff -= off; + start_addr = max(fe->address & mask, fe->vma->vm_start); + off = ((fe->address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + fe->pte -= off; + start_pgoff -= off; /* - * max_pgoff is either end of page table or end of vma - * or fault_around_pages() from pgoff, depending what is nearest. + * end_pgoff is either end of page table or end of vma + * or fault_around_pages() from start_pgoff, depending what is nearest. */ - max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + + end_pgoff = start_pgoff - + ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; - max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1, - pgoff + nr_pages - 1); + end_pgoff = min3(end_pgoff, vma_pages(fe->vma) + fe->vma->vm_pgoff - 1, + start_pgoff + nr_pages - 1); /* Check if it makes any sense to call ->map_pages */ - while (!pte_none(*pte)) { - if (++pgoff > max_pgoff) - return; - start_addr += PAGE_SIZE; - if (start_addr >= vma->vm_end) - return; - pte++; + fe->address = start_addr; + while (!pte_none(*fe->pte)) { + if (++start_pgoff > end_pgoff) + goto out; + fe->address += PAGE_SIZE; + if (fe->address >= fe->vma->vm_end) + goto out; + fe->pte++; } - vmf.virtual_address = (void __user *) start_addr; - vmf.pte = pte; - vmf.pgoff = pgoff; - vmf.max_pgoff = max_pgoff; - vmf.flags = flags; - vmf.gfp_mask = __get_fault_gfp_mask(vma); - vma->vm_ops->map_pages(vma, &vmf); + fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); +out: + /* restore fault_env */ + fe->pte = pte; + fe->address = address; } -static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, - pgoff_t pgoff, unsigned int flags, pte_t orig_pte) +static int do_read_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte) { + struct vm_area_struct *vma = fe->vma; struct page *fault_page; - spinlock_t *ptl; - pte_t *pte; int ret = 0; /* @@ -3035,66 +3019,68 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * something). */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { - pte = pte_offset_map_lock(mm, pmd, address, &ptl); - do_fault_around(vma, address, pte, pgoff, flags); - if (!pte_same(*pte, orig_pte)) + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, + &fe->ptl); + if (!pte_same(*fe->pte, orig_pte)) + goto unlock_out; + do_fault_around(fe, pgoff); + /* Check if the fault is handled by faultaround */ + if (!pte_same(*fe->pte, orig_pte)) goto unlock_out; - pte_unmap_unlock(pte, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); } - ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL); + ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; - pte = pte_offset_map_lock(mm, pmd, address, &ptl); - if (unlikely(!pte_same(*pte, orig_pte))) { - pte_unmap_unlock(pte, ptl); + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, &fe->ptl); + if (unlikely(!pte_same(*fe->pte, orig_pte))) { + pte_unmap_unlock(fe->pte, fe->ptl); unlock_page(fault_page); put_page(fault_page); return ret; } - do_set_pte(vma, address, fault_page, pte, false, false); + do_set_pte(fe, fault_page); unlock_page(fault_page); unlock_out: - pte_unmap_unlock(pte, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); return ret; } -static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, - pgoff_t pgoff, unsigned int flags, pte_t orig_pte) +static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte) { + struct vm_area_struct *vma = fe->vma; struct page *fault_page, *new_page; void *fault_entry; struct mem_cgroup *memcg; - spinlock_t *ptl; - pte_t *pte; int ret; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, fe->address); if (!new_page) return VM_FAULT_OOM; - if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) { + if (mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, + &memcg, false)) { put_page(new_page); return VM_FAULT_OOM; } - ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page, - &fault_entry); + ret = __do_fault(fe, pgoff, new_page, &fault_page, &fault_entry); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; if (!(ret & VM_FAULT_DAX_LOCKED)) - copy_user_highpage(new_page, fault_page, address, vma); + copy_user_highpage(new_page, fault_page, fe->address, vma); __SetPageUptodate(new_page); - pte = pte_offset_map_lock(mm, pmd, address, &ptl); - if (unlikely(!pte_same(*pte, orig_pte))) { - pte_unmap_unlock(pte, ptl); + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, + &fe->ptl); + if (unlikely(!pte_same(*fe->pte, orig_pte))) { + pte_unmap_unlock(fe->pte, fe->ptl); if (!(ret & VM_FAULT_DAX_LOCKED)) { unlock_page(fault_page); put_page(fault_page); @@ -3104,10 +3090,10 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } goto uncharge_out; } - do_set_pte(vma, address, new_page, pte, true, true); + do_set_pte(fe, new_page); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); - pte_unmap_unlock(pte, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); if (!(ret & VM_FAULT_DAX_LOCKED)) { unlock_page(fault_page); put_page(fault_page); @@ -3121,18 +3107,15 @@ uncharge_out: return ret; } -static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, - pgoff_t pgoff, unsigned int flags, pte_t orig_pte) +static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte) { + struct vm_area_struct *vma = fe->vma; struct page *fault_page; struct address_space *mapping; - spinlock_t *ptl; - pte_t *pte; int dirtied = 0; int ret, tmp; - ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL); + ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; @@ -3142,7 +3125,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, */ if (vma->vm_ops->page_mkwrite) { unlock_page(fault_page); - tmp = do_page_mkwrite(vma, fault_page, address); + tmp = do_page_mkwrite(vma, fault_page, fe->address); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(fault_page); @@ -3150,15 +3133,16 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, } } - pte = pte_offset_map_lock(mm, pmd, address, &ptl); - if (unlikely(!pte_same(*pte, orig_pte))) { - pte_unmap_unlock(pte, ptl); + fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, + &fe->ptl); + if (unlikely(!pte_same(*fe->pte, orig_pte))) { + pte_unmap_unlock(fe->pte, fe->ptl); unlock_page(fault_page); put_page(fault_page); return ret; } - do_set_pte(vma, address, fault_page, pte, true, false); - pte_unmap_unlock(pte, ptl); + do_set_pte(fe, fault_page); + pte_unmap_unlock(fe->pte, fe->ptl); if (set_page_dirty(fault_page)) dirtied = 1; @@ -3190,23 +3174,20 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - unsigned int flags, pte_t orig_pte) +static int do_fault(struct fault_env *fe, pte_t orig_pte) { - pgoff_t pgoff = linear_page_index(vma, address); + struct vm_area_struct *vma = fe->vma; + pgoff_t pgoff = linear_page_index(vma, fe->address); - pte_unmap(page_table); + pte_unmap(fe->pte); /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) return VM_FAULT_SIGBUS; - if (!(flags & FAULT_FLAG_WRITE)) - return do_read_fault(mm, vma, address, pmd, pgoff, flags, - orig_pte); + if (!(fe->flags & FAULT_FLAG_WRITE)) + return do_read_fault(fe, pgoff, orig_pte); if (!(vma->vm_flags & VM_SHARED)) - return do_cow_fault(mm, vma, address, pmd, pgoff, flags, - orig_pte); - return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); + return do_cow_fault(fe, pgoff, orig_pte); + return do_shared_fault(fe, pgoff, orig_pte); } static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, @@ -3224,11 +3205,10 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, return mpol_misplaced(page, vma, addr); } -static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) +static int do_numa_page(struct fault_env *fe, pte_t pte) { + struct vm_area_struct *vma = fe->vma; struct page *page = NULL; - spinlock_t *ptl; int page_nid = -1; int last_cpupid; int target_nid; @@ -3248,10 +3228,10 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * page table entry is not accessible, so there would be no * concurrent hardware modifications to the PTE. */ - ptl = pte_lockptr(mm, pmd); - spin_lock(ptl); - if (unlikely(!pte_same(*ptep, pte))) { - pte_unmap_unlock(ptep, ptl); + fe->ptl = pte_lockptr(vma->vm_mm, fe->pmd); + spin_lock(fe->ptl); + if (unlikely(!pte_same(*fe->pte, pte))) { + pte_unmap_unlock(fe->pte, fe->ptl); goto out; } @@ -3260,18 +3240,18 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, pte = pte_mkyoung(pte); if (was_writable) pte = pte_mkwrite(pte); - set_pte_at(mm, addr, ptep, pte); - update_mmu_cache(vma, addr, ptep); + set_pte_at(vma->vm_mm, fe->address, fe->pte, pte); + update_mmu_cache(vma, fe->address, fe->pte); - page = vm_normal_page(vma, addr, pte); + page = vm_normal_page(vma, fe->address, pte); if (!page) { - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); return 0; } /* TODO: handle PTE-mapped THP */ if (PageCompound(page)) { - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); return 0; } @@ -3295,8 +3275,9 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, last_cpupid = page_cpupid_last(page); page_nid = page_to_nid(page); - target_nid = numa_migrate_prep(page, vma, addr, page_nid, &flags); - pte_unmap_unlock(ptep, ptl); + target_nid = numa_migrate_prep(page, vma, fe->address, page_nid, + &flags); + pte_unmap_unlock(fe->pte, fe->ptl); if (target_nid == -1) { put_page(page); goto out; @@ -3316,24 +3297,24 @@ out: return 0; } -static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, unsigned int flags) +static int create_huge_pmd(struct fault_env *fe) { + struct vm_area_struct *vma = fe->vma; if (vma_is_anonymous(vma)) - return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags); + return do_huge_pmd_anonymous_page(fe); if (vma->vm_ops->pmd_fault) - return vma->vm_ops->pmd_fault(vma, address, pmd, flags); + return vma->vm_ops->pmd_fault(vma, fe->address, fe->pmd, + fe->flags); return VM_FAULT_FALLBACK; } -static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, pmd_t orig_pmd, - unsigned int flags) +static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd) { - if (vma_is_anonymous(vma)) - return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd); - if (vma->vm_ops->pmd_fault) - return vma->vm_ops->pmd_fault(vma, address, pmd, flags); + if (vma_is_anonymous(fe->vma)) + return do_huge_pmd_wp_page(fe, orig_pmd); + if (fe->vma->vm_ops->pmd_fault) + return fe->vma->vm_ops->pmd_fault(fe->vma, fe->address, fe->pmd, + fe->flags); return VM_FAULT_FALLBACK; } @@ -3353,12 +3334,9 @@ static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma, * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int handle_pte_fault(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - pte_t *pte, pmd_t *pmd, unsigned int flags) +static int handle_pte_fault(struct fault_env *fe) { pte_t entry; - spinlock_t *ptl; /* * some architectures can have larger ptes than wordsize, @@ -3368,37 +3346,34 @@ static int handle_pte_fault(struct mm_struct *mm, * we later double check anyway with the ptl lock held. So here * a barrier will do. */ - entry = *pte; + entry = *fe->pte; barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { - if (vma_is_anonymous(vma)) - return do_anonymous_page(mm, vma, address, - pte, pmd, flags); + if (vma_is_anonymous(fe->vma)) + return do_anonymous_page(fe); else - return do_fault(mm, vma, address, pte, pmd, - flags, entry); + return do_fault(fe, entry); } - return do_swap_page(mm, vma, address, - pte, pmd, flags, entry); + return do_swap_page(fe, entry); } if (pte_protnone(entry)) - return do_numa_page(mm, vma, address, entry, pte, pmd); + return do_numa_page(fe, entry); - ptl = pte_lockptr(mm, pmd); - spin_lock(ptl); - if (unlikely(!pte_same(*pte, entry))) + fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd); + spin_lock(fe->ptl); + if (unlikely(!pte_same(*fe->pte, entry))) goto unlock; - if (flags & FAULT_FLAG_WRITE) { + if (fe->flags & FAULT_FLAG_WRITE) { if (!pte_write(entry)) - return do_wp_page(mm, vma, address, - pte, pmd, ptl, entry); + return do_wp_page(fe, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); - if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { - update_mmu_cache(vma, address, pte); + if (ptep_set_access_flags(fe->vma, fe->address, fe->pte, entry, + fe->flags & FAULT_FLAG_WRITE)) { + update_mmu_cache(fe->vma, fe->address, fe->pte); } else { /* * This is needed only for protection faults but the arch code @@ -3406,11 +3381,11 @@ static int handle_pte_fault(struct mm_struct *mm, * This still avoids useless tlb flushes for .text page faults * with threads. */ - if (flags & FAULT_FLAG_WRITE) - flush_tlb_fix_spurious_fault(vma, address); + if (fe->flags & FAULT_FLAG_WRITE) + flush_tlb_fix_spurious_fault(fe->vma, fe->address); } unlock: - pte_unmap_unlock(pte, ptl); + pte_unmap_unlock(fe->pte, fe->ptl); return 0; } @@ -3423,51 +3398,42 @@ unlock: static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { + struct fault_env fe = { + .vma = vma, + .address = address, + .flags = flags, + }; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, - flags & FAULT_FLAG_INSTRUCTION, - flags & FAULT_FLAG_REMOTE)) - return VM_FAULT_SIGSEGV; - - if (unlikely(is_vm_hugetlb_page(vma))) - return hugetlb_fault(mm, vma, address, flags); pgd = pgd_offset(mm, address); pud = pud_alloc(mm, pgd, address); if (!pud) return VM_FAULT_OOM; - pmd = pmd_alloc(mm, pud, address); - if (!pmd) + fe.pmd = pmd_alloc(mm, pud, address); + if (!fe.pmd) return VM_FAULT_OOM; - if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) { - int ret = create_huge_pmd(mm, vma, address, pmd, flags); + if (pmd_none(*fe.pmd) && transparent_hugepage_enabled(vma)) { + int ret = create_huge_pmd(&fe); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { - pmd_t orig_pmd = *pmd; + pmd_t orig_pmd = *fe.pmd; int ret; barrier(); if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { - unsigned int dirty = flags & FAULT_FLAG_WRITE; - if (pmd_protnone(orig_pmd)) - return do_huge_pmd_numa_page(mm, vma, address, - orig_pmd, pmd); + return do_huge_pmd_numa_page(&fe, orig_pmd); - if (dirty && !pmd_write(orig_pmd)) { - ret = wp_huge_pmd(mm, vma, address, pmd, - orig_pmd, flags); + if ((fe.flags & FAULT_FLAG_WRITE) && + !pmd_write(orig_pmd)) { + ret = wp_huge_pmd(&fe, orig_pmd); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { - huge_pmd_set_accessed(mm, vma, address, pmd, - orig_pmd, dirty); + huge_pmd_set_accessed(&fe, orig_pmd); return 0; } } @@ -3478,7 +3444,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * run pte_offset_map on the pmd, if an huge pmd could * materialize from under us from a different thread. */ - if (unlikely(pte_alloc(mm, pmd, address))) + if (unlikely(pte_alloc(fe.vma->vm_mm, fe.pmd, fe.address))) return VM_FAULT_OOM; /* * If a huge pmd materialized under us just retry later. Use @@ -3491,7 +3457,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * through an atomic read in C, which is what pmd_trans_unstable() * provides. */ - if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd))) + if (unlikely(pmd_trans_unstable(fe.pmd) || pmd_devmap(*fe.pmd))) return 0; /* * A regular pmd is established and it can't morph into a huge pmd @@ -3499,9 +3465,9 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * read mode and khugepaged takes it in write mode. So now it's * safe to run pte_offset_map(). */ - pte = pte_offset_map(pmd, address); + fe.pte = pte_offset_map(fe.pmd, fe.address); - return handle_pte_fault(mm, vma, address, pte, pmd, flags); + return handle_pte_fault(&fe); } /* @@ -3530,7 +3496,15 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (flags & FAULT_FLAG_USER) mem_cgroup_oom_enable(); - ret = __handle_mm_fault(vma, address, flags); + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + flags & FAULT_FLAG_INSTRUCTION, + flags & FAULT_FLAG_REMOTE)) + return VM_FAULT_SIGSEGV; + + if (unlikely(is_vm_hugetlb_page(vma))) + ret = hugetlb_fault(vma->vm_mm, vma, address, flags); + else + ret = __handle_mm_fault(vma, address, flags); if (flags & FAULT_FLAG_USER) { mem_cgroup_oom_disable(); |