summaryrefslogtreecommitdiff
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c207
1 files changed, 152 insertions, 55 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index b56a7f0c9f85..1971bfffcc03 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -107,7 +107,7 @@ void vma_set_page_prot(struct vm_area_struct *vma)
static void __remove_shared_vm_struct(struct vm_area_struct *vma,
struct file *file, struct address_space *mapping)
{
- if (vma->vm_flags & VM_SHARED)
+ if (vma_is_shared_maywrite(vma))
mapping_unmap_writable(mapping);
flush_dcache_mmap_lock(mapping);
@@ -384,7 +384,7 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
static void __vma_link_file(struct vm_area_struct *vma,
struct address_space *mapping)
{
- if (vma->vm_flags & VM_SHARED)
+ if (vma_is_shared_maywrite(vma))
mapping_allow_writable(mapping);
flush_dcache_mmap_lock(mapping);
@@ -583,11 +583,12 @@ again:
* dup_anon_vma() - Helper function to duplicate anon_vma
* @dst: The destination VMA
* @src: The source VMA
+ * @dup: Pointer to the destination VMA when successful.
*
* Returns: 0 on success.
*/
static inline int dup_anon_vma(struct vm_area_struct *dst,
- struct vm_area_struct *src)
+ struct vm_area_struct *src, struct vm_area_struct **dup)
{
/*
* Easily overlooked: when mprotect shifts the boundary, make sure the
@@ -595,9 +596,15 @@ static inline int dup_anon_vma(struct vm_area_struct *dst,
* anon pages imported.
*/
if (src->anon_vma && !dst->anon_vma) {
+ int ret;
+
vma_assert_write_locked(dst);
dst->anon_vma = src->anon_vma;
- return anon_vma_clone(dst, src);
+ ret = anon_vma_clone(dst, src);
+ if (ret)
+ return ret;
+
+ *dup = dst;
}
return 0;
@@ -624,6 +631,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, pgoff_t pgoff,
struct vm_area_struct *next)
{
+ struct vm_area_struct *anon_dup = NULL;
bool remove_next = false;
struct vma_prepare vp;
@@ -633,7 +641,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
remove_next = true;
vma_start_write(next);
- ret = dup_anon_vma(vma, next);
+ ret = dup_anon_vma(vma, next, &anon_dup);
if (ret)
return ret;
}
@@ -661,6 +669,8 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
return 0;
nomem:
+ if (anon_dup)
+ unlink_anon_vmas(anon_dup);
return -ENOMEM;
}
@@ -850,16 +860,17 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
* **** is not represented - it will be merged and the vma containing the
* area is returned, or the function will return NULL
*/
-struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
- struct vm_area_struct *prev, unsigned long addr,
- unsigned long end, unsigned long vm_flags,
- struct anon_vma *anon_vma, struct file *file,
- pgoff_t pgoff, struct mempolicy *policy,
- struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
- struct anon_vma_name *anon_name)
+static struct vm_area_struct
+*vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
+ struct vm_area_struct *prev, unsigned long addr, unsigned long end,
+ unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file,
+ pgoff_t pgoff, struct mempolicy *policy,
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+ struct anon_vma_name *anon_name)
{
struct vm_area_struct *curr, *next, *res;
struct vm_area_struct *vma, *adjust, *remove, *remove2;
+ struct vm_area_struct *anon_dup = NULL;
struct vma_prepare vp;
pgoff_t vma_pgoff;
int err = 0;
@@ -927,18 +938,23 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_start_write(next);
remove = next; /* case 1 */
vma_end = next->vm_end;
- err = dup_anon_vma(prev, next);
+ err = dup_anon_vma(prev, next, &anon_dup);
if (curr) { /* case 6 */
vma_start_write(curr);
remove = curr;
remove2 = next;
+ /*
+ * Note that the dup_anon_vma below cannot overwrite err
+ * since the first caller would do nothing unless next
+ * has an anon_vma.
+ */
if (!next->anon_vma)
- err = dup_anon_vma(prev, curr);
+ err = dup_anon_vma(prev, curr, &anon_dup);
}
} else if (merge_prev) { /* case 2 */
if (curr) {
vma_start_write(curr);
- err = dup_anon_vma(prev, curr);
+ err = dup_anon_vma(prev, curr, &anon_dup);
if (end == curr->vm_end) { /* case 7 */
remove = curr;
} else { /* case 5 */
@@ -954,7 +970,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_end = addr;
adjust = next;
adj_start = -(prev->vm_end - addr);
- err = dup_anon_vma(next, prev);
+ err = dup_anon_vma(next, prev, &anon_dup);
} else {
/*
* Note that cases 3 and 8 are the ONLY ones where prev
@@ -968,14 +984,14 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_pgoff = curr->vm_pgoff;
vma_start_write(curr);
remove = curr;
- err = dup_anon_vma(next, curr);
+ err = dup_anon_vma(next, curr, &anon_dup);
}
}
}
/* Error in anon_vma clone. */
if (err)
- return NULL;
+ goto anon_vma_fail;
if (vma_start < vma->vm_start || vma_end > vma->vm_end)
vma_expanded = true;
@@ -988,7 +1004,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
}
if (vma_iter_prealloc(vmi, vma))
- return NULL;
+ goto prealloc_fail;
init_multi_vma_prep(&vp, vma, adjust, remove, remove2);
VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma &&
@@ -1016,6 +1032,15 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_complete(&vp, vmi, mm);
khugepaged_enter_vma(res, vm_flags);
return res;
+
+prealloc_fail:
+ if (anon_dup)
+ unlink_anon_vmas(anon_dup);
+
+anon_vma_fail:
+ vma_iter_set(vmi, addr);
+ vma_iter_load(vmi);
+ return NULL;
}
/*
@@ -1198,7 +1223,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
* Does the application expect PROT_READ to imply PROT_EXEC?
*
* (the exception is when the underlying filesystem is noexec
- * mounted, in which case we dont add PROT_EXEC.)
+ * mounted, in which case we don't add PROT_EXEC.)
*/
if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
if (!(file && path_noexec(&file->f_path)))
@@ -1924,9 +1949,9 @@ static int acct_stack_growth(struct vm_area_struct *vma,
return 0;
}
-#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
+#if defined(CONFIG_STACK_GROWSUP)
/*
- * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
+ * PA-RISC uses this for its stack.
* vma is the last one with address > vma->vm_end. Have to extend vma.
*/
static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
@@ -2023,7 +2048,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
validate_mm(mm);
return error;
}
-#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
+#endif /* CONFIG_STACK_GROWSUP */
/*
* vma is the first one with address < vma->vm_start. Have to extend vma.
@@ -2159,8 +2184,6 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned lon
#else
int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
{
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
- return -EINVAL;
return expand_downwards(vma, address);
}
@@ -2323,8 +2346,8 @@ static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
* has already been checked or doesn't make sense to fail.
* VMA Iterator will point to the end VMA.
*/
-int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
- unsigned long addr, int new_below)
+static int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long addr, int new_below)
{
struct vma_prepare vp;
struct vm_area_struct *new;
@@ -2405,8 +2428,8 @@ out_free_vma:
* Split a vma into two pieces at address 'addr', a new vma is allocated
* either for the first part or the tail.
*/
-int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
- unsigned long addr, int new_below)
+static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long addr, int new_below)
{
if (vma->vm_mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
@@ -2415,6 +2438,85 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
}
/*
+ * We are about to modify one or multiple of a VMA's flags, policy, userfaultfd
+ * context and anonymous VMA name within the range [start, end).
+ *
+ * As a result, we might be able to merge the newly modified VMA range with an
+ * adjacent VMA with identical properties.
+ *
+ * If no merge is possible and the range does not span the entirety of the VMA,
+ * we then need to split the VMA to accommodate the change.
+ *
+ * The function returns either the merged VMA, the original VMA if a split was
+ * required instead, or an error if the split failed.
+ */
+struct vm_area_struct *vma_modify(struct vma_iterator *vmi,
+ struct vm_area_struct *prev,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long vm_flags,
+ struct mempolicy *policy,
+ struct vm_userfaultfd_ctx uffd_ctx,
+ struct anon_vma_name *anon_name)
+{
+ pgoff_t pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ struct vm_area_struct *merged;
+
+ merged = vma_merge(vmi, vma->vm_mm, prev, start, end, vm_flags,
+ vma->anon_vma, vma->vm_file, pgoff, policy,
+ uffd_ctx, anon_name);
+ if (merged)
+ return merged;
+
+ if (vma->vm_start < start) {
+ int err = split_vma(vmi, vma, start, 1);
+
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ if (vma->vm_end > end) {
+ int err = split_vma(vmi, vma, end, 0);
+
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+/*
+ * Attempt to merge a newly mapped VMA with those adjacent to it. The caller
+ * must ensure that [start, end) does not overlap any existing VMA.
+ */
+static struct vm_area_struct
+*vma_merge_new_vma(struct vma_iterator *vmi, struct vm_area_struct *prev,
+ struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, pgoff_t pgoff)
+{
+ return vma_merge(vmi, vma->vm_mm, prev, start, end, vma->vm_flags,
+ vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+ vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+}
+
+/*
+ * Expand vma by delta bytes, potentially merging with an immediately adjacent
+ * VMA with identical properties.
+ */
+struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
+ struct vm_area_struct *vma,
+ unsigned long delta)
+{
+ pgoff_t pgoff = vma->vm_pgoff + vma_pages(vma);
+
+ /* vma is specified as prev, so case 1 or 2 will apply. */
+ return vma_merge(vmi, vma->vm_mm, vma, vma->vm_end, vma->vm_end + delta,
+ vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff,
+ vma_policy(vma), vma->vm_userfaultfd_ctx,
+ anon_vma_name(vma));
+}
+
+/*
* do_vmi_align_munmap() - munmap the aligned region from @start to @end.
* @vmi: The vma iterator
* @vma: The starting vm_area_struct
@@ -2650,6 +2752,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long charged = 0;
unsigned long end = addr + len;
unsigned long merge_start = addr, merge_end = end;
+ bool writable_file_mapping = false;
pgoff_t vm_pgoff;
int error;
VMA_ITERATOR(vmi, mm, addr);
@@ -2744,17 +2847,19 @@ cannot_expand:
vma->vm_pgoff = pgoff;
if (file) {
- if (vm_flags & VM_SHARED) {
- error = mapping_map_writable(file->f_mapping);
- if (error)
- goto free_vma;
- }
-
vma->vm_file = get_file(file);
error = call_mmap(file, vma);
if (error)
goto unmap_and_free_vma;
+ if (vma_is_shared_maywrite(vma)) {
+ error = mapping_map_writable(file->f_mapping);
+ if (error)
+ goto close_and_free_vma;
+
+ writable_file_mapping = true;
+ }
+
/*
* Expansion is handled above, merging is handled below.
* Drivers should not alter the address of the VMA.
@@ -2769,10 +2874,9 @@ cannot_expand:
* vma again as we may succeed this time.
*/
if (unlikely(vm_flags != vma->vm_flags && prev)) {
- merge = vma_merge(&vmi, mm, prev, vma->vm_start,
- vma->vm_end, vma->vm_flags, NULL,
- vma->vm_file, vma->vm_pgoff, NULL,
- NULL_VM_UFFD_CTX, NULL);
+ merge = vma_merge_new_vma(&vmi, prev, vma,
+ vma->vm_start, vma->vm_end,
+ vma->vm_pgoff);
if (merge) {
/*
* ->mmap() can change vma->vm_file and fput
@@ -2819,7 +2923,7 @@ cannot_expand:
mm->map_count++;
if (vma->vm_file) {
i_mmap_lock_write(vma->vm_file->f_mapping);
- if (vma->vm_flags & VM_SHARED)
+ if (vma_is_shared_maywrite(vma))
mapping_allow_writable(vma->vm_file->f_mapping);
flush_dcache_mmap_lock(vma->vm_file->f_mapping);
@@ -2836,7 +2940,7 @@ cannot_expand:
/* Once vma denies write, undo our temporary denial count */
unmap_writable:
- if (file && vm_flags & VM_SHARED)
+ if (writable_file_mapping)
mapping_unmap_writable(file->f_mapping);
file = vma->vm_file;
ksm_add_vma(vma);
@@ -2884,7 +2988,7 @@ unmap_and_free_vma:
unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start,
vma->vm_end, vma->vm_end, true);
}
- if (file && (vm_flags & VM_SHARED))
+ if (writable_file_mapping)
mapping_unmap_writable(file->f_mapping);
free_vma:
vm_area_free(vma);
@@ -3143,13 +3247,13 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
if (!len)
return 0;
- if (mmap_write_lock_killable(mm))
- return -EINTR;
-
/* Until we need other flags, refuse anything except VM_EXEC. */
if ((flags & (~VM_EXEC)) != 0)
return -EINVAL;
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
ret = check_brk_limits(addr, len);
if (ret)
goto limits_failed;
@@ -3174,12 +3278,6 @@ limits_failed:
}
EXPORT_SYMBOL(vm_brk_flags);
-int vm_brk(unsigned long addr, unsigned long len)
-{
- return vm_brk_flags(addr, len, 0);
-}
-EXPORT_SYMBOL(vm_brk);
-
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
@@ -3278,7 +3376,8 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
}
if (vma_link(mm, vma)) {
- vm_unacct_memory(charged);
+ if (vma->vm_flags & VM_ACCOUNT)
+ vm_unacct_memory(charged);
return -ENOMEM;
}
@@ -3313,9 +3412,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
if (new_vma && new_vma->vm_start < addr + len)
return NULL; /* should never get here */
- new_vma = vma_merge(&vmi, mm, prev, addr, addr + len, vma->vm_flags,
- vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+ new_vma = vma_merge_new_vma(&vmi, prev, vma, addr, addr + len, pgoff);
if (new_vma) {
/*
* Source vma may have been merged into new_vma