summaryrefslogtreecommitdiff
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c90
1 files changed, 81 insertions, 9 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index ed70a68e882a..ee2298936fe6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1043,6 +1043,46 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
}
EXPORT_SYMBOL(do_mmap_pgoff);
+SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, pgoff)
+{
+ struct file *file = NULL;
+ unsigned long retval = -EBADF;
+
+ if (!(flags & MAP_ANONYMOUS)) {
+ if (unlikely(flags & MAP_HUGETLB))
+ return -EINVAL;
+ file = fget(fd);
+ if (!file)
+ goto out;
+ } else if (flags & MAP_HUGETLB) {
+ struct user_struct *user = NULL;
+ /*
+ * VM_NORESERVE is used because the reservations will be
+ * taken when vm_ops->mmap() is called
+ * A dummy user value is used because we are not locking
+ * memory so no accounting is necessary
+ */
+ len = ALIGN(len, huge_page_size(&default_hstate));
+ file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
+ &user, HUGETLB_ANONHUGE_INODE);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ }
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+
+ down_write(&current->mm->mmap_sem);
+ retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+ up_write(&current->mm->mmap_sem);
+
+ if (file)
+ fput(file);
+out:
+ return retval;
+}
+
/*
* Some shared mappigns will want the pages marked read-only
* to track write events. If so, we'll downgrade vm_page_prot
@@ -1198,8 +1238,20 @@ munmap_back:
goto free_vma;
}
- if (vma_wants_writenotify(vma))
+ if (vma_wants_writenotify(vma)) {
+ pgprot_t pprot = vma->vm_page_prot;
+
+ /* Can vma->vm_page_prot have changed??
+ *
+ * Answer: Yes, drivers may have changed it in their
+ * f_op->mmap method.
+ *
+ * Ensures that vmas marked as uncached stay that way.
+ */
vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
+ if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ }
vma_link(mm, vma, prev, rb_link, rb_parent);
file = vma->vm_file;
@@ -1811,10 +1863,10 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
- * Split a vma into two pieces at address 'addr', a new vma is allocated
- * either for the first part or the tail.
+ * __split_vma() bypasses sysctl_max_map_count checking. We use this on the
+ * munmap path where it doesn't make sense to fail.
*/
-int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long addr, int new_below)
{
struct mempolicy *pol;
@@ -1824,9 +1876,6 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
~(huge_page_mask(hstate_vma(vma)))))
return -EINVAL;
- if (mm->map_count >= sysctl_max_map_count)
- return -ENOMEM;
-
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!new)
return -ENOMEM;
@@ -1866,6 +1915,19 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
return 0;
}
+/*
+ * Split a vma into two pieces at address 'addr', a new vma is allocated
+ * either for the first part or the tail.
+ */
+int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, int new_below)
+{
+ if (mm->map_count >= sysctl_max_map_count)
+ return -ENOMEM;
+
+ return __split_vma(mm, vma, addr, new_below);
+}
+
/* Munmap is split into 2 main parts -- this part which finds
* what needs doing, and the areas themselves, which do the
* work. This now handles partial unmappings.
@@ -1901,7 +1963,17 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
* places tmp vma above, and higher split_vma places tmp vma below.
*/
if (start > vma->vm_start) {
- int error = split_vma(mm, vma, start, 0);
+ int error;
+
+ /*
+ * Make sure that map_count on return from munmap() will
+ * not exceed its limit; but let map_count go just above
+ * its limit temporarily, to help free resources as expected.
+ */
+ if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
+ return -ENOMEM;
+
+ error = __split_vma(mm, vma, start, 0);
if (error)
return error;
prev = vma;
@@ -1910,7 +1982,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
/* Does it split the last one? */
last = find_vma(mm, end);
if (last && end > last->vm_start) {
- int error = split_vma(mm, last, end, 1);
+ int error = __split_vma(mm, last, end, 1);
if (error)
return error;
}