diff options
author | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-16 09:44:32 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-16 09:44:32 -0800 |
commit | 2f77d107050abc14bc393b34bdb7b91cf670c250 (patch) | |
tree | 6651586fb1b10f60cd6acdb3222bafac9c2d7aa8 | |
parent | 0221872a3b0aa2fa2f3fa60affcbaebd662c4a90 (diff) | |
download | lwn-2f77d107050abc14bc393b34bdb7b91cf670c250.tar.gz lwn-2f77d107050abc14bc393b34bdb7b91cf670c250.zip |
Fix incorrect user space access locking in mincore()
Doug Chapman noticed that mincore() will doa "copy_to_user()" of the
result while holding the mmap semaphore for reading, which is a big
no-no. While a recursive read-lock on a semaphore in the case of a page
fault happens to work, we don't actually allow them due to deadlock
schenarios with writers due to fairness issues.
Doug and Marcel sent in a patch to fix it, but I decided to just rewrite
the mess instead - not just fixing the locking problem, but making the
code smaller and (imho) much easier to understand.
Cc: Doug Chapman <dchapman@redhat.com>
Cc: Marcel Holtmann <holtmann@redhat.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/mincore.c | 190 |
1 files changed, 86 insertions, 104 deletions
diff --git a/mm/mincore.c b/mm/mincore.c index 72890780c1c9..b44d7f875cb6 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -1,7 +1,7 @@ /* * linux/mm/mincore.c * - * Copyright (C) 1994-1999 Linus Torvalds + * Copyright (C) 1994-2006 Linus Torvalds */ /* @@ -38,46 +38,60 @@ static unsigned char mincore_page(struct vm_area_struct * vma, return present; } -static long mincore_vma(struct vm_area_struct * vma, - unsigned long start, unsigned long end, unsigned char __user * vec) +/* + * Do a chunk of "sys_mincore()". We've already checked + * all the arguments, we hold the mmap semaphore: we should + * just return the amount of info we're asked for. + */ +static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages) { - long error, i, remaining; - unsigned char * tmp; + unsigned long i, nr, pgoff; + struct vm_area_struct *vma = find_vma(current->mm, addr); - error = -ENOMEM; - if (!vma->vm_file) - return error; - - start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (end > vma->vm_end) - end = vma->vm_end; - end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + /* + * find_vma() didn't find anything: the address + * is above everything we have mapped. + */ + if (!vma) { + memset(vec, 0, pages); + return pages; + } - error = -EAGAIN; - tmp = (unsigned char *) __get_free_page(GFP_KERNEL); - if (!tmp) - return error; + /* + * find_vma() found something, but we might be + * below it: check for that. + */ + if (addr < vma->vm_start) { + unsigned long gap = (vma->vm_start - addr) >> PAGE_SHIFT; + if (gap > pages) + gap = pages; + memset(vec, 0, gap); + return gap; + } - /* (end - start) is # of pages, and also # of bytes in "vec */ - remaining = (end - start), + /* + * Ok, got it. But check whether it's a segment we support + * mincore() on. Right now, we don't do any anonymous mappings. + */ + if (!vma->vm_file) + return -ENOMEM; - error = 0; - for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) { - int j = 0; - long thispiece = (remaining < PAGE_SIZE) ? - remaining : PAGE_SIZE; + /* + * Calculate how many pages there are left in the vma, and + * what the pgoff is for our address. + */ + nr = (vma->vm_end - addr) >> PAGE_SHIFT; + if (nr > pages) + nr = pages; - while (j < thispiece) - tmp[j++] = mincore_page(vma, start++); + pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; + pgoff += vma->vm_pgoff; - if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) { - error = -EFAULT; - break; - } - } + /* And then we just fill the sucker in.. */ + for (i = 0 ; i < nr; i++, pgoff++) + vec[i] = mincore_page(vma, pgoff); - free_page((unsigned long) tmp); - return error; + return nr; } /* @@ -107,82 +121,50 @@ static long mincore_vma(struct vm_area_struct * vma, asmlinkage long sys_mincore(unsigned long start, size_t len, unsigned char __user * vec) { - int index = 0; - unsigned long end, limit; - struct vm_area_struct * vma; - size_t max; - int unmapped_error = 0; - long error; - - /* check the arguments */ - if (start & ~PAGE_CACHE_MASK) - goto einval; - - limit = TASK_SIZE; - if (start >= limit) - goto enomem; - - if (!len) - return 0; + long retval; + unsigned long pages; + unsigned char *tmp; - max = limit - start; - len = PAGE_CACHE_ALIGN(len); - if (len > max || !len) - goto enomem; - - end = start + len; + /* Check the start address: needs to be page-aligned.. */ + if (start & ~PAGE_CACHE_MASK) + return -EINVAL; - /* check the output buffer whilst holding the lock */ - error = -EFAULT; - down_read(¤t->mm->mmap_sem); + /* ..and we need to be passed a valid user-space range */ + if (!access_ok(VERIFY_READ, (void __user *) start, len)) + return -ENOMEM; - if (!access_ok(VERIFY_WRITE, vec, len >> PAGE_SHIFT)) - goto out; + /* This also avoids any overflows on PAGE_CACHE_ALIGN */ + pages = len >> PAGE_SHIFT; + pages += (len & ~PAGE_MASK) != 0; - /* - * If the interval [start,end) covers some unmapped address - * ranges, just ignore them, but return -ENOMEM at the end. - */ - error = 0; - - vma = find_vma(current->mm, start); - while (vma) { - /* Here start < vma->vm_end. */ - if (start < vma->vm_start) { - unmapped_error = -ENOMEM; - start = vma->vm_start; - } + if (!access_ok(VERIFY_WRITE, vec, pages)) + return -EFAULT; - /* Here vma->vm_start <= start < vma->vm_end. */ - if (end <= vma->vm_end) { - if (start < end) { - error = mincore_vma(vma, start, end, - &vec[index]); - if (error) - goto out; - } - error = unmapped_error; - goto out; + tmp = (void *) __get_free_page(GFP_USER); + if (!tmp) + return -ENOMEM; + + retval = 0; + while (pages) { + /* + * Do at most PAGE_SIZE entries per iteration, due to + * the temporary buffer size. + */ + down_read(¤t->mm->mmap_sem); + retval = do_mincore(start, tmp, max(pages, PAGE_SIZE)); + up_read(¤t->mm->mmap_sem); + + if (retval <= 0) + break; + if (copy_to_user(vec, tmp, retval)) { + retval = -EFAULT; + break; } - - /* Here vma->vm_start <= start < vma->vm_end < end. */ - error = mincore_vma(vma, start, vma->vm_end, &vec[index]); - if (error) - goto out; - index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT; - start = vma->vm_end; - vma = vma->vm_next; + pages -= retval; + vec += retval; + start += retval << PAGE_SHIFT; + retval = 0; } - - /* we found a hole in the area queried if we arrive here */ - error = -ENOMEM; - -out: - up_read(¤t->mm->mmap_sem); - return error; - -einval: - return -EINVAL; -enomem: - return -ENOMEM; + free_page((unsigned long) tmp); + return retval; } |