diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-01 13:17:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-01 13:17:28 -0700 |
commit | 75723957673bfa10c98b735259f891cc79cf0450 (patch) | |
tree | 7382ea8e4bbda6ddf8d6f29f708aa531e262cd36 /fs/splice.c | |
parent | e2cd68f7cd07cc898581bd736ebdd6f2c2323c2e (diff) | |
download | lwn-75723957673bfa10c98b735259f891cc79cf0450.tar.gz lwn-75723957673bfa10c98b735259f891cc79cf0450.zip |
Fix possible splice() mmap_sem deadlock
Nick Piggin points out that splice isn't being good about the mmap
semaphore: while two readers can nest inside each others, it does leave
a possible deadlock if a writer (ie a new mmap()) comes in during that
nesting.
Original "just move the locking" patch by Nick, replaced by one by me
based on an optimistic pagefault_disable(). And then Jens tested and
updated that patch.
Reported-by: Nick Piggin <npiggin@suse.de>
Tested-by: Jens Axboe <jens.axboe@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/splice.c')
-rw-r--r-- | fs/splice.c | 46 |
1 files changed, 34 insertions, 12 deletions
diff --git a/fs/splice.c b/fs/splice.c index c010a72ca2d2..e95a36228863 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1224,6 +1224,33 @@ static long do_splice(struct file *in, loff_t __user *off_in, } /* + * Do a copy-from-user while holding the mmap_semaphore for reading, in a + * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem + * for writing) and page faulting on the user memory pointed to by src. + * This assumes that we will very rarely hit the partial != 0 path, or this + * will not be a win. + */ +static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n) +{ + int partial; + + pagefault_disable(); + partial = __copy_from_user_inatomic(dst, src, n); + pagefault_enable(); + + /* + * Didn't copy everything, drop the mmap_sem and do a faulting copy + */ + if (unlikely(partial)) { + up_read(¤t->mm->mmap_sem); + partial = copy_from_user(dst, src, n); + down_read(¤t->mm->mmap_sem); + } + + return partial; +} + +/* * Map an iov into an array of pages and offset/length tupples. With the * partial_page structure, we can map several non-contiguous ranges into * our ones pages[] map instead of splitting that operation into pieces. @@ -1236,31 +1263,26 @@ static int get_iovec_page_array(const struct iovec __user *iov, { int buffers = 0, error = 0; - /* - * It's ok to take the mmap_sem for reading, even - * across a "get_user()". - */ down_read(¤t->mm->mmap_sem); while (nr_vecs) { unsigned long off, npages; + struct iovec entry; void __user *base; size_t len; int i; - /* - * Get user address base and length for this iovec. - */ - error = get_user(base, &iov->iov_base); - if (unlikely(error)) - break; - error = get_user(len, &iov->iov_len); - if (unlikely(error)) + error = -EFAULT; + if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry))) break; + base = entry.iov_base; + len = entry.iov_len; + /* * Sanity check this iovec. 0 read succeeds. */ + error = 0; if (unlikely(!len)) break; error = -EFAULT; |