diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2007-06-14 13:08:55 +0200 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2007-07-10 08:04:12 +0200 |
commit | 6a14b90bb6bc7cd83e2a444bf457a2ea645cbfe7 (patch) | |
tree | c6f2788cbafd29bdf520c0b2a232818f4d62ec9d | |
parent | c66ab6fa705e1b2887a6d9246b798bdc526839e2 (diff) | |
download | lwn-6a14b90bb6bc7cd83e2a444bf457a2ea645cbfe7.tar.gz lwn-6a14b90bb6bc7cd83e2a444bf457a2ea645cbfe7.zip |
vmsplice: add vmsplice-to-user support
A bit of a cheat, it actually just copies the data to userspace. But
this makes the interface nice and symmetric and enables people to build
on splice, with room for future improvement in performance.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/ocfs2/file.c | 2 | ||||
-rw-r--r-- | fs/splice.c | 178 | ||||
-rw-r--r-- | include/linux/pipe_fs_i.h | 8 |
3 files changed, 158 insertions, 30 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 93565c03d315..222f108ee454 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1640,7 +1640,7 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, .total_len = len, .flags = flags, .pos = *ppos, - .file = out, + .u.file = out, }; ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor); diff --git a/fs/splice.c b/fs/splice.c index 68f6328236a6..13846f723d72 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -528,7 +528,7 @@ EXPORT_SYMBOL(generic_file_splice_read); static int pipe_to_sendpage(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { - struct file *file = sd->file; + struct file *file = sd->u.file; loff_t pos = sd->pos; int ret, more; @@ -566,7 +566,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { - struct file *file = sd->file; + struct file *file = sd->u.file; struct address_space *mapping = file->f_mapping; unsigned int offset, this_len; struct page *page; @@ -769,7 +769,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, .total_len = len, .flags = flags, .pos = *ppos, - .file = out, + .u.file = out, }; /* @@ -807,7 +807,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, .total_len = len, .flags = flags, .pos = *ppos, - .file = out, + .u.file = out, }; ssize_t ret; int err; @@ -1087,7 +1087,7 @@ EXPORT_SYMBOL(splice_direct_to_actor); static int direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) { - struct file *file = sd->file; + struct file *file = sd->u.file; return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); } @@ -1100,7 +1100,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, .total_len = len, .flags = flags, .pos = *ppos, - .file = out, + .u.file = out, }; size_t ret; @@ -1289,28 +1289,131 @@ static int get_iovec_page_array(const struct iovec __user *iov, return error; } +static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) +{ + char *src; + int ret; + + ret = buf->ops->pin(pipe, buf); + if (unlikely(ret)) + return ret; + + /* + * See if we can use the atomic maps, by prefaulting in the + * pages and doing an atomic copy + */ + if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { + src = buf->ops->map(pipe, buf, 1); + ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, + sd->len); + buf->ops->unmap(pipe, buf, src); + if (!ret) { + ret = sd->len; + goto out; + } + } + + /* + * No dice, use slow non-atomic map and copy + */ + src = buf->ops->map(pipe, buf, 0); + + ret = sd->len; + if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) + ret = -EFAULT; + +out: + if (ret > 0) + sd->u.userptr += ret; + buf->ops->unmap(pipe, buf, src); + return ret; +} + +/* + * For lack of a better implementation, implement vmsplice() to userspace + * as a simple copy of the pipes pages to the user iov. + */ +static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) +{ + struct pipe_inode_info *pipe; + struct splice_desc sd; + ssize_t size; + int error; + long ret; + + pipe = pipe_info(file->f_path.dentry->d_inode); + if (!pipe) + return -EBADF; + + if (pipe->inode) + mutex_lock(&pipe->inode->i_mutex); + + error = ret = 0; + while (nr_segs) { + void __user *base; + size_t len; + + /* + * Get user address base and length for this iovec. + */ + error = get_user(base, &iov->iov_base); + if (unlikely(error)) + break; + error = get_user(len, &iov->iov_len); + if (unlikely(error)) + break; + + /* + * Sanity check this iovec. 0 read succeeds. + */ + if (unlikely(!len)) + break; + if (unlikely(!base)) { + error = -EFAULT; + break; + } + + sd.len = 0; + sd.total_len = len; + sd.flags = flags; + sd.u.userptr = base; + sd.pos = 0; + + size = __splice_from_pipe(pipe, &sd, pipe_to_user); + if (size < 0) { + if (!ret) + ret = size; + + break; + } + + ret += size; + + if (size < len) + break; + + nr_segs--; + iov++; + } + + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); + + if (!ret) + ret = error; + + return ret; +} + /* * vmsplice splices a user address range into a pipe. It can be thought of * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. - * - * Note that vmsplice only supports splicing _from_ user memory to a pipe, - * not the other way around. Splicing from user memory is a simple operation - * that can be supported without any funky alignment restrictions or nasty - * vm tricks. We simply map in the user memory and fill them into a pipe. - * The reverse isn't quite as easy, though. There are two possible solutions - * for that: - * - * - memcpy() the data internally, at which point we might as well just - * do a regular read() on the buffer anyway. - * - Lots of nasty vm tricks, that are neither fast nor flexible (it - * has restriction limitations on both ends of the pipe). - * - * Alas, it isn't here. - * */ -static long do_vmsplice(struct file *file, const struct iovec __user *iov, - unsigned long nr_segs, unsigned int flags) +static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) { struct pipe_inode_info *pipe; struct page *pages[PIPE_BUFFERS]; @@ -1325,10 +1428,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, pipe = pipe_info(file->f_path.dentry->d_inode); if (!pipe) return -EBADF; - if (unlikely(nr_segs > UIO_MAXIOV)) - return -EINVAL; - else if (unlikely(!nr_segs)) - return 0; spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, flags & SPLICE_F_GIFT); @@ -1338,6 +1437,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, return splice_to_pipe(pipe, &spd); } +/* + * Note that vmsplice only really supports true splicing _from_ user memory + * to a pipe, not the other way around. Splicing from user memory is a simple + * operation that can be supported without any funky alignment restrictions + * or nasty vm tricks. We simply map in the user memory and fill them into + * a pipe. The reverse isn't quite as easy, though. There are two possible + * solutions for that: + * + * - memcpy() the data internally, at which point we might as well just + * do a regular read() on the buffer anyway. + * - Lots of nasty vm tricks, that are neither fast nor flexible (it + * has restriction limitations on both ends of the pipe). + * + * Currently we punt and implement it as a normal copy, see pipe_to_user(). + * + */ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) { @@ -1345,11 +1460,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, long error; int fput; + if (unlikely(nr_segs > UIO_MAXIOV)) + return -EINVAL; + else if (unlikely(!nr_segs)) + return 0; + error = -EBADF; file = fget_light(fd, &fput); if (file) { if (file->f_mode & FMODE_WRITE) - error = do_vmsplice(file, iov, nr_segs, flags); + error = vmsplice_to_pipe(file, iov, nr_segs, flags); + else if (file->f_mode & FMODE_READ) + error = vmsplice_to_user(file, iov, nr_segs, flags); fput_light(file, fput); } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 883ba9b78d3f..6e7bfc125425 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -88,7 +88,13 @@ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); struct splice_desc { unsigned int len, total_len; /* current and remaining length */ unsigned int flags; /* splice flags */ - struct file *file; /* file to read/write */ + /* + * actor() private data + */ + union { + void __user *userptr; /* memory to write to */ + struct file *file; /* file to read/write */ + } u; loff_t pos; /* file position */ }; |