diff options
author | Jens Axboe <axboe@suse.de> | 2006-03-30 15:16:46 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-30 12:28:18 -0800 |
commit | 5abc97aa25b2c41413b3a520faee83f2282d9f18 (patch) | |
tree | 4ba13ae0e91f15d02986df7cdca5e9455212d7d4 | |
parent | 5274f052e7b3dbd81935772eb551dfd0325dfa9d (diff) | |
download | lwn-5abc97aa25b2c41413b3a520faee83f2282d9f18.tar.gz lwn-5abc97aa25b2c41413b3a520faee83f2282d9f18.zip |
[PATCH] splice: add support for SPLICE_F_MOVE flag
This enables the caller to migrate pages from one address space page
cache to another. In buzz word marketing, you can do zero-copy file
copies!
Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/pipe.c | 8 | ||||
-rw-r--r-- | fs/splice.c | 121 | ||||
-rw-r--r-- | include/linux/pipe_fs_i.h | 8 |
3 files changed, 100 insertions, 37 deletions
diff --git a/fs/pipe.c b/fs/pipe.c index 2414bf270db6..109a102c150d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -121,11 +121,19 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer kunmap(buf->page); } +static int anon_pipe_buf_steal(struct pipe_inode_info *info, + struct pipe_buffer *buf) +{ + buf->stolen = 1; + return 0; +} + static struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, .map = anon_pipe_buf_map, .unmap = anon_pipe_buf_unmap, .release = anon_pipe_buf_release, + .steal = anon_pipe_buf_steal, }; static ssize_t diff --git a/fs/splice.c b/fs/splice.c index efa47c1c4e13..4a026f95884f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -21,6 +21,7 @@ #include <linux/pagemap.h> #include <linux/pipe_fs_i.h> #include <linux/mm_inline.h> +#include <linux/swap.h> /* * Passed to the actors @@ -32,11 +33,37 @@ struct splice_desc { loff_t pos; /* file position */ }; +static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, + struct pipe_buffer *buf) +{ + struct page *page = buf->page; + + WARN_ON(!PageLocked(page)); + WARN_ON(!PageUptodate(page)); + + if (!remove_mapping(page_mapping(page), page)) + return 1; + + if (PageLRU(page)) { + struct zone *zone = page_zone(page); + + spin_lock_irq(&zone->lru_lock); + BUG_ON(!PageLRU(page)); + __ClearPageLRU(page); + del_page_from_lru(zone, page); + spin_unlock_irq(&zone->lru_lock); + } + + buf->stolen = 1; + return 0; +} + static void page_cache_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) { page_cache_release(buf->page); buf->page = NULL; + buf->stolen = 0; } static void *page_cache_pipe_buf_map(struct file *file, @@ -63,7 +90,8 @@ static void *page_cache_pipe_buf_map(struct file *file, static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) { - unlock_page(buf->page); + if (!buf->stolen) + unlock_page(buf->page); kunmap(buf->page); } @@ -72,6 +100,7 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { .map = page_cache_pipe_buf_map, .unmap = page_cache_pipe_buf_unmap, .release = page_cache_pipe_buf_release, + .steal = page_cache_pipe_buf_steal, }; static ssize_t move_to_pipe(struct inode *inode, struct page **pages, @@ -336,8 +365,8 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, struct address_space *mapping = file->f_mapping; unsigned int offset; struct page *page; - char *src, *dst; pgoff_t index; + char *src; int ret; /* @@ -350,40 +379,54 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; -find_page: - ret = -ENOMEM; - page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); - if (!page) - goto out; - /* - * If the page is uptodate, it is also locked. If it isn't - * uptodate, we can mark it uptodate if we are filling the - * full page. Otherwise we need to read it in first... + * reuse buf page, if SPLICE_F_MOVE is set */ - if (!PageUptodate(page)) { - if (sd->len < PAGE_CACHE_SIZE) { - ret = mapping->a_ops->readpage(file, page); - if (unlikely(ret)) - goto out; - - lock_page(page); - - if (!PageUptodate(page)) { - /* - * page got invalidated, repeat - */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto find_page; + if (sd->flags & SPLICE_F_MOVE) { + if (buf->ops->steal(info, buf)) + goto find_page; + + page = buf->page; + if (add_to_page_cache_lru(page, mapping, index, + mapping_gfp_mask(mapping))) + goto find_page; + } else { +find_page: + ret = -ENOMEM; + page = find_or_create_page(mapping, index, + mapping_gfp_mask(mapping)); + if (!page) + goto out; + + /* + * If the page is uptodate, it is also locked. If it isn't + * uptodate, we can mark it uptodate if we are filling the + * full page. Otherwise we need to read it in first... + */ + if (!PageUptodate(page)) { + if (sd->len < PAGE_CACHE_SIZE) { + ret = mapping->a_ops->readpage(file, page); + if (unlikely(ret)) + goto out; + + lock_page(page); + + if (!PageUptodate(page)) { + /* + * page got invalidated, repeat + */ + if (!page->mapping) { + unlock_page(page); + page_cache_release(page); + goto find_page; + } + ret = -EIO; + goto out; } - ret = -EIO; - goto out; + } else { + WARN_ON(!PageLocked(page)); + SetPageUptodate(page); } - } else { - WARN_ON(!PageLocked(page)); - SetPageUptodate(page); } } @@ -391,10 +434,13 @@ find_page: if (ret) goto out; - dst = kmap_atomic(page, KM_USER0); - memcpy(dst + offset, src + buf->offset, sd->len); - flush_dcache_page(page); - kunmap_atomic(dst, KM_USER0); + if (!buf->stolen) { + char *dst = kmap_atomic(page, KM_USER0); + + memcpy(dst + offset, src + buf->offset, sd->len); + flush_dcache_page(page); + kunmap_atomic(dst, KM_USER0); + } ret = mapping->a_ops->commit_write(file, page, 0, sd->len); if (ret < 0) @@ -405,7 +451,8 @@ find_page: out: if (ret < 0) unlock_page(page); - page_cache_release(page); + if (!buf->stolen) + page_cache_release(page); buf->ops->unmap(info, buf); return ret; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index b12e59c75752..75c7f55023ab 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -9,6 +9,7 @@ struct pipe_buffer { struct page *page; unsigned int offset, len; struct pipe_buf_operations *ops; + unsigned int stolen; }; struct pipe_buf_operations { @@ -16,6 +17,7 @@ struct pipe_buf_operations { void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); + int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); }; struct pipe_inode_info { @@ -53,4 +55,10 @@ void pipe_wait(struct inode * inode); struct inode* pipe_new(struct inode* inode); void free_pipe_info(struct inode* inode); +/* + * splice is tied to pipes as a transport (at least for now), so we'll just + * add the splice flags here. + */ +#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ + #endif |