summaryrefslogtreecommitdiff
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2012-08-29 14:27:18 -0400
committerChris Mason <chris.mason@fusionio.com>2012-10-01 15:19:07 -0400
commit2aaa66558172b017f36bf38ae69372813dedee9d (patch)
treed6150a615935d36166cd06d69e3252451e62e724 /fs/btrfs/file.c
parent2671485d395c07fca104c972785898d7c52fc942 (diff)
downloadlwn-2aaa66558172b017f36bf38ae69372813dedee9d.tar.gz
lwn-2aaa66558172b017f36bf38ae69372813dedee9d.zip
Btrfs: add hole punching
This patch adds hole punching via fallocate. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c332
1 files changed, 328 insertions, 4 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 58598c249951..57026a6e9c94 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -39,6 +39,7 @@
#include "tree-log.h"
#include "locking.h"
#include "compat.h"
+#include "volumes.h"
/*
* when auto defrag is enabled we
@@ -584,7 +585,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct btrfs_path *path, u64 start, u64 end,
- int drop_cache)
+ u64 *drop_end, int drop_cache)
{
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
@@ -822,6 +823,8 @@ next_slot:
btrfs_abort_transaction(trans, root, ret);
}
+ if (drop_end)
+ *drop_end = min(end, extent_end);
btrfs_release_path(path);
return ret;
}
@@ -836,7 +839,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- ret = __btrfs_drop_extents(trans, root, inode, path, start, end,
+ ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
drop_cache);
btrfs_free_path(path);
return ret;
@@ -1645,6 +1648,324 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
return 0;
}
+static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
+ int slot, u64 start, u64 end)
+{
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+
+ if (slot < 0 || slot >= btrfs_header_nritems(leaf))
+ return 0;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != btrfs_ino(inode) ||
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ return 0;
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
+ return 0;
+
+ if (btrfs_file_extent_disk_bytenr(leaf, fi))
+ return 0;
+
+ if (key.offset == end)
+ return 1;
+ if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
+ return 1;
+ return 0;
+}
+
+static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
+ struct btrfs_path *path, u64 offset, u64 end)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct extent_map *hole_em;
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = offset;
+
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ return ret;
+ BUG_ON(!ret);
+
+ leaf = path->nodes[0];
+ if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
+ u64 num_bytes;
+
+ path->slots[0]--;
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
+ end - offset;
+ btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_mark_buffer_dirty(leaf);
+ goto out;
+ }
+
+ if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) {
+ u64 num_bytes;
+
+ path->slots[0]++;
+ key.offset = offset;
+ btrfs_set_item_key_safe(trans, root, path, &key);
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
+ offset;
+ btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_mark_buffer_dirty(leaf);
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
+ 0, 0, end - offset, 0, end - offset,
+ 0, 0, 0);
+ if (ret)
+ return ret;
+
+out:
+ btrfs_release_path(path);
+
+ hole_em = alloc_extent_map();
+ if (!hole_em) {
+ btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags);
+ } else {
+ hole_em->start = offset;
+ hole_em->len = end - offset;
+ hole_em->orig_start = offset;
+
+ hole_em->block_start = EXTENT_MAP_HOLE;
+ hole_em->block_len = 0;
+ hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
+ hole_em->compress_type = BTRFS_COMPRESS_NONE;
+ hole_em->generation = trans->transid;
+
+ do {
+ btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, hole_em);
+ if (!ret)
+ list_move(&hole_em->list,
+ &em_tree->modified_extents);
+ write_unlock(&em_tree->lock);
+ } while (ret == -EEXIST);
+ free_extent_map(hole_em);
+ if (ret)
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags);
+ }
+
+ return 0;
+}
+
+static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_state *cached_state = NULL;
+ struct btrfs_path *path;
+ struct btrfs_block_rsv *rsv;
+ struct btrfs_trans_handle *trans;
+ u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+ u64 lockstart = (offset + mask) & ~mask;
+ u64 lockend = ((offset + len) & ~mask) - 1;
+ u64 cur_offset = lockstart;
+ u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
+ u64 drop_end;
+ unsigned long nr;
+ int ret = 0;
+ int err = 0;
+ bool same_page = (offset >> PAGE_CACHE_SHIFT) ==
+ ((offset + len) >> PAGE_CACHE_SHIFT);
+
+ btrfs_wait_ordered_range(inode, offset, len);
+
+ mutex_lock(&inode->i_mutex);
+ if (offset >= inode->i_size) {
+ mutex_unlock(&inode->i_mutex);
+ return 0;
+ }
+
+ /*
+ * Only do this if we are in the same page and we aren't doing the
+ * entire page.
+ */
+ if (same_page && len < PAGE_CACHE_SIZE) {
+ ret = btrfs_truncate_page(inode, offset, len, 0);
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
+
+ /* zero back part of the first page */
+ ret = btrfs_truncate_page(inode, offset, 0, 0);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
+
+ /* zero the front end of the last page */
+ ret = btrfs_truncate_page(inode, offset + len, 0, 1);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
+
+ if (lockend < lockstart) {
+ mutex_unlock(&inode->i_mutex);
+ return 0;
+ }
+
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+
+ truncate_pagecache_range(inode, lockstart, lockend);
+
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ 0, &cached_state);
+ ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
+
+ /*
+ * We need to make sure we have no ordered extents in this range
+ * and nobody raced in and read a page in this range, if we did
+ * we need to try again.
+ */
+ if ((!ordered ||
+ (ordered->file_offset + ordered->len < lockstart ||
+ ordered->file_offset > lockend)) &&
+ !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, EXTENT_UPTODATE, 0,
+ cached_state)) {
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ break;
+ }
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, &cached_state, GFP_NOFS);
+ btrfs_wait_ordered_range(inode, lockstart,
+ lockend - lockstart + 1);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rsv = btrfs_alloc_block_rsv(root);
+ if (!rsv) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+ rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
+ rsv->failfast = 1;
+
+ /*
+ * 1 - update the inode
+ * 1 - removing the extents in the range
+ * 1 - adding the hole extent
+ */
+ trans = btrfs_start_transaction(root, 3);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto out_free;
+ }
+
+ ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
+ min_size);
+ BUG_ON(ret);
+ trans->block_rsv = rsv;
+
+ while (cur_offset < lockend) {
+ ret = __btrfs_drop_extents(trans, root, inode, path,
+ cur_offset, lockend + 1,
+ &drop_end, 1);
+ if (ret != -ENOSPC)
+ break;
+
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
+
+ ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+ if (ret) {
+ err = ret;
+ break;
+ }
+
+ cur_offset = drop_end;
+
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret) {
+ err = ret;
+ break;
+ }
+
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+
+ trans = btrfs_start_transaction(root, 3);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ break;
+ }
+
+ ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
+ rsv, min_size);
+ BUG_ON(ret); /* shouldn't happen */
+ trans->block_rsv = rsv;
+ }
+
+ if (ret) {
+ err = ret;
+ goto out_trans;
+ }
+
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
+ ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+ if (ret) {
+ err = ret;
+ goto out_trans;
+ }
+
+out_trans:
+ if (!trans)
+ goto out_free;
+
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
+ ret = btrfs_update_inode(trans, root, inode);
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+out_free:
+ btrfs_free_path(path);
+ btrfs_free_block_rsv(root, rsv);
+out:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state, GFP_NOFS);
+ mutex_unlock(&inode->i_mutex);
+ if (ret && !err)
+ err = ret;
+ return err;
+}
+
static long btrfs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
@@ -1663,10 +1984,13 @@ static long btrfs_fallocate(struct file *file, int mode,
alloc_start = offset & ~mask;
alloc_end = (offset + len + mask) & ~mask;
- /* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode & ~FALLOC_FL_KEEP_SIZE)
+ /* Make sure we aren't being give some crap mode */
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ return btrfs_punch_hole(inode, offset, len);
+
/*
* Make sure we have enough space before we do the
* allocation.