summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@redhat.com>2009-02-20 11:00:09 -0500
committerChris Mason <chris.mason@oracle.com>2009-02-20 11:00:09 -0500
commit6a63209fc02d5483371f07e4913ee8abad608051 (patch)
tree7595e0df452928b677b66a64baf0cb3b7ec53dfc /fs/btrfs/extent-tree.c
parent2cfbd50b536c878e58ab3681c4e944fa3d99b415 (diff)
downloadlwn-6a63209fc02d5483371f07e4913ee8abad608051.tar.gz
lwn-6a63209fc02d5483371f07e4913ee8abad608051.zip
Btrfs: add better -ENOSPC handling
This is a step in the direction of better -ENOSPC handling. Instead of checking the global bytes counter we check the space_info bytes counters to make sure we have enough space. If we don't we go ahead and try to allocate a new chunk, and then if that fails we return -ENOSPC. This patch adds two counters to btrfs_space_info, bytes_delalloc and bytes_may_use. bytes_delalloc account for extents we've actually setup for delalloc and will be allocated at some point down the line. bytes_may_use is to keep track of how many bytes we may use for delalloc at some point. When we actually set the extent_bit for the delalloc bytes we subtract the reserved bytes from the bytes_may_use counter. This keeps us from not actually being able to allocate space for any delalloc bytes. Signed-off-by: Josef Bacik <jbacik@redhat.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c215
1 files changed, 200 insertions, 15 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0a5d796c9f7e..e11875e97c2f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
+static int do_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, u64 alloc_bytes,
+ u64 flags, int force);
+
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
return (cache->flags & bits) == bits;
@@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_readonly = 0;
+ found->bytes_delalloc = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
@@ -1972,6 +1977,196 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
return flags;
}
+static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ u64 alloc_profile;
+
+ if (data) {
+ alloc_profile = info->avail_data_alloc_bits &
+ info->data_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
+ } else if (root == root->fs_info->chunk_root) {
+ alloc_profile = info->avail_system_alloc_bits &
+ info->system_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
+ } else {
+ alloc_profile = info->avail_metadata_alloc_bits &
+ info->metadata_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
+ }
+
+ return btrfs_reduce_alloc_profile(root, data);
+}
+
+void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
+{
+ u64 alloc_target;
+
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
+ alloc_target);
+}
+
+/*
+ * for now this just makes sure we have at least 5% of our metadata space free
+ * for use.
+ */
+int btrfs_check_metadata_free_space(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_space_info *meta_sinfo;
+ u64 alloc_target, thresh;
+
+ /* get the space info for where the metadata will live */
+ alloc_target = btrfs_get_alloc_profile(root, 0);
+ meta_sinfo = __find_space_info(info, alloc_target);
+
+ /*
+ * if the metadata area isn't maxed out then there is no sense in
+ * checking how much is used, since we can always allocate a new chunk
+ */
+ if (!meta_sinfo->full)
+ return 0;
+
+ spin_lock(&meta_sinfo->lock);
+ thresh = meta_sinfo->total_bytes * 95;
+
+ do_div(thresh, 100);
+
+ if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
+ meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
+ spin_unlock(&meta_sinfo->lock);
+ return -ENOSPC;
+ }
+ spin_unlock(&meta_sinfo->lock);
+
+ return 0;
+}
+
+/*
+ * This will check the space that the inode allocates from to make sure we have
+ * enough space for bytes.
+ */
+int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+ int ret = 0;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+
+ data_sinfo = BTRFS_I(inode)->space_info;
+again:
+ /* make sure we have enough space to handle the data first */
+ spin_lock(&data_sinfo->lock);
+ if (data_sinfo->total_bytes - data_sinfo->bytes_used -
+ data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
+ data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
+ data_sinfo->bytes_may_use < bytes) {
+ /*
+ * if we don't have enough free bytes in this space then we need
+ * to alloc a new chunk.
+ */
+ if (!data_sinfo->full) {
+ u64 alloc_target;
+ struct btrfs_trans_handle *trans;
+
+ data_sinfo->force_alloc = 1;
+ spin_unlock(&data_sinfo->lock);
+
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ bytes + 2 * 1024 * 1024,
+ alloc_target, 0);
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ return ret;
+ goto again;
+ }
+ spin_unlock(&data_sinfo->lock);
+ printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
+ ", %llu bytes_used, %llu bytes_reserved, "
+ "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+ "%llu total\n", bytes, data_sinfo->bytes_delalloc,
+ data_sinfo->bytes_used, data_sinfo->bytes_reserved,
+ data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
+ data_sinfo->bytes_may_use, data_sinfo->total_bytes);
+ return -ENOSPC;
+ }
+ data_sinfo->bytes_may_use += bytes;
+ BTRFS_I(inode)->reserved_bytes += bytes;
+ spin_unlock(&data_sinfo->lock);
+
+ return btrfs_check_metadata_free_space(root);
+}
+
+/*
+ * if there was an error for whatever reason after calling
+ * btrfs_check_data_free_space, call this so we can cleanup the counters.
+ */
+void btrfs_free_reserved_data_space(struct btrfs_root *root,
+ struct inode *inode, u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+
+ data_sinfo = BTRFS_I(inode)->space_info;
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_may_use -= bytes;
+ BTRFS_I(inode)->reserved_bytes -= bytes;
+ spin_unlock(&data_sinfo->lock);
+}
+
+/* called when we are adding a delalloc extent to the inode's io_tree */
+void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+
+ /* get the space info for where this inode will be storing its data */
+ data_sinfo = BTRFS_I(inode)->space_info;
+
+ /* make sure we have enough space to handle the data first */
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_delalloc += bytes;
+
+ /*
+ * we are adding a delalloc extent without calling
+ * btrfs_check_data_free_space first. This happens on a weird
+ * writepage condition, but shouldn't hurt our accounting
+ */
+ if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
+ data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
+ BTRFS_I(inode)->reserved_bytes = 0;
+ } else {
+ data_sinfo->bytes_may_use -= bytes;
+ BTRFS_I(inode)->reserved_bytes -= bytes;
+ }
+
+ spin_unlock(&data_sinfo->lock);
+}
+
+/* called when we are clearing an delalloc extent from the inode's io_tree */
+void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *info;
+
+ info = BTRFS_I(inode)->space_info;
+
+ spin_lock(&info->lock);
+ info->bytes_delalloc -= bytes;
+ spin_unlock(&info->lock);
+}
+
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
@@ -3105,6 +3300,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
(unsigned long long)(info->total_bytes - info->bytes_used -
info->bytes_pinned - info->bytes_reserved),
(info->full) ? "" : "not ");
+ printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
+ " may_use=%llu, used=%llu\n", info->total_bytes,
+ info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
+ info->bytes_used);
down_read(&info->groups_sem);
list_for_each_entry(cache, &info->block_groups, list) {
@@ -3131,24 +3330,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
{
int ret;
u64 search_start = 0;
- u64 alloc_profile;
struct btrfs_fs_info *info = root->fs_info;
- if (data) {
- alloc_profile = info->avail_data_alloc_bits &
- info->data_alloc_profile;
- data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
- } else if (root == root->fs_info->chunk_root) {
- alloc_profile = info->avail_system_alloc_bits &
- info->system_alloc_profile;
- data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
- } else {
- alloc_profile = info->avail_metadata_alloc_bits &
- info->metadata_alloc_profile;
- data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
- }
+ data = btrfs_get_alloc_profile(root, data);
again:
- data = btrfs_reduce_alloc_profile(root, data);
/*
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations