diff options
author | Josef Bacik <josef@toxicpanda.com> | 2019-06-20 15:37:59 -0400 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2019-09-09 14:59:09 +0200 |
commit | 26ce2095e03c248759951d81fdff37e2bf32601c (patch) | |
tree | adfce20880e58a5318167e766f42fa9902672b99 /fs/btrfs/block-group.c | |
parent | 8484764e8587dc3defa9579b795e3f7bbf9789c5 (diff) | |
download | lwn-26ce2095e03c248759951d81fdff37e2bf32601c.tar.gz lwn-26ce2095e03c248759951d81fdff37e2bf32601c.zip |
btrfs: migrate inc/dec_block_group_ro code
This can easily be moved now.
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ refresh ]
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r-- | fs/btrfs/block-group.c | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 6a70301a587a..a4b76e57680a 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1061,6 +1061,80 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( } /* + * Mark block group @cache read-only, so later write won't happen to block + * group @cache. + * + * If @force is not set, this function will only mark the block group readonly + * if we have enough free space (1M) in other metadata/system block groups. + * If @force is not set, this function will mark the block group readonly + * without checking free space. + * + * NOTE: This function doesn't care if other block groups can contain all the + * data in this block group. That check should be done by relocation routine, + * not this function. + */ +int __btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache, int force) +{ + struct btrfs_space_info *sinfo = cache->space_info; + u64 num_bytes; + u64 sinfo_used; + u64 min_allocable_bytes; + int ret = -ENOSPC; + + /* + * We need some metadata space and system metadata space for + * allocating chunks in some corner cases until we force to set + * it to be readonly. + */ + if ((sinfo->flags & + (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && + !force) + min_allocable_bytes = SZ_1M; + else + min_allocable_bytes = 0; + + spin_lock(&sinfo->lock); + spin_lock(&cache->lock); + + if (cache->ro) { + cache->ro++; + ret = 0; + goto out; + } + + num_bytes = cache->key.offset - cache->reserved - cache->pinned - + cache->bytes_super - btrfs_block_group_used(&cache->item); + sinfo_used = btrfs_space_info_used(sinfo, true); + + /* + * sinfo_used + num_bytes should always <= sinfo->total_bytes. + * + * Here we make sure if we mark this bg RO, we still have enough + * free space as buffer (if min_allocable_bytes is not 0). + */ + if (sinfo_used + num_bytes + min_allocable_bytes <= + sinfo->total_bytes) { + sinfo->bytes_readonly += num_bytes; + cache->ro++; + list_add_tail(&cache->ro_list, &sinfo->ro_bgs); + ret = 0; + } +out: + spin_unlock(&cache->lock); + spin_unlock(&sinfo->lock); + if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) { + btrfs_info(cache->fs_info, + "unable to make block group %llu ro", + cache->key.objectid); + btrfs_info(cache->fs_info, + "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu", + sinfo_used, num_bytes, min_allocable_bytes); + btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); + } + return ret; +} + +/* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. */ @@ -1791,3 +1865,141 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, set_avail_alloc_bits(fs_info, type); return 0; } + +static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) +{ + u64 num_devices; + u64 stripped; + + /* + * if restripe for this chunk_type is on pick target profile and + * return, otherwise do the usual balance + */ + stripped = btrfs_get_restripe_target(fs_info, flags); + if (stripped) + return extended_to_chunk(stripped); + + num_devices = fs_info->fs_devices->rw_devices; + + stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK | + BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10; + + if (num_devices == 1) { + stripped |= BTRFS_BLOCK_GROUP_DUP; + stripped = flags & ~stripped; + + /* turn raid0 into single device chunks */ + if (flags & BTRFS_BLOCK_GROUP_RAID0) + return stripped; + + /* turn mirroring into duplication */ + if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK | + BTRFS_BLOCK_GROUP_RAID10)) + return stripped | BTRFS_BLOCK_GROUP_DUP; + } else { + /* they already had raid on here, just return */ + if (flags & stripped) + return flags; + + stripped |= BTRFS_BLOCK_GROUP_DUP; + stripped = flags & ~stripped; + + /* switch duplicated blocks with raid1 */ + if (flags & BTRFS_BLOCK_GROUP_DUP) + return stripped | BTRFS_BLOCK_GROUP_RAID1; + + /* this is drive concat, leave it alone */ + } + + return flags; +} + +int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache) + +{ + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_trans_handle *trans; + u64 alloc_flags; + int ret; + +again: + trans = btrfs_join_transaction(fs_info->extent_root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + /* + * we're not allowed to set block groups readonly after the dirty + * block groups cache has started writing. If it already started, + * back off and let this transaction commit + */ + mutex_lock(&fs_info->ro_block_group_mutex); + if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) { + u64 transid = trans->transid; + + mutex_unlock(&fs_info->ro_block_group_mutex); + btrfs_end_transaction(trans); + + ret = btrfs_wait_for_commit(fs_info, transid); + if (ret) + return ret; + goto again; + } + + /* + * if we are changing raid levels, try to allocate a corresponding + * block group with the new raid level. + */ + alloc_flags = update_block_group_flags(fs_info, cache->flags); + if (alloc_flags != cache->flags) { + ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + /* + * ENOSPC is allowed here, we may have enough space + * already allocated at the new raid level to + * carry on + */ + if (ret == -ENOSPC) + ret = 0; + if (ret < 0) + goto out; + } + + ret = __btrfs_inc_block_group_ro(cache, 0); + if (!ret) + goto out; + alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags); + ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + if (ret < 0) + goto out; + ret = __btrfs_inc_block_group_ro(cache, 0); +out: + if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { + alloc_flags = update_block_group_flags(fs_info, cache->flags); + mutex_lock(&fs_info->chunk_mutex); + check_system_chunk(trans, alloc_flags); + mutex_unlock(&fs_info->chunk_mutex); + } + mutex_unlock(&fs_info->ro_block_group_mutex); + + btrfs_end_transaction(trans); + return ret; +} + +void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache) +{ + struct btrfs_space_info *sinfo = cache->space_info; + u64 num_bytes; + + BUG_ON(!cache->ro); + + spin_lock(&sinfo->lock); + spin_lock(&cache->lock); + if (!--cache->ro) { + num_bytes = cache->key.offset - cache->reserved - + cache->pinned - cache->bytes_super - + btrfs_block_group_used(&cache->item); + sinfo->bytes_readonly -= num_bytes; + list_del_init(&cache->ro_list); + } + spin_unlock(&cache->lock); + spin_unlock(&sinfo->lock); +} |