From 125ccb0ae6806dbec31abf4a85448971df3b4e39 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 8 Dec 2011 15:07:24 +0800 Subject: Btrfs: don't pass a trans handle unnecessarily in volumes.c Some functions never use the transaction handle passed to them. Signed-off-by: Li Zefan --- fs/btrfs/volumes.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f4b839fd3c9d..73f673c8d8d8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -829,7 +829,6 @@ out: /* * find_free_dev_extent - find free space in the specified device - * @trans: transaction handler * @device: the device which we search the free space in * @num_bytes: the size of the free space that we need * @start: store the start of the free space. @@ -848,8 +847,7 @@ out: * But if we don't find suitable free space, it is used to store the size of * the max free space. */ -int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, u64 num_bytes, +int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *len) { struct btrfs_key key; @@ -893,7 +891,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; - ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; if (ret > 0) { @@ -1469,8 +1467,7 @@ error_undo: /* * does all the dirty work required for changing file system's UUID. */ -static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int btrfs_prepare_sprout(struct btrfs_root *root) { struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_fs_devices *old_devices; @@ -1695,7 +1692,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (seeding_dev) { sb->s_flags &= ~MS_RDONLY; - ret = btrfs_prepare_sprout(trans, root); + ret = btrfs_prepare_sprout(root); BUG_ON(ret); } @@ -2323,8 +2320,7 @@ done: return ret; } -static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int btrfs_add_system_chunk(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_chunk *chunk, int item_size) { @@ -2496,7 +2492,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (total_avail == 0) continue; - ret = find_free_dev_extent(trans, device, + ret = find_free_dev_extent(device, max_stripe_size * dev_stripes, &dev_offset, &max_avail); if (ret && ret != -ENOSPC) @@ -2687,7 +2683,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, BUG_ON(ret); if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { - ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk, + ret = btrfs_add_system_chunk(chunk_root, &key, chunk, item_size); BUG_ON(ret); } -- cgit v1.2.3 From de11cc12df17337979e0929d2831887432f236ca Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 1 Dec 2011 12:55:47 +0800 Subject: Btrfs: don't pre-allocate btrfs bio We pre-allocate a btrfs bio with fixed size, and then may re-allocate memory if we find stripes are bigger than the fixed size. But this pre-allocation is not necessary. Also we don't have to calcuate the stripe number twice. Signed-off-by: Li Zefan --- fs/btrfs/volumes.c | 67 +++++++++++++++++------------------------------------- 1 file changed, 21 insertions(+), 46 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 73f673c8d8d8..540fdd25fb5e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2897,26 +2897,13 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 stripe_nr; u64 stripe_nr_orig; u64 stripe_nr_end; - int stripes_allocated = 8; - int stripes_required = 1; int stripe_index; int i; + int ret = 0; int num_stripes; int max_errors = 0; struct btrfs_bio *bbio = NULL; - if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) - stripes_allocated = 1; -again: - if (bbio_ret) { - bbio = kzalloc(btrfs_bio_size(stripes_allocated), - GFP_NOFS); - if (!bbio) - return -ENOMEM; - - atomic_set(&bbio->error, 0); - } - read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); read_unlock(&em_tree->lock); @@ -2935,32 +2922,6 @@ again: if (mirror_num > map->num_stripes) mirror_num = 0; - /* if our btrfs_bio struct is too small, back off and try again */ - if (rw & REQ_WRITE) { - if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP)) { - stripes_required = map->num_stripes; - max_errors = 1; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { - stripes_required = map->sub_stripes; - max_errors = 1; - } - } - if (rw & REQ_DISCARD) { - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID10)) { - stripes_required = map->num_stripes; - } - } - if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && - stripes_allocated < stripes_required) { - stripes_allocated = map->num_stripes; - free_extent_map(em); - kfree(bbio); - goto again; - } stripe_nr = offset; /* * stripe_nr counts the total number of stripes we have to stride @@ -3055,6 +3016,13 @@ again: } BUG_ON(stripe_index >= map->num_stripes); + bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS); + if (!bbio) { + ret = -ENOMEM; + goto out; + } + atomic_set(&bbio->error, 0); + if (rw & REQ_DISCARD) { for (i = 0; i < num_stripes; i++) { bbio->stripes[i].physical = @@ -3151,15 +3119,22 @@ again: stripe_index++; } } - if (bbio_ret) { - *bbio_ret = bbio; - bbio->num_stripes = num_stripes; - bbio->max_errors = max_errors; - bbio->mirror_num = mirror_num; + + if (rw & REQ_WRITE) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) { + max_errors = 1; + } } + + *bbio_ret = bbio; + bbio->num_stripes = num_stripes; + bbio->max_errors = max_errors; + bbio->mirror_num = mirror_num; out: free_extent_map(em); - return 0; + return ret; } int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, -- cgit v1.2.3 From ec9ef7a13be4dcce964c8503e8999087945e5b9e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 1 Dec 2011 14:06:42 +0800 Subject: Btrfs: simplfy calculation of stripe length for discard operation For btrfs raid, while discarding a range of space, we'll need to know the start offset and length to discard for each device, and it's done in btrfs_map_block(). However the calculation is a bit complex for raid0 and raid10, so I reimplement it based on a fact that: dev1 dev2 dev3 (raid0) ----------------------------------- s0 s3 s6 s1 s4 s7 s2 s5 Each device has (total_stripes / nr_dev) stripes, or plus one. Signed-off-by: Li Zefan --- fs/btrfs/volumes.c | 95 ++++++++++++++++++------------------------------------ 1 file changed, 31 insertions(+), 64 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 540fdd25fb5e..563ef650850e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3024,80 +3024,47 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, atomic_set(&bbio->error, 0); if (rw & REQ_DISCARD) { + int factor = 0; + int sub_stripes = 0; + u64 stripes_per_dev = 0; + u32 remaining_stripes = 0; + + if (map->type & + (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID0) + sub_stripes = 1; + else + sub_stripes = map->sub_stripes; + + factor = map->num_stripes / sub_stripes; + stripes_per_dev = div_u64_rem(stripe_nr_end - + stripe_nr_orig, + factor, + &remaining_stripes); + } + for (i = 0; i < num_stripes; i++) { bbio->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; bbio->stripes[i].dev = map->stripes[stripe_index].dev; - if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - u64 stripes; - u32 last_stripe = 0; - int j; - - div_u64_rem(stripe_nr_end - 1, - map->num_stripes, - &last_stripe); - - for (j = 0; j < map->num_stripes; j++) { - u32 test; - - div_u64_rem(stripe_nr_end - 1 - j, - map->num_stripes, &test); - if (test == stripe_index) - break; - } - stripes = stripe_nr_end - 1 - j; - do_div(stripes, map->num_stripes); - bbio->stripes[i].length = map->stripe_len * - (stripes - stripe_nr + 1); - - if (i == 0) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID10)) { + bbio->stripes[i].length = stripes_per_dev * + map->stripe_len; + if (i / sub_stripes < remaining_stripes) + bbio->stripes[i].length += + map->stripe_len; + if (i < sub_stripes) bbio->stripes[i].length -= stripe_offset; - stripe_offset = 0; - } - if (stripe_index == last_stripe) - bbio->stripes[i].length -= - stripe_end_offset; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { - u64 stripes; - int j; - int factor = map->num_stripes / - map->sub_stripes; - u32 last_stripe = 0; - - div_u64_rem(stripe_nr_end - 1, - factor, &last_stripe); - last_stripe *= map->sub_stripes; - - for (j = 0; j < factor; j++) { - u32 test; - - div_u64_rem(stripe_nr_end - 1 - j, - factor, &test); - - if (test == - stripe_index / map->sub_stripes) - break; - } - stripes = stripe_nr_end - 1 - j; - do_div(stripes, factor); - bbio->stripes[i].length = map->stripe_len * - (stripes - stripe_nr + 1); - - if (i < map->sub_stripes) { - bbio->stripes[i].length -= - stripe_offset; - if (i == map->sub_stripes - 1) - stripe_offset = 0; - } - if (stripe_index >= last_stripe && - stripe_index <= (last_stripe + - map->sub_stripes - 1)) { + if ((i / sub_stripes + 1) % + sub_stripes == remaining_stripes) bbio->stripes[i].length -= stripe_end_offset; - } + if (i == sub_stripes - 1) + stripe_offset = 0; } else bbio->stripes[i].length = *length; -- cgit v1.2.3 From b367e47fb3a70f5d24ebd6faf7d42436d485fb2d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 7 Dec 2011 11:38:24 +0800 Subject: Btrfs: fix possible deadlock when opening a seed device The correct lock order is uuid_mutex -> volume_mutex -> chunk_mutex, but when we mount a filesystem which has backing seed devices, we have this lock chain: open_ctree() lock(chunk_mutex); read_chunk_tree(); read_one_dev(); open_seed_devices(); lock(uuid_mutex); and then we hit a lockdep splat. Signed-off-by: Li Zefan --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/volumes.c | 9 +++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3f9d5551e582..858ab347413e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2270,9 +2270,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); - mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); - mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", sb->s_id); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 563ef650850e..fbb493b28d5a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3506,7 +3506,7 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) struct btrfs_fs_devices *fs_devices; int ret; - mutex_lock(&uuid_mutex); + BUG_ON(!mutex_is_locked(&uuid_mutex)); fs_devices = root->fs_info->fs_devices->seed; while (fs_devices) { @@ -3544,7 +3544,6 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) fs_devices->seed = root->fs_info->fs_devices->seed; root->fs_info->fs_devices->seed = fs_devices; out: - mutex_unlock(&uuid_mutex); return ret; } @@ -3687,6 +3686,9 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) if (!path) return -ENOMEM; + mutex_lock(&uuid_mutex); + lock_chunks(root); + /* first we search for all of the device items, and then we * read in all of the chunk items. This way we can create chunk * mappings that reference all of the devices that are afound @@ -3737,6 +3739,9 @@ again: } ret = 0; error: + unlock_chunks(root); + mutex_unlock(&uuid_mutex); + btrfs_free_path(path); return ret; } -- cgit v1.2.3