diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 141 |
1 files changed, 95 insertions, 46 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0d6e785bcb98..d7670e2a9f39 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -245,7 +245,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, * * global::fs_devs - add, remove, updates to the global list * - * does not protect: manipulation of the fs_devices::devices list! + * does not protect: manipulation of the fs_devices::devices list in general + * but in mount context it could be used to exclude list modifications by eg. + * scan ioctl * * btrfs_device::name - renames (write side), read is RCU * @@ -258,6 +260,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, * may be used to exclude some operations from running concurrently without any * modifications to the list (see write_all_supers) * + * Is not required at mount and close times, because our device list is + * protected by the uuid_mutex at that point. + * * balance_mutex * ------------- * protects balance structures (status, state) and context accessed from @@ -602,6 +607,11 @@ static int btrfs_free_stale_devices(const char *path, return ret; } +/* + * This is only used on mount, and we are protected from competing things + * messing with our fs_devices by the uuid_mutex, thus we do not need the + * fs_devices->device_list_mutex here. + */ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device, fmode_t flags, void *holder) @@ -1229,8 +1239,14 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int ret; lockdep_assert_held(&uuid_mutex); + /* + * The device_list_mutex cannot be taken here in case opening the + * underlying device takes further locks like bd_mutex. + * + * We also don't need the lock here as this is called during mount and + * exclusion is provided by uuid_mutex + */ - mutex_lock(&fs_devices->device_list_mutex); if (fs_devices->opened) { fs_devices->opened++; ret = 0; @@ -1238,7 +1254,6 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, list_sort(NULL, &fs_devices->devices, devid_cmp); ret = open_fs_devices(fs_devices, flags, holder); } - mutex_unlock(&fs_devices->device_list_mutex); return ret; } @@ -3231,7 +3246,7 @@ static int del_balance_item(struct btrfs_fs_info *fs_info) if (!path) return -ENOMEM; - trans = btrfs_start_transaction(root, 0); + trans = btrfs_start_transaction_fallback_global_rsv(root, 0); if (IS_ERR(trans)) { btrfs_free_path(path); return PTR_ERR(trans); @@ -4135,7 +4150,22 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, mutex_lock(&fs_info->balance_mutex); if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) btrfs_info(fs_info, "balance: paused"); - else if (ret == -ECANCELED && atomic_read(&fs_info->balance_cancel_req)) + /* + * Balance can be canceled by: + * + * - Regular cancel request + * Then ret == -ECANCELED and balance_cancel_req > 0 + * + * - Fatal signal to "btrfs" process + * Either the signal caught by wait_reserve_ticket() and callers + * got -EINTR, or caught by btrfs_should_cancel_balance() and + * got -ECANCELED. + * Either way, in this case balance_cancel_req = 0, and + * ret == -EINTR or ret == -ECANCELED. + * + * So here we only check the return value to catch canceled balance. + */ + else if (ret == -ECANCELED || ret == -EINTR) btrfs_info(fs_info, "balance: canceled"); else btrfs_info(fs_info, "balance: ended with status: %d", ret); @@ -5522,6 +5552,9 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) atomic_set(&bbio->error, 0); refcount_set(&bbio->refs, 1); + bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes); + bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes); + return bbio; } @@ -6144,8 +6177,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, ret = -ENOMEM; goto out; } - if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) - bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); + + for (i = 0; i < num_stripes; i++) { + bbio->stripes[i].physical = map->stripes[stripe_index].physical + + stripe_offset + stripe_nr * map->stripe_len; + bbio->stripes[i].dev = map->stripes[stripe_index].dev; + stripe_index++; + } /* build raid_map */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && @@ -6153,11 +6191,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, u64 tmp; unsigned rot; - bbio->raid_map = (u64 *)((void *)bbio->stripes + - sizeof(struct btrfs_bio_stripe) * - num_alloc_stripes + - sizeof(int) * tgtdev_indexes); - /* Work out the disk rotation on this stripe-set */ div_u64_rem(stripe_nr, num_stripes, &rot); @@ -6171,25 +6204,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, if (map->type & BTRFS_BLOCK_GROUP_RAID6) bbio->raid_map[(i+rot+1) % num_stripes] = RAID6_Q_STRIPE; - } - - for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = - map->stripes[stripe_index].physical + - stripe_offset + - stripe_nr * map->stripe_len; - bbio->stripes[i].dev = - map->stripes[stripe_index].dev; - stripe_index++; + sort_parity_stripes(bbio, num_stripes); } if (need_full_stripe(op)) max_errors = btrfs_chunk_max_errors(map); - if (bbio->raid_map) - sort_parity_stripes(bbio, num_stripes); - if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && need_full_stripe(op)) { handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes, @@ -6261,23 +6282,18 @@ static void btrfs_end_bio(struct bio *bio) atomic_inc(&bbio->error); if (bio->bi_status == BLK_STS_IOERR || bio->bi_status == BLK_STS_TARGET) { - unsigned int stripe_index = - btrfs_io_bio(bio)->stripe_index; - struct btrfs_device *dev; - - BUG_ON(stripe_index >= bbio->num_stripes); - dev = bbio->stripes[stripe_index].dev; - if (dev->bdev) { - if (bio_op(bio) == REQ_OP_WRITE) - btrfs_dev_stat_inc_and_print(dev, + struct btrfs_device *dev = btrfs_io_bio(bio)->device; + + ASSERT(dev->bdev); + if (bio_op(bio) == REQ_OP_WRITE) + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - else if (!(bio->bi_opf & REQ_RAHEAD)) - btrfs_dev_stat_inc_and_print(dev, + else if (!(bio->bi_opf & REQ_RAHEAD)) + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); - if (bio->bi_opf & REQ_PREFLUSH) - btrfs_dev_stat_inc_and_print(dev, + if (bio->bi_opf & REQ_PREFLUSH) + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS); - } } } @@ -6313,13 +6329,12 @@ static void btrfs_end_bio(struct bio *bio) } static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, - u64 physical, int dev_nr) + u64 physical, struct btrfs_device *dev) { - struct btrfs_device *dev = bbio->stripes[dev_nr].dev; struct btrfs_fs_info *fs_info = bbio->fs_info; bio->bi_private = bbio; - btrfs_io_bio(bio)->stripe_index = dev_nr; + btrfs_io_bio(bio)->device = dev; bio->bi_end_io = btrfs_end_bio; bio->bi_iter.bi_sector = physical >> 9; btrfs_debug_in_rcu(fs_info, @@ -6420,8 +6435,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, else bio = first_bio; - submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, - dev_nr); + submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev); } btrfs_bio_counter_dec(fs_info); return BLK_STS_OK; @@ -7029,6 +7043,19 @@ out: return ret; } +static void readahead_tree_node_children(struct extent_buffer *node) +{ + int i; + const int nr_items = btrfs_header_nritems(node); + + for (i = 0; i < nr_items; i++) { + u64 start; + + start = btrfs_node_blockptr(node, i); + readahead_tree_block(node->fs_info, start); + } +} + int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) { struct btrfs_root *root = fs_info->chunk_root; @@ -7039,6 +7066,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) int ret; int slot; u64 total_dev = 0; + u64 last_ra_node = 0; path = btrfs_alloc_path(); if (!path) @@ -7049,7 +7077,14 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) * otherwise we don't need it. */ mutex_lock(&uuid_mutex); - mutex_lock(&fs_info->chunk_mutex); + + /* + * It is possible for mount and umount to race in such a way that + * we execute this code path, but open_fs_devices failed to clear + * total_rw_bytes. We certainly want it cleared before reading the + * device items, so clear it here. + */ + fs_info->fs_devices->total_rw_bytes = 0; /* * Read all device items, and then all the chunk items. All @@ -7064,6 +7099,8 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) if (ret < 0) goto error; while (1) { + struct extent_buffer *node; + leaf = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { @@ -7074,6 +7111,17 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) goto error; break; } + /* + * The nodes on level 1 are not locked but we don't need to do + * that during mount time as nothing else can access the tree + */ + node = path->nodes[1]; + if (node) { + if (last_ra_node != node->start) { + readahead_tree_node_children(node); + last_ra_node = node->start; + } + } btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.type == BTRFS_DEV_ITEM_KEY) { struct btrfs_dev_item *dev_item; @@ -7086,7 +7134,9 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + mutex_lock(&fs_info->chunk_mutex); ret = read_one_chunk(&found_key, leaf, chunk); + mutex_unlock(&fs_info->chunk_mutex); if (ret) goto error; } @@ -7116,7 +7166,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) } ret = 0; error: - mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&uuid_mutex); btrfs_free_path(path); |