diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/bio.c | 37 | ||||
-rw-r--r-- | fs/btrfs/bio.h | 3 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 2 | ||||
-rw-r--r-- | fs/btrfs/defrag.c | 10 | ||||
-rw-r--r-- | fs/btrfs/dir-item.c | 4 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 17 | ||||
-rw-r--r-- | fs/btrfs/extent_map.c | 38 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 7 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 2 | ||||
-rw-r--r-- | fs/btrfs/qgroup.h | 2 | ||||
-rw-r--r-- | fs/btrfs/super.c | 12 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 1 |
13 files changed, 73 insertions, 64 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index fec5c6cde0a7..7e0f9600b80c 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -49,6 +49,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info, bbio->end_io = end_io; bbio->private = private; atomic_set(&bbio->pending_ios, 1); + WRITE_ONCE(bbio->status, BLK_STS_OK); } /* @@ -113,41 +114,29 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio) } } -static void btrfs_orig_write_end_io(struct bio *bio); - -static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio, - struct btrfs_bio *orig_bbio) -{ - /* - * For writes we tolerate nr_mirrors - 1 write failures, so we can't - * just blindly propagate a write failure here. Instead increment the - * error count in the original I/O context so that it is guaranteed to - * be larger than the error tolerance. - */ - if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) { - struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private; - struct btrfs_io_context *orig_bioc = orig_stripe->bioc; - - atomic_add(orig_bioc->max_errors, &orig_bioc->error); - } else { - orig_bbio->bio.bi_status = bbio->bio.bi_status; - } -} - void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) { bbio->bio.bi_status = status; if (bbio->bio.bi_pool == &btrfs_clone_bioset) { struct btrfs_bio *orig_bbio = bbio->private; - if (bbio->bio.bi_status) - btrfs_bbio_propagate_error(bbio, orig_bbio); btrfs_cleanup_bio(bbio); bbio = orig_bbio; } - if (atomic_dec_and_test(&bbio->pending_ios)) + /* + * At this point, bbio always points to the original btrfs_bio. Save + * the first error in it. + */ + if (status != BLK_STS_OK) + cmpxchg(&bbio->status, BLK_STS_OK, status); + + if (atomic_dec_and_test(&bbio->pending_ios)) { + /* Load split bio's error which might be set above. */ + if (status == BLK_STS_OK) + bbio->bio.bi_status = READ_ONCE(bbio->status); __btrfs_bio_end_io(bbio); + } } static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror) diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h index e48612340745..e2fe16074ad6 100644 --- a/fs/btrfs/bio.h +++ b/fs/btrfs/bio.h @@ -79,6 +79,9 @@ struct btrfs_bio { /* File system that this I/O operates on. */ struct btrfs_fs_info *fs_info; + /* Save the first error status of split bio. */ + blk_status_t status; + /* * This member must come last, bio_alloc_bioset will allocate enough * bytes for entire btrfs_bio but relies on bio being last. diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 7980b2e33a92..4423d8b716a5 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3819,6 +3819,8 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, spin_lock(&cache->lock); if (cache->ro) space_info->bytes_readonly += num_bytes; + else if (btrfs_is_zoned(cache->fs_info)) + space_info->bytes_zone_unusable += num_bytes; cache->reserved -= num_bytes; space_info->bytes_reserved -= num_bytes; space_info->max_extent_size = 0; diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index b95ef44c326b..968dae953948 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -763,12 +763,12 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, * We can get a merged extent, in that case, we need to re-search * tree to get the original em for defrag. * - * If @newer_than is 0 or em::generation < newer_than, we can trust - * this em, as either we don't care about the generation, or the - * merged extent map will be rejected anyway. + * This is because even if we have adjacent extents that are contiguous + * and compatible (same type and flags), we still want to defrag them + * so that we use less metadata (extent items in the extent tree and + * file extent items in the inode's subvolume tree). */ - if (em && (em->flags & EXTENT_FLAG_MERGED) && - newer_than && em->generation >= newer_than) { + if (em && (em->flags & EXTENT_FLAG_MERGED)) { free_extent_map(em); em = NULL; } diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 001c0c2f872c..1e8cd7c9472e 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -347,8 +347,8 @@ btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path, return di; } /* Adjust return code if the key was not found in the next leaf. */ - if (ret > 0) - ret = 0; + if (ret >= 0) + ret = -ENOENT; return ERR_PTR(ret); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ad5db619b00..b11bfe68dd65 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1959,7 +1959,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) fs_info->qgroup_seq = 1; fs_info->qgroup_ulist = NULL; fs_info->qgroup_rescan_running = false; - fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL; + fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT; mutex_init(&fs_info->qgroup_rescan_lock); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 309a8ae48434..872cca54cc6c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -262,22 +262,23 @@ static noinline int lock_delalloc_folios(struct inode *inode, for (i = 0; i < found_folios; i++) { struct folio *folio = fbatch.folios[i]; - u32 len = end + 1 - start; + u64 range_start; + u32 range_len; if (folio == locked_folio) continue; - if (btrfs_folio_start_writer_lock(fs_info, folio, start, - len)) - goto out; - + folio_lock(folio); if (!folio_test_dirty(folio) || folio->mapping != mapping) { - btrfs_folio_end_writer_lock(fs_info, folio, start, - len); + folio_unlock(folio); goto out; } + range_start = max_t(u64, folio_pos(folio), start); + range_len = min_t(u64, folio_pos(folio) + folio_size(folio), + end + 1) - range_start; + btrfs_folio_set_writer_lock(fs_info, folio, range_start, range_len); - processed_end = folio_pos(folio) + folio_size(folio) - 1; + processed_end = range_start + range_len - 1; } folio_batch_release(&fbatch); cond_resched(); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 25d191f1ac10..1d93e1202c33 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -230,7 +230,12 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma if (extent_map_end(prev) != next->start) return false; - if (prev->flags != next->flags) + /* + * The merged flag is not an on-disk flag, it just indicates we had the + * extent maps of 2 (or more) adjacent extents merged, so factor it out. + */ + if ((prev->flags & ~EXTENT_FLAG_MERGED) != + (next->flags & ~EXTENT_FLAG_MERGED)) return false; if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1) @@ -243,13 +248,19 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma /* * Handle the on-disk data extents merge for @prev and @next. * + * @prev: left extent to merge + * @next: right extent to merge + * @merged: the extent we will not discard after the merge; updated with new values + * + * After this, one of the two extents is the new merged extent and the other is + * removed from the tree and likely freed. Note that @merged is one of @prev/@next + * so there is const/non-const aliasing occurring here. + * * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes. * For now only uncompressed regular extent can be merged. - * - * @prev and @next will be both updated to point to the new merged range. - * Thus one of them should be removed by the caller. */ -static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next) +static void merge_ondisk_extents(const struct extent_map *prev, const struct extent_map *next, + struct extent_map *merged) { u64 new_disk_bytenr; u64 new_disk_num_bytes; @@ -284,15 +295,10 @@ static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *nex new_disk_bytenr; new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr; - prev->disk_bytenr = new_disk_bytenr; - prev->disk_num_bytes = new_disk_num_bytes; - prev->ram_bytes = new_disk_num_bytes; - prev->offset = new_offset; - - next->disk_bytenr = new_disk_bytenr; - next->disk_num_bytes = new_disk_num_bytes; - next->ram_bytes = new_disk_num_bytes; - next->offset = new_offset; + merged->disk_bytenr = new_disk_bytenr; + merged->disk_num_bytes = new_disk_num_bytes; + merged->ram_bytes = new_disk_num_bytes; + merged->offset = new_offset; } static void dump_extent_map(struct btrfs_fs_info *fs_info, const char *prefix, @@ -361,7 +367,7 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) em->generation = max(em->generation, merge->generation); if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) - merge_ondisk_extents(merge, em); + merge_ondisk_extents(merge, em, em); em->flags |= EXTENT_FLAG_MERGED; validate_extent_map(fs_info, em); @@ -378,7 +384,7 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) { em->len += merge->len; if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) - merge_ondisk_extents(em, merge); + merge_ondisk_extents(em, merge, em); validate_extent_map(fs_info, em); rb_erase(&merge->rb_node, &tree->root); RB_CLEAR_NODE(&merge->rb_node); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5618ca02934a..da51edbad6a0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4368,11 +4368,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, */ if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name); - if (IS_ERR_OR_NULL(di)) { - if (!di) - ret = -ENOENT; - else - ret = PTR_ERR(di); + if (IS_ERR(di)) { + ret = PTR_ERR(di); btrfs_abort_transaction(trans, ret); goto out; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1332ec59c539..a0e8deca87a7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1407,7 +1407,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) fs_info->quota_root = NULL; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE; - fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL; + fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT; spin_unlock(&fs_info->qgroup_lock); btrfs_free_qgroup_config(fs_info); diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 98adf4ec7b01..c229256d6fd5 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -121,6 +121,8 @@ struct btrfs_inode; #define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN (1ULL << 63) #define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING (1ULL << 62) +#define BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT (3) + /* * Record a dirty extent, and info qgroup to update quota on it */ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 98fa0f382480..926d7a9ed99d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -340,6 +340,15 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) fallthrough; case Opt_compress: case Opt_compress_type: + /* + * Provide the same semantics as older kernels that don't use fs + * context, specifying the "compress" option clears + * "force-compress" without the need to pass + * "compress-force=[no|none]" before specifying "compress". + */ + if (opt != Opt_compress_force && opt != Opt_compress_force_type) + btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); + if (opt == Opt_compress || opt == Opt_compress_force) { ctx->compress_type = BTRFS_COMPRESS_ZLIB; ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; @@ -1498,8 +1507,7 @@ static int btrfs_reconfigure(struct fs_context *fc) sync_filesystem(sb); set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); - if (!mount_reconfigure && - !btrfs_check_options(fs_info, &ctx->mount_opt, fc->sb_flags)) + if (!btrfs_check_options(fs_info, &ctx->mount_opt, fc->sb_flags)) return -EINVAL; ret = btrfs_check_features(fs_info, !(fc->sb_flags & SB_RDONLY)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8f340ad1d938..eb51b609190f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1105,6 +1105,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) if (device->bdev) { fs_devices->open_devices--; device->bdev = NULL; + device->bdev_file = NULL; } clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); btrfs_destroy_dev_zone_info(device); |