diff options
author | Josef Bacik <josef@toxicpanda.com> | 2021-12-03 17:18:04 -0500 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2022-01-07 14:18:24 +0100 |
commit | 54f03ab1e19b04dea546f83ae70b3285bc61b9f8 (patch) | |
tree | c5af64a97a7de3e5cb5fe3d2346d3eb71a434ca8 /fs/btrfs/inode-item.c | |
parent | 26c2c4540d6d5c85a22a857ccda304361f1afeaf (diff) | |
download | lwn-54f03ab1e19b04dea546f83ae70b3285bc61b9f8.tar.gz lwn-54f03ab1e19b04dea546f83ae70b3285bc61b9f8.zip |
btrfs: move btrfs_truncate_inode_items to inode-item.c
This is an inode item related manipulation with a few vfs related
adjustments. I'm going to remove the vfs related code from this helper
and simplify it a lot, but I want those changes to be easily seen via
git blame, so move this function now and then the simplification work
can be done.
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/inode-item.c')
-rw-r--r-- | fs/btrfs/inode-item.c | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 72593a93c43c..c43a3610f443 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -418,3 +418,375 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root } return ret; } + +/* + * Remove inode items from a given root. + * + * @trans: A transaction handle. + * @root: The root from which to remove items. + * @inode: The inode whose items we want to remove. + * @new_size: The new i_size for the inode. This is only applicable when + * @min_type is BTRFS_EXTENT_DATA_KEY, must be 0 otherwise. + * @min_type: The minimum key type to remove. All keys with a type + * greater than this value are removed and all keys with + * this type are removed only if their offset is >= @new_size. + * @extents_found: Output parameter that will contain the number of file + * extent items that were removed or adjusted to the new + * inode i_size. The caller is responsible for initializing + * the counter. Also, it can be NULL if the caller does not + * need this counter. + * + * Remove all keys associated with the inode from the given root that have a key + * with a type greater than or equals to @min_type. When @min_type has a value of + * BTRFS_EXTENT_DATA_KEY, only remove file extent items that have an offset value + * greater than or equals to @new_size. If a file extent item that starts before + * @new_size and ends after it is found, its length is adjusted. + * + * Returns: 0 on success, < 0 on error and NEED_TRUNCATE_BLOCK when @min_type is + * BTRFS_EXTENT_DATA_KEY and the caller must truncate the last block. + */ +int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_inode *inode, + u64 new_size, u32 min_type, + u64 *extents_found) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; + u64 extent_start = 0; + u64 extent_num_bytes = 0; + u64 extent_offset = 0; + u64 item_end = 0; + u64 last_size = new_size; + u32 found_type = (u8)-1; + int found_extent; + int del_item; + int pending_del_nr = 0; + int pending_del_slot = 0; + int extent_type = -1; + int ret; + u64 ino = btrfs_ino(inode); + u64 bytes_deleted = 0; + bool be_nice = false; + bool should_throttle = false; + const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize); + struct extent_state *cached_state = NULL; + + BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); + + /* + * For non-free space inodes and non-shareable roots, we want to back + * off from time to time. This means all inodes in subvolume roots, + * reloc roots, and data reloc roots. + */ + if (!btrfs_is_free_space_inode(inode) && + test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) + be_nice = true; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = READA_BACK; + + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + lock_extent_bits(&inode->io_tree, lock_start, (u64)-1, + &cached_state); + + /* + * We want to drop from the next block forward in case this + * new size is not block aligned since we will be keeping the + * last block of the extent just the way it is. + */ + btrfs_drop_extent_cache(inode, ALIGN(new_size, + fs_info->sectorsize), + (u64)-1, 0); + } + + /* + * This function is also used to drop the items in the log tree before + * we relog the inode, so if root != BTRFS_I(inode)->root, it means + * it is used to drop the logged items. So we shouldn't kill the delayed + * items. + */ + if (min_type == 0 && root == inode->root) + btrfs_kill_delayed_inode_items(inode); + + key.objectid = ino; + key.offset = (u64)-1; + key.type = (u8)-1; + +search_again: + /* + * With a 16K leaf size and 128MiB extents, you can actually queue up a + * huge file in a single leaf. Most of the time that bytes_deleted is + * > 0, it will be huge by the time we get here + */ + if (be_nice && bytes_deleted > SZ_32M && + btrfs_should_end_transaction(trans)) { + ret = -EAGAIN; + goto out; + } + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = 0; + /* There are no items in the tree for us to truncate, we're done */ + if (path->slots[0] == 0) + goto out; + path->slots[0]--; + } + + while (1) { + u64 clear_start = 0, clear_len = 0; + + fi = NULL; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = found_key.type; + + if (found_key.objectid != ino) + break; + + if (found_type < min_type) + break; + + item_end = found_key.offset; + if (found_type == BTRFS_EXTENT_DATA_KEY) { + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(leaf, fi); + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { + item_end += + btrfs_file_extent_num_bytes(leaf, fi); + + trace_btrfs_truncate_show_fi_regular( + inode, leaf, fi, found_key.offset); + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + item_end += btrfs_file_extent_ram_bytes(leaf, fi); + + trace_btrfs_truncate_show_fi_inline( + inode, leaf, fi, path->slots[0], + found_key.offset); + } + item_end--; + } + if (found_type > min_type) { + del_item = 1; + } else { + if (item_end < new_size) + break; + if (found_key.offset >= new_size) + del_item = 1; + else + del_item = 0; + } + found_extent = 0; + /* FIXME, shrink the extent if the ref count is only 1 */ + if (found_type != BTRFS_EXTENT_DATA_KEY) + goto delete; + + if (extents_found != NULL) + (*extents_found)++; + + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { + u64 num_dec; + + clear_start = found_key.offset; + extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); + if (!del_item) { + u64 orig_num_bytes = + btrfs_file_extent_num_bytes(leaf, fi); + extent_num_bytes = ALIGN(new_size - + found_key.offset, + fs_info->sectorsize); + clear_start = ALIGN(new_size, fs_info->sectorsize); + + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_num_bytes); + num_dec = (orig_num_bytes - extent_num_bytes); + if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && + extent_start != 0) + inode_sub_bytes(&inode->vfs_inode, + num_dec); + btrfs_mark_buffer_dirty(leaf); + } else { + extent_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, fi); + extent_offset = found_key.offset - + btrfs_file_extent_offset(leaf, fi); + + /* FIXME blocksize != 4096 */ + num_dec = btrfs_file_extent_num_bytes(leaf, fi); + if (extent_start != 0) { + found_extent = 1; + if (test_bit(BTRFS_ROOT_SHAREABLE, + &root->state)) + inode_sub_bytes(&inode->vfs_inode, + num_dec); + } + } + clear_len = num_dec; + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + /* + * We can't truncate inline items that have had + * special encodings + */ + if (!del_item && + btrfs_file_extent_encryption(leaf, fi) == 0 && + btrfs_file_extent_other_encoding(leaf, fi) == 0 && + btrfs_file_extent_compression(leaf, fi) == 0) { + u32 size = (u32)(new_size - found_key.offset); + + btrfs_set_file_extent_ram_bytes(leaf, fi, size); + size = btrfs_file_extent_calc_inline_size(size); + btrfs_truncate_item(path, size, 1); + } else if (!del_item) { + /* + * We have to bail so the last_size is set to + * just before this extent. + */ + ret = BTRFS_NEED_TRUNCATE_BLOCK; + break; + } else { + /* + * Inline extents are special, we just treat + * them as a full sector worth in the file + * extent tree just for simplicity sake. + */ + clear_len = fs_info->sectorsize; + } + + if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) + inode_sub_bytes(&inode->vfs_inode, + item_end + 1 - new_size); + } +delete: + /* + * We use btrfs_truncate_inode_items() to clean up log trees for + * multiple fsyncs, and in this case we don't want to clear the + * file extent range because it's just the log. + */ + if (root == inode->root) { + ret = btrfs_inode_clear_file_extent_range(inode, + clear_start, clear_len); + if (ret) { + btrfs_abort_transaction(trans, ret); + break; + } + } + + if (del_item) + last_size = found_key.offset; + else + last_size = new_size; + if (del_item) { + if (!pending_del_nr) { + /* No pending yet, add ourselves */ + pending_del_slot = path->slots[0]; + pending_del_nr = 1; + } else if (pending_del_nr && + path->slots[0] + 1 == pending_del_slot) { + /* Hop on the pending chunk */ + pending_del_nr++; + pending_del_slot = path->slots[0]; + } else { + BUG(); + } + } else { + break; + } + should_throttle = false; + + if (found_extent && + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + struct btrfs_ref ref = { 0 }; + + bytes_deleted += extent_num_bytes; + + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, + extent_start, extent_num_bytes, 0); + btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), + ino, extent_offset, + root->root_key.objectid, false); + ret = btrfs_free_extent(trans, &ref); + if (ret) { + btrfs_abort_transaction(trans, ret); + break; + } + if (be_nice) { + if (btrfs_should_throttle_delayed_refs(trans)) + should_throttle = true; + } + } + + if (found_type == BTRFS_INODE_ITEM_KEY) + break; + + if (path->slots[0] == 0 || + path->slots[0] != pending_del_slot || + should_throttle) { + if (pending_del_nr) { + ret = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + if (ret) { + btrfs_abort_transaction(trans, ret); + break; + } + pending_del_nr = 0; + } + btrfs_release_path(path); + + /* + * We can generate a lot of delayed refs, so we need to + * throttle every once and a while and make sure we're + * adding enough space to keep up with the work we are + * generating. Since we hold a transaction here we + * can't flush, and we don't want to FLUSH_LIMIT because + * we could have generated too many delayed refs to + * actually allocate, so just bail if we're short and + * let the normal reservation dance happen higher up. + */ + if (should_throttle) { + ret = btrfs_delayed_refs_rsv_refill(fs_info, + BTRFS_RESERVE_NO_FLUSH); + if (ret) { + ret = -EAGAIN; + break; + } + } + goto search_again; + } else { + path->slots[0]--; + } + } +out: + if (ret >= 0 && pending_del_nr) { + int err; + + err = btrfs_del_items(trans, root, path, pending_del_slot, + pending_del_nr); + if (err) { + btrfs_abort_transaction(trans, err); + ret = err; + } + } + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + ASSERT(last_size >= new_size); + if (!ret && last_size > new_size) + last_size = new_size; + btrfs_inode_safe_disk_i_size_write(inode, last_size); + unlock_extent_cached(&inode->io_tree, lock_start, (u64)-1, + &cached_state); + } + + btrfs_free_path(path); + return ret; +} |