summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2022-02-17 12:12:04 +0000
committerDavid Sterba <dsterba@suse.com>2022-03-14 13:13:52 +0100
commit5b7ce5e287f030d1d3c799abea769b1a308067ba (patch)
tree535544522761fa7b63a75f3b36630553d151401d /fs/btrfs
parent7f30c07288bb9e20463182d0db56416025f85e08 (diff)
downloadlwn-5b7ce5e287f030d1d3c799abea769b1a308067ba.tar.gz
lwn-5b7ce5e287f030d1d3c799abea769b1a308067ba.zip
btrfs: hold on to less memory when logging checksums during full fsync
When doing a full fsync, at copy_items(), we iterate over all extents and then collect their checksums into a list. After copying all the extents to the log tree, we then log all the previously collected checksums. Before the previous patch in the series (subject "btrfs: stop copying old file extents when doing a full fsync"), we had to do it this way, because while we were iterating over the items in the leaf of the subvolume tree, we were holding a write lock on a leaf of the log tree, so logging the checksums for an extent right after we collected them could result in a deadlock, in case the checksum items ended up in the same leaf. However after the previous patch in the series we now do a first iteration over all the items in the leaf of the subvolume tree before locking a path in the log tree, so we can now log the checksums right after we have obtained them. This avoids holding in memory all checksums for all extents in the leaf while copying items from the source leaf to the log tree. The amount of memory used to hold all checksums of the extents in a leaf can be significant. For example if a leaf has 200 file extent items referring to 1M extents, using the default crc32c checksums, would result in using over 200K of memory (not accounting for the extra overhead of struct btrfs_ordered_sum), with smaller or less extents it would be less, but it could be much more with more extents per leaf and/or much larger extents. So change copy_items() to log the checksums for an extent after looking them up, and then free their memory, as they are no longer necessary. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/tree-log.c29
1 files changed, 12 insertions, 17 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index bb3781fed8da..0b841eab6169 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4428,12 +4428,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
char *ins_data;
int i;
int dst_index;
- struct list_head ordered_sums;
const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
const u64 i_size = i_size_read(&inode->vfs_inode);
- INIT_LIST_HEAD(&ordered_sums);
-
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
if (!ins_data)
@@ -4450,6 +4447,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
for (i = 0; i < nr; i++) {
const int src_slot = start_slot + i;
struct btrfs_root *csum_root;
+ struct btrfs_ordered_sum *sums;
+ struct btrfs_ordered_sum *sums_next;
+ LIST_HEAD(ordered_sums);
u64 disk_bytenr;
u64 disk_num_bytes;
u64 extent_offset;
@@ -4523,6 +4523,15 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
if (ret)
goto out;
+ list_for_each_entry_safe(sums, sums_next, &ordered_sums, list) {
+ if (!ret)
+ ret = log_csums(trans, inode, log, sums);
+ list_del(&sums->list);
+ kfree(sums);
+ }
+ if (ret)
+ goto out;
+
add_to_batch:
ins_sizes[dst_index] = btrfs_item_size(src, src_slot);
batch.total_data_size += ins_sizes[dst_index];
@@ -4596,20 +4605,6 @@ copy_item:
out:
kfree(ins_data);
- /*
- * we have to do this after the loop above to avoid changing the
- * log tree while trying to change the log tree.
- */
- while (!list_empty(&ordered_sums)) {
- struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
- struct btrfs_ordered_sum,
- list);
- if (!ret)
- ret = log_csums(trans, inode, log, sums);
- list_del(&sums->list);
- kfree(sums);
- }
-
return ret;
}