diff options
author | Filipe Manana <fdmanana@suse.com> | 2022-02-17 12:12:04 +0000 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2022-03-14 13:13:52 +0100 |
commit | 5b7ce5e287f030d1d3c799abea769b1a308067ba (patch) | |
tree | 535544522761fa7b63a75f3b36630553d151401d /fs/btrfs | |
parent | 7f30c07288bb9e20463182d0db56416025f85e08 (diff) | |
download | lwn-5b7ce5e287f030d1d3c799abea769b1a308067ba.tar.gz lwn-5b7ce5e287f030d1d3c799abea769b1a308067ba.zip |
btrfs: hold on to less memory when logging checksums during full fsync
When doing a full fsync, at copy_items(), we iterate over all extents and
then collect their checksums into a list. After copying all the extents to
the log tree, we then log all the previously collected checksums.
Before the previous patch in the series (subject "btrfs: stop copying old
file extents when doing a full fsync"), we had to do it this way, because
while we were iterating over the items in the leaf of the subvolume tree,
we were holding a write lock on a leaf of the log tree, so logging the
checksums for an extent right after we collected them could result in a
deadlock, in case the checksum items ended up in the same leaf.
However after the previous patch in the series we now do a first iteration
over all the items in the leaf of the subvolume tree before locking a path
in the log tree, so we can now log the checksums right after we have
obtained them. This avoids holding in memory all checksums for all extents
in the leaf while copying items from the source leaf to the log tree. The
amount of memory used to hold all checksums of the extents in a leaf can
be significant. For example if a leaf has 200 file extent items referring
to 1M extents, using the default crc32c checksums, would result in using
over 200K of memory (not accounting for the extra overhead of struct
btrfs_ordered_sum), with smaller or less extents it would be less, but
it could be much more with more extents per leaf and/or much larger
extents.
So change copy_items() to log the checksums for an extent after looking
them up, and then free their memory, as they are no longer necessary.
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/tree-log.c | 29 |
1 files changed, 12 insertions, 17 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index bb3781fed8da..0b841eab6169 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4428,12 +4428,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, char *ins_data; int i; int dst_index; - struct list_head ordered_sums; const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); const u64 i_size = i_size_read(&inode->vfs_inode); - INIT_LIST_HEAD(&ordered_sums); - ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); if (!ins_data) @@ -4450,6 +4447,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, for (i = 0; i < nr; i++) { const int src_slot = start_slot + i; struct btrfs_root *csum_root; + struct btrfs_ordered_sum *sums; + struct btrfs_ordered_sum *sums_next; + LIST_HEAD(ordered_sums); u64 disk_bytenr; u64 disk_num_bytes; u64 extent_offset; @@ -4523,6 +4523,15 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (ret) goto out; + list_for_each_entry_safe(sums, sums_next, &ordered_sums, list) { + if (!ret) + ret = log_csums(trans, inode, log, sums); + list_del(&sums->list); + kfree(sums); + } + if (ret) + goto out; + add_to_batch: ins_sizes[dst_index] = btrfs_item_size(src, src_slot); batch.total_data_size += ins_sizes[dst_index]; @@ -4596,20 +4605,6 @@ copy_item: out: kfree(ins_data); - /* - * we have to do this after the loop above to avoid changing the - * log tree while trying to change the log tree. - */ - while (!list_empty(&ordered_sums)) { - struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, - struct btrfs_ordered_sum, - list); - if (!ret) - ret = log_csums(trans, inode, log, sums); - list_del(&sums->list); - kfree(sums); - } - return ret; } |