diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 139 |
1 files changed, 92 insertions, 47 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4f874d02f310..aaa346562df6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5954,42 +5954,31 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, } /** - * drop_outstanding_extent - drop an outstanding extent + * drop_over_reserved_extents - drop our extra extent reservations * @inode: the inode we're dropping the extent for - * @num_bytes: the number of bytes we're releasing. * - * This is called when we are freeing up an outstanding extent, either called - * after an error or after an extent is written. This will return the number of - * reserved extents that need to be freed. This must be called with - * BTRFS_I(inode)->lock held. + * We reserve extents we may use, but they may have been merged with other + * extents and we may not need the extra reservation. + * + * We also call this when we've completed io to an extent or had an error and + * cleared the outstanding extent, in either case we no longer need our + * reservation and can drop the excess. */ -static unsigned drop_outstanding_extent(struct btrfs_inode *inode, - u64 num_bytes) +static unsigned drop_over_reserved_extents(struct btrfs_inode *inode) { - unsigned drop_inode_space = 0; - unsigned dropped_extents = 0; - unsigned num_extents; + unsigned num_extents = 0; - num_extents = count_max_extents(num_bytes); - ASSERT(num_extents); - ASSERT(inode->outstanding_extents >= num_extents); - inode->outstanding_extents -= num_extents; + if (inode->reserved_extents > inode->outstanding_extents) { + num_extents = inode->reserved_extents - + inode->outstanding_extents; + btrfs_mod_reserved_extents(inode, -num_extents); + } if (inode->outstanding_extents == 0 && test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, &inode->runtime_flags)) - drop_inode_space = 1; - - /* - * If we have more or the same amount of outstanding extents than we have - * reserved then we need to leave the reserved extents count alone. - */ - if (inode->outstanding_extents >= inode->reserved_extents) - return drop_inode_space; - - dropped_extents = inode->reserved_extents - inode->outstanding_extents; - inode->reserved_extents -= dropped_extents; - return dropped_extents + drop_inode_space; + num_extents++; + return num_extents; } /** @@ -6044,13 +6033,15 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv; u64 to_reserve = 0; u64 csum_bytes; - unsigned nr_extents; + unsigned nr_extents, reserve_extents; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; int ret = 0; bool delalloc_lock = true; u64 to_free = 0; unsigned dropped; bool release_extra = false; + bool underflow = false; + bool did_retry = false; /* If we are a free space inode we need to not flush since we will be in * the middle of a transaction commit. We also don't need the delalloc @@ -6075,18 +6066,31 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) mutex_lock(&inode->delalloc_mutex); num_bytes = ALIGN(num_bytes, fs_info->sectorsize); - +retry: spin_lock(&inode->lock); - nr_extents = count_max_extents(num_bytes); - inode->outstanding_extents += nr_extents; + reserve_extents = nr_extents = count_max_extents(num_bytes); + btrfs_mod_outstanding_extents(inode, nr_extents); - nr_extents = 0; - if (inode->outstanding_extents > inode->reserved_extents) - nr_extents += inode->outstanding_extents - + /* + * Because we add an outstanding extent for ordered before we clear + * delalloc we will double count our outstanding extents slightly. This + * could mean that we transiently over-reserve, which could result in an + * early ENOSPC if our timing is unlucky. Keep track of the case that + * we had a reservation underflow so we can retry if we fail. + * + * Keep in mind we can legitimately have more outstanding extents than + * reserved because of fragmentation, so only allow a retry once. + */ + if (inode->outstanding_extents > + inode->reserved_extents + nr_extents) { + reserve_extents = inode->outstanding_extents - inode->reserved_extents; + underflow = true; + } /* We always want to reserve a slot for updating the inode. */ - to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1); + to_reserve = btrfs_calc_trans_metadata_size(fs_info, + reserve_extents + 1); to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); csum_bytes = inode->csum_bytes; spin_unlock(&inode->lock); @@ -6111,7 +6115,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1); release_extra = true; } - inode->reserved_extents += nr_extents; + btrfs_mod_reserved_extents(inode, reserve_extents); spin_unlock(&inode->lock); if (delalloc_lock) @@ -6127,7 +6131,10 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) out_fail: spin_lock(&inode->lock); - dropped = drop_outstanding_extent(inode, num_bytes); + nr_extents = count_max_extents(num_bytes); + btrfs_mod_outstanding_extents(inode, -nr_extents); + + dropped = drop_over_reserved_extents(inode); /* * If the inodes csum_bytes is the same as the original * csum_bytes then we know we haven't raced with any free()ers @@ -6184,6 +6191,11 @@ out_fail: trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), to_free, 0); } + if (underflow && !did_retry) { + did_retry = true; + underflow = false; + goto retry; + } if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); return ret; @@ -6191,12 +6203,12 @@ out_fail: /** * btrfs_delalloc_release_metadata - release a metadata reservation for an inode - * @inode: the inode to release the reservation for - * @num_bytes: the number of bytes we're releasing + * @inode: the inode to release the reservation for. + * @num_bytes: the number of bytes we are releasing. * * This will release the metadata reservation for an inode. This can be called * once we complete IO for a given set of bytes to release their metadata - * reservations. + * reservations, or on error for the same reason. */ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) { @@ -6206,8 +6218,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); - dropped = drop_outstanding_extent(inode, num_bytes); - + dropped = drop_over_reserved_extents(inode); if (num_bytes) to_free = calc_csum_metadata_size(inode, num_bytes, 0); spin_unlock(&inode->lock); @@ -6224,6 +6235,42 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) } /** + * btrfs_delalloc_release_extents - release our outstanding_extents + * @inode: the inode to balance the reservation for. + * @num_bytes: the number of bytes we originally reserved with + * + * When we reserve space we increase outstanding_extents for the extents we may + * add. Once we've set the range as delalloc or created our ordered extents we + * have outstanding_extents to track the real usage, so we use this to free our + * temporarily tracked outstanding_extents. This _must_ be used in conjunction + * with btrfs_delalloc_reserve_metadata. + */ +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + unsigned num_extents; + u64 to_free; + unsigned dropped; + + spin_lock(&inode->lock); + num_extents = count_max_extents(num_bytes); + btrfs_mod_outstanding_extents(inode, -num_extents); + dropped = drop_over_reserved_extents(inode); + spin_unlock(&inode->lock); + + if (!dropped) + return; + + if (btrfs_is_testing(fs_info)) + return; + + to_free = btrfs_calc_trans_metadata_size(fs_info, dropped); + trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), + to_free, 0); + btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free); +} + +/** * btrfs_delalloc_reserve_space - reserve data and metadata space for * delalloc * @inode: inode we're writing to @@ -6267,10 +6314,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, * @inode: inode we're releasing space for * @start: start position of the space already reserved * @len: the len of the space already reserved - * - * This must be matched with a call to btrfs_delalloc_reserve_space. This is - * called in the case that we don't need the metadata AND data reservations - * anymore. So if there is an error or we insert an inline extent. + * @release_bytes: the len of the space we consumed or didn't use * * This function will release the metadata space that was not used and will * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes @@ -6278,7 +6322,8 @@ int btrfs_delalloc_reserve_space(struct inode *inode, * Also it will handle the qgroup reserved space. */ void btrfs_delalloc_release_space(struct inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) + struct extent_changeset *reserved, + u64 start, u64 len) { btrfs_delalloc_release_metadata(BTRFS_I(inode), len); btrfs_free_reserved_data_space(inode, reserved, start, len); |