diff options
-rw-r--r-- | fs/btrfs/block-rsv.c | 6 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 21 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 134 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 2 |
4 files changed, 132 insertions, 31 deletions
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 6ccd91bbff3e..6a8f9629bbbd 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -281,10 +281,10 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *target = NULL; /* - * If we are the delayed_rsv then push to the global rsv, otherwise dump - * into the delayed rsv if it is not full. + * If we are a delayed block reserve then push to the global rsv, + * otherwise dump into the global delayed reserve if it is not full. */ - if (block_rsv == delayed_rsv) + if (block_rsv->type == BTRFS_BLOCK_RSV_DELOPS) target = global_rsv; else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv)) target = delayed_rsv; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index ecfbc2d3f11a..25d0cdf85a91 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -89,7 +89,9 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_block_rsv *local_rsv = &trans->delayed_rsv; u64 num_bytes; + u64 reserved_bytes; num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates); num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, @@ -98,9 +100,26 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) if (num_bytes == 0) return; + /* + * Try to take num_bytes from the transaction's local delayed reserve. + * If not possible, try to take as much as it's available. If the local + * reserve doesn't have enough reserved space, the delayed refs reserve + * will be refilled next time btrfs_delayed_refs_rsv_refill() is called + * by someone or if a transaction commit is triggered before that, the + * global block reserve will be used. We want to minimize using the + * global block reserve for cases we can account for in advance, to + * avoid exhausting it and reach -ENOSPC during a transaction commit. + */ + spin_lock(&local_rsv->lock); + reserved_bytes = min(num_bytes, local_rsv->reserved); + local_rsv->reserved -= reserved_bytes; + local_rsv->full = (local_rsv->reserved >= local_rsv->size); + spin_unlock(&local_rsv->lock); + spin_lock(&delayed_rsv->lock); delayed_rsv->size += num_bytes; - delayed_rsv->full = false; + delayed_rsv->reserved += reserved_bytes; + delayed_rsv->full = (delayed_rsv->reserved >= delayed_rsv->size); spin_unlock(&delayed_rsv->lock); trans->delayed_ref_updates = 0; trans->delayed_ref_csum_deletions = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a627a4d555b5..74b0c3b9dc03 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -561,6 +561,69 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root) return true; } +static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info, + enum btrfs_reserve_flush_enum flush, + u64 num_bytes, + u64 *delayed_refs_bytes) +{ + struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_space_info *si = fs_info->trans_block_rsv.space_info; + u64 extra_delayed_refs_bytes = 0; + u64 bytes; + int ret; + + /* + * If there's a gap between the size of the delayed refs reserve and + * its reserved space, than some tasks have added delayed refs or bumped + * its size otherwise (due to block group creation or removal, or block + * group item update). Also try to allocate that gap in order to prevent + * using (and possibly abusing) the global reserve when committing the + * transaction. + */ + if (flush == BTRFS_RESERVE_FLUSH_ALL && + !btrfs_block_rsv_full(delayed_refs_rsv)) { + spin_lock(&delayed_refs_rsv->lock); + if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) + extra_delayed_refs_bytes = delayed_refs_rsv->size - + delayed_refs_rsv->reserved; + spin_unlock(&delayed_refs_rsv->lock); + } + + bytes = num_bytes + *delayed_refs_bytes + extra_delayed_refs_bytes; + + /* + * We want to reserve all the bytes we may need all at once, so we only + * do 1 enospc flushing cycle per transaction start. + */ + ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); + if (ret == 0) { + if (extra_delayed_refs_bytes > 0) + btrfs_migrate_to_delayed_refs_rsv(fs_info, + extra_delayed_refs_bytes); + return 0; + } + + if (extra_delayed_refs_bytes > 0) { + bytes -= extra_delayed_refs_bytes; + ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); + if (ret == 0) + return 0; + } + + /* + * If we are an emergency flush, which can steal from the global block + * reserve, then attempt to not reserve space for the delayed refs, as + * we will consume space for them from the global block reserve. + */ + if (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) { + bytes -= *delayed_refs_bytes; + *delayed_refs_bytes = 0; + ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); + } + + return ret; +} + static struct btrfs_trans_handle * start_transaction(struct btrfs_root *root, unsigned int num_items, unsigned int type, enum btrfs_reserve_flush_enum flush, @@ -568,10 +631,12 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv; struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; u64 num_bytes = 0; u64 qgroup_reserved = 0; + u64 delayed_refs_bytes = 0; bool reloc_reserved = false; bool do_chunk_alloc = false; int ret; @@ -594,9 +659,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, * the appropriate flushing if need be. */ if (num_items && root != fs_info->chunk_root) { - struct btrfs_block_rsv *rsv = &fs_info->trans_block_rsv; - u64 delayed_refs_bytes = 0; - qgroup_reserved = num_items * fs_info->nodesize; /* * Use prealloc for now, as there might be a currently running @@ -608,20 +670,16 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, if (ret) return ERR_PTR(ret); + num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items); /* - * We want to reserve all the bytes we may need all at once, so - * we only do 1 enospc flushing cycle per transaction start. We - * accomplish this by simply assuming we'll do num_items worth - * of delayed refs updates in this trans handle, and refill that - * amount for whatever is missing in the reserve. + * If we plan to insert/update/delete "num_items" from a btree, + * we will also generate delayed refs for extent buffers in the + * respective btree paths, so reserve space for the delayed refs + * that will be generated by the caller as it modifies btrees. + * Try to reserve them to avoid excessive use of the global + * block reserve. */ - num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items); - if (flush == BTRFS_RESERVE_FLUSH_ALL && - !btrfs_block_rsv_full(delayed_refs_rsv)) { - delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info, - num_items); - num_bytes += delayed_refs_bytes; - } + delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info, num_items); /* * Do the reservation for the relocation root creation @@ -631,17 +689,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, reloc_reserved = true; } - ret = btrfs_reserve_metadata_bytes(fs_info, rsv->space_info, - num_bytes, flush); + ret = btrfs_reserve_trans_metadata(fs_info, flush, num_bytes, + &delayed_refs_bytes); if (ret) goto reserve_fail; - if (delayed_refs_bytes) { - btrfs_migrate_to_delayed_refs_rsv(fs_info, delayed_refs_bytes); - num_bytes -= delayed_refs_bytes; - } - btrfs_block_rsv_add_bytes(rsv, num_bytes, true); - if (rsv->space_info->force_alloc) + btrfs_block_rsv_add_bytes(trans_rsv, num_bytes, true); + + if (trans_rsv->space_info->force_alloc) do_chunk_alloc = true; } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL && !btrfs_block_rsv_full(delayed_refs_rsv)) { @@ -701,6 +756,7 @@ again: h->type = type; INIT_LIST_HEAD(&h->new_bgs); + btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELOPS); smp_mb(); if (cur_trans->state >= TRANS_STATE_COMMIT_START && @@ -713,8 +769,17 @@ again: if (num_bytes) { trace_btrfs_space_reservation(fs_info, "transaction", h->transid, num_bytes, 1); - h->block_rsv = &fs_info->trans_block_rsv; + h->block_rsv = trans_rsv; h->bytes_reserved = num_bytes; + if (delayed_refs_bytes > 0) { + trace_btrfs_space_reservation(fs_info, + "local_delayed_refs_rsv", + h->transid, + delayed_refs_bytes, 1); + h->delayed_refs_bytes_reserved = delayed_refs_bytes; + btrfs_block_rsv_add_bytes(&h->delayed_rsv, delayed_refs_bytes, true); + delayed_refs_bytes = 0; + } h->reloc_reserved = reloc_reserved; } @@ -770,8 +835,10 @@ join_fail: kmem_cache_free(btrfs_trans_handle_cachep, h); alloc_fail: if (num_bytes) - btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv, - num_bytes, NULL); + btrfs_block_rsv_release(fs_info, trans_rsv, num_bytes, NULL); + if (delayed_refs_bytes) + btrfs_space_info_free_bytes_may_use(fs_info, trans_rsv->space_info, + delayed_refs_bytes); reserve_fail: btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); return ERR_PTR(ret); @@ -992,11 +1059,14 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans) if (!trans->block_rsv) { ASSERT(!trans->bytes_reserved); + ASSERT(!trans->delayed_refs_bytes_reserved); return; } - if (!trans->bytes_reserved) + if (!trans->bytes_reserved) { + ASSERT(!trans->delayed_refs_bytes_reserved); return; + } ASSERT(trans->block_rsv == &fs_info->trans_block_rsv); trace_btrfs_space_reservation(fs_info, "transaction", @@ -1004,6 +1074,16 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans) btrfs_block_rsv_release(fs_info, trans->block_rsv, trans->bytes_reserved, NULL); trans->bytes_reserved = 0; + + if (!trans->delayed_refs_bytes_reserved) + return; + + trace_btrfs_space_reservation(fs_info, "local_delayed_refs_rsv", + trans->transid, + trans->delayed_refs_bytes_reserved, 0); + btrfs_block_rsv_release(fs_info, &trans->delayed_rsv, + trans->delayed_refs_bytes_reserved, NULL); + trans->delayed_refs_bytes_reserved = 0; } static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 68c89b183c2e..44c5cdcd500f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -118,6 +118,7 @@ enum { struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; + u64 delayed_refs_bytes_reserved; u64 chunk_bytes_reserved; unsigned long delayed_ref_updates; unsigned long delayed_ref_csum_deletions; @@ -140,6 +141,7 @@ struct btrfs_trans_handle { bool in_fsync; struct btrfs_fs_info *fs_info; struct list_head new_bgs; + struct btrfs_block_rsv delayed_rsv; }; /* |