summaryrefslogblamecommitdiff
path: root/fs/bcachefs/extent_update.c
blob: 846d77dc253039798a60f7ff36cb5f0ec812316c (plain) (tree)







































































































































































                                                                             

                                               

                                      
                                                 
                                                   

                               

                    



                                                                           
                                                             
 
                                                                    
                              
 
                                                               
 





                                                                               

                                                    






                                                                                



                                                      
                                                                 

















                                                                      
                 





                                                               





































































                                                                                    
















                                                                 
           

                                                      

                                                 
 

                                                 
 

                                     





                                                     
 

                                                 
 


                                                                            
                                           


                                                                 
 







                                                        
 

                                   
 









                                                                       




                                                                 
 






                                                                              

                                                           
                        


                                                                  
                                                   




                                                                               

                                                                         
                      



                                                                         
 






                                                                              


                                                           









                                                                         


                                                                                
 









                                                                         
                                                                   
 



                                                                    


                                                                 
 


                                                                   








                                                                         

                                                     

                      


                                      









































                                                                                

                                                      

                                    

                                                   
                                                             









                                                                           






                                                                    



                                                                            
 
                                                        
                                                                         
                        
                                                                       

                 









                                                             

                                                       

                                                          



                                                            
                                                            



                                            
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_on_stack.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
#include "debug.h"
#include "extents.h"
#include "extent_update.h"

/*
 * This counts the number of iterators to the alloc & ec btrees we'll need
 * inserting/removing this extent:
 */
static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
{
	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
	const union bch_extent_entry *entry;
	unsigned ret = 0;

	bkey_extent_entry_for_each(ptrs, entry) {
		switch (__extent_entry_type(entry)) {
		case BCH_EXTENT_ENTRY_ptr:
		case BCH_EXTENT_ENTRY_stripe_ptr:
			ret++;
		}
	}

	return ret;
}

static int count_iters_for_insert(struct btree_trans *trans,
				  struct bkey_s_c k,
				  unsigned offset,
				  struct bpos *end,
				  unsigned *nr_iters,
				  unsigned max_iters,
				  bool overwrite)
{
	int ret = 0;

	switch (k.k->type) {
	case KEY_TYPE_extent:
	case KEY_TYPE_reflink_v:
		*nr_iters += bch2_bkey_nr_alloc_ptrs(k);

		if (*nr_iters >= max_iters) {
			*end = bpos_min(*end, k.k->p);
			ret = 1;
		}

		break;
	case KEY_TYPE_reflink_p: {
		struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
		u64 idx = le64_to_cpu(p.v->idx);
		unsigned sectors = bpos_min(*end, p.k->p).offset -
			bkey_start_offset(p.k);
		struct btree_iter *iter;
		struct bkey_s_c r_k;

		for_each_btree_key(trans, iter,
				   BTREE_ID_REFLINK, POS(0, idx + offset),
				   BTREE_ITER_SLOTS, r_k, ret) {
			if (bkey_cmp(bkey_start_pos(r_k.k),
				     POS(0, idx + sectors)) >= 0)
				break;

			*nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);

			if (*nr_iters >= max_iters) {
				struct bpos pos = bkey_start_pos(k.k);
				pos.offset += r_k.k->p.offset - idx;

				*end = bpos_min(*end, pos);
				ret = 1;
				break;
			}
		}

		bch2_trans_iter_put(trans, iter);
		break;
	}
	}

	return ret;
}

#define EXTENT_ITERS_MAX	(BTREE_ITER_MAX / 3)

int bch2_extent_atomic_end(struct btree_iter *iter,
			   struct bkey_i *insert,
			   struct bpos *end)
{
	struct btree_trans *trans = iter->trans;
	struct btree *b;
	struct btree_node_iter	node_iter;
	struct bkey_packed	*_k;
	unsigned		nr_iters = 0;
	int ret;

	ret = bch2_btree_iter_traverse(iter);
	if (ret)
		return ret;

	b = iter->l[0].b;
	node_iter = iter->l[0].iter;

	BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);

	*end = bpos_min(insert->k.p, b->key.k.p);

	ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
				     &nr_iters, EXTENT_ITERS_MAX / 2, false);
	if (ret < 0)
		return ret;

	while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
						      KEY_TYPE_discard))) {
		struct bkey	unpacked;
		struct bkey_s_c	k = bkey_disassemble(b, _k, &unpacked);
		unsigned offset = 0;

		if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
			break;

		if (bkey_cmp(bkey_start_pos(&insert->k),
			     bkey_start_pos(k.k)) > 0)
			offset = bkey_start_offset(&insert->k) -
				bkey_start_offset(k.k);

		ret = count_iters_for_insert(trans, k, offset, end,
					&nr_iters, EXTENT_ITERS_MAX, true);
		if (ret)
			break;

		bch2_btree_node_iter_advance(&node_iter, b);
	}

	return ret < 0 ? ret : 0;
}

int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
{
	struct bpos end;
	int ret;

	ret = bch2_extent_atomic_end(iter, k, &end);
	if (ret)
		return ret;

	bch2_cut_back(end, k);
	return 0;
}

int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
{
	struct bpos end;
	int ret;

	ret = bch2_extent_atomic_end(iter, k, &end);
	if (ret)
		return ret;

	return !bkey_cmp(end, k->k.p);
}

enum btree_insert_ret
bch2_extent_can_insert(struct btree_trans *trans,
		       struct btree_iter *iter,
		       struct bkey_i *insert,
		       unsigned *u64s)
{
	struct btree_iter_level *l = &iter->l[0];
	struct btree_node_iter node_iter = l->iter;
	struct bkey_packed *_k;
	struct bkey unpacked;
	int sectors;

	while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
						      KEY_TYPE_discard))) {
		struct bkey_s_c k = bkey_disassemble(l->b, _k, &unpacked);
		enum bch_extent_overlap overlap =
			bch2_extent_overlap(&insert->k, k.k);

		if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
			break;

		overlap = bch2_extent_overlap(&insert->k, k.k);

		/*
		 * If we're overwriting an existing extent, we may need to emit
		 * a whiteout - unless we're inserting a new extent at the same
		 * position:
		 */
		if (k.k->needs_whiteout &&
		    (!bkey_whiteout(&insert->k) ||
		     bkey_cmp(k.k->p, insert->k.p)))
			*u64s += BKEY_U64s;

		/*
		 * If we're partially overwriting an existing extent which has
		 * been written out to disk, we'll need to emit a new version of
		 * that extent:
		 */
		if (bkey_written(l->b, _k) &&
		    overlap != BCH_EXTENT_OVERLAP_ALL)
			*u64s += _k->u64s;

		/* And we may be splitting an existing extent: */
		if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
			*u64s += _k->u64s;

		if (overlap == BCH_EXTENT_OVERLAP_MIDDLE &&
		    (sectors = bch2_bkey_sectors_compressed(k))) {
			int flags = trans->flags & BTREE_INSERT_NOFAIL
				? BCH_DISK_RESERVATION_NOFAIL : 0;

			switch (bch2_disk_reservation_add(trans->c,
					trans->disk_res,
					sectors, flags)) {
			case 0:
				break;
			case -ENOSPC:
				return BTREE_INSERT_ENOSPC;
			default:
				BUG();
			}
		}

		if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
		    overlap == BCH_EXTENT_OVERLAP_MIDDLE)
			break;

		bch2_btree_node_iter_advance(&node_iter, l->b);
	}

	return BTREE_INSERT_OK;
}

static void verify_extent_nonoverlapping(struct bch_fs *c,
					 struct btree *b,
					 struct btree_node_iter *_iter,
					 struct bkey_i *insert)
{
#ifdef CONFIG_BCACHEFS_DEBUG
	struct btree_node_iter iter;
	struct bkey_packed *k;
	struct bkey uk;

	if (!expensive_debug_checks(c))
		return;

	iter = *_iter;
	k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard);
	BUG_ON(k &&
	       (uk = bkey_unpack_key(b, k),
		bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0));

	iter = *_iter;
	k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard);
#if 0
	BUG_ON(k &&
	       (uk = bkey_unpack_key(b, k),
		bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0);
#else
	if (k &&
	    (uk = bkey_unpack_key(b, k),
	     bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) {
		char buf1[100];
		char buf2[100];

		bch2_bkey_to_text(&PBUF(buf1), &insert->k);
		bch2_bkey_to_text(&PBUF(buf2), &uk);

		bch2_dump_btree_node(b);
		panic("insert > next :\n"
		      "insert %s\n"
		      "next   %s\n",
		      buf1, buf2);
	}
#endif

#endif
}

static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
			       struct bkey_i *insert)
{
	struct btree_iter_level *l = &iter->l[0];
	struct bkey_packed *k =
		bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b));

	BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b));

	EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
	verify_extent_nonoverlapping(c, l->b, &l->iter, insert);

	if (debug_check_bkeys(c))
		bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert));

	bch2_bset_insert(l->b, &l->iter, k, insert, 0);
	bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
}

static void pack_push_whiteout(struct bch_fs *c, struct btree *b,
			       struct bpos pos)
{
	struct bkey_packed k;

	if (!bkey_pack_pos(&k, pos, b)) {
		struct bkey_i tmp;

		bkey_init(&tmp.k);
		tmp.k.p = pos;
		bkey_copy(&k, &tmp);
	}

	k.needs_whiteout = true;
	push_whiteout(c, b, &k);
}

static void
extent_drop(struct bch_fs *c, struct btree_iter *iter,
	    struct bkey_packed *_k, struct bkey_s k)
{
	struct btree_iter_level *l = &iter->l[0];

	if (!bkey_whiteout(k.k))
		btree_account_key_drop(l->b, _k);

	k.k->size = 0;
	k.k->type = KEY_TYPE_deleted;

	if (!btree_node_old_extent_overwrite(l->b) &&
	    k.k->needs_whiteout) {
		pack_push_whiteout(c, l->b, k.k->p);
		k.k->needs_whiteout = false;
	}

	if (_k >= btree_bset_last(l->b)->start) {
		unsigned u64s = _k->u64s;

		bch2_bset_delete(l->b, _k, _k->u64s);
		bch2_btree_node_iter_fix(iter, l->b, &l->iter, _k, u64s, 0);
	} else {
		extent_save(l->b, _k, k.k);
		bch2_btree_iter_fix_key_modified(iter, l->b, _k);
	}
}

static void
extent_squash(struct bch_fs *c, struct btree_iter *iter,
	      struct bkey_i *insert,
	      struct bkey_packed *_k, struct bkey_s k,
	      enum bch_extent_overlap overlap)
{
	struct btree_iter_level *l = &iter->l[0];
	struct bkey_on_stack tmp, split;

	bkey_on_stack_init(&tmp);
	bkey_on_stack_init(&split);

	if (!btree_node_old_extent_overwrite(l->b)) {
		if (!bkey_whiteout(&insert->k) &&
		    !bkey_cmp(k.k->p, insert->k.p)) {
			insert->k.needs_whiteout = k.k->needs_whiteout;
			k.k->needs_whiteout = false;
		}
	} else {
		insert->k.needs_whiteout |= k.k->needs_whiteout;
	}

	switch (overlap) {
	case BCH_EXTENT_OVERLAP_FRONT:
		if (bkey_written(l->b, _k)) {
			bkey_on_stack_reassemble(&tmp, c, k.s_c);
			bch2_cut_front(insert->k.p, tmp.k);

			/*
			 * needs_whiteout was propagated to new version of @k,
			 * @tmp:
			 */
			if (!btree_node_old_extent_overwrite(l->b))
				k.k->needs_whiteout = false;

			extent_drop(c, iter, _k, k);
			extent_bset_insert(c, iter, tmp.k);
		} else {
			btree_keys_account_val_delta(l->b, _k,
				bch2_cut_front_s(insert->k.p, k));

			extent_save(l->b, _k, k.k);
			/*
			 * No need to call bset_fix_invalidated_key, start of
			 * extent changed but extents are indexed by where they
			 * end
			 */
			bch2_btree_iter_fix_key_modified(iter, l->b, _k);
		}
		break;
	case BCH_EXTENT_OVERLAP_BACK:
		if (bkey_written(l->b, _k)) {
			bkey_on_stack_reassemble(&tmp, c, k.s_c);
			bch2_cut_back(bkey_start_pos(&insert->k), tmp.k);

			/*
			 * @tmp has different position than @k, needs_whiteout
			 * should not be propagated:
			 */
			if (!btree_node_old_extent_overwrite(l->b))
				tmp.k->k.needs_whiteout = false;

			extent_drop(c, iter, _k, k);
			extent_bset_insert(c, iter, tmp.k);
		} else {
			/*
			 * position of @k is changing, emit a whiteout if
			 * needs_whiteout is set:
			 */
			if (!btree_node_old_extent_overwrite(l->b) &&
			    k.k->needs_whiteout) {
				pack_push_whiteout(c, l->b, k.k->p);
				k.k->needs_whiteout = false;
			}

			btree_keys_account_val_delta(l->b, _k,
				bch2_cut_back_s(bkey_start_pos(&insert->k), k));
			extent_save(l->b, _k, k.k);

			bch2_bset_fix_invalidated_key(l->b, _k);
			bch2_btree_node_iter_fix(iter, l->b, &l->iter,
						 _k, _k->u64s, _k->u64s);
		}
		break;
	case BCH_EXTENT_OVERLAP_ALL:
		extent_drop(c, iter, _k, k);
		break;
	case BCH_EXTENT_OVERLAP_MIDDLE:
		bkey_on_stack_reassemble(&split, c, k.s_c);
		bch2_cut_back(bkey_start_pos(&insert->k), split.k);

		if (!btree_node_old_extent_overwrite(l->b))
			split.k->k.needs_whiteout = false;

		/* this is identical to BCH_EXTENT_OVERLAP_FRONT: */
		if (bkey_written(l->b, _k)) {
			bkey_on_stack_reassemble(&tmp, c, k.s_c);
			bch2_cut_front(insert->k.p, tmp.k);

			if (!btree_node_old_extent_overwrite(l->b))
				k.k->needs_whiteout = false;

			extent_drop(c, iter, _k, k);
			extent_bset_insert(c, iter, tmp.k);
		} else {
			btree_keys_account_val_delta(l->b, _k,
				bch2_cut_front_s(insert->k.p, k));

			extent_save(l->b, _k, k.k);
			bch2_btree_iter_fix_key_modified(iter, l->b, _k);
		}

		extent_bset_insert(c, iter, split.k);
		break;
	}

	bkey_on_stack_exit(&split, c);
	bkey_on_stack_exit(&tmp, c);
}

/**
 * bch_extent_insert_fixup - insert a new extent and deal with overlaps
 *
 * this may result in not actually doing the insert, or inserting some subset
 * of the insert key. For cmpxchg operations this is where that logic lives.
 *
 * All subsets of @insert that need to be inserted are inserted using
 * bch2_btree_insert_and_journal(). If @b or @res fills up, this function
 * returns false, setting @iter->pos for the prefix of @insert that actually got
 * inserted.
 *
 * BSET INVARIANTS: this function is responsible for maintaining all the
 * invariants for bsets of extents in memory. things get really hairy with 0
 * size extents
 *
 * within one bset:
 *
 * bkey_start_pos(bkey_next(k)) >= k
 * or bkey_start_offset(bkey_next(k)) >= k->offset
 *
 * i.e. strict ordering, no overlapping extents.
 *
 * multiple bsets (i.e. full btree node):
 *
 * ∀ k, j
 *   k.size != 0 ∧ j.size != 0 →
 *     ¬ (k > bkey_start_pos(j) ∧ k < j)
 *
 * i.e. no two overlapping keys _of nonzero size_
 *
 * We can't realistically maintain this invariant for zero size keys because of
 * the key merging done in bch2_btree_insert_key() - for two mergeable keys k, j
 * there may be another 0 size key between them in another bset, and it will
 * thus overlap with the merged key.
 *
 * In addition, the end of iter->pos indicates how much has been processed.
 * If the end of iter->pos is not the same as the end of insert, then
 * key insertion needs to continue/be retried.
 */
void bch2_insert_fixup_extent(struct btree_trans *trans,
			      struct btree_iter *iter,
			      struct bkey_i *insert)
{
	struct bch_fs *c = trans->c;
	struct btree_iter_level *l = &iter->l[0];
	struct btree_node_iter node_iter = l->iter;
	bool do_update		= !bkey_whiteout(&insert->k);
	struct bkey_packed *_k;
	struct bkey unpacked;

	EBUG_ON(iter->level);
	EBUG_ON(!insert->k.size);
	EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));

	while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
						      KEY_TYPE_discard))) {
		struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
		enum bch_extent_overlap overlap =
			bch2_extent_overlap(&insert->k, k.k);

		if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
			break;

		if (!bkey_whiteout(k.k))
			do_update = true;

		if (!do_update) {
			struct bpos cur_end = bpos_min(insert->k.p, k.k->p);

			bch2_cut_front(cur_end, insert);
			bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
		} else {
			extent_squash(c, iter, insert, _k, k, overlap);
		}

		node_iter = l->iter;

		if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
		    overlap == BCH_EXTENT_OVERLAP_MIDDLE)
			break;
	}

	l->iter = node_iter;
	bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p);

	if (do_update) {
		if (insert->k.type == KEY_TYPE_deleted)
			insert->k.type = KEY_TYPE_discard;

		if (!bkey_whiteout(&insert->k) ||
		    btree_node_old_extent_overwrite(l->b))
			extent_bset_insert(c, iter, insert);

		bch2_btree_journal_key(trans, iter, insert);
	}

	bch2_cut_front(insert->k.p, insert);
}