summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-11-09 16:01:15 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:32 -0400
commit35189e09ab46785746df7007ed2a57ee78b56191 (patch)
tree8ef014d34714c88f3d0942a7ec3efb7358f07144 /fs
parent03c8c747a0f2ee5f2b45ad692d51f6e2bdce21cb (diff)
downloadlwn-35189e09ab46785746df7007ed2a57ee78b56191.tar.gz
lwn-35189e09ab46785746df7007ed2a57ee78b56191.zip
bcachefs: bkey_on_stack
This implements code for storing small bkeys on the stack and allocating out of a mempool if they're too big. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/bkey_on_stack.h35
-rw-r--r--fs/bcachefs/bkey_sort.c13
-rw-r--r--fs/bcachefs/ec.c12
-rw-r--r--fs/bcachefs/extents.c18
-rw-r--r--fs/bcachefs/fs-io.c92
-rw-r--r--fs/bcachefs/fs.c29
-rw-r--r--fs/bcachefs/io.c63
-rw-r--r--fs/bcachefs/migrate.c16
-rw-r--r--fs/bcachefs/move.c10
-rw-r--r--fs/bcachefs/reflink.c17
-rw-r--r--fs/bcachefs/super.c2
12 files changed, 205 insertions, 104 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index f8a040115fd1..344cf982124f 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -729,6 +729,8 @@ struct bch_fs {
atomic64_t key_version;
+ mempool_t large_bkey_pool;
+
/* REBALANCE */
struct bch_fs_rebalance rebalance;
diff --git a/fs/bcachefs/bkey_on_stack.h b/fs/bcachefs/bkey_on_stack.h
new file mode 100644
index 000000000000..d4739038323f
--- /dev/null
+++ b/fs/bcachefs/bkey_on_stack.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_ON_STACK_H
+#define _BCACHEFS_BKEY_ON_STACK_H
+
+#include "bcachefs.h"
+
+struct bkey_on_stack {
+ struct bkey_i *k;
+ u64 onstack[12];
+};
+
+static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
+ struct bch_fs *c, unsigned u64s)
+{
+ if (s->k == (void *) s->onstack &&
+ u64s > ARRAY_SIZE(s->onstack)) {
+ s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
+ memcpy(s->k, s->onstack, sizeof(s->onstack));
+ }
+}
+
+static inline void bkey_on_stack_init(struct bkey_on_stack *s)
+{
+ s->k = (void *) s->onstack;
+}
+
+static inline void bkey_on_stack_exit(struct bkey_on_stack *s,
+ struct bch_fs *c)
+{
+ if (s->k != (void *) s->onstack)
+ mempool_free(s->k, &c->large_bkey_pool);
+ s->k = NULL;
+}
+
+#endif /* _BCACHEFS_BKEY_ON_STACK_H */
diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c
index 2cac269b386f..f5c0507ad79d 100644
--- a/fs/bcachefs/bkey_sort.c
+++ b/fs/bcachefs/bkey_sort.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_on_stack.h"
#include "bkey_sort.h"
#include "bset.h"
#include "extents.h"
@@ -292,8 +293,10 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
struct bkey l_unpacked, r_unpacked;
struct bkey_s l, r;
struct btree_nr_keys nr;
+ struct bkey_on_stack split;
memset(&nr, 0, sizeof(nr));
+ bkey_on_stack_init(&split);
heap_resort(iter, extent_sort_cmp, NULL);
@@ -349,13 +352,13 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
extent_sort_sift(iter, b, _r - iter->data);
} else if (bkey_cmp(l.k->p, r.k->p) > 0) {
- BKEY_PADDED(k) tmp;
+ bkey_on_stack_realloc(&split, c, l.k->u64s);
/*
* r wins, but it overlaps in the middle of l - split l:
*/
- bkey_reassemble(&tmp.k, l.s_c);
- bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
+ bkey_reassemble(split.k, l.s_c);
+ bch2_cut_back(bkey_start_pos(r.k), &split.k->k);
__bch2_cut_front(r.k->p, l);
extent_save(b, lk, l.k);
@@ -363,7 +366,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
extent_sort_sift(iter, b, 0);
extent_sort_append(c, f, &nr, dst->start,
- &prev, bkey_i_to_s(&tmp.k));
+ &prev, bkey_i_to_s(split.k));
} else {
bch2_cut_back(bkey_start_pos(r.k), l.k);
extent_save(b, lk, l.k);
@@ -373,6 +376,8 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
extent_sort_advance_prev(f, &nr, dst->start, &prev);
dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
+
+ bkey_on_stack_exit(&split, c);
return nr;
}
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index f32b8e6bf2ce..b24f867520c3 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -4,6 +4,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
#include "bset.h"
#include "btree_gc.h"
#include "btree_update.h"
@@ -777,9 +778,10 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
struct bkey_s_c k;
struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
- BKEY_PADDED(k) tmp;
+ struct bkey_on_stack sk;
int ret = 0, dev, idx;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -804,8 +806,9 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
dev = s->key.v.ptrs[idx].dev;
- bkey_reassemble(&tmp.k, k);
- e = bkey_i_to_s_extent(&tmp.k);
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ e = bkey_i_to_s_extent(sk.k);
extent_for_each_ptr(e, ptr)
if (ptr->dev != dev)
@@ -816,7 +819,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
extent_stripe_ptr_add(e, s, ptr, idx);
- bch2_trans_update(&trans, iter, &tmp.k);
+ bch2_trans_update(&trans, iter, sk.k);
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
@@ -829,6 +832,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
}
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
return ret;
}
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index b12798103763..46eeaa574e86 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -8,6 +8,7 @@
#include "bcachefs.h"
#include "bkey_methods.h"
+#include "bkey_on_stack.h"
#include "btree_gc.h"
#include "btree_update.h"
#include "btree_update_interior.h"
@@ -1132,7 +1133,11 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
break;
}
case BCH_EXTENT_OVERLAP_MIDDLE: {
- BKEY_PADDED(k) split;
+ struct bkey_on_stack split;
+
+ bkey_on_stack_init(&split);
+ bkey_on_stack_realloc(&split, c, k.k->u64s);
+
/*
* The insert key falls 'in the middle' of k
* The insert key splits k in 3:
@@ -1147,18 +1152,19 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
* modify k _before_ doing the insert (which will move
* what k points to)
*/
- bkey_reassemble(&split.k, k.s_c);
- split.k.k.needs_whiteout |= bkey_written(l->b, _k);
+ bkey_reassemble(split.k, k.s_c);
+ split.k->k.needs_whiteout |= bkey_written(l->b, _k);
- bch2_cut_back(bkey_start_pos(&insert->k), &split.k.k);
- BUG_ON(bkey_deleted(&split.k.k));
+ bch2_cut_back(bkey_start_pos(&insert->k), &split.k->k);
+ BUG_ON(bkey_deleted(&split.k->k));
__bch2_cut_front(insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
bch2_btree_iter_fix_key_modified(iter, l->b, _k);
- extent_bset_insert(c, iter, &split.k);
+ extent_bset_insert(c, iter, split.k);
+ bkey_on_stack_exit(&split, c);
break;
}
}
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 657559c2db14..478630fdf643 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
@@ -691,6 +692,18 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
}
}
+static bool extent_partial_reads_expensive(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *i;
+
+ bkey_for_each_crc(k.k, ptrs, crc, i)
+ if (crc.csum_type || crc.compression_type)
+ return true;
+ return false;
+}
+
static void readpage_bio_extend(struct readpages_iter *iter,
struct bio *bio,
unsigned sectors_this_extent,
@@ -744,15 +757,17 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
struct readpages_iter *readpages_iter)
{
struct bch_fs *c = trans->c;
+ struct bkey_on_stack sk;
int flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE;
int ret = 0;
rbio->c = c;
rbio->start_time = local_clock();
+
+ bkey_on_stack_init(&sk);
retry:
while (1) {
- BKEY_PADDED(k) tmp;
struct bkey_s_c k;
unsigned bytes, sectors, offset_into_extent;
@@ -764,15 +779,16 @@ retry:
if (ret)
break;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ k = bkey_i_to_s_c(sk.k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(trans,
- &offset_into_extent, &tmp.k);
+ &offset_into_extent, sk.k);
if (ret)
break;
@@ -780,22 +796,9 @@ retry:
bch2_trans_unlock(trans);
- if (readpages_iter) {
- bool want_full_extent = false;
-
- if (bkey_extent_is_data(k.k)) {
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *i;
- struct extent_ptr_decoded p;
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, i)
- want_full_extent |= ((p.crc.csum_type != 0) |
- (p.crc.compression_type != 0));
- }
-
- readpage_bio_extend(readpages_iter, &rbio->bio,
- sectors, want_full_extent);
- }
+ if (readpages_iter)
+ readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
+ extent_partial_reads_expensive(k));
bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
swap(rbio->bio.bi_iter.bi_size, bytes);
@@ -809,7 +812,7 @@ retry:
bch2_read_extent(c, rbio, k, offset_into_extent, flags);
if (flags & BCH_READ_LAST_FRAGMENT)
- return;
+ break;
swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
@@ -818,8 +821,12 @@ retry:
if (ret == -EINTR)
goto retry;
- bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
- bio_endio(&rbio->bio);
+ if (ret) {
+ bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
+ bio_endio(&rbio->bio);
+ }
+
+ bkey_on_stack_exit(&sk, c);
}
void bch2_readahead(struct readahead_control *ractl)
@@ -2353,6 +2360,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
+ struct bkey_on_stack copy;
struct btree_trans trans;
struct btree_iter *src, *dst, *del = NULL;
loff_t shift, new_size;
@@ -2362,6 +2370,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
+ bkey_on_stack_init(&copy);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
/*
@@ -2430,7 +2439,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
while (1) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
- BKEY_PADDED(k) copy;
struct bkey_i delete;
struct bkey_s_c k;
struct bpos next_pos;
@@ -2455,34 +2463,35 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
break;
reassemble:
- bkey_reassemble(&copy.k, k);
+ bkey_on_stack_realloc(&copy, c, k.k->u64s);
+ bkey_reassemble(copy.k, k);
if (insert &&
bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) {
- bch2_cut_front(move_pos, &copy.k);
- bch2_btree_iter_set_pos(src, bkey_start_pos(&copy.k.k));
+ bch2_cut_front(move_pos, copy.k);
+ bch2_btree_iter_set_pos(src, bkey_start_pos(&copy.k->k));
}
- copy.k.k.p.offset += shift >> 9;
- bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k.k));
+ copy.k->k.p.offset += shift >> 9;
+ bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k->k));
- ret = bch2_extent_atomic_end(dst, &copy.k, &atomic_end);
+ ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end);
if (ret)
goto bkey_err;
- if (bkey_cmp(atomic_end, copy.k.k.p)) {
+ if (bkey_cmp(atomic_end, copy.k->k.p)) {
if (insert) {
move_pos = atomic_end;
move_pos.offset -= shift >> 9;
goto reassemble;
} else {
- bch2_cut_back(atomic_end, &copy.k.k);
+ bch2_cut_back(atomic_end, &copy.k->k);
}
}
bkey_init(&delete.k);
delete.k.p = src->pos;
- bch2_key_resize(&delete.k, copy.k.k.size);
+ bch2_key_resize(&delete.k, copy.k->k.size);
next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
@@ -2495,12 +2504,12 @@ reassemble:
* by the triggers machinery:
*/
if (insert &&
- bkey_cmp(bkey_start_pos(&copy.k.k), delete.k.p) < 0) {
- bch2_cut_back(bkey_start_pos(&copy.k.k), &delete.k);
+ bkey_cmp(bkey_start_pos(&copy.k->k), delete.k.p) < 0) {
+ bch2_cut_back(bkey_start_pos(&copy.k->k), &delete.k);
} else if (!insert &&
- bkey_cmp(copy.k.k.p,
+ bkey_cmp(copy.k->k.p,
bkey_start_pos(&delete.k)) > 0) {
- bch2_cut_front(copy.k.k.p, &delete);
+ bch2_cut_front(copy.k->k.p, &delete);
del = bch2_trans_copy_iter(&trans, src);
BUG_ON(IS_ERR_OR_NULL(del));
@@ -2509,10 +2518,10 @@ reassemble:
bkey_start_pos(&delete.k));
}
- bch2_trans_update(&trans, dst, &copy.k);
+ bch2_trans_update(&trans, dst, copy.k);
bch2_trans_update(&trans, del ?: src, &delete);
- if (copy.k.k.size == k.k->size) {
+ if (copy.k->k.size == k.k->size) {
/*
* If we're moving the entire extent, we can skip
* running triggers:
@@ -2521,10 +2530,10 @@ reassemble:
} else {
/* We might end up splitting compressed extents: */
unsigned nr_ptrs =
- bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k));
+ bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(copy.k));
ret = bch2_disk_reservation_get(c, &disk_res,
- copy.k.k.size, nr_ptrs,
+ copy.k->k.size, nr_ptrs,
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
}
@@ -2559,6 +2568,7 @@ bkey_err:
}
err:
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&copy, c);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index b241164f6f7e..e8cdae3c114b 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "acl.h"
+#include "bkey_on_stack.h"
#include "btree_update.h"
#include "buckets.h"
#include "chardev.h"
@@ -875,7 +876,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- BKEY_PADDED(k) cur, prev;
+ struct bkey_on_stack cur, prev;
struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
unsigned offset_into_extent, sectors;
bool have_extent = false;
@@ -888,6 +889,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (start + len < start)
return -EINVAL;
+ bkey_on_stack_init(&cur);
+ bkey_on_stack_init(&prev);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -902,15 +905,17 @@ retry:
continue;
}
- bkey_reassemble(&cur.k, k);
- k = bkey_i_to_s_c(&cur.k);
+ bkey_on_stack_realloc(&cur, c, k.k->u64s);
+ bkey_on_stack_realloc(&prev, c, k.k->u64s);
+ bkey_reassemble(cur.k, k);
+ k = bkey_i_to_s_c(cur.k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans,
- &offset_into_extent, &cur.k);
+ &offset_into_extent, cur.k);
if (ret)
break;
@@ -920,19 +925,19 @@ retry:
bch2_cut_front(POS(k.k->p.inode,
bkey_start_offset(k.k) +
offset_into_extent),
- &cur.k);
- bch2_key_resize(&cur.k.k, sectors);
- cur.k.k.p = iter->pos;
- cur.k.k.p.offset += cur.k.k.size;
+ cur.k);
+ bch2_key_resize(&cur.k->k, sectors);
+ cur.k->k.p = iter->pos;
+ cur.k->k.p.offset += cur.k->k.size;
if (have_extent) {
ret = bch2_fill_extent(c, info,
- bkey_i_to_s_c(&prev.k), 0);
+ bkey_i_to_s_c(prev.k), 0);
if (ret)
break;
}
- bkey_copy(&prev.k, &cur.k);
+ bkey_copy(prev.k, cur.k);
have_extent = true;
if (k.k->type == KEY_TYPE_reflink_v)
@@ -945,10 +950,12 @@ retry:
goto retry;
if (!ret && have_extent)
- ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k),
+ ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
FIEMAP_EXTENT_LAST);
ret = bch2_trans_exit(&trans) ?: ret;
+ bkey_on_stack_exit(&cur, c);
+ bkey_on_stack_exit(&prev, c);
return ret < 0 ? ret : 0;
}
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index f53eee7accc8..4fe61705ae75 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -8,6 +8,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
#include "bset.h"
#include "btree_update.h"
#include "buckets.h"
@@ -394,12 +395,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
int bch2_write_index_default(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
+ struct bkey_on_stack sk;
struct keylist *keys = &op->insert_keys;
struct bkey_i *k = bch2_keylist_front(keys);
struct btree_trans trans;
struct btree_iter *iter;
int ret;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -407,13 +410,14 @@ int bch2_write_index_default(struct bch_write_op *op)
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
do {
- BKEY_PADDED(k) tmp;
+ k = bch2_keylist_front(keys);
- bkey_copy(&tmp.k, bch2_keylist_front(keys));
+ bkey_on_stack_realloc(&sk, c, k->k.u64s);
+ bkey_copy(sk.k, k);
bch2_trans_begin_updates(&trans);
- ret = bch2_extent_update(&trans, iter, &tmp.k,
+ ret = bch2_extent_update(&trans, iter, sk.k,
&op->res, op_journal_seq(op),
op->new_i_size, &op->i_sectors_delta);
if (ret == -EINTR)
@@ -421,13 +425,14 @@ int bch2_write_index_default(struct bch_write_op *op)
if (ret)
break;
- if (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) < 0)
- bch2_cut_front(iter->pos, bch2_keylist_front(keys));
+ if (bkey_cmp(iter->pos, k->k.p) < 0)
+ bch2_cut_front(iter->pos, k);
else
bch2_keylist_pop_front(keys);
} while (!bch2_keylist_empty(keys));
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
return ret;
}
@@ -1463,13 +1468,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
{
struct btree_trans trans;
struct btree_iter *iter;
- BKEY_PADDED(k) tmp;
+ struct bkey_on_stack sk;
struct bkey_s_c k;
int ret;
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -1481,11 +1487,12 @@ retry:
if (bkey_err(k))
goto err;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
- if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k),
+ if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,
rbio->pos.offset -
rbio->pick.crc.offset)) {
@@ -1502,6 +1509,7 @@ retry:
out:
bch2_rbio_done(rbio);
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
return;
err:
rbio->bio.bi_status = BLK_STS_IOERR;
@@ -1514,12 +1522,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
{
struct btree_trans trans;
struct btree_iter *iter;
+ struct bkey_on_stack sk;
struct bkey_s_c k;
int ret;
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -1527,18 +1537,18 @@ retry:
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode, bvec_iter.bi_sector),
BTREE_ITER_SLOTS, k, ret) {
- BKEY_PADDED(k) tmp;
unsigned bytes, sectors, offset_into_extent;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ k = bkey_i_to_s_c(sk.k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans,
- &offset_into_extent, &tmp.k);
+ &offset_into_extent, sk.k);
if (ret)
break;
@@ -1577,6 +1587,7 @@ err:
rbio->bio.bi_status = BLK_STS_IOERR;
out:
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
bch2_rbio_done(rbio);
}
@@ -1633,7 +1644,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- BKEY_PADDED(k) new;
+ struct bkey_on_stack new;
struct bch_extent_crc_unpacked new_crc;
u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
int ret;
@@ -1641,6 +1652,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
if (rbio->pick.crc.compression_type)
return;
+ bkey_on_stack_init(&new);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -1651,8 +1663,9 @@ retry:
if (IS_ERR_OR_NULL(k.k))
goto out;
- bkey_reassemble(&new.k, k);
- k = bkey_i_to_s_c(&new.k);
+ bkey_on_stack_realloc(&new, c, k.k->u64s);
+ bkey_reassemble(new.k, k);
+ k = bkey_i_to_s_c(new.k);
if (bversion_cmp(k.k->version, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
@@ -1671,10 +1684,10 @@ retry:
goto out;
}
- if (!bch2_bkey_narrow_crcs(&new.k, new_crc))
+ if (!bch2_bkey_narrow_crcs(new.k, new_crc))
goto out;
- bch2_trans_update(&trans, iter, &new.k);
+ bch2_trans_update(&trans, iter, new.k);
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
@@ -1683,6 +1696,7 @@ retry:
goto retry;
out:
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&new, c);
}
/* Inner part that may run in process context */
@@ -2114,6 +2128,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
{
struct btree_trans trans;
struct btree_iter *iter;
+ struct bkey_on_stack sk;
struct bkey_s_c k;
unsigned flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE|
@@ -2127,6 +2142,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
rbio->c = c;
rbio->start_time = local_clock();
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -2135,7 +2151,6 @@ retry:
POS(inode, rbio->bio.bi_iter.bi_sector),
BTREE_ITER_SLOTS);
while (1) {
- BKEY_PADDED(k) tmp;
unsigned bytes, sectors, offset_into_extent;
bch2_btree_iter_set_pos(iter,
@@ -2146,15 +2161,16 @@ retry:
if (ret)
goto err;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
-
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ k = bkey_i_to_s_c(sk.k);
+
ret = bch2_read_indirect_extent(&trans,
- &offset_into_extent, &tmp.k);
+ &offset_into_extent, sk.k);
if (ret)
goto err;
@@ -2186,6 +2202,7 @@ retry:
}
out:
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
return;
err:
if (ret == -EINTR)
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index de8522f754e2..4dacbd637d02 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -4,6 +4,7 @@
*/
#include "bcachefs.h"
+#include "bkey_on_stack.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
@@ -40,9 +41,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- BKEY_PADDED(key) tmp;
+ struct bkey_on_stack sk;
int ret = 0;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
@@ -58,9 +60,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
continue;
}
- bkey_reassemble(&tmp.key, k);
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
- ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key),
+ ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
dev_idx, flags, false);
if (ret)
break;
@@ -70,11 +73,11 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
* will do the appropriate thing with it (turning it into a
* KEY_TYPE_error key, or just a discard if it was a cached extent)
*/
- bch2_extent_normalize(c, bkey_i_to_s(&tmp.key));
+ bch2_extent_normalize(c, bkey_i_to_s(sk.k));
- bch2_btree_iter_set_pos(iter, bkey_start_pos(&tmp.key.k));
+ bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
- bch2_trans_update(&trans, iter, &tmp.key);
+ bch2_trans_update(&trans, iter, sk.k);
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
@@ -92,6 +95,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
}
ret = bch2_trans_exit(&trans) ?: ret;
+ bkey_on_stack_exit(&sk, c);
BUG_ON(ret == -EINTR);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index c5d3375882d7..dbe35d16e7dd 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
#include "btree_gc.h"
#include "btree_update.h"
#include "btree_update_interior.h"
@@ -489,7 +490,7 @@ static int __bch2_move_data(struct bch_fs *c,
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
- BKEY_PADDED(k) tmp;
+ struct bkey_on_stack sk;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
@@ -498,6 +499,7 @@ static int __bch2_move_data(struct bch_fs *c,
u64 delay, cur_inum = U64_MAX;
int ret = 0, ret2;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
stats->data_type = BCH_DATA_USER;
@@ -577,8 +579,9 @@ peek:
}
/* unlock before doing IO: */
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k,
@@ -605,6 +608,7 @@ next_nondata:
}
out:
ret = bch2_trans_exit(&trans) ?: ret;
+ bkey_on_stack_exit(&sk, c);
return ret;
}
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 6e71c5e8f9a2..6d21086c3254 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_on_stack.h"
#include "btree_update.h"
#include "extents.h"
#include "inode.h"
@@ -160,7 +161,8 @@ s64 bch2_remap_range(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter *dst_iter, *src_iter;
struct bkey_s_c src_k;
- BKEY_PADDED(k) new_dst, new_src;
+ BKEY_PADDED(k) new_dst;
+ struct bkey_on_stack new_src;
struct bpos dst_end = dst_start, src_end = src_start;
struct bpos dst_want, src_want;
u64 src_done, dst_done;
@@ -183,6 +185,7 @@ s64 bch2_remap_range(struct bch_fs *c,
dst_end.offset += remap_sectors;
src_end.offset += remap_sectors;
+ bkey_on_stack_init(&new_src);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
@@ -222,14 +225,15 @@ s64 bch2_remap_range(struct bch_fs *c,
break;
if (src_k.k->type == KEY_TYPE_extent) {
- bkey_reassemble(&new_src.k, src_k);
- src_k = bkey_i_to_s_c(&new_src.k);
+ bkey_on_stack_realloc(&new_src, c, src_k.k->u64s);
+ bkey_reassemble(new_src.k, src_k);
+ src_k = bkey_i_to_s_c(new_src.k);
- bch2_cut_front(src_iter->pos, &new_src.k);
- bch2_cut_back(src_end, &new_src.k.k);
+ bch2_cut_front(src_iter->pos, new_src.k);
+ bch2_cut_back(src_end, &new_src.k->k);
ret = bch2_make_extent_indirect(&trans, src_iter,
- bkey_i_to_extent(&new_src.k));
+ bkey_i_to_extent(new_src.k));
if (ret)
goto btree_err;
@@ -299,6 +303,7 @@ err:
} while (ret2 == -EINTR);
ret = bch2_trans_exit(&trans) ?: ret;
+ bkey_on_stack_exit(&new_src, c);
percpu_ref_put(&c->writes);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 14e2f6828cc6..8c7b56a95f4b 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -475,6 +475,7 @@ static void bch2_fs_free(struct bch_fs *c)
free_percpu(c->usage[0]);
kfree(c->usage_base);
free_percpu(c->pcpu);
+ mempool_exit(&c->large_bkey_pool);
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->btree_interior_update_pool);
@@ -729,6 +730,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
!(c->online_reserved = alloc_percpu(u64)) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) ||
+ mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
bch2_io_clock_init(&c->io_clock[READ]) ||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
bch2_fs_journal_init(&c->journal) ||