summaryrefslogtreecommitdiff
path: root/io_uring/rsrc.c
diff options
context:
space:
mode:
authorPavel Begunkov <asml.silence@gmail.com>2023-04-04 13:39:45 +0100
committerJens Axboe <axboe@kernel.dk>2023-04-04 09:30:39 -0600
commitb8fb5b4fdd67f9d18109c5d21d44a8bd4ddb608b (patch)
treed88a6c8049aeb16442461afa53182a2c603a5eb6 /io_uring/rsrc.c
parente3ef728ff07b42668e7e12f49cd2f9055e064ec1 (diff)
downloadlwn-b8fb5b4fdd67f9d18109c5d21d44a8bd4ddb608b.tar.gz
lwn-b8fb5b4fdd67f9d18109c5d21d44a8bd4ddb608b.zip
io_uring/rsrc: use non-pcpu refcounts for nodes
One problem with the current rsrc infra is that often updates will generates lots of rsrc nodes, each carry pcpu refs. That takes quite a lot of memory, especially if there is a stall, and takes lots of CPU cycles. Only pcpu allocations takes >50 of CPU with a naive benchmark updating files in a loop. Replace pcpu refs with normal refcounting. There is already a hot path avoiding atomics / refs, but following patches will further improve it. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/e9ed8a9457b331a26555ff9443afc64cdaab7247.1680576071.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/rsrc.c')
-rw-r--r--io_uring/rsrc.c15
1 files changed, 5 insertions, 10 deletions
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 7a43aed8e395..f2da9e251e3f 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -155,7 +155,7 @@ void io_rsrc_refs_refill(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
ctx->rsrc_cached_refs += IO_RSRC_REF_BATCH;
- percpu_ref_get_many(&ctx->rsrc_node->refs, IO_RSRC_REF_BATCH);
+ refcount_add(IO_RSRC_REF_BATCH, &ctx->rsrc_node->refs);
}
static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
@@ -220,13 +220,11 @@ void io_wait_rsrc_data(struct io_rsrc_data *data)
void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
{
- percpu_ref_exit(&ref_node->refs);
kfree(ref_node);
}
-static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
+__cold void io_rsrc_node_ref_zero(struct io_rsrc_node *node)
{
- struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
struct io_ring_ctx *ctx = node->rsrc_data->ctx;
unsigned long flags;
bool first_add = false;
@@ -269,11 +267,7 @@ static struct io_rsrc_node *io_rsrc_node_alloc(void)
if (!ref_node)
return NULL;
- if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero,
- 0, GFP_KERNEL)) {
- kfree(ref_node);
- return NULL;
- }
+ refcount_set(&ref_node->refs, 1);
INIT_LIST_HEAD(&ref_node->node);
INIT_LIST_HEAD(&ref_node->rsrc_list);
ref_node->done = false;
@@ -298,7 +292,8 @@ void io_rsrc_node_switch(struct io_ring_ctx *ctx,
spin_unlock_irq(&ctx->rsrc_ref_lock);
atomic_inc(&data_to_kill->refs);
- percpu_ref_kill(&rsrc_node->refs);
+ /* put master ref */
+ io_rsrc_put_node(rsrc_node, 1);
ctx->rsrc_node = NULL;
}