summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAmery Hung <ameryhung@gmail.com>2026-04-10 18:54:17 -0700
committerAlexei Starovoitov <ast@kernel.org>2026-04-10 21:22:32 -0700
commit5063e775889948c0475ccdf21c74a6191b7b6482 (patch)
tree13d391ce9a74e8b003873a13511b700810c122c5 /include
parent78ee02a966ad76966be516ed3d56860d7a58fe7e (diff)
downloadlwn-5063e775889948c0475ccdf21c74a6191b7b6482.tar.gz
lwn-5063e775889948c0475ccdf21c74a6191b7b6482.zip
bpf: Use kmalloc_nolock() universally in local storage
Switch to kmalloc_nolock() universally in local storage. Socket local storage didn't move to kmalloc_nolock() when BPF memory allocator was replaced by it for performance reasons. Now that kfree_rcu() supports freeing memory allocated by kmalloc_nolock(), we can move the remaining local storages to use kmalloc_nolock() and cleanup the cluttered free paths. Use kfree() instead of kfree_nolock() in bpf_selem_free_trace_rcu() and bpf_local_storage_free_trace_rcu(). Both callbacks run in process context where spinning is allowed, so kfree_nolock() is unnecessary. Benchmark: ./bench -p 1 local-storage-create --storage-type socket \ --batch-size {16,32,64} The benchmark is a microbenchmark stress-testing how fast local storage can be created. There is no measurable throughput change for socket local storage after switching from kzalloc() to kmalloc_nolock(). Socket local storage batch creation speed diff --------------- ---- ------------------ ---- Baseline 16 433.9 ± 0.6 k/s 32 434.3 ± 1.4 k/s 64 434.2 ± 0.7 k/s After 16 439.0 ± 1.9 k/s +1.2% 32 437.3 ± 2.0 k/s +0.7% 64 435.8 ± 2.5k/s +0.4% Also worth noting that the baseline got a 5% throughput boost when sheaf replaces percpu partial slab recently [0]. [0] https://lore.kernel.org/bpf/20260123-sheaves-for-all-v4-0-041323d506f7@suse.cz/ Signed-off-by: Amery Hung <ameryhung@gmail.com> Link: https://lore.kernel.org/r/20260411015419.114016-3-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf_local_storage.h8
1 files changed, 2 insertions, 6 deletions
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 8157e8da61d4..dced54e9265f 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -54,7 +54,6 @@ struct bpf_local_storage_map {
u32 bucket_log;
u16 elem_size;
u16 cache_idx;
- bool use_kmalloc_nolock;
};
struct bpf_local_storage_data {
@@ -86,8 +85,7 @@ struct bpf_local_storage_elem {
*/
};
atomic_t state;
- bool use_kmalloc_nolock;
- /* 3 bytes hole */
+ /* 4 bytes hole */
/* The data is stored in another cacheline to minimize
* the number of cachelines access during a cache hit.
*/
@@ -104,7 +102,6 @@ struct bpf_local_storage {
rqspinlock_t lock; /* Protect adding/removing from the "list" */
u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */
refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */
- bool use_kmalloc_nolock;
};
/* U16_MAX is much more than enough for sk local storage
@@ -137,8 +134,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
- struct bpf_local_storage_cache *cache,
- bool use_kmalloc_nolock);
+ struct bpf_local_storage_cache *cache);
void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,