summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 10:10:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 10:10:28 -0700
commitc8db08110cbeff12a1f3990a31730936b092f62b (patch)
treeaadc293cbbcad4cc08b2dabe386b9395c108c2bf /mm
parent0e58e3f1c57850f62afd40a642a7fe3417d80b21 (diff)
parent98779186aa0b3367489a87c6d8bc0911f577444e (diff)
downloadlwn-c8db08110cbeff12a1f3990a31730936b092f62b.tar.gz
lwn-c8db08110cbeff12a1f3990a31730936b092f62b.zip
Merge tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs xattr updates from Christian Brauner: "This reworks the simple_xattr infrastructure and adds support for user.* extended attributes on sockets. The simple_xattr subsystem currently uses an rbtree protected by a reader-writer spinlock. This series replaces the rbtree with an rhashtable giving O(1) average-case lookup with RCU-based lockless reads. This sped up concurrent access patterns on tmpfs quite a bit and it's an overall easy enough conversion to do and gets rid or rwlock_t. The conversion is done incrementally: a new rhashtable path is added alongside the existing rbtree, consumers are migrated one at a time (shmem, kernfs, pidfs), and then the rbtree code is removed. All three consumers switch from embedded structs to pointer-based lazy allocation so the rhashtable overhead is only paid for inodes that actually use xattrs. With this infrastructure in place the series adds support for user.* xattrs on sockets. Path-based AF_UNIX sockets inherit xattr support from the underlying filesystem (e.g. tmpfs) but sockets in sockfs - that is everything created via socket() including abstract namespace AF_UNIX sockets - had no xattr support at all. The xattr_permission() checks are reworked to allow user.* xattrs on S_IFSOCK inodes. Sockfs sockets get per-inode limits of 128 xattrs and 128KB total value size matching the limits already in use for kernfs. The practical motivation comes from several directions. systemd and GNOME are expanding their use of Varlink as an IPC mechanism. For D-Bus there are tools like dbus-monitor that can observe IPC traffic across the system but this only works because D-Bus has a central broker. For Varlink there is no broker and there is currently no way to identify which sockets speak Varlink. With user.* xattrs on sockets a service can label its socket with the IPC protocol it speaks (e.g., user.varlink=1) and an eBPF program can then selectively capture traffic on those sockets. Enumerating bound sockets via netlink combined with these xattr labels gives a way to discover all Varlink IPC entrypoints for debugging and introspection. Similarly, systemd-journald wants to use xattrs on the /dev/log socket for protocol negotiation to indicate whether RFC 5424 structured syslog is supported or whether only the legacy RFC 3164 format should be used. In containers these labels are particularly useful as high-privilege or more complicated solutions for socket identification aren't available. The series comes with comprehensive selftests covering path-based AF_UNIX sockets, sockfs socket operations, per-inode limit enforcement, and xattr operations across multiple address families (AF_INET, AF_INET6, AF_NETLINK, AF_PACKET)" * tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: selftests/xattr: test xattrs on various socket families selftests/xattr: sockfs socket xattr tests selftests/xattr: path-based AF_UNIX socket xattr tests xattr: support extended attributes on sockets xattr,net: support limited amount of extended attributes on sockfs sockets xattr: move user limits for xattrs to generic infra xattr: switch xattr_permission() to switch statement xattr: add xattr_permission_error() xattr: remove rbtree-based simple_xattr infrastructure pidfs: adapt to rhashtable-based simple_xattrs kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation shmem: adapt to rhashtable-based simple_xattrs with lazy allocation xattr: add rhashtable-based simple_xattr infrastructure xattr: add rcu_head and rhash_head to struct simple_xattr
Diffstat (limited to 'mm')
-rw-r--r--mm/shmem.c46
1 files changed, 32 insertions, 14 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index b40f3cd48961..0b0e577e880a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1425,7 +1425,10 @@ static void shmem_evict_inode(struct inode *inode)
}
}
- simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+ if (info->xattrs) {
+ simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+ kfree(info->xattrs);
+ }
shmem_free_inode(inode->i_sb, freed);
WARN_ON(inode->i_blocks);
clear_inode(inode);
@@ -3101,7 +3104,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
shmem_set_inode_flags(inode, info->fsflags, NULL);
INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
- simple_xattrs_init(&info->xattrs);
cache_no_acl(inode);
if (sbinfo->noswap)
mapping_set_unevictable(inode->i_mapping);
@@ -4255,10 +4257,13 @@ static int shmem_initxattrs(struct inode *inode,
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
const struct xattr *xattr;
- struct simple_xattr *new_xattr;
size_t ispace = 0;
size_t len;
+ CLASS(simple_xattrs, xattrs)();
+ if (IS_ERR(xattrs))
+ return PTR_ERR(xattrs);
+
if (sbinfo->max_inodes) {
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
ispace += simple_xattr_space(xattr->name,
@@ -4277,24 +4282,24 @@ static int shmem_initxattrs(struct inode *inode,
}
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
- if (!new_xattr)
+ CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
+ if (IS_ERR(new_xattr))
break;
len = strlen(xattr->name) + 1;
new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
GFP_KERNEL_ACCOUNT);
- if (!new_xattr->name) {
- kvfree(new_xattr);
+ if (!new_xattr->name)
break;
- }
memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN);
memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
xattr->name, len);
- simple_xattr_add(&info->xattrs, new_xattr);
+ if (simple_xattr_add(xattrs, new_xattr))
+ break;
+ retain_and_null_ptr(new_xattr);
}
if (xattr->name != NULL) {
@@ -4303,10 +4308,10 @@ static int shmem_initxattrs(struct inode *inode,
sbinfo->free_ispace += ispace;
raw_spin_unlock(&sbinfo->stat_lock);
}
- simple_xattrs_free(&info->xattrs, NULL);
return -ENOMEM;
}
+ smp_store_release(&info->xattrs, no_free_ptr(xattrs));
return 0;
}
@@ -4315,9 +4320,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
const char *name, void *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(inode);
+ struct simple_xattrs *xattrs;
+
+ xattrs = READ_ONCE(info->xattrs);
+ if (!xattrs)
+ return -ENODATA;
name = xattr_full_name(handler, name);
- return simple_xattr_get(&info->xattrs, name, buffer, size);
+ return simple_xattr_get(xattrs, name, buffer, size);
}
static int shmem_xattr_handler_set(const struct xattr_handler *handler,
@@ -4328,10 +4338,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct simple_xattrs *xattrs;
struct simple_xattr *old_xattr;
size_t ispace = 0;
name = xattr_full_name(handler, name);
+
+ xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+
if (value && sbinfo->max_inodes) {
ispace = simple_xattr_space(name, size);
raw_spin_lock(&sbinfo->stat_lock);
@@ -4344,13 +4360,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
return -ENOSPC;
}
- old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+ old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
if (!IS_ERR(old_xattr)) {
ispace = 0;
if (old_xattr && sbinfo->max_inodes)
ispace = simple_xattr_space(old_xattr->name,
old_xattr->size);
- simple_xattr_free(old_xattr);
+ simple_xattr_free_rcu(old_xattr);
old_xattr = NULL;
inode_set_ctime_current(inode);
inode_inc_iversion(inode);
@@ -4391,7 +4407,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = {
static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
+
+ return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs),
+ buffer, size);
}
#endif /* CONFIG_TMPFS_XATTR */