diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-13 10:10:28 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-13 10:10:28 -0700 |
| commit | c8db08110cbeff12a1f3990a31730936b092f62b (patch) | |
| tree | aadc293cbbcad4cc08b2dabe386b9395c108c2bf /mm | |
| parent | 0e58e3f1c57850f62afd40a642a7fe3417d80b21 (diff) | |
| parent | 98779186aa0b3367489a87c6d8bc0911f577444e (diff) | |
| download | lwn-c8db08110cbeff12a1f3990a31730936b092f62b.tar.gz lwn-c8db08110cbeff12a1f3990a31730936b092f62b.zip | |
Merge tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs xattr updates from Christian Brauner:
"This reworks the simple_xattr infrastructure and adds support for
user.* extended attributes on sockets.
The simple_xattr subsystem currently uses an rbtree protected by a
reader-writer spinlock. This series replaces the rbtree with an
rhashtable giving O(1) average-case lookup with RCU-based lockless
reads. This sped up concurrent access patterns on tmpfs quite a bit
and it's an overall easy enough conversion to do and gets rid or
rwlock_t.
The conversion is done incrementally: a new rhashtable path is added
alongside the existing rbtree, consumers are migrated one at a time
(shmem, kernfs, pidfs), and then the rbtree code is removed. All three
consumers switch from embedded structs to pointer-based lazy
allocation so the rhashtable overhead is only paid for inodes that
actually use xattrs.
With this infrastructure in place the series adds support for user.*
xattrs on sockets. Path-based AF_UNIX sockets inherit xattr support
from the underlying filesystem (e.g. tmpfs) but sockets in sockfs -
that is everything created via socket() including abstract namespace
AF_UNIX sockets - had no xattr support at all.
The xattr_permission() checks are reworked to allow user.* xattrs on
S_IFSOCK inodes. Sockfs sockets get per-inode limits of 128 xattrs and
128KB total value size matching the limits already in use for kernfs.
The practical motivation comes from several directions. systemd and
GNOME are expanding their use of Varlink as an IPC mechanism.
For D-Bus there are tools like dbus-monitor that can observe IPC
traffic across the system but this only works because D-Bus has a
central broker.
For Varlink there is no broker and there is currently no way to
identify which sockets speak Varlink. With user.* xattrs on sockets a
service can label its socket with the IPC protocol it speaks (e.g.,
user.varlink=1) and an eBPF program can then selectively capture
traffic on those sockets. Enumerating bound sockets via netlink
combined with these xattr labels gives a way to discover all Varlink
IPC entrypoints for debugging and introspection.
Similarly, systemd-journald wants to use xattrs on the /dev/log socket
for protocol negotiation to indicate whether RFC 5424 structured
syslog is supported or whether only the legacy RFC 3164 format should
be used.
In containers these labels are particularly useful as high-privilege
or more complicated solutions for socket identification aren't
available.
The series comes with comprehensive selftests covering path-based
AF_UNIX sockets, sockfs socket operations, per-inode limit
enforcement, and xattr operations across multiple address families
(AF_INET, AF_INET6, AF_NETLINK, AF_PACKET)"
* tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
selftests/xattr: test xattrs on various socket families
selftests/xattr: sockfs socket xattr tests
selftests/xattr: path-based AF_UNIX socket xattr tests
xattr: support extended attributes on sockets
xattr,net: support limited amount of extended attributes on sockfs sockets
xattr: move user limits for xattrs to generic infra
xattr: switch xattr_permission() to switch statement
xattr: add xattr_permission_error()
xattr: remove rbtree-based simple_xattr infrastructure
pidfs: adapt to rhashtable-based simple_xattrs
kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation
shmem: adapt to rhashtable-based simple_xattrs with lazy allocation
xattr: add rhashtable-based simple_xattr infrastructure
xattr: add rcu_head and rhash_head to struct simple_xattr
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/shmem.c | 46 |
1 files changed, 32 insertions, 14 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index b40f3cd48961..0b0e577e880a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1425,7 +1425,10 @@ static void shmem_evict_inode(struct inode *inode) } } - simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL); + if (info->xattrs) { + simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL); + kfree(info->xattrs); + } shmem_free_inode(inode->i_sb, freed); WARN_ON(inode->i_blocks); clear_inode(inode); @@ -3101,7 +3104,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, shmem_set_inode_flags(inode, info->fsflags, NULL); INIT_LIST_HEAD(&info->shrinklist); INIT_LIST_HEAD(&info->swaplist); - simple_xattrs_init(&info->xattrs); cache_no_acl(inode); if (sbinfo->noswap) mapping_set_unevictable(inode->i_mapping); @@ -4255,10 +4257,13 @@ static int shmem_initxattrs(struct inode *inode, struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); const struct xattr *xattr; - struct simple_xattr *new_xattr; size_t ispace = 0; size_t len; + CLASS(simple_xattrs, xattrs)(); + if (IS_ERR(xattrs)) + return PTR_ERR(xattrs); + if (sbinfo->max_inodes) { for (xattr = xattr_array; xattr->name != NULL; xattr++) { ispace += simple_xattr_space(xattr->name, @@ -4277,24 +4282,24 @@ static int shmem_initxattrs(struct inode *inode, } for (xattr = xattr_array; xattr->name != NULL; xattr++) { - new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); - if (!new_xattr) + CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len); + if (IS_ERR(new_xattr)) break; len = strlen(xattr->name) + 1; new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, GFP_KERNEL_ACCOUNT); - if (!new_xattr->name) { - kvfree(new_xattr); + if (!new_xattr->name) break; - } memcpy(new_xattr->name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, xattr->name, len); - simple_xattr_add(&info->xattrs, new_xattr); + if (simple_xattr_add(xattrs, new_xattr)) + break; + retain_and_null_ptr(new_xattr); } if (xattr->name != NULL) { @@ -4303,10 +4308,10 @@ static int shmem_initxattrs(struct inode *inode, sbinfo->free_ispace += ispace; raw_spin_unlock(&sbinfo->stat_lock); } - simple_xattrs_free(&info->xattrs, NULL); return -ENOMEM; } + smp_store_release(&info->xattrs, no_free_ptr(xattrs)); return 0; } @@ -4315,9 +4320,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler, const char *name, void *buffer, size_t size) { struct shmem_inode_info *info = SHMEM_I(inode); + struct simple_xattrs *xattrs; + + xattrs = READ_ONCE(info->xattrs); + if (!xattrs) + return -ENODATA; name = xattr_full_name(handler, name); - return simple_xattr_get(&info->xattrs, name, buffer, size); + return simple_xattr_get(xattrs, name, buffer, size); } static int shmem_xattr_handler_set(const struct xattr_handler *handler, @@ -4328,10 +4338,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler, { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct simple_xattrs *xattrs; struct simple_xattr *old_xattr; size_t ispace = 0; name = xattr_full_name(handler, name); + + xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags); + if (IS_ERR_OR_NULL(xattrs)) + return PTR_ERR(xattrs); + if (value && sbinfo->max_inodes) { ispace = simple_xattr_space(name, size); raw_spin_lock(&sbinfo->stat_lock); @@ -4344,13 +4360,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler, return -ENOSPC; } - old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags); + old_xattr = simple_xattr_set(xattrs, name, value, size, flags); if (!IS_ERR(old_xattr)) { ispace = 0; if (old_xattr && sbinfo->max_inodes) ispace = simple_xattr_space(old_xattr->name, old_xattr->size); - simple_xattr_free(old_xattr); + simple_xattr_free_rcu(old_xattr); old_xattr = NULL; inode_set_ctime_current(inode); inode_inc_iversion(inode); @@ -4391,7 +4407,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = { static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); - return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size); + + return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs), + buffer, size); } #endif /* CONFIG_TMPFS_XATTR */ |
