diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-13 10:10:28 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-13 10:10:28 -0700 |
| commit | c8db08110cbeff12a1f3990a31730936b092f62b (patch) | |
| tree | aadc293cbbcad4cc08b2dabe386b9395c108c2bf /include/linux | |
| parent | 0e58e3f1c57850f62afd40a642a7fe3417d80b21 (diff) | |
| parent | 98779186aa0b3367489a87c6d8bc0911f577444e (diff) | |
| download | lwn-c8db08110cbeff12a1f3990a31730936b092f62b.tar.gz lwn-c8db08110cbeff12a1f3990a31730936b092f62b.zip | |
Merge tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs xattr updates from Christian Brauner:
"This reworks the simple_xattr infrastructure and adds support for
user.* extended attributes on sockets.
The simple_xattr subsystem currently uses an rbtree protected by a
reader-writer spinlock. This series replaces the rbtree with an
rhashtable giving O(1) average-case lookup with RCU-based lockless
reads. This sped up concurrent access patterns on tmpfs quite a bit
and it's an overall easy enough conversion to do and gets rid or
rwlock_t.
The conversion is done incrementally: a new rhashtable path is added
alongside the existing rbtree, consumers are migrated one at a time
(shmem, kernfs, pidfs), and then the rbtree code is removed. All three
consumers switch from embedded structs to pointer-based lazy
allocation so the rhashtable overhead is only paid for inodes that
actually use xattrs.
With this infrastructure in place the series adds support for user.*
xattrs on sockets. Path-based AF_UNIX sockets inherit xattr support
from the underlying filesystem (e.g. tmpfs) but sockets in sockfs -
that is everything created via socket() including abstract namespace
AF_UNIX sockets - had no xattr support at all.
The xattr_permission() checks are reworked to allow user.* xattrs on
S_IFSOCK inodes. Sockfs sockets get per-inode limits of 128 xattrs and
128KB total value size matching the limits already in use for kernfs.
The practical motivation comes from several directions. systemd and
GNOME are expanding their use of Varlink as an IPC mechanism.
For D-Bus there are tools like dbus-monitor that can observe IPC
traffic across the system but this only works because D-Bus has a
central broker.
For Varlink there is no broker and there is currently no way to
identify which sockets speak Varlink. With user.* xattrs on sockets a
service can label its socket with the IPC protocol it speaks (e.g.,
user.varlink=1) and an eBPF program can then selectively capture
traffic on those sockets. Enumerating bound sockets via netlink
combined with these xattr labels gives a way to discover all Varlink
IPC entrypoints for debugging and introspection.
Similarly, systemd-journald wants to use xattrs on the /dev/log socket
for protocol negotiation to indicate whether RFC 5424 structured
syslog is supported or whether only the legacy RFC 3164 format should
be used.
In containers these labels are particularly useful as high-privilege
or more complicated solutions for socket identification aren't
available.
The series comes with comprehensive selftests covering path-based
AF_UNIX sockets, sockfs socket operations, per-inode limit
enforcement, and xattr operations across multiple address families
(AF_INET, AF_INET6, AF_NETLINK, AF_PACKET)"
* tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
selftests/xattr: test xattrs on various socket families
selftests/xattr: sockfs socket xattr tests
selftests/xattr: path-based AF_UNIX socket xattr tests
xattr: support extended attributes on sockets
xattr,net: support limited amount of extended attributes on sockfs sockets
xattr: move user limits for xattrs to generic infra
xattr: switch xattr_permission() to switch statement
xattr: add xattr_permission_error()
xattr: remove rbtree-based simple_xattr infrastructure
pidfs: adapt to rhashtable-based simple_xattrs
kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation
shmem: adapt to rhashtable-based simple_xattrs with lazy allocation
xattr: add rhashtable-based simple_xattr infrastructure
xattr: add rcu_head and rhash_head to struct simple_xattr
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/kernfs.h | 2 | ||||
| -rw-r--r-- | include/linux/shmem_fs.h | 2 | ||||
| -rw-r--r-- | include/linux/xattr.h | 47 |
3 files changed, 42 insertions, 9 deletions
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 4f0ab88a1b31..e21b2f7f4159 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -100,8 +100,6 @@ enum kernfs_node_type { #define KERNFS_TYPE_MASK 0x000f #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK -#define KERNFS_MAX_USER_XATTRS 128 -#define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10) enum kernfs_node_flag { KERNFS_ACTIVATED = 0x0010, diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a8273b32e041..f6a2d3402d76 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -48,7 +48,7 @@ struct shmem_inode_info { }; struct timespec64 i_crtime; /* file creation time */ struct shared_policy policy; /* NUMA memory alloc policy */ - struct simple_xattrs xattrs; /* list of xattrs */ + struct simple_xattrs *xattrs; /* list of xattrs */ pgoff_t fallocend; /* highest fallocate endindex */ unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */ atomic_t stop_eviction; /* hold when working on inode */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 296b5ee5c979..8b6601367eae 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/mm.h> +#include <linux/rhashtable-types.h> #include <linux/user_namespace.h> #include <uapi/linux/xattr.h> @@ -106,31 +107,65 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler) } struct simple_xattrs { - struct rb_root rb_root; - rwlock_t lock; + struct rhashtable ht; }; struct simple_xattr { - struct rb_node rb_node; + struct rhash_head hash_node; + struct rcu_head rcu; char *name; size_t size; char value[] __counted_by(size); }; -void simple_xattrs_init(struct simple_xattrs *xattrs); +#define SIMPLE_XATTR_MAX_NR 128 +#define SIMPLE_XATTR_MAX_SIZE (128 << 10) + +struct simple_xattr_limits { + atomic_t nr_xattrs; /* current user.* xattr count */ + atomic_t xattr_size; /* current total user.* value bytes */ +}; + +static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits) +{ + atomic_set(&limits->nr_xattrs, 0); + atomic_set(&limits->xattr_size, 0); +} + +int simple_xattrs_init(struct simple_xattrs *xattrs); +struct simple_xattrs *simple_xattrs_alloc(void); +struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, + const void *value, int flags); void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); void simple_xattr_free(struct simple_xattr *xattr); +void simple_xattr_free_rcu(struct simple_xattr *xattr); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags); +int simple_xattr_set_limited(struct simple_xattrs *xattrs, + struct simple_xattr_limits *limits, + const char *name, const void *value, + size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); -void simple_xattr_add(struct simple_xattrs *xattrs, - struct simple_xattr *new_xattr); +int simple_xattr_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr); int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name); +DEFINE_CLASS(simple_xattr, + struct simple_xattr *, + if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T), + simple_xattr_alloc(value, size), + const void *value, size_t size) + +DEFINE_CLASS(simple_xattrs, + struct simple_xattrs *, + if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); }, + simple_xattrs_alloc(), + void) + #endif /* _LINUX_XATTR_H */ |
