summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 10:10:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 10:10:28 -0700
commitc8db08110cbeff12a1f3990a31730936b092f62b (patch)
treeaadc293cbbcad4cc08b2dabe386b9395c108c2bf /include/linux
parent0e58e3f1c57850f62afd40a642a7fe3417d80b21 (diff)
parent98779186aa0b3367489a87c6d8bc0911f577444e (diff)
downloadlwn-c8db08110cbeff12a1f3990a31730936b092f62b.tar.gz
lwn-c8db08110cbeff12a1f3990a31730936b092f62b.zip
Merge tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs xattr updates from Christian Brauner: "This reworks the simple_xattr infrastructure and adds support for user.* extended attributes on sockets. The simple_xattr subsystem currently uses an rbtree protected by a reader-writer spinlock. This series replaces the rbtree with an rhashtable giving O(1) average-case lookup with RCU-based lockless reads. This sped up concurrent access patterns on tmpfs quite a bit and it's an overall easy enough conversion to do and gets rid or rwlock_t. The conversion is done incrementally: a new rhashtable path is added alongside the existing rbtree, consumers are migrated one at a time (shmem, kernfs, pidfs), and then the rbtree code is removed. All three consumers switch from embedded structs to pointer-based lazy allocation so the rhashtable overhead is only paid for inodes that actually use xattrs. With this infrastructure in place the series adds support for user.* xattrs on sockets. Path-based AF_UNIX sockets inherit xattr support from the underlying filesystem (e.g. tmpfs) but sockets in sockfs - that is everything created via socket() including abstract namespace AF_UNIX sockets - had no xattr support at all. The xattr_permission() checks are reworked to allow user.* xattrs on S_IFSOCK inodes. Sockfs sockets get per-inode limits of 128 xattrs and 128KB total value size matching the limits already in use for kernfs. The practical motivation comes from several directions. systemd and GNOME are expanding their use of Varlink as an IPC mechanism. For D-Bus there are tools like dbus-monitor that can observe IPC traffic across the system but this only works because D-Bus has a central broker. For Varlink there is no broker and there is currently no way to identify which sockets speak Varlink. With user.* xattrs on sockets a service can label its socket with the IPC protocol it speaks (e.g., user.varlink=1) and an eBPF program can then selectively capture traffic on those sockets. Enumerating bound sockets via netlink combined with these xattr labels gives a way to discover all Varlink IPC entrypoints for debugging and introspection. Similarly, systemd-journald wants to use xattrs on the /dev/log socket for protocol negotiation to indicate whether RFC 5424 structured syslog is supported or whether only the legacy RFC 3164 format should be used. In containers these labels are particularly useful as high-privilege or more complicated solutions for socket identification aren't available. The series comes with comprehensive selftests covering path-based AF_UNIX sockets, sockfs socket operations, per-inode limit enforcement, and xattr operations across multiple address families (AF_INET, AF_INET6, AF_NETLINK, AF_PACKET)" * tag 'vfs-7.1-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: selftests/xattr: test xattrs on various socket families selftests/xattr: sockfs socket xattr tests selftests/xattr: path-based AF_UNIX socket xattr tests xattr: support extended attributes on sockets xattr,net: support limited amount of extended attributes on sockfs sockets xattr: move user limits for xattrs to generic infra xattr: switch xattr_permission() to switch statement xattr: add xattr_permission_error() xattr: remove rbtree-based simple_xattr infrastructure pidfs: adapt to rhashtable-based simple_xattrs kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation shmem: adapt to rhashtable-based simple_xattrs with lazy allocation xattr: add rhashtable-based simple_xattr infrastructure xattr: add rcu_head and rhash_head to struct simple_xattr
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/kernfs.h2
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--include/linux/xattr.h47
3 files changed, 42 insertions, 9 deletions
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 4f0ab88a1b31..e21b2f7f4159 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -100,8 +100,6 @@ enum kernfs_node_type {
#define KERNFS_TYPE_MASK 0x000f
#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK
-#define KERNFS_MAX_USER_XATTRS 128
-#define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10)
enum kernfs_node_flag {
KERNFS_ACTIVATED = 0x0010,
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a8273b32e041..f6a2d3402d76 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,7 +48,7 @@ struct shmem_inode_info {
};
struct timespec64 i_crtime; /* file creation time */
struct shared_policy policy; /* NUMA memory alloc policy */
- struct simple_xattrs xattrs; /* list of xattrs */
+ struct simple_xattrs *xattrs; /* list of xattrs */
pgoff_t fallocend; /* highest fallocate endindex */
unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */
atomic_t stop_eviction; /* hold when working on inode */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 296b5ee5c979..8b6601367eae 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
+#include <linux/rhashtable-types.h>
#include <linux/user_namespace.h>
#include <uapi/linux/xattr.h>
@@ -106,31 +107,65 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
}
struct simple_xattrs {
- struct rb_root rb_root;
- rwlock_t lock;
+ struct rhashtable ht;
};
struct simple_xattr {
- struct rb_node rb_node;
+ struct rhash_head hash_node;
+ struct rcu_head rcu;
char *name;
size_t size;
char value[] __counted_by(size);
};
-void simple_xattrs_init(struct simple_xattrs *xattrs);
+#define SIMPLE_XATTR_MAX_NR 128
+#define SIMPLE_XATTR_MAX_SIZE (128 << 10)
+
+struct simple_xattr_limits {
+ atomic_t nr_xattrs; /* current user.* xattr count */
+ atomic_t xattr_size; /* current total user.* value bytes */
+};
+
+static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits)
+{
+ atomic_set(&limits->nr_xattrs, 0);
+ atomic_set(&limits->xattr_size, 0);
+}
+
+int simple_xattrs_init(struct simple_xattrs *xattrs);
+struct simple_xattrs *simple_xattrs_alloc(void);
+struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
+ const void *value, int flags);
void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
void simple_xattr_free(struct simple_xattr *xattr);
+void simple_xattr_free_rcu(struct simple_xattr *xattr);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
const char *name, const void *value,
size_t size, int flags);
+int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+ struct simple_xattr_limits *limits,
+ const char *name, const void *value,
+ size_t size, int flags);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size);
-void simple_xattr_add(struct simple_xattrs *xattrs,
- struct simple_xattr *new_xattr);
+int simple_xattr_add(struct simple_xattrs *xattrs,
+ struct simple_xattr *new_xattr);
int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
+DEFINE_CLASS(simple_xattr,
+ struct simple_xattr *,
+ if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T),
+ simple_xattr_alloc(value, size),
+ const void *value, size_t size)
+
+DEFINE_CLASS(simple_xattrs,
+ struct simple_xattrs *,
+ if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); },
+ simple_xattrs_alloc(),
+ void)
+
#endif /* _LINUX_XATTR_H */