summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/kernfs/dir.c15
-rw-r--r--fs/kernfs/inode.c99
-rw-r--r--fs/kernfs/kernfs-internal.h5
-rw-r--r--fs/pidfs.c64
-rw-r--r--fs/xattr.c423
-rw-r--r--include/linux/kernfs.h2
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--include/linux/xattr.h47
-rw-r--r--mm/shmem.c46
-rw-r--r--net/socket.c119
-rw-r--r--tools/testing/selftests/filesystems/xattr/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/xattr/Makefile6
-rw-r--r--tools/testing/selftests/filesystems/xattr/xattr_socket_test.c470
-rw-r--r--tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c177
-rw-r--r--tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c363
15 files changed, 1546 insertions, 295 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 22a4dff2a3af..cdacdc1dcdd9 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -564,10 +564,8 @@ static void kernfs_free_rcu(struct rcu_head *rcu)
/* If the whole node goes away, then name can't be used outside */
kfree_const(rcu_access_pointer(kn->name));
- if (kn->iattr) {
- simple_xattrs_free(&kn->iattr->xattrs, NULL);
+ if (kn->iattr)
kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
- }
kmem_cache_free(kernfs_node_cache, kn);
}
@@ -601,6 +599,12 @@ void kernfs_put(struct kernfs_node *kn)
if (kernfs_type(kn) == KERNFS_LINK)
kernfs_put(kn->symlink.target_kn);
+ if (kn->iattr && kn->iattr->xattrs) {
+ simple_xattrs_free(kn->iattr->xattrs, NULL);
+ kfree(kn->iattr->xattrs);
+ kn->iattr->xattrs = NULL;
+ }
+
spin_lock(&root->kernfs_idr_lock);
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
spin_unlock(&root->kernfs_idr_lock);
@@ -699,7 +703,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
err_out4:
if (kn->iattr) {
- simple_xattrs_free(&kn->iattr->xattrs, NULL);
+ if (kn->iattr->xattrs) {
+ simple_xattrs_free(kn->iattr->xattrs, NULL);
+ kfree(kn->iattr->xattrs);
+ }
kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
}
err_out3:
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index a36aaee98dce..1de10500842d 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -45,9 +45,7 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc)
ret->ia_mtime = ret->ia_atime;
ret->ia_ctime = ret->ia_atime;
- simple_xattrs_init(&ret->xattrs);
- atomic_set(&ret->nr_user_xattrs, 0);
- atomic_set(&ret->user_xattr_size, 0);
+ simple_xattr_limits_init(&ret->xattr_limits);
/* If someone raced us, recognize it. */
if (!try_cmpxchg(&kn->iattr, &attr, ret))
@@ -146,7 +144,8 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
if (!attrs)
return -ENOMEM;
- return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
+ return simple_xattr_list(d_inode(dentry), READ_ONCE(attrs->xattrs),
+ buf, size);
}
static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
@@ -298,27 +297,38 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
void *value, size_t size)
{
struct kernfs_iattrs *attrs = kernfs_iattrs_noalloc(kn);
+ struct simple_xattrs *xattrs;
+
if (!attrs)
return -ENODATA;
- return simple_xattr_get(&attrs->xattrs, name, value, size);
+ xattrs = READ_ONCE(attrs->xattrs);
+ if (!xattrs)
+ return -ENODATA;
+
+ return simple_xattr_get(xattrs, name, value, size);
}
int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
const void *value, size_t size, int flags)
{
struct simple_xattr *old_xattr;
+ struct simple_xattrs *xattrs;
struct kernfs_iattrs *attrs;
attrs = kernfs_iattrs(kn);
if (!attrs)
return -ENOMEM;
- old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags);
+ xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+
+ old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
if (IS_ERR(old_xattr))
return PTR_ERR(old_xattr);
- simple_xattr_free(old_xattr);
+ simple_xattr_free_rcu(old_xattr);
return 0;
}
@@ -344,69 +354,6 @@ static int kernfs_vfs_xattr_set(const struct xattr_handler *handler,
return kernfs_xattr_set(kn, name, value, size, flags);
}
-static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
- const char *full_name,
- struct simple_xattrs *xattrs,
- const void *value, size_t size, int flags)
-{
- struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn);
- atomic_t *sz = &attr->user_xattr_size;
- atomic_t *nr = &attr->nr_user_xattrs;
- struct simple_xattr *old_xattr;
- int ret;
-
- if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) {
- ret = -ENOSPC;
- goto dec_count_out;
- }
-
- if (atomic_add_return(size, sz) > KERNFS_USER_XATTR_SIZE_LIMIT) {
- ret = -ENOSPC;
- goto dec_size_out;
- }
-
- old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
- if (!old_xattr)
- return 0;
-
- if (IS_ERR(old_xattr)) {
- ret = PTR_ERR(old_xattr);
- goto dec_size_out;
- }
-
- ret = 0;
- size = old_xattr->size;
- simple_xattr_free(old_xattr);
-dec_size_out:
- atomic_sub(size, sz);
-dec_count_out:
- atomic_dec(nr);
- return ret;
-}
-
-static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
- const char *full_name,
- struct simple_xattrs *xattrs,
- const void *value, size_t size, int flags)
-{
- struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn);
- atomic_t *sz = &attr->user_xattr_size;
- atomic_t *nr = &attr->nr_user_xattrs;
- struct simple_xattr *old_xattr;
-
- old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
- if (!old_xattr)
- return 0;
-
- if (IS_ERR(old_xattr))
- return PTR_ERR(old_xattr);
-
- atomic_sub(old_xattr->size, sz);
- atomic_dec(nr);
- simple_xattr_free(old_xattr);
- return 0;
-}
-
static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
@@ -415,6 +362,7 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, suffix);
struct kernfs_node *kn = inode->i_private;
+ struct simple_xattrs *xattrs;
struct kernfs_iattrs *attrs;
if (!(kernfs_root(kn)->flags & KERNFS_ROOT_SUPPORT_USER_XATTR))
@@ -424,13 +372,12 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
if (!attrs)
return -ENOMEM;
- if (value)
- return kernfs_vfs_user_xattr_add(kn, full_name, &attrs->xattrs,
- value, size, flags);
- else
- return kernfs_vfs_user_xattr_rm(kn, full_name, &attrs->xattrs,
- value, size, flags);
+ xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+ return simple_xattr_set_limited(xattrs, &attrs->xattr_limits,
+ full_name, value, size, flags);
}
static const struct xattr_handler kernfs_trusted_xattr_handler = {
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index b1fd9622a5e3..8d8912f50b05 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -26,9 +26,8 @@ struct kernfs_iattrs {
struct timespec64 ia_mtime;
struct timespec64 ia_ctime;
- struct simple_xattrs xattrs;
- atomic_t nr_user_xattrs;
- atomic_t user_xattr_size;
+ struct simple_xattrs *xattrs;
+ struct simple_xattr_limits xattr_limits;
};
struct kernfs_root {
diff --git a/fs/pidfs.c b/fs/pidfs.c
index e3825ee246be..dc82741becb1 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -22,6 +22,7 @@
#include <net/net_namespace.h>
#include <linux/coredump.h>
#include <linux/rhashtable.h>
+#include <linux/llist.h>
#include <linux/xattr.h>
#include <linux/cookie.h>
@@ -31,7 +32,6 @@
#define PIDFS_PID_DEAD ERR_PTR(-ESRCH)
static struct kmem_cache *pidfs_attr_cachep __ro_after_init;
-static struct kmem_cache *pidfs_xattr_cachep __ro_after_init;
static struct path pidfs_root_path = {};
@@ -46,9 +46,8 @@ enum pidfs_attr_mask_bits {
PIDFS_ATTR_BIT_COREDUMP = 1,
};
-struct pidfs_attr {
+struct pidfs_anon_attr {
unsigned long attr_mask;
- struct simple_xattrs *xattrs;
struct /* exit info */ {
__u64 cgroupid;
__s32 exit_code;
@@ -93,6 +92,13 @@ static const struct rhashtable_params pidfs_ino_ht_params = {
* inode number and the inode generation number to compare or
* use file handles.
*/
+struct pidfs_attr {
+ struct simple_xattrs *xattrs;
+ union {
+ struct pidfs_anon_attr;
+ struct llist_node pidfs_llist;
+ };
+};
#if BITS_PER_LONG == 32
@@ -178,10 +184,30 @@ void pidfs_remove_pid(struct pid *pid)
pidfs_ino_ht_params);
}
+static LLIST_HEAD(pidfs_free_list);
+
+static void pidfs_free_attr_work(struct work_struct *work)
+{
+ struct pidfs_attr *attr, *next;
+ struct llist_node *head;
+
+ head = llist_del_all(&pidfs_free_list);
+ llist_for_each_entry_safe(attr, next, head, pidfs_llist) {
+ struct simple_xattrs *xattrs = attr->xattrs;
+
+ if (xattrs) {
+ simple_xattrs_free(xattrs, NULL);
+ kfree(xattrs);
+ }
+ kfree(attr);
+ }
+}
+
+static DECLARE_WORK(pidfs_free_work, pidfs_free_attr_work);
+
void pidfs_free_pid(struct pid *pid)
{
- struct pidfs_attr *attr __free(kfree) = no_free_ptr(pid->attr);
- struct simple_xattrs *xattrs __free(kfree) = NULL;
+ struct pidfs_attr *attr = pid->attr;
/*
* Any dentry must've been wiped from the pid by now.
@@ -200,9 +226,10 @@ void pidfs_free_pid(struct pid *pid)
if (IS_ERR(attr))
return;
- xattrs = no_free_ptr(attr->xattrs);
- if (xattrs)
- simple_xattrs_free(xattrs, NULL);
+ if (likely(!attr->xattrs))
+ kfree(attr);
+ else if (llist_add(&attr->pidfs_llist, &pidfs_free_list))
+ schedule_work(&pidfs_free_work);
}
#ifdef CONFIG_PROC_FS
@@ -1009,7 +1036,7 @@ static int pidfs_xattr_get(const struct xattr_handler *handler,
xattrs = READ_ONCE(attr->xattrs);
if (!xattrs)
- return 0;
+ return -ENODATA;
name = xattr_full_name(handler, suffix);
return simple_xattr_get(xattrs, name, value, size);
@@ -1029,22 +1056,16 @@ static int pidfs_xattr_set(const struct xattr_handler *handler,
/* Ensure we're the only one to set @attr->xattrs. */
WARN_ON_ONCE(!inode_is_locked(inode));
- xattrs = READ_ONCE(attr->xattrs);
- if (!xattrs) {
- xattrs = kmem_cache_zalloc(pidfs_xattr_cachep, GFP_KERNEL);
- if (!xattrs)
- return -ENOMEM;
-
- simple_xattrs_init(xattrs);
- smp_store_release(&pid->attr->xattrs, xattrs);
- }
+ xattrs = simple_xattrs_lazy_alloc(&attr->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
name = xattr_full_name(handler, suffix);
old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
if (IS_ERR(old_xattr))
return PTR_ERR(old_xattr);
- simple_xattr_free(old_xattr);
+ simple_xattr_free_rcu(old_xattr);
return 0;
}
@@ -1122,11 +1143,6 @@ void __init pidfs_init(void)
(SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
SLAB_ACCOUNT | SLAB_PANIC), NULL);
- pidfs_xattr_cachep = kmem_cache_create("pidfs_xattr_cache",
- sizeof(struct simple_xattrs), 0,
- (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
- SLAB_ACCOUNT | SLAB_PANIC), NULL);
-
pidfs_mnt = kern_mount(&pidfs_type);
if (IS_ERR(pidfs_mnt))
panic("Failed to mount pidfs pseudo filesystem");
diff --git a/fs/xattr.c b/fs/xattr.c
index 3e49e612e1ba..09ecbaaa1660 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -22,6 +22,7 @@
#include <linux/audit.h>
#include <linux/vmalloc.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/rhashtable.h>
#include <linux/uaccess.h>
@@ -105,6 +106,13 @@ int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
return 0;
}
+static inline int xattr_permission_error(int mask)
+{
+ if (mask & MAY_WRITE)
+ return -EPERM;
+ return -ENODATA;
+}
+
/*
* Check permissions for extended attribute access. This is a bit complicated
* because different namespaces have very different rules.
@@ -134,7 +142,7 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
*/
if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
if (!capable(CAP_SYS_ADMIN))
- return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
+ return xattr_permission_error(mask);
return 0;
}
@@ -144,12 +152,22 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
* privileged users can write attributes.
*/
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
- if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
- (mask & MAY_WRITE) &&
- !inode_owner_or_capable(idmap, inode))
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFREG:
+ break;
+ case S_IFDIR:
+ if (!(inode->i_mode & S_ISVTX))
+ break;
+ if (!(mask & MAY_WRITE))
+ break;
+ if (inode_owner_or_capable(idmap, inode))
+ break;
return -EPERM;
+ case S_IFSOCK:
+ break;
+ default:
+ return xattr_permission_error(mask);
+ }
}
return inode_permission(idmap, inode, mask);
@@ -1197,6 +1215,27 @@ void simple_xattr_free(struct simple_xattr *xattr)
kvfree(xattr);
}
+static void simple_xattr_rcu_free(struct rcu_head *head)
+{
+ struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
+
+ simple_xattr_free(xattr);
+}
+
+/**
+ * simple_xattr_free_rcu - free an xattr object with RCU delay
+ * @xattr: the xattr object
+ *
+ * Free the xattr object after an RCU grace period. This must be used when
+ * the xattr was removed from a data structure that concurrent RCU readers
+ * may still be traversing. Can handle @xattr being NULL.
+ */
+void simple_xattr_free_rcu(struct simple_xattr *xattr)
+{
+ if (xattr)
+ call_rcu(&xattr->rcu, simple_xattr_rcu_free);
+}
+
/**
* simple_xattr_alloc - allocate new xattr object
* @value: value of the xattr object
@@ -1205,65 +1244,58 @@ void simple_xattr_free(struct simple_xattr *xattr)
* Allocate a new xattr object and initialize respective members. The caller is
* responsible for handling the name of the xattr.
*
- * Return: On success a new xattr object is returned. On failure NULL is
- * returned.
+ * Return: New xattr object on success, NULL if @value is NULL, ERR_PTR on
+ * failure.
*/
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
{
struct simple_xattr *new_xattr;
size_t len;
+ if (!value)
+ return NULL;
+
/* wrap around? */
len = sizeof(*new_xattr) + size;
if (len < sizeof(*new_xattr))
- return NULL;
+ return ERR_PTR(-ENOMEM);
new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
if (!new_xattr)
- return NULL;
+ return ERR_PTR(-ENOMEM);
new_xattr->size = size;
memcpy(new_xattr->value, value, size);
return new_xattr;
}
-/**
- * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
- * @key: xattr name
- * @node: current node
- *
- * Compare the xattr name with the xattr name attached to @node in the rbtree.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @node matches @key.
- */
-static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
+static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
{
- const char *xattr_name = key;
- const struct simple_xattr *xattr;
+ const char *name = data;
+ return jhash(name, strlen(name), seed);
+}
- xattr = rb_entry(node, struct simple_xattr, rb_node);
- return strcmp(xattr->name, xattr_name);
+static u32 simple_xattr_obj_hashfn(const void *obj, u32 len, u32 seed)
+{
+ const struct simple_xattr *xattr = obj;
+ return jhash(xattr->name, strlen(xattr->name), seed);
}
-/**
- * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @new_node matches the xattr attached to @node.
- */
-static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
- const struct rb_node *node)
+static int simple_xattr_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ const void *obj)
{
- struct simple_xattr *xattr;
- xattr = rb_entry(new_node, struct simple_xattr, rb_node);
- return rbtree_simple_xattr_cmp(xattr->name, node);
+ const struct simple_xattr *xattr = obj;
+ return strcmp(xattr->name, arg->key);
}
+static const struct rhashtable_params simple_xattr_params = {
+ .head_offset = offsetof(struct simple_xattr, hash_node),
+ .hashfn = simple_xattr_hashfn,
+ .obj_hashfn = simple_xattr_obj_hashfn,
+ .obj_cmpfn = simple_xattr_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
/**
* simple_xattr_get - get an xattr object
* @xattrs: the header of the xattr object
@@ -1282,14 +1314,12 @@ static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size)
{
- struct simple_xattr *xattr = NULL;
- struct rb_node *rbp;
+ struct simple_xattr *xattr;
int ret = -ENODATA;
- read_lock(&xattrs->lock);
- rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp);
- if (rbp) {
- xattr = rb_entry(rbp, struct simple_xattr, rb_node);
+ guard(rcu)();
+ xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params);
+ if (xattr) {
ret = xattr->size;
if (buffer) {
if (size < xattr->size)
@@ -1298,7 +1328,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
memcpy(buffer, xattr->value, xattr->size);
}
}
- read_unlock(&xattrs->lock);
return ret;
}
@@ -1325,6 +1354,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
* nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
* XATTR_REPLACE we fail as mentioned above.
*
+ * Note: Callers must externally serialize writes. All current callers hold
+ * the inode lock for write operations. The lookup->replace/remove sequence
+ * is not atomic with respect to the rhashtable's per-bucket locking, but
+ * is safe because writes are serialized by the caller.
+ *
* Return: On success, the removed or replaced xattr is returned, to be freed
* by the caller; or NULL if none. On failure a negative error code is returned.
*/
@@ -1332,64 +1366,57 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
const char *name, const void *value,
size_t size, int flags)
{
- struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
- struct rb_node *parent = NULL, **rbp;
- int err = 0, ret;
+ struct simple_xattr *old_xattr = NULL;
+ int err;
- /* value == NULL means remove */
- if (value) {
- new_xattr = simple_xattr_alloc(value, size);
- if (!new_xattr)
- return ERR_PTR(-ENOMEM);
+ CLASS(simple_xattr, new_xattr)(value, size);
+ if (IS_ERR(new_xattr))
+ return new_xattr;
+ if (new_xattr) {
new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
- if (!new_xattr->name) {
- simple_xattr_free(new_xattr);
+ if (!new_xattr->name)
return ERR_PTR(-ENOMEM);
- }
}
- write_lock(&xattrs->lock);
- rbp = &xattrs->rb_root.rb_node;
- while (*rbp) {
- parent = *rbp;
- ret = rbtree_simple_xattr_cmp(name, *rbp);
- if (ret < 0)
- rbp = &(*rbp)->rb_left;
- else if (ret > 0)
- rbp = &(*rbp)->rb_right;
- else
- old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
- if (old_xattr)
- break;
- }
+ /* Lookup is safe without RCU here since writes are serialized. */
+ old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
+ simple_xattr_params);
if (old_xattr) {
/* Fail if XATTR_CREATE is requested and the xattr exists. */
- if (flags & XATTR_CREATE) {
- err = -EEXIST;
- goto out_unlock;
- }
+ if (flags & XATTR_CREATE)
+ return ERR_PTR(-EEXIST);
- if (new_xattr)
- rb_replace_node(&old_xattr->rb_node,
- &new_xattr->rb_node, &xattrs->rb_root);
- else
- rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
+ if (new_xattr) {
+ err = rhashtable_replace_fast(&xattrs->ht,
+ &old_xattr->hash_node,
+ &new_xattr->hash_node,
+ simple_xattr_params);
+ if (err)
+ return ERR_PTR(err);
+ } else {
+ err = rhashtable_remove_fast(&xattrs->ht,
+ &old_xattr->hash_node,
+ simple_xattr_params);
+ if (err)
+ return ERR_PTR(err);
+ }
} else {
/* Fail if XATTR_REPLACE is requested but no xattr is found. */
- if (flags & XATTR_REPLACE) {
- err = -ENODATA;
- goto out_unlock;
- }
+ if (flags & XATTR_REPLACE)
+ return ERR_PTR(-ENODATA);
/*
* If XATTR_CREATE or no flags are specified together with a
* new value simply insert it.
*/
if (new_xattr) {
- rb_link_node(&new_xattr->rb_node, parent, rbp);
- rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root);
+ err = rhashtable_insert_fast(&xattrs->ht,
+ &new_xattr->hash_node,
+ simple_xattr_params);
+ if (err)
+ return ERR_PTR(err);
}
/*
@@ -1398,12 +1425,73 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
*/
}
-out_unlock:
- write_unlock(&xattrs->lock);
- if (!err)
- return old_xattr;
- simple_xattr_free(new_xattr);
- return ERR_PTR(err);
+ retain_and_null_ptr(new_xattr);
+ return old_xattr;
+}
+
+static inline void simple_xattr_limits_dec(struct simple_xattr_limits *limits,
+ size_t size)
+{
+ atomic_sub(size, &limits->xattr_size);
+ atomic_dec(&limits->nr_xattrs);
+}
+
+static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits,
+ size_t size)
+{
+ if (atomic_inc_return(&limits->nr_xattrs) > SIMPLE_XATTR_MAX_NR) {
+ atomic_dec(&limits->nr_xattrs);
+ return -ENOSPC;
+ }
+
+ if (atomic_add_return(size, &limits->xattr_size) <= SIMPLE_XATTR_MAX_SIZE)
+ return 0;
+
+ simple_xattr_limits_dec(limits, size);
+ return -ENOSPC;
+}
+
+/**
+ * simple_xattr_set_limited - set an xattr with per-inode user.* limits
+ * @xattrs: the header of the xattr object
+ * @limits: per-inode limit counters for user.* xattrs
+ * @name: the name of the xattr to set or remove
+ * @value: the value to store (NULL to remove)
+ * @size: the size of @value
+ * @flags: XATTR_CREATE, XATTR_REPLACE, or 0
+ *
+ * Like simple_xattr_set(), but enforces per-inode count and total value size
+ * limits for user.* xattrs. Uses speculative pre-increment of the atomic
+ * counters to avoid races without requiring external locks.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
+ */
+int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+ struct simple_xattr_limits *limits,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct simple_xattr *old_xattr;
+ int ret;
+
+ if (value) {
+ ret = simple_xattr_limits_inc(limits, size);
+ if (ret)
+ return ret;
+ }
+
+ old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
+ if (IS_ERR(old_xattr)) {
+ if (value)
+ simple_xattr_limits_dec(limits, size);
+ return PTR_ERR(old_xattr);
+ }
+ if (old_xattr) {
+ simple_xattr_limits_dec(limits, old_xattr->size);
+ simple_xattr_free_rcu(old_xattr);
+ }
+ return 0;
}
static bool xattr_is_trusted(const char *name)
@@ -1443,8 +1531,8 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size)
{
bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+ struct rhashtable_iter iter;
struct simple_xattr *xattr;
- struct rb_node *rbp;
ssize_t remaining_size = size;
int err = 0;
@@ -1464,9 +1552,19 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
remaining_size -= err;
err = 0;
- read_lock(&xattrs->lock);
- for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
- xattr = rb_entry(rbp, struct simple_xattr, rb_node);
+ if (!xattrs)
+ return size - remaining_size;
+
+ rhashtable_walk_enter(&xattrs->ht, &iter);
+ rhashtable_walk_start(&iter);
+
+ while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
+ if (IS_ERR(xattr)) {
+ if (PTR_ERR(xattr) == -EAGAIN)
+ continue;
+ err = PTR_ERR(xattr);
+ break;
+ }
/* skip "trusted." attributes for unprivileged callers */
if (!trusted && xattr_is_trusted(xattr->name))
@@ -1480,25 +1578,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (err)
break;
}
- read_unlock(&xattrs->lock);
- return err ? err : size - remaining_size;
-}
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
-/**
- * rbtree_simple_xattr_less - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- * Note that this function technically tolerates duplicate entries.
- *
- * Return: True if insertion point in the rbtree is found.
- */
-static bool rbtree_simple_xattr_less(struct rb_node *new_node,
- const struct rb_node *node)
-{
- return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
+ return err ? err : size - remaining_size;
}
/**
@@ -1509,25 +1593,100 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
* Add an xattr object to @xattrs. This assumes no replacement or removal
* of matching xattrs is wanted. Should only be called during inode
* initialization when a few distinct initial xattrs are supposed to be set.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
*/
-void simple_xattr_add(struct simple_xattrs *xattrs,
- struct simple_xattr *new_xattr)
+int simple_xattr_add(struct simple_xattrs *xattrs,
+ struct simple_xattr *new_xattr)
{
- write_lock(&xattrs->lock);
- rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less);
- write_unlock(&xattrs->lock);
+ return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node,
+ simple_xattr_params);
}
/**
* simple_xattrs_init - initialize new xattr header
* @xattrs: header to initialize
*
- * Initialize relevant fields of a an xattr header.
+ * Initialize the rhashtable used to store xattr objects.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
+ */
+int simple_xattrs_init(struct simple_xattrs *xattrs)
+{
+ return rhashtable_init(&xattrs->ht, &simple_xattr_params);
+}
+
+/**
+ * simple_xattrs_alloc - allocate and initialize a new xattr header
+ *
+ * Dynamically allocate a simple_xattrs header and initialize the
+ * underlying rhashtable. This is intended for consumers that want
+ * to lazily allocate xattr storage only when the first xattr is set,
+ * avoiding the per-inode rhashtable overhead when no xattrs are used.
+ *
+ * Return: On success a new simple_xattrs is returned. On failure an
+ * ERR_PTR is returned.
+ */
+struct simple_xattrs *simple_xattrs_alloc(void)
+{
+ struct simple_xattrs *xattrs __free(kfree) = NULL;
+ int ret;
+
+ xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
+ if (!xattrs)
+ return ERR_PTR(-ENOMEM);
+
+ ret = simple_xattrs_init(xattrs);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return no_free_ptr(xattrs);
+}
+
+/**
+ * simple_xattrs_lazy_alloc - get or allocate xattrs for a set operation
+ * @xattrsp: pointer to the xattrs pointer (may point to NULL)
+ * @value: value being set (NULL means remove)
+ * @flags: xattr set flags
+ *
+ * For lazily-allocated xattrs on the write path. If no xattrs exist yet
+ * and this is a remove operation, returns the appropriate result without
+ * allocating. Otherwise ensures xattrs is allocated and published with
+ * store-release semantics.
+ *
+ * Return: On success a valid pointer to the xattrs is returned. On
+ * failure or early-exit an ERR_PTR or NULL is returned. Callers should
+ * check with IS_ERR_OR_NULL() and propagate with PTR_ERR() which
+ * correctly returns 0 for the NULL no-op case.
*/
-void simple_xattrs_init(struct simple_xattrs *xattrs)
+struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
+ const void *value, int flags)
{
- xattrs->rb_root = RB_ROOT;
- rwlock_init(&xattrs->lock);
+ struct simple_xattrs *xattrs;
+
+ xattrs = READ_ONCE(*xattrsp);
+ if (xattrs)
+ return xattrs;
+
+ if (!value)
+ return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL;
+
+ xattrs = simple_xattrs_alloc();
+ if (!IS_ERR(xattrs))
+ smp_store_release(xattrsp, xattrs);
+ return xattrs;
+}
+
+static void simple_xattr_ht_free(void *ptr, void *arg)
+{
+ struct simple_xattr *xattr = ptr;
+ size_t *freed_space = arg;
+
+ if (freed_space)
+ *freed_space += simple_xattr_space(xattr->name, xattr->size);
+ simple_xattr_free(xattr);
}
/**
@@ -1540,22 +1699,10 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
*/
void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
{
- struct rb_node *rbp;
+ might_sleep();
if (freed_space)
*freed_space = 0;
- rbp = rb_first(&xattrs->rb_root);
- while (rbp) {
- struct simple_xattr *xattr;
- struct rb_node *rbp_next;
-
- rbp_next = rb_next(rbp);
- xattr = rb_entry(rbp, struct simple_xattr, rb_node);
- rb_erase(&xattr->rb_node, &xattrs->rb_root);
- if (freed_space)
- *freed_space += simple_xattr_space(xattr->name,
- xattr->size);
- simple_xattr_free(xattr);
- rbp = rbp_next;
- }
+ rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free,
+ freed_space);
}
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 4f0ab88a1b31..e21b2f7f4159 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -100,8 +100,6 @@ enum kernfs_node_type {
#define KERNFS_TYPE_MASK 0x000f
#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK
-#define KERNFS_MAX_USER_XATTRS 128
-#define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10)
enum kernfs_node_flag {
KERNFS_ACTIVATED = 0x0010,
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a8273b32e041..f6a2d3402d76 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,7 +48,7 @@ struct shmem_inode_info {
};
struct timespec64 i_crtime; /* file creation time */
struct shared_policy policy; /* NUMA memory alloc policy */
- struct simple_xattrs xattrs; /* list of xattrs */
+ struct simple_xattrs *xattrs; /* list of xattrs */
pgoff_t fallocend; /* highest fallocate endindex */
unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */
atomic_t stop_eviction; /* hold when working on inode */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 296b5ee5c979..8b6601367eae 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
+#include <linux/rhashtable-types.h>
#include <linux/user_namespace.h>
#include <uapi/linux/xattr.h>
@@ -106,31 +107,65 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
}
struct simple_xattrs {
- struct rb_root rb_root;
- rwlock_t lock;
+ struct rhashtable ht;
};
struct simple_xattr {
- struct rb_node rb_node;
+ struct rhash_head hash_node;
+ struct rcu_head rcu;
char *name;
size_t size;
char value[] __counted_by(size);
};
-void simple_xattrs_init(struct simple_xattrs *xattrs);
+#define SIMPLE_XATTR_MAX_NR 128
+#define SIMPLE_XATTR_MAX_SIZE (128 << 10)
+
+struct simple_xattr_limits {
+ atomic_t nr_xattrs; /* current user.* xattr count */
+ atomic_t xattr_size; /* current total user.* value bytes */
+};
+
+static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits)
+{
+ atomic_set(&limits->nr_xattrs, 0);
+ atomic_set(&limits->xattr_size, 0);
+}
+
+int simple_xattrs_init(struct simple_xattrs *xattrs);
+struct simple_xattrs *simple_xattrs_alloc(void);
+struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
+ const void *value, int flags);
void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
void simple_xattr_free(struct simple_xattr *xattr);
+void simple_xattr_free_rcu(struct simple_xattr *xattr);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
const char *name, const void *value,
size_t size, int flags);
+int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+ struct simple_xattr_limits *limits,
+ const char *name, const void *value,
+ size_t size, int flags);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size);
-void simple_xattr_add(struct simple_xattrs *xattrs,
- struct simple_xattr *new_xattr);
+int simple_xattr_add(struct simple_xattrs *xattrs,
+ struct simple_xattr *new_xattr);
int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
+DEFINE_CLASS(simple_xattr,
+ struct simple_xattr *,
+ if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T),
+ simple_xattr_alloc(value, size),
+ const void *value, size_t size)
+
+DEFINE_CLASS(simple_xattrs,
+ struct simple_xattrs *,
+ if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); },
+ simple_xattrs_alloc(),
+ void)
+
#endif /* _LINUX_XATTR_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index b40f3cd48961..0b0e577e880a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1425,7 +1425,10 @@ static void shmem_evict_inode(struct inode *inode)
}
}
- simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+ if (info->xattrs) {
+ simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+ kfree(info->xattrs);
+ }
shmem_free_inode(inode->i_sb, freed);
WARN_ON(inode->i_blocks);
clear_inode(inode);
@@ -3101,7 +3104,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
shmem_set_inode_flags(inode, info->fsflags, NULL);
INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
- simple_xattrs_init(&info->xattrs);
cache_no_acl(inode);
if (sbinfo->noswap)
mapping_set_unevictable(inode->i_mapping);
@@ -4255,10 +4257,13 @@ static int shmem_initxattrs(struct inode *inode,
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
const struct xattr *xattr;
- struct simple_xattr *new_xattr;
size_t ispace = 0;
size_t len;
+ CLASS(simple_xattrs, xattrs)();
+ if (IS_ERR(xattrs))
+ return PTR_ERR(xattrs);
+
if (sbinfo->max_inodes) {
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
ispace += simple_xattr_space(xattr->name,
@@ -4277,24 +4282,24 @@ static int shmem_initxattrs(struct inode *inode,
}
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
- if (!new_xattr)
+ CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
+ if (IS_ERR(new_xattr))
break;
len = strlen(xattr->name) + 1;
new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
GFP_KERNEL_ACCOUNT);
- if (!new_xattr->name) {
- kvfree(new_xattr);
+ if (!new_xattr->name)
break;
- }
memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN);
memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
xattr->name, len);
- simple_xattr_add(&info->xattrs, new_xattr);
+ if (simple_xattr_add(xattrs, new_xattr))
+ break;
+ retain_and_null_ptr(new_xattr);
}
if (xattr->name != NULL) {
@@ -4303,10 +4308,10 @@ static int shmem_initxattrs(struct inode *inode,
sbinfo->free_ispace += ispace;
raw_spin_unlock(&sbinfo->stat_lock);
}
- simple_xattrs_free(&info->xattrs, NULL);
return -ENOMEM;
}
+ smp_store_release(&info->xattrs, no_free_ptr(xattrs));
return 0;
}
@@ -4315,9 +4320,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
const char *name, void *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(inode);
+ struct simple_xattrs *xattrs;
+
+ xattrs = READ_ONCE(info->xattrs);
+ if (!xattrs)
+ return -ENODATA;
name = xattr_full_name(handler, name);
- return simple_xattr_get(&info->xattrs, name, buffer, size);
+ return simple_xattr_get(xattrs, name, buffer, size);
}
static int shmem_xattr_handler_set(const struct xattr_handler *handler,
@@ -4328,10 +4338,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct simple_xattrs *xattrs;
struct simple_xattr *old_xattr;
size_t ispace = 0;
name = xattr_full_name(handler, name);
+
+ xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+
if (value && sbinfo->max_inodes) {
ispace = simple_xattr_space(name, size);
raw_spin_lock(&sbinfo->stat_lock);
@@ -4344,13 +4360,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
return -ENOSPC;
}
- old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+ old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
if (!IS_ERR(old_xattr)) {
ispace = 0;
if (old_xattr && sbinfo->max_inodes)
ispace = simple_xattr_space(old_xattr->name,
old_xattr->size);
- simple_xattr_free(old_xattr);
+ simple_xattr_free_rcu(old_xattr);
old_xattr = NULL;
inode_set_ctime_current(inode);
inode_inc_iversion(inode);
@@ -4391,7 +4407,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = {
static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
+
+ return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs),
+ buffer, size);
}
#endif /* CONFIG_TMPFS_XATTR */
diff --git a/net/socket.c b/net/socket.c
index 05952188127f..d25be67e4b84 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -315,45 +315,70 @@ efault_end:
static struct kmem_cache *sock_inode_cachep __ro_after_init;
+struct sockfs_inode {
+ struct simple_xattrs *xattrs;
+ struct simple_xattr_limits xattr_limits;
+ struct socket_alloc;
+};
+
+static struct sockfs_inode *SOCKFS_I(struct inode *inode)
+{
+ return container_of(inode, struct sockfs_inode, vfs_inode);
+}
+
static struct inode *sock_alloc_inode(struct super_block *sb)
{
- struct socket_alloc *ei;
+ struct sockfs_inode *si;
- ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
- if (!ei)
+ si = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
+ if (!si)
return NULL;
- init_waitqueue_head(&ei->socket.wq.wait);
- ei->socket.wq.fasync_list = NULL;
- ei->socket.wq.flags = 0;
+ si->xattrs = NULL;
+ simple_xattr_limits_init(&si->xattr_limits);
+
+ init_waitqueue_head(&si->socket.wq.wait);
+ si->socket.wq.fasync_list = NULL;
+ si->socket.wq.flags = 0;
+
+ si->socket.state = SS_UNCONNECTED;
+ si->socket.flags = 0;
+ si->socket.ops = NULL;
+ si->socket.sk = NULL;
+ si->socket.file = NULL;
- ei->socket.state = SS_UNCONNECTED;
- ei->socket.flags = 0;
- ei->socket.ops = NULL;
- ei->socket.sk = NULL;
- ei->socket.file = NULL;
+ return &si->vfs_inode;
+}
+
+static void sock_evict_inode(struct inode *inode)
+{
+ struct sockfs_inode *si = SOCKFS_I(inode);
+ struct simple_xattrs *xattrs = si->xattrs;
- return &ei->vfs_inode;
+ if (xattrs) {
+ simple_xattrs_free(xattrs, NULL);
+ kfree(xattrs);
+ }
+ clear_inode(inode);
}
static void sock_free_inode(struct inode *inode)
{
- struct socket_alloc *ei;
+ struct sockfs_inode *si = SOCKFS_I(inode);
- ei = container_of(inode, struct socket_alloc, vfs_inode);
- kmem_cache_free(sock_inode_cachep, ei);
+ kmem_cache_free(sock_inode_cachep, si);
}
static void init_once(void *foo)
{
- struct socket_alloc *ei = (struct socket_alloc *)foo;
+ struct sockfs_inode *si = (struct sockfs_inode *)foo;
- inode_init_once(&ei->vfs_inode);
+ inode_init_once(&si->vfs_inode);
}
static void init_inodecache(void)
{
sock_inode_cachep = kmem_cache_create("sock_inode_cache",
- sizeof(struct socket_alloc),
+ sizeof(struct sockfs_inode),
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
@@ -365,6 +390,7 @@ static void init_inodecache(void)
static const struct super_operations sockfs_ops = {
.alloc_inode = sock_alloc_inode,
.free_inode = sock_free_inode,
+ .evict_inode = sock_evict_inode,
.statfs = simple_statfs,
};
@@ -417,9 +443,48 @@ static const struct xattr_handler sockfs_security_xattr_handler = {
.set = sockfs_security_xattr_set,
};
+static int sockfs_user_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, void *value, size_t size)
+{
+ const char *name = xattr_full_name(handler, suffix);
+ struct simple_xattrs *xattrs;
+
+ xattrs = READ_ONCE(SOCKFS_I(inode)->xattrs);
+ if (!xattrs)
+ return -ENODATA;
+
+ return simple_xattr_get(xattrs, name, value, size);
+}
+
+static int sockfs_user_xattr_set(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, const void *value,
+ size_t size, int flags)
+{
+ const char *name = xattr_full_name(handler, suffix);
+ struct sockfs_inode *si = SOCKFS_I(inode);
+ struct simple_xattrs *xattrs;
+
+ xattrs = simple_xattrs_lazy_alloc(&si->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+
+ return simple_xattr_set_limited(xattrs, &si->xattr_limits,
+ name, value, size, flags);
+}
+
+static const struct xattr_handler sockfs_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = sockfs_user_xattr_get,
+ .set = sockfs_user_xattr_set,
+};
+
static const struct xattr_handler * const sockfs_xattr_handlers[] = {
&sockfs_xattr_handler,
&sockfs_security_xattr_handler,
+ &sockfs_user_xattr_handler,
NULL
};
@@ -572,26 +637,26 @@ EXPORT_SYMBOL(sockfd_lookup);
static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
size_t size)
{
- ssize_t len;
- ssize_t used = 0;
+ struct sockfs_inode *si = SOCKFS_I(d_inode(dentry));
+ ssize_t len, used;
- len = security_inode_listsecurity(d_inode(dentry), buffer, size);
+ len = simple_xattr_list(d_inode(dentry), READ_ONCE(si->xattrs),
+ buffer, size);
if (len < 0)
return len;
- used += len;
+
+ used = len;
if (buffer) {
- if (size < used)
- return -ERANGE;
buffer += len;
+ size -= len;
}
- len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
+ len = XATTR_NAME_SOCKPROTONAME_LEN + 1;
used += len;
if (buffer) {
- if (size < used)
+ if (size < len)
return -ERANGE;
memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
- buffer += len;
}
return used;
diff --git a/tools/testing/selftests/filesystems/xattr/.gitignore b/tools/testing/selftests/filesystems/xattr/.gitignore
new file mode 100644
index 000000000000..092d14094c0f
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/.gitignore
@@ -0,0 +1,3 @@
+xattr_socket_test
+xattr_sockfs_test
+xattr_socket_types_test
diff --git a/tools/testing/selftests/filesystems/xattr/Makefile b/tools/testing/selftests/filesystems/xattr/Makefile
new file mode 100644
index 000000000000..95364ffb10e9
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += $(KHDR_INCLUDES)
+TEST_GEN_PROGS := xattr_socket_test xattr_sockfs_test xattr_socket_types_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c b/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c
new file mode 100644
index 000000000000..fac0a4c6bc05
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c
@@ -0,0 +1,470 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
+/*
+ * Test extended attributes on path-based Unix domain sockets.
+ *
+ * Path-based Unix domain sockets are bound to a filesystem path and their
+ * inodes live on the underlying filesystem (e.g. tmpfs). These tests verify
+ * that user.* and trusted.* xattr operations work correctly on them using
+ * path-based syscalls (setxattr, getxattr, etc.).
+ *
+ * Covers SOCK_STREAM, SOCK_DGRAM, and SOCK_SEQPACKET socket types.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "../../kselftest_harness.h"
+
+#define TEST_XATTR_NAME "user.testattr"
+#define TEST_XATTR_VALUE "testvalue"
+#define TEST_XATTR_VALUE2 "newvalue"
+
+/*
+ * Fixture for path-based Unix domain socket tests.
+ * Creates a SOCK_STREAM socket bound to a path in /tmp (typically tmpfs).
+ */
+FIXTURE(xattr_socket)
+{
+ char socket_path[PATH_MAX];
+ int sockfd;
+};
+
+FIXTURE_VARIANT(xattr_socket)
+{
+ int sock_type;
+ const char *name;
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket, stream) {
+ .sock_type = SOCK_STREAM,
+ .name = "stream",
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket, dgram) {
+ .sock_type = SOCK_DGRAM,
+ .name = "dgram",
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket, seqpacket) {
+ .sock_type = SOCK_SEQPACKET,
+ .name = "seqpacket",
+};
+
+FIXTURE_SETUP(xattr_socket)
+{
+ struct sockaddr_un addr;
+ int ret;
+
+ self->sockfd = -1;
+
+ snprintf(self->socket_path, sizeof(self->socket_path),
+ "/tmp/xattr_socket_test_%s.%d", variant->name, getpid());
+ unlink(self->socket_path);
+
+ self->sockfd = socket(AF_UNIX, variant->sock_type, 0);
+ ASSERT_GE(self->sockfd, 0) {
+ TH_LOG("Failed to create socket: %s", strerror(errno));
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, self->socket_path, sizeof(addr.sun_path) - 1);
+
+ ret = bind(self->sockfd, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("Failed to bind socket to %s: %s",
+ self->socket_path, strerror(errno));
+ }
+}
+
+FIXTURE_TEARDOWN(xattr_socket)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+ unlink(self->socket_path);
+}
+
+TEST_F(xattr_socket, set_user_xattr)
+{
+ int ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s (errno=%d)", strerror(errno), errno);
+ }
+}
+
+TEST_F(xattr_socket, get_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) {
+ TH_LOG("getxattr returned %zd, expected %zu: %s",
+ ret, strlen(TEST_XATTR_VALUE), strerror(errno));
+ }
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+TEST_F(xattr_socket, list_user_xattr)
+{
+ char list[1024];
+ ssize_t ret;
+ bool found = false;
+ char *ptr;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s", strerror(errno));
+ }
+
+ memset(list, 0, sizeof(list));
+ ret = listxattr(self->socket_path, list, sizeof(list));
+ ASSERT_GT(ret, 0) {
+ TH_LOG("listxattr failed: %s", strerror(errno));
+ }
+
+ for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) {
+ if (strcmp(ptr, TEST_XATTR_NAME) == 0) {
+ found = true;
+ break;
+ }
+ }
+ ASSERT_TRUE(found) {
+ TH_LOG("xattr %s not found in list", TEST_XATTR_NAME);
+ }
+}
+
+TEST_F(xattr_socket, remove_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s", strerror(errno));
+ }
+
+ ret = removexattr(self->socket_path, TEST_XATTR_NAME);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("removexattr failed: %s", strerror(errno));
+ }
+
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA) {
+ TH_LOG("Expected ENODATA, got %s", strerror(errno));
+ }
+}
+
+/*
+ * Test that xattrs persist across socket close and reopen.
+ * The xattr is on the filesystem inode, not the socket fd.
+ */
+TEST_F(xattr_socket, xattr_persistence)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s", strerror(errno));
+ }
+
+ close(self->sockfd);
+ self->sockfd = -1;
+
+ memset(buf, 0, sizeof(buf));
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) {
+ TH_LOG("getxattr after close failed: %s", strerror(errno));
+ }
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+TEST_F(xattr_socket, update_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0);
+ ASSERT_EQ(ret, 0);
+
+ memset(buf, 0, sizeof(buf));
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE2);
+}
+
+TEST_F(xattr_socket, xattr_create_flag)
+{
+ int ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), XATTR_CREATE);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EEXIST);
+}
+
+TEST_F(xattr_socket, xattr_replace_flag)
+{
+ int ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), XATTR_REPLACE);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_socket, multiple_xattrs)
+{
+ char buf[256];
+ ssize_t ret;
+ int i;
+ char name[64], value[64];
+ const int num_xattrs = 5;
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(name, sizeof(name), "user.test%d", i);
+ snprintf(value, sizeof(value), "value%d", i);
+ ret = setxattr(self->socket_path, name, value, strlen(value), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr %s failed: %s", name, strerror(errno));
+ }
+ }
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(name, sizeof(name), "user.test%d", i);
+ snprintf(value, sizeof(value), "value%d", i);
+ memset(buf, 0, sizeof(buf));
+ ret = getxattr(self->socket_path, name, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(value));
+ ASSERT_STREQ(buf, value);
+ }
+}
+
+TEST_F(xattr_socket, xattr_empty_value)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME, "", 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(xattr_socket, xattr_get_size)
+{
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, NULL, 0);
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+}
+
+TEST_F(xattr_socket, xattr_buffer_too_small)
+{
+ char buf[2];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ERANGE);
+}
+
+TEST_F(xattr_socket, xattr_nonexistent)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = getxattr(self->socket_path, "user.nonexistent", buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_socket, remove_nonexistent_xattr)
+{
+ int ret;
+
+ ret = removexattr(self->socket_path, "user.nonexistent");
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_socket, large_xattr_value)
+{
+ char large_value[4096];
+ char read_buf[4096];
+ ssize_t ret;
+
+ memset(large_value, 'A', sizeof(large_value));
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ large_value, sizeof(large_value), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr with large value failed: %s", strerror(errno));
+ }
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME,
+ read_buf, sizeof(read_buf));
+ ASSERT_EQ(ret, (ssize_t)sizeof(large_value));
+ ASSERT_EQ(memcmp(large_value, read_buf, sizeof(large_value)), 0);
+}
+
+/*
+ * Test lsetxattr/lgetxattr (don't follow symlinks).
+ * Socket files aren't symlinks, so this should work the same.
+ */
+TEST_F(xattr_socket, lsetxattr_lgetxattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = lsetxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("lsetxattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = lgetxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+/*
+ * Fixture for trusted.* xattr tests.
+ * These require CAP_SYS_ADMIN.
+ */
+FIXTURE(xattr_socket_trusted)
+{
+ char socket_path[PATH_MAX];
+ int sockfd;
+};
+
+FIXTURE_VARIANT(xattr_socket_trusted)
+{
+ int sock_type;
+ const char *name;
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_trusted, stream) {
+ .sock_type = SOCK_STREAM,
+ .name = "stream",
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_trusted, dgram) {
+ .sock_type = SOCK_DGRAM,
+ .name = "dgram",
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_trusted, seqpacket) {
+ .sock_type = SOCK_SEQPACKET,
+ .name = "seqpacket",
+};
+
+FIXTURE_SETUP(xattr_socket_trusted)
+{
+ struct sockaddr_un addr;
+ int ret;
+
+ self->sockfd = -1;
+
+ snprintf(self->socket_path, sizeof(self->socket_path),
+ "/tmp/xattr_socket_trusted_%s.%d", variant->name, getpid());
+ unlink(self->socket_path);
+
+ self->sockfd = socket(AF_UNIX, variant->sock_type, 0);
+ ASSERT_GE(self->sockfd, 0);
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, self->socket_path, sizeof(addr.sun_path) - 1);
+
+ ret = bind(self->sockfd, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(xattr_socket_trusted)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+ unlink(self->socket_path);
+}
+
+TEST_F(xattr_socket_trusted, set_trusted_xattr)
+{
+ char buf[256];
+ ssize_t len;
+ int ret;
+
+ ret = setxattr(self->socket_path, "trusted.testattr",
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ if (ret == -1 && errno == EPERM)
+ SKIP(return, "Need CAP_SYS_ADMIN for trusted.* xattrs");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr trusted.testattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ len = getxattr(self->socket_path, "trusted.testattr",
+ buf, sizeof(buf));
+ ASSERT_EQ(len, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+TEST_F(xattr_socket_trusted, get_trusted_xattr_unprivileged)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = getxattr(self->socket_path, "trusted.testattr", buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_TRUE(errno == ENODATA || errno == EPERM) {
+ TH_LOG("Expected ENODATA or EPERM, got %s", strerror(errno));
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c b/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c
new file mode 100644
index 000000000000..bfabe91b2ed1
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
+/*
+ * Test user.* xattrs on various socket families.
+ *
+ * All socket types use sockfs for their inodes, so user.* xattrs should
+ * work on any socket regardless of address family. This tests AF_INET,
+ * AF_INET6, AF_NETLINK, AF_PACKET, and abstract namespace AF_UNIX sockets.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/xattr.h>
+#include <linux/netlink.h>
+#include <unistd.h>
+
+#include "../../kselftest_harness.h"
+
+#define TEST_XATTR_NAME "user.testattr"
+#define TEST_XATTR_VALUE "testvalue"
+
+FIXTURE(xattr_socket_types)
+{
+ int sockfd;
+};
+
+FIXTURE_VARIANT(xattr_socket_types)
+{
+ int family;
+ int type;
+ int protocol;
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_types, inet) {
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = 0,
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_types, inet6) {
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = 0,
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_types, netlink) {
+ .family = AF_NETLINK,
+ .type = SOCK_RAW,
+ .protocol = NETLINK_USERSOCK,
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_types, packet) {
+ .family = AF_PACKET,
+ .type = SOCK_DGRAM,
+ .protocol = 0,
+};
+
+FIXTURE_SETUP(xattr_socket_types)
+{
+ self->sockfd = socket(variant->family, variant->type,
+ variant->protocol);
+ if (self->sockfd < 0 &&
+ (errno == EAFNOSUPPORT || errno == EPERM || errno == EACCES))
+ SKIP(return, "socket(%d, %d, %d) not available: %s",
+ variant->family, variant->type, variant->protocol,
+ strerror(errno));
+ ASSERT_GE(self->sockfd, 0) {
+ TH_LOG("Failed to create socket(%d, %d, %d): %s",
+ variant->family, variant->type, variant->protocol,
+ strerror(errno));
+ }
+}
+
+FIXTURE_TEARDOWN(xattr_socket_types)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+}
+
+TEST_F(xattr_socket_types, set_get_list_remove)
+{
+ char buf[256], list[4096], *ptr;
+ ssize_t ret;
+ bool found;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+
+ memset(list, 0, sizeof(list));
+ ret = flistxattr(self->sockfd, list, sizeof(list));
+ ASSERT_GT(ret, 0);
+ found = false;
+ for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) {
+ if (strcmp(ptr, TEST_XATTR_NAME) == 0)
+ found = true;
+ }
+ ASSERT_TRUE(found);
+
+ ret = fremovexattr(self->sockfd, TEST_XATTR_NAME);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+/*
+ * Test abstract namespace AF_UNIX socket.
+ * Abstract sockets don't have a filesystem path; their inodes live in
+ * sockfs so user.* xattrs should work via fsetxattr/fgetxattr.
+ */
+FIXTURE(xattr_abstract)
+{
+ int sockfd;
+};
+
+FIXTURE_SETUP(xattr_abstract)
+{
+ struct sockaddr_un addr;
+ char name[64];
+ int ret, len;
+
+ self->sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(self->sockfd, 0);
+
+ len = snprintf(name, sizeof(name), "xattr_test_abstract_%d", getpid());
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ addr.sun_path[0] = '\0';
+ memcpy(&addr.sun_path[1], name, len);
+
+ ret = bind(self->sockfd, (struct sockaddr *)&addr,
+ offsetof(struct sockaddr_un, sun_path) + 1 + len);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(xattr_abstract)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+}
+
+TEST_F(xattr_abstract, set_get)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr on abstract socket failed: %s",
+ strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c b/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c
new file mode 100644
index 000000000000..b4824b01a86d
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
+/*
+ * Test extended attributes on sockfs sockets.
+ *
+ * Sockets created via socket() have their inodes in sockfs, which supports
+ * user.* xattrs with per-inode limits: up to 128 xattrs and 128KB total
+ * value size. These tests verify xattr operations via fsetxattr/fgetxattr/
+ * flistxattr/fremovexattr on the socket fd, as well as limit enforcement.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "../../kselftest_harness.h"
+
+#define TEST_XATTR_NAME "user.testattr"
+#define TEST_XATTR_VALUE "testvalue"
+#define TEST_XATTR_VALUE2 "newvalue"
+
+/* Per-inode limits for user.* xattrs on sockfs (from include/linux/xattr.h) */
+#define SIMPLE_XATTR_MAX_NR 128
+#define SIMPLE_XATTR_MAX_SIZE (128 << 10) /* 128 KB */
+
+#ifndef XATTR_SIZE_MAX
+#define XATTR_SIZE_MAX 65536
+#endif
+
+/*
+ * Fixture for sockfs socket xattr tests.
+ * Creates an AF_UNIX socket (lives in sockfs, not bound to any path).
+ */
+FIXTURE(xattr_sockfs)
+{
+ int sockfd;
+};
+
+FIXTURE_SETUP(xattr_sockfs)
+{
+ self->sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(self->sockfd, 0) {
+ TH_LOG("Failed to create socket: %s", strerror(errno));
+ }
+}
+
+FIXTURE_TEARDOWN(xattr_sockfs)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+}
+
+TEST_F(xattr_sockfs, set_get_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) {
+ TH_LOG("fgetxattr returned %zd: %s", ret, strerror(errno));
+ }
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+/*
+ * Test listing xattrs on a sockfs socket.
+ * Should include user.* xattrs and system.sockprotoname.
+ */
+TEST_F(xattr_sockfs, list_user_xattr)
+{
+ char list[4096];
+ ssize_t ret;
+ char *ptr;
+ bool found_user = false;
+ bool found_proto = false;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr failed: %s", strerror(errno));
+ }
+
+ memset(list, 0, sizeof(list));
+ ret = flistxattr(self->sockfd, list, sizeof(list));
+ ASSERT_GT(ret, 0) {
+ TH_LOG("flistxattr failed: %s", strerror(errno));
+ }
+
+ for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) {
+ if (strcmp(ptr, TEST_XATTR_NAME) == 0)
+ found_user = true;
+ if (strcmp(ptr, "system.sockprotoname") == 0)
+ found_proto = true;
+ }
+ ASSERT_TRUE(found_user) {
+ TH_LOG("user xattr not found in list");
+ }
+ ASSERT_TRUE(found_proto) {
+ TH_LOG("system.sockprotoname not found in list");
+ }
+}
+
+TEST_F(xattr_sockfs, remove_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fremovexattr(self->sockfd, TEST_XATTR_NAME);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fremovexattr failed: %s", strerror(errno));
+ }
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_sockfs, update_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0);
+ ASSERT_EQ(ret, 0);
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE2);
+}
+
+TEST_F(xattr_sockfs, xattr_create_flag)
+{
+ int ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2),
+ XATTR_CREATE);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EEXIST);
+}
+
+TEST_F(xattr_sockfs, xattr_replace_flag)
+{
+ int ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE),
+ XATTR_REPLACE);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_sockfs, get_nonexistent)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fgetxattr(self->sockfd, "user.nonexistent", buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_sockfs, empty_value)
+{
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, "", 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, NULL, 0);
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(xattr_sockfs, get_size)
+{
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, NULL, 0);
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+}
+
+TEST_F(xattr_sockfs, buffer_too_small)
+{
+ char buf[2];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ERANGE);
+}
+
+/*
+ * Test maximum number of user.* xattrs per socket.
+ * The kernel enforces SIMPLE_XATTR_MAX_NR (128), so the 129th should
+ * fail with ENOSPC.
+ */
+TEST_F(xattr_sockfs, max_nr_xattrs)
+{
+ char name[32];
+ int i, ret;
+
+ for (i = 0; i < SIMPLE_XATTR_MAX_NR; i++) {
+ snprintf(name, sizeof(name), "user.test%03d", i);
+ ret = fsetxattr(self->sockfd, name, "v", 1, 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr %s failed at i=%d: %s",
+ name, i, strerror(errno));
+ }
+ }
+
+ ret = fsetxattr(self->sockfd, "user.overflow", "v", 1, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOSPC) {
+ TH_LOG("Expected ENOSPC for xattr %d, got %s",
+ SIMPLE_XATTR_MAX_NR + 1, strerror(errno));
+ }
+}
+
+/*
+ * Test maximum total value size for user.* xattrs.
+ * The kernel enforces SIMPLE_XATTR_MAX_SIZE (128KB). Individual xattr
+ * values are limited to XATTR_SIZE_MAX (64KB) by the VFS, so we need
+ * at least two xattrs to hit the total limit.
+ */
+TEST_F(xattr_sockfs, max_xattr_size)
+{
+ char *value;
+ int ret;
+
+ value = malloc(XATTR_SIZE_MAX);
+ ASSERT_NE(value, NULL);
+ memset(value, 'A', XATTR_SIZE_MAX);
+
+ /* First 64KB xattr - total = 64KB */
+ ret = fsetxattr(self->sockfd, "user.big1", value, XATTR_SIZE_MAX, 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("first large xattr failed: %s", strerror(errno));
+ }
+
+ /* Second 64KB xattr - total = 128KB (exactly at limit) */
+ ret = fsetxattr(self->sockfd, "user.big2", value, XATTR_SIZE_MAX, 0);
+ free(value);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("second large xattr failed: %s", strerror(errno));
+ }
+
+ /* Third xattr with 1 byte - total > 128KB, should fail */
+ ret = fsetxattr(self->sockfd, "user.big3", "v", 1, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOSPC) {
+ TH_LOG("Expected ENOSPC when exceeding size limit, got %s",
+ strerror(errno));
+ }
+}
+
+/*
+ * Test that removing an xattr frees limit space, allowing re-addition.
+ */
+TEST_F(xattr_sockfs, limit_remove_readd)
+{
+ char name[32];
+ int i, ret;
+
+ /* Fill up to the maximum count */
+ for (i = 0; i < SIMPLE_XATTR_MAX_NR; i++) {
+ snprintf(name, sizeof(name), "user.test%03d", i);
+ ret = fsetxattr(self->sockfd, name, "v", 1, 0);
+ ASSERT_EQ(ret, 0);
+ }
+
+ /* Verify we're at the limit */
+ ret = fsetxattr(self->sockfd, "user.overflow", "v", 1, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOSPC);
+
+ /* Remove one xattr */
+ ret = fremovexattr(self->sockfd, "user.test000");
+ ASSERT_EQ(ret, 0);
+
+ /* Now we should be able to add one more */
+ ret = fsetxattr(self->sockfd, "user.newattr", "v", 1, 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("re-add after remove failed: %s", strerror(errno));
+ }
+}
+
+/*
+ * Test that two different sockets have independent xattr limits.
+ */
+TEST_F(xattr_sockfs, limits_per_inode)
+{
+ char buf[256];
+ int sock2;
+ ssize_t ret;
+
+ sock2 = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(sock2, 0);
+
+ /* Set xattr on first socket */
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ /* First socket's xattr should not be visible on second socket */
+ ret = fgetxattr(sock2, TEST_XATTR_NAME, NULL, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+
+ /* Second socket should independently accept xattrs */
+ ret = fsetxattr(sock2, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0);
+ ASSERT_EQ(ret, 0);
+
+ /* Verify each socket has its own value */
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(sock2, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE2);
+
+ close(sock2);
+}
+
+TEST_HARNESS_MAIN