diff options
| -rw-r--r-- | fs/kernfs/dir.c | 15 | ||||
| -rw-r--r-- | fs/kernfs/inode.c | 99 | ||||
| -rw-r--r-- | fs/kernfs/kernfs-internal.h | 5 | ||||
| -rw-r--r-- | fs/pidfs.c | 64 | ||||
| -rw-r--r-- | fs/xattr.c | 423 | ||||
| -rw-r--r-- | include/linux/kernfs.h | 2 | ||||
| -rw-r--r-- | include/linux/shmem_fs.h | 2 | ||||
| -rw-r--r-- | include/linux/xattr.h | 47 | ||||
| -rw-r--r-- | mm/shmem.c | 46 | ||||
| -rw-r--r-- | net/socket.c | 119 | ||||
| -rw-r--r-- | tools/testing/selftests/filesystems/xattr/.gitignore | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/filesystems/xattr/Makefile | 6 | ||||
| -rw-r--r-- | tools/testing/selftests/filesystems/xattr/xattr_socket_test.c | 470 | ||||
| -rw-r--r-- | tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c | 177 | ||||
| -rw-r--r-- | tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c | 363 |
15 files changed, 1546 insertions, 295 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 22a4dff2a3af..cdacdc1dcdd9 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -564,10 +564,8 @@ static void kernfs_free_rcu(struct rcu_head *rcu) /* If the whole node goes away, then name can't be used outside */ kfree_const(rcu_access_pointer(kn->name)); - if (kn->iattr) { - simple_xattrs_free(&kn->iattr->xattrs, NULL); + if (kn->iattr) kmem_cache_free(kernfs_iattrs_cache, kn->iattr); - } kmem_cache_free(kernfs_node_cache, kn); } @@ -601,6 +599,12 @@ void kernfs_put(struct kernfs_node *kn) if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); + if (kn->iattr && kn->iattr->xattrs) { + simple_xattrs_free(kn->iattr->xattrs, NULL); + kfree(kn->iattr->xattrs); + kn->iattr->xattrs = NULL; + } + spin_lock(&root->kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); spin_unlock(&root->kernfs_idr_lock); @@ -699,7 +703,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, err_out4: if (kn->iattr) { - simple_xattrs_free(&kn->iattr->xattrs, NULL); + if (kn->iattr->xattrs) { + simple_xattrs_free(kn->iattr->xattrs, NULL); + kfree(kn->iattr->xattrs); + } kmem_cache_free(kernfs_iattrs_cache, kn->iattr); } err_out3: diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index a36aaee98dce..1de10500842d 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -45,9 +45,7 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc) ret->ia_mtime = ret->ia_atime; ret->ia_ctime = ret->ia_atime; - simple_xattrs_init(&ret->xattrs); - atomic_set(&ret->nr_user_xattrs, 0); - atomic_set(&ret->user_xattr_size, 0); + simple_xattr_limits_init(&ret->xattr_limits); /* If someone raced us, recognize it. */ if (!try_cmpxchg(&kn->iattr, &attr, ret)) @@ -146,7 +144,8 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size) if (!attrs) return -ENOMEM; - return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size); + return simple_xattr_list(d_inode(dentry), READ_ONCE(attrs->xattrs), + buf, size); } static inline void set_default_inode_attr(struct inode *inode, umode_t mode) @@ -298,27 +297,38 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name, void *value, size_t size) { struct kernfs_iattrs *attrs = kernfs_iattrs_noalloc(kn); + struct simple_xattrs *xattrs; + if (!attrs) return -ENODATA; - return simple_xattr_get(&attrs->xattrs, name, value, size); + xattrs = READ_ONCE(attrs->xattrs); + if (!xattrs) + return -ENODATA; + + return simple_xattr_get(xattrs, name, value, size); } int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags) { struct simple_xattr *old_xattr; + struct simple_xattrs *xattrs; struct kernfs_iattrs *attrs; attrs = kernfs_iattrs(kn); if (!attrs) return -ENOMEM; - old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags); + xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags); + if (IS_ERR_OR_NULL(xattrs)) + return PTR_ERR(xattrs); + + old_xattr = simple_xattr_set(xattrs, name, value, size, flags); if (IS_ERR(old_xattr)) return PTR_ERR(old_xattr); - simple_xattr_free(old_xattr); + simple_xattr_free_rcu(old_xattr); return 0; } @@ -344,69 +354,6 @@ static int kernfs_vfs_xattr_set(const struct xattr_handler *handler, return kernfs_xattr_set(kn, name, value, size, flags); } -static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn, - const char *full_name, - struct simple_xattrs *xattrs, - const void *value, size_t size, int flags) -{ - struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn); - atomic_t *sz = &attr->user_xattr_size; - atomic_t *nr = &attr->nr_user_xattrs; - struct simple_xattr *old_xattr; - int ret; - - if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) { - ret = -ENOSPC; - goto dec_count_out; - } - - if (atomic_add_return(size, sz) > KERNFS_USER_XATTR_SIZE_LIMIT) { - ret = -ENOSPC; - goto dec_size_out; - } - - old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags); - if (!old_xattr) - return 0; - - if (IS_ERR(old_xattr)) { - ret = PTR_ERR(old_xattr); - goto dec_size_out; - } - - ret = 0; - size = old_xattr->size; - simple_xattr_free(old_xattr); -dec_size_out: - atomic_sub(size, sz); -dec_count_out: - atomic_dec(nr); - return ret; -} - -static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn, - const char *full_name, - struct simple_xattrs *xattrs, - const void *value, size_t size, int flags) -{ - struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn); - atomic_t *sz = &attr->user_xattr_size; - atomic_t *nr = &attr->nr_user_xattrs; - struct simple_xattr *old_xattr; - - old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags); - if (!old_xattr) - return 0; - - if (IS_ERR(old_xattr)) - return PTR_ERR(old_xattr); - - atomic_sub(old_xattr->size, sz); - atomic_dec(nr); - simple_xattr_free(old_xattr); - return 0; -} - static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, @@ -415,6 +362,7 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, { const char *full_name = xattr_full_name(handler, suffix); struct kernfs_node *kn = inode->i_private; + struct simple_xattrs *xattrs; struct kernfs_iattrs *attrs; if (!(kernfs_root(kn)->flags & KERNFS_ROOT_SUPPORT_USER_XATTR)) @@ -424,13 +372,12 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, if (!attrs) return -ENOMEM; - if (value) - return kernfs_vfs_user_xattr_add(kn, full_name, &attrs->xattrs, - value, size, flags); - else - return kernfs_vfs_user_xattr_rm(kn, full_name, &attrs->xattrs, - value, size, flags); + xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags); + if (IS_ERR_OR_NULL(xattrs)) + return PTR_ERR(xattrs); + return simple_xattr_set_limited(xattrs, &attrs->xattr_limits, + full_name, value, size, flags); } static const struct xattr_handler kernfs_trusted_xattr_handler = { diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index b1fd9622a5e3..8d8912f50b05 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -26,9 +26,8 @@ struct kernfs_iattrs { struct timespec64 ia_mtime; struct timespec64 ia_ctime; - struct simple_xattrs xattrs; - atomic_t nr_user_xattrs; - atomic_t user_xattr_size; + struct simple_xattrs *xattrs; + struct simple_xattr_limits xattr_limits; }; struct kernfs_root { diff --git a/fs/pidfs.c b/fs/pidfs.c index e3825ee246be..dc82741becb1 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -22,6 +22,7 @@ #include <net/net_namespace.h> #include <linux/coredump.h> #include <linux/rhashtable.h> +#include <linux/llist.h> #include <linux/xattr.h> #include <linux/cookie.h> @@ -31,7 +32,6 @@ #define PIDFS_PID_DEAD ERR_PTR(-ESRCH) static struct kmem_cache *pidfs_attr_cachep __ro_after_init; -static struct kmem_cache *pidfs_xattr_cachep __ro_after_init; static struct path pidfs_root_path = {}; @@ -46,9 +46,8 @@ enum pidfs_attr_mask_bits { PIDFS_ATTR_BIT_COREDUMP = 1, }; -struct pidfs_attr { +struct pidfs_anon_attr { unsigned long attr_mask; - struct simple_xattrs *xattrs; struct /* exit info */ { __u64 cgroupid; __s32 exit_code; @@ -93,6 +92,13 @@ static const struct rhashtable_params pidfs_ino_ht_params = { * inode number and the inode generation number to compare or * use file handles. */ +struct pidfs_attr { + struct simple_xattrs *xattrs; + union { + struct pidfs_anon_attr; + struct llist_node pidfs_llist; + }; +}; #if BITS_PER_LONG == 32 @@ -178,10 +184,30 @@ void pidfs_remove_pid(struct pid *pid) pidfs_ino_ht_params); } +static LLIST_HEAD(pidfs_free_list); + +static void pidfs_free_attr_work(struct work_struct *work) +{ + struct pidfs_attr *attr, *next; + struct llist_node *head; + + head = llist_del_all(&pidfs_free_list); + llist_for_each_entry_safe(attr, next, head, pidfs_llist) { + struct simple_xattrs *xattrs = attr->xattrs; + + if (xattrs) { + simple_xattrs_free(xattrs, NULL); + kfree(xattrs); + } + kfree(attr); + } +} + +static DECLARE_WORK(pidfs_free_work, pidfs_free_attr_work); + void pidfs_free_pid(struct pid *pid) { - struct pidfs_attr *attr __free(kfree) = no_free_ptr(pid->attr); - struct simple_xattrs *xattrs __free(kfree) = NULL; + struct pidfs_attr *attr = pid->attr; /* * Any dentry must've been wiped from the pid by now. @@ -200,9 +226,10 @@ void pidfs_free_pid(struct pid *pid) if (IS_ERR(attr)) return; - xattrs = no_free_ptr(attr->xattrs); - if (xattrs) - simple_xattrs_free(xattrs, NULL); + if (likely(!attr->xattrs)) + kfree(attr); + else if (llist_add(&attr->pidfs_llist, &pidfs_free_list)) + schedule_work(&pidfs_free_work); } #ifdef CONFIG_PROC_FS @@ -1009,7 +1036,7 @@ static int pidfs_xattr_get(const struct xattr_handler *handler, xattrs = READ_ONCE(attr->xattrs); if (!xattrs) - return 0; + return -ENODATA; name = xattr_full_name(handler, suffix); return simple_xattr_get(xattrs, name, value, size); @@ -1029,22 +1056,16 @@ static int pidfs_xattr_set(const struct xattr_handler *handler, /* Ensure we're the only one to set @attr->xattrs. */ WARN_ON_ONCE(!inode_is_locked(inode)); - xattrs = READ_ONCE(attr->xattrs); - if (!xattrs) { - xattrs = kmem_cache_zalloc(pidfs_xattr_cachep, GFP_KERNEL); - if (!xattrs) - return -ENOMEM; - - simple_xattrs_init(xattrs); - smp_store_release(&pid->attr->xattrs, xattrs); - } + xattrs = simple_xattrs_lazy_alloc(&attr->xattrs, value, flags); + if (IS_ERR_OR_NULL(xattrs)) + return PTR_ERR(xattrs); name = xattr_full_name(handler, suffix); old_xattr = simple_xattr_set(xattrs, name, value, size, flags); if (IS_ERR(old_xattr)) return PTR_ERR(old_xattr); - simple_xattr_free(old_xattr); + simple_xattr_free_rcu(old_xattr); return 0; } @@ -1122,11 +1143,6 @@ void __init pidfs_init(void) (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT | SLAB_PANIC), NULL); - pidfs_xattr_cachep = kmem_cache_create("pidfs_xattr_cache", - sizeof(struct simple_xattrs), 0, - (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_ACCOUNT | SLAB_PANIC), NULL); - pidfs_mnt = kern_mount(&pidfs_type); if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); diff --git a/fs/xattr.c b/fs/xattr.c index 3e49e612e1ba..09ecbaaa1660 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -22,6 +22,7 @@ #include <linux/audit.h> #include <linux/vmalloc.h> #include <linux/posix_acl_xattr.h> +#include <linux/rhashtable.h> #include <linux/uaccess.h> @@ -105,6 +106,13 @@ int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode) return 0; } +static inline int xattr_permission_error(int mask) +{ + if (mask & MAY_WRITE) + return -EPERM; + return -ENODATA; +} + /* * Check permissions for extended attribute access. This is a bit complicated * because different namespaces have very different rules. @@ -134,7 +142,7 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode, */ if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { if (!capable(CAP_SYS_ADMIN)) - return (mask & MAY_WRITE) ? -EPERM : -ENODATA; + return xattr_permission_error(mask); return 0; } @@ -144,12 +152,22 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode, * privileged users can write attributes. */ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - return (mask & MAY_WRITE) ? -EPERM : -ENODATA; - if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && - (mask & MAY_WRITE) && - !inode_owner_or_capable(idmap, inode)) + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + break; + case S_IFDIR: + if (!(inode->i_mode & S_ISVTX)) + break; + if (!(mask & MAY_WRITE)) + break; + if (inode_owner_or_capable(idmap, inode)) + break; return -EPERM; + case S_IFSOCK: + break; + default: + return xattr_permission_error(mask); + } } return inode_permission(idmap, inode, mask); @@ -1197,6 +1215,27 @@ void simple_xattr_free(struct simple_xattr *xattr) kvfree(xattr); } +static void simple_xattr_rcu_free(struct rcu_head *head) +{ + struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu); + + simple_xattr_free(xattr); +} + +/** + * simple_xattr_free_rcu - free an xattr object with RCU delay + * @xattr: the xattr object + * + * Free the xattr object after an RCU grace period. This must be used when + * the xattr was removed from a data structure that concurrent RCU readers + * may still be traversing. Can handle @xattr being NULL. + */ +void simple_xattr_free_rcu(struct simple_xattr *xattr) +{ + if (xattr) + call_rcu(&xattr->rcu, simple_xattr_rcu_free); +} + /** * simple_xattr_alloc - allocate new xattr object * @value: value of the xattr object @@ -1205,65 +1244,58 @@ void simple_xattr_free(struct simple_xattr *xattr) * Allocate a new xattr object and initialize respective members. The caller is * responsible for handling the name of the xattr. * - * Return: On success a new xattr object is returned. On failure NULL is - * returned. + * Return: New xattr object on success, NULL if @value is NULL, ERR_PTR on + * failure. */ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) { struct simple_xattr *new_xattr; size_t len; + if (!value) + return NULL; + /* wrap around? */ len = sizeof(*new_xattr) + size; if (len < sizeof(*new_xattr)) - return NULL; + return ERR_PTR(-ENOMEM); new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT); if (!new_xattr) - return NULL; + return ERR_PTR(-ENOMEM); new_xattr->size = size; memcpy(new_xattr->value, value, size); return new_xattr; } -/** - * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry - * @key: xattr name - * @node: current node - * - * Compare the xattr name with the xattr name attached to @node in the rbtree. - * - * Return: Negative value if continuing left, positive if continuing right, 0 - * if the xattr attached to @node matches @key. - */ -static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node) +static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed) { - const char *xattr_name = key; - const struct simple_xattr *xattr; + const char *name = data; + return jhash(name, strlen(name), seed); +} - xattr = rb_entry(node, struct simple_xattr, rb_node); - return strcmp(xattr->name, xattr_name); +static u32 simple_xattr_obj_hashfn(const void *obj, u32 len, u32 seed) +{ + const struct simple_xattr *xattr = obj; + return jhash(xattr->name, strlen(xattr->name), seed); } -/** - * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes - * @new_node: new node - * @node: current node - * - * Compare the xattr attached to @new_node with the xattr attached to @node. - * - * Return: Negative value if continuing left, positive if continuing right, 0 - * if the xattr attached to @new_node matches the xattr attached to @node. - */ -static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node, - const struct rb_node *node) +static int simple_xattr_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) { - struct simple_xattr *xattr; - xattr = rb_entry(new_node, struct simple_xattr, rb_node); - return rbtree_simple_xattr_cmp(xattr->name, node); + const struct simple_xattr *xattr = obj; + return strcmp(xattr->name, arg->key); } +static const struct rhashtable_params simple_xattr_params = { + .head_offset = offsetof(struct simple_xattr, hash_node), + .hashfn = simple_xattr_hashfn, + .obj_hashfn = simple_xattr_obj_hashfn, + .obj_cmpfn = simple_xattr_obj_cmpfn, + .automatic_shrinking = true, +}; + /** * simple_xattr_get - get an xattr object * @xattrs: the header of the xattr object @@ -1282,14 +1314,12 @@ static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node, int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size) { - struct simple_xattr *xattr = NULL; - struct rb_node *rbp; + struct simple_xattr *xattr; int ret = -ENODATA; - read_lock(&xattrs->lock); - rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp); - if (rbp) { - xattr = rb_entry(rbp, struct simple_xattr, rb_node); + guard(rcu)(); + xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params); + if (xattr) { ret = xattr->size; if (buffer) { if (size < xattr->size) @@ -1298,7 +1328,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, memcpy(buffer, xattr->value, xattr->size); } } - read_unlock(&xattrs->lock); return ret; } @@ -1325,6 +1354,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For * XATTR_REPLACE we fail as mentioned above. * + * Note: Callers must externally serialize writes. All current callers hold + * the inode lock for write operations. The lookup->replace/remove sequence + * is not atomic with respect to the rhashtable's per-bucket locking, but + * is safe because writes are serialized by the caller. + * * Return: On success, the removed or replaced xattr is returned, to be freed * by the caller; or NULL if none. On failure a negative error code is returned. */ @@ -1332,64 +1366,57 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags) { - struct simple_xattr *old_xattr = NULL, *new_xattr = NULL; - struct rb_node *parent = NULL, **rbp; - int err = 0, ret; + struct simple_xattr *old_xattr = NULL; + int err; - /* value == NULL means remove */ - if (value) { - new_xattr = simple_xattr_alloc(value, size); - if (!new_xattr) - return ERR_PTR(-ENOMEM); + CLASS(simple_xattr, new_xattr)(value, size); + if (IS_ERR(new_xattr)) + return new_xattr; + if (new_xattr) { new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT); - if (!new_xattr->name) { - simple_xattr_free(new_xattr); + if (!new_xattr->name) return ERR_PTR(-ENOMEM); - } } - write_lock(&xattrs->lock); - rbp = &xattrs->rb_root.rb_node; - while (*rbp) { - parent = *rbp; - ret = rbtree_simple_xattr_cmp(name, *rbp); - if (ret < 0) - rbp = &(*rbp)->rb_left; - else if (ret > 0) - rbp = &(*rbp)->rb_right; - else - old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node); - if (old_xattr) - break; - } + /* Lookup is safe without RCU here since writes are serialized. */ + old_xattr = rhashtable_lookup_fast(&xattrs->ht, name, + simple_xattr_params); if (old_xattr) { /* Fail if XATTR_CREATE is requested and the xattr exists. */ - if (flags & XATTR_CREATE) { - err = -EEXIST; - goto out_unlock; - } + if (flags & XATTR_CREATE) + return ERR_PTR(-EEXIST); - if (new_xattr) - rb_replace_node(&old_xattr->rb_node, - &new_xattr->rb_node, &xattrs->rb_root); - else - rb_erase(&old_xattr->rb_node, &xattrs->rb_root); + if (new_xattr) { + err = rhashtable_replace_fast(&xattrs->ht, + &old_xattr->hash_node, + &new_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); + } else { + err = rhashtable_remove_fast(&xattrs->ht, + &old_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); + } } else { /* Fail if XATTR_REPLACE is requested but no xattr is found. */ - if (flags & XATTR_REPLACE) { - err = -ENODATA; - goto out_unlock; - } + if (flags & XATTR_REPLACE) + return ERR_PTR(-ENODATA); /* * If XATTR_CREATE or no flags are specified together with a * new value simply insert it. */ if (new_xattr) { - rb_link_node(&new_xattr->rb_node, parent, rbp); - rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root); + err = rhashtable_insert_fast(&xattrs->ht, + &new_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); } /* @@ -1398,12 +1425,73 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, */ } -out_unlock: - write_unlock(&xattrs->lock); - if (!err) - return old_xattr; - simple_xattr_free(new_xattr); - return ERR_PTR(err); + retain_and_null_ptr(new_xattr); + return old_xattr; +} + +static inline void simple_xattr_limits_dec(struct simple_xattr_limits *limits, + size_t size) +{ + atomic_sub(size, &limits->xattr_size); + atomic_dec(&limits->nr_xattrs); +} + +static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits, + size_t size) +{ + if (atomic_inc_return(&limits->nr_xattrs) > SIMPLE_XATTR_MAX_NR) { + atomic_dec(&limits->nr_xattrs); + return -ENOSPC; + } + + if (atomic_add_return(size, &limits->xattr_size) <= SIMPLE_XATTR_MAX_SIZE) + return 0; + + simple_xattr_limits_dec(limits, size); + return -ENOSPC; +} + +/** + * simple_xattr_set_limited - set an xattr with per-inode user.* limits + * @xattrs: the header of the xattr object + * @limits: per-inode limit counters for user.* xattrs + * @name: the name of the xattr to set or remove + * @value: the value to store (NULL to remove) + * @size: the size of @value + * @flags: XATTR_CREATE, XATTR_REPLACE, or 0 + * + * Like simple_xattr_set(), but enforces per-inode count and total value size + * limits for user.* xattrs. Uses speculative pre-increment of the atomic + * counters to avoid races without requiring external locks. + * + * Return: On success zero is returned. On failure a negative error code is + * returned. + */ +int simple_xattr_set_limited(struct simple_xattrs *xattrs, + struct simple_xattr_limits *limits, + const char *name, const void *value, + size_t size, int flags) +{ + struct simple_xattr *old_xattr; + int ret; + + if (value) { + ret = simple_xattr_limits_inc(limits, size); + if (ret) + return ret; + } + + old_xattr = simple_xattr_set(xattrs, name, value, size, flags); + if (IS_ERR(old_xattr)) { + if (value) + simple_xattr_limits_dec(limits, size); + return PTR_ERR(old_xattr); + } + if (old_xattr) { + simple_xattr_limits_dec(limits, old_xattr->size); + simple_xattr_free_rcu(old_xattr); + } + return 0; } static bool xattr_is_trusted(const char *name) @@ -1443,8 +1531,8 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size) { bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); + struct rhashtable_iter iter; struct simple_xattr *xattr; - struct rb_node *rbp; ssize_t remaining_size = size; int err = 0; @@ -1464,9 +1552,19 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, remaining_size -= err; err = 0; - read_lock(&xattrs->lock); - for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) { - xattr = rb_entry(rbp, struct simple_xattr, rb_node); + if (!xattrs) + return size - remaining_size; + + rhashtable_walk_enter(&xattrs->ht, &iter); + rhashtable_walk_start(&iter); + + while ((xattr = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(xattr)) { + if (PTR_ERR(xattr) == -EAGAIN) + continue; + err = PTR_ERR(xattr); + break; + } /* skip "trusted." attributes for unprivileged callers */ if (!trusted && xattr_is_trusted(xattr->name)) @@ -1480,25 +1578,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, if (err) break; } - read_unlock(&xattrs->lock); - return err ? err : size - remaining_size; -} + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); -/** - * rbtree_simple_xattr_less - compare two xattr rbtree nodes - * @new_node: new node - * @node: current node - * - * Compare the xattr attached to @new_node with the xattr attached to @node. - * Note that this function technically tolerates duplicate entries. - * - * Return: True if insertion point in the rbtree is found. - */ -static bool rbtree_simple_xattr_less(struct rb_node *new_node, - const struct rb_node *node) -{ - return rbtree_simple_xattr_node_cmp(new_node, node) < 0; + return err ? err : size - remaining_size; } /** @@ -1509,25 +1593,100 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node, * Add an xattr object to @xattrs. This assumes no replacement or removal * of matching xattrs is wanted. Should only be called during inode * initialization when a few distinct initial xattrs are supposed to be set. + * + * Return: On success zero is returned. On failure a negative error code is + * returned. */ -void simple_xattr_add(struct simple_xattrs *xattrs, - struct simple_xattr *new_xattr) +int simple_xattr_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr) { - write_lock(&xattrs->lock); - rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less); - write_unlock(&xattrs->lock); + return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node, + simple_xattr_params); } /** * simple_xattrs_init - initialize new xattr header * @xattrs: header to initialize * - * Initialize relevant fields of a an xattr header. + * Initialize the rhashtable used to store xattr objects. + * + * Return: On success zero is returned. On failure a negative error code is + * returned. + */ +int simple_xattrs_init(struct simple_xattrs *xattrs) +{ + return rhashtable_init(&xattrs->ht, &simple_xattr_params); +} + +/** + * simple_xattrs_alloc - allocate and initialize a new xattr header + * + * Dynamically allocate a simple_xattrs header and initialize the + * underlying rhashtable. This is intended for consumers that want + * to lazily allocate xattr storage only when the first xattr is set, + * avoiding the per-inode rhashtable overhead when no xattrs are used. + * + * Return: On success a new simple_xattrs is returned. On failure an + * ERR_PTR is returned. + */ +struct simple_xattrs *simple_xattrs_alloc(void) +{ + struct simple_xattrs *xattrs __free(kfree) = NULL; + int ret; + + xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL); + if (!xattrs) + return ERR_PTR(-ENOMEM); + + ret = simple_xattrs_init(xattrs); + if (ret) + return ERR_PTR(ret); + + return no_free_ptr(xattrs); +} + +/** + * simple_xattrs_lazy_alloc - get or allocate xattrs for a set operation + * @xattrsp: pointer to the xattrs pointer (may point to NULL) + * @value: value being set (NULL means remove) + * @flags: xattr set flags + * + * For lazily-allocated xattrs on the write path. If no xattrs exist yet + * and this is a remove operation, returns the appropriate result without + * allocating. Otherwise ensures xattrs is allocated and published with + * store-release semantics. + * + * Return: On success a valid pointer to the xattrs is returned. On + * failure or early-exit an ERR_PTR or NULL is returned. Callers should + * check with IS_ERR_OR_NULL() and propagate with PTR_ERR() which + * correctly returns 0 for the NULL no-op case. */ -void simple_xattrs_init(struct simple_xattrs *xattrs) +struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, + const void *value, int flags) { - xattrs->rb_root = RB_ROOT; - rwlock_init(&xattrs->lock); + struct simple_xattrs *xattrs; + + xattrs = READ_ONCE(*xattrsp); + if (xattrs) + return xattrs; + + if (!value) + return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL; + + xattrs = simple_xattrs_alloc(); + if (!IS_ERR(xattrs)) + smp_store_release(xattrsp, xattrs); + return xattrs; +} + +static void simple_xattr_ht_free(void *ptr, void *arg) +{ + struct simple_xattr *xattr = ptr; + size_t *freed_space = arg; + + if (freed_space) + *freed_space += simple_xattr_space(xattr->name, xattr->size); + simple_xattr_free(xattr); } /** @@ -1540,22 +1699,10 @@ void simple_xattrs_init(struct simple_xattrs *xattrs) */ void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space) { - struct rb_node *rbp; + might_sleep(); if (freed_space) *freed_space = 0; - rbp = rb_first(&xattrs->rb_root); - while (rbp) { - struct simple_xattr *xattr; - struct rb_node *rbp_next; - - rbp_next = rb_next(rbp); - xattr = rb_entry(rbp, struct simple_xattr, rb_node); - rb_erase(&xattr->rb_node, &xattrs->rb_root); - if (freed_space) - *freed_space += simple_xattr_space(xattr->name, - xattr->size); - simple_xattr_free(xattr); - rbp = rbp_next; - } + rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free, + freed_space); } diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 4f0ab88a1b31..e21b2f7f4159 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -100,8 +100,6 @@ enum kernfs_node_type { #define KERNFS_TYPE_MASK 0x000f #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK -#define KERNFS_MAX_USER_XATTRS 128 -#define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10) enum kernfs_node_flag { KERNFS_ACTIVATED = 0x0010, diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a8273b32e041..f6a2d3402d76 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -48,7 +48,7 @@ struct shmem_inode_info { }; struct timespec64 i_crtime; /* file creation time */ struct shared_policy policy; /* NUMA memory alloc policy */ - struct simple_xattrs xattrs; /* list of xattrs */ + struct simple_xattrs *xattrs; /* list of xattrs */ pgoff_t fallocend; /* highest fallocate endindex */ unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */ atomic_t stop_eviction; /* hold when working on inode */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 296b5ee5c979..8b6601367eae 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/mm.h> +#include <linux/rhashtable-types.h> #include <linux/user_namespace.h> #include <uapi/linux/xattr.h> @@ -106,31 +107,65 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler) } struct simple_xattrs { - struct rb_root rb_root; - rwlock_t lock; + struct rhashtable ht; }; struct simple_xattr { - struct rb_node rb_node; + struct rhash_head hash_node; + struct rcu_head rcu; char *name; size_t size; char value[] __counted_by(size); }; -void simple_xattrs_init(struct simple_xattrs *xattrs); +#define SIMPLE_XATTR_MAX_NR 128 +#define SIMPLE_XATTR_MAX_SIZE (128 << 10) + +struct simple_xattr_limits { + atomic_t nr_xattrs; /* current user.* xattr count */ + atomic_t xattr_size; /* current total user.* value bytes */ +}; + +static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits) +{ + atomic_set(&limits->nr_xattrs, 0); + atomic_set(&limits->xattr_size, 0); +} + +int simple_xattrs_init(struct simple_xattrs *xattrs); +struct simple_xattrs *simple_xattrs_alloc(void); +struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, + const void *value, int flags); void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); void simple_xattr_free(struct simple_xattr *xattr); +void simple_xattr_free_rcu(struct simple_xattr *xattr); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags); +int simple_xattr_set_limited(struct simple_xattrs *xattrs, + struct simple_xattr_limits *limits, + const char *name, const void *value, + size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); -void simple_xattr_add(struct simple_xattrs *xattrs, - struct simple_xattr *new_xattr); +int simple_xattr_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr); int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name); +DEFINE_CLASS(simple_xattr, + struct simple_xattr *, + if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T), + simple_xattr_alloc(value, size), + const void *value, size_t size) + +DEFINE_CLASS(simple_xattrs, + struct simple_xattrs *, + if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); }, + simple_xattrs_alloc(), + void) + #endif /* _LINUX_XATTR_H */ diff --git a/mm/shmem.c b/mm/shmem.c index b40f3cd48961..0b0e577e880a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1425,7 +1425,10 @@ static void shmem_evict_inode(struct inode *inode) } } - simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL); + if (info->xattrs) { + simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL); + kfree(info->xattrs); + } shmem_free_inode(inode->i_sb, freed); WARN_ON(inode->i_blocks); clear_inode(inode); @@ -3101,7 +3104,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, shmem_set_inode_flags(inode, info->fsflags, NULL); INIT_LIST_HEAD(&info->shrinklist); INIT_LIST_HEAD(&info->swaplist); - simple_xattrs_init(&info->xattrs); cache_no_acl(inode); if (sbinfo->noswap) mapping_set_unevictable(inode->i_mapping); @@ -4255,10 +4257,13 @@ static int shmem_initxattrs(struct inode *inode, struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); const struct xattr *xattr; - struct simple_xattr *new_xattr; size_t ispace = 0; size_t len; + CLASS(simple_xattrs, xattrs)(); + if (IS_ERR(xattrs)) + return PTR_ERR(xattrs); + if (sbinfo->max_inodes) { for (xattr = xattr_array; xattr->name != NULL; xattr++) { ispace += simple_xattr_space(xattr->name, @@ -4277,24 +4282,24 @@ static int shmem_initxattrs(struct inode *inode, } for (xattr = xattr_array; xattr->name != NULL; xattr++) { - new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); - if (!new_xattr) + CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len); + if (IS_ERR(new_xattr)) break; len = strlen(xattr->name) + 1; new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, GFP_KERNEL_ACCOUNT); - if (!new_xattr->name) { - kvfree(new_xattr); + if (!new_xattr->name) break; - } memcpy(new_xattr->name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, xattr->name, len); - simple_xattr_add(&info->xattrs, new_xattr); + if (simple_xattr_add(xattrs, new_xattr)) + break; + retain_and_null_ptr(new_xattr); } if (xattr->name != NULL) { @@ -4303,10 +4308,10 @@ static int shmem_initxattrs(struct inode *inode, sbinfo->free_ispace += ispace; raw_spin_unlock(&sbinfo->stat_lock); } - simple_xattrs_free(&info->xattrs, NULL); return -ENOMEM; } + smp_store_release(&info->xattrs, no_free_ptr(xattrs)); return 0; } @@ -4315,9 +4320,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler, const char *name, void *buffer, size_t size) { struct shmem_inode_info *info = SHMEM_I(inode); + struct simple_xattrs *xattrs; + + xattrs = READ_ONCE(info->xattrs); + if (!xattrs) + return -ENODATA; name = xattr_full_name(handler, name); - return simple_xattr_get(&info->xattrs, name, buffer, size); + return simple_xattr_get(xattrs, name, buffer, size); } static int shmem_xattr_handler_set(const struct xattr_handler *handler, @@ -4328,10 +4338,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler, { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct simple_xattrs *xattrs; struct simple_xattr *old_xattr; size_t ispace = 0; name = xattr_full_name(handler, name); + + xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags); + if (IS_ERR_OR_NULL(xattrs)) + return PTR_ERR(xattrs); + if (value && sbinfo->max_inodes) { ispace = simple_xattr_space(name, size); raw_spin_lock(&sbinfo->stat_lock); @@ -4344,13 +4360,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler, return -ENOSPC; } - old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags); + old_xattr = simple_xattr_set(xattrs, name, value, size, flags); if (!IS_ERR(old_xattr)) { ispace = 0; if (old_xattr && sbinfo->max_inodes) ispace = simple_xattr_space(old_xattr->name, old_xattr->size); - simple_xattr_free(old_xattr); + simple_xattr_free_rcu(old_xattr); old_xattr = NULL; inode_set_ctime_current(inode); inode_inc_iversion(inode); @@ -4391,7 +4407,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = { static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); - return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size); + + return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs), + buffer, size); } #endif /* CONFIG_TMPFS_XATTR */ diff --git a/net/socket.c b/net/socket.c index 05952188127f..d25be67e4b84 100644 --- a/net/socket.c +++ b/net/socket.c @@ -315,45 +315,70 @@ efault_end: static struct kmem_cache *sock_inode_cachep __ro_after_init; +struct sockfs_inode { + struct simple_xattrs *xattrs; + struct simple_xattr_limits xattr_limits; + struct socket_alloc; +}; + +static struct sockfs_inode *SOCKFS_I(struct inode *inode) +{ + return container_of(inode, struct sockfs_inode, vfs_inode); +} + static struct inode *sock_alloc_inode(struct super_block *sb) { - struct socket_alloc *ei; + struct sockfs_inode *si; - ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL); - if (!ei) + si = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL); + if (!si) return NULL; - init_waitqueue_head(&ei->socket.wq.wait); - ei->socket.wq.fasync_list = NULL; - ei->socket.wq.flags = 0; + si->xattrs = NULL; + simple_xattr_limits_init(&si->xattr_limits); + + init_waitqueue_head(&si->socket.wq.wait); + si->socket.wq.fasync_list = NULL; + si->socket.wq.flags = 0; + + si->socket.state = SS_UNCONNECTED; + si->socket.flags = 0; + si->socket.ops = NULL; + si->socket.sk = NULL; + si->socket.file = NULL; - ei->socket.state = SS_UNCONNECTED; - ei->socket.flags = 0; - ei->socket.ops = NULL; - ei->socket.sk = NULL; - ei->socket.file = NULL; + return &si->vfs_inode; +} + +static void sock_evict_inode(struct inode *inode) +{ + struct sockfs_inode *si = SOCKFS_I(inode); + struct simple_xattrs *xattrs = si->xattrs; - return &ei->vfs_inode; + if (xattrs) { + simple_xattrs_free(xattrs, NULL); + kfree(xattrs); + } + clear_inode(inode); } static void sock_free_inode(struct inode *inode) { - struct socket_alloc *ei; + struct sockfs_inode *si = SOCKFS_I(inode); - ei = container_of(inode, struct socket_alloc, vfs_inode); - kmem_cache_free(sock_inode_cachep, ei); + kmem_cache_free(sock_inode_cachep, si); } static void init_once(void *foo) { - struct socket_alloc *ei = (struct socket_alloc *)foo; + struct sockfs_inode *si = (struct sockfs_inode *)foo; - inode_init_once(&ei->vfs_inode); + inode_init_once(&si->vfs_inode); } static void init_inodecache(void) { sock_inode_cachep = kmem_cache_create("sock_inode_cache", - sizeof(struct socket_alloc), + sizeof(struct sockfs_inode), 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | @@ -365,6 +390,7 @@ static void init_inodecache(void) static const struct super_operations sockfs_ops = { .alloc_inode = sock_alloc_inode, .free_inode = sock_free_inode, + .evict_inode = sock_evict_inode, .statfs = simple_statfs, }; @@ -417,9 +443,48 @@ static const struct xattr_handler sockfs_security_xattr_handler = { .set = sockfs_security_xattr_set, }; +static int sockfs_user_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *suffix, void *value, size_t size) +{ + const char *name = xattr_full_name(handler, suffix); + struct simple_xattrs *xattrs; + + xattrs = READ_ONCE(SOCKFS_I(inode)->xattrs); + if (!xattrs) + return -ENODATA; + + return simple_xattr_get(xattrs, name, value, size); +} + +static int sockfs_user_xattr_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *inode, + const char *suffix, const void *value, + size_t size, int flags) +{ + const char *name = xattr_full_name(handler, suffix); + struct sockfs_inode *si = SOCKFS_I(inode); + struct simple_xattrs *xattrs; + + xattrs = simple_xattrs_lazy_alloc(&si->xattrs, value, flags); + if (IS_ERR_OR_NULL(xattrs)) + return PTR_ERR(xattrs); + + return simple_xattr_set_limited(xattrs, &si->xattr_limits, + name, value, size, flags); +} + +static const struct xattr_handler sockfs_user_xattr_handler = { + .prefix = XATTR_USER_PREFIX, + .get = sockfs_user_xattr_get, + .set = sockfs_user_xattr_set, +}; + static const struct xattr_handler * const sockfs_xattr_handlers[] = { &sockfs_xattr_handler, &sockfs_security_xattr_handler, + &sockfs_user_xattr_handler, NULL }; @@ -572,26 +637,26 @@ EXPORT_SYMBOL(sockfd_lookup); static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, size_t size) { - ssize_t len; - ssize_t used = 0; + struct sockfs_inode *si = SOCKFS_I(d_inode(dentry)); + ssize_t len, used; - len = security_inode_listsecurity(d_inode(dentry), buffer, size); + len = simple_xattr_list(d_inode(dentry), READ_ONCE(si->xattrs), + buffer, size); if (len < 0) return len; - used += len; + + used = len; if (buffer) { - if (size < used) - return -ERANGE; buffer += len; + size -= len; } - len = (XATTR_NAME_SOCKPROTONAME_LEN + 1); + len = XATTR_NAME_SOCKPROTONAME_LEN + 1; used += len; if (buffer) { - if (size < used) + if (size < len) return -ERANGE; memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len); - buffer += len; } return used; diff --git a/tools/testing/selftests/filesystems/xattr/.gitignore b/tools/testing/selftests/filesystems/xattr/.gitignore new file mode 100644 index 000000000000..092d14094c0f --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/.gitignore @@ -0,0 +1,3 @@ +xattr_socket_test +xattr_sockfs_test +xattr_socket_types_test diff --git a/tools/testing/selftests/filesystems/xattr/Makefile b/tools/testing/selftests/filesystems/xattr/Makefile new file mode 100644 index 000000000000..95364ffb10e9 --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += $(KHDR_INCLUDES) +TEST_GEN_PROGS := xattr_socket_test xattr_sockfs_test xattr_socket_types_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c b/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c new file mode 100644 index 000000000000..fac0a4c6bc05 --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c @@ -0,0 +1,470 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2026 Christian Brauner <brauner@kernel.org> +/* + * Test extended attributes on path-based Unix domain sockets. + * + * Path-based Unix domain sockets are bound to a filesystem path and their + * inodes live on the underlying filesystem (e.g. tmpfs). These tests verify + * that user.* and trusted.* xattr operations work correctly on them using + * path-based syscalls (setxattr, getxattr, etc.). + * + * Covers SOCK_STREAM, SOCK_DGRAM, and SOCK_SEQPACKET socket types. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/xattr.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define TEST_XATTR_NAME "user.testattr" +#define TEST_XATTR_VALUE "testvalue" +#define TEST_XATTR_VALUE2 "newvalue" + +/* + * Fixture for path-based Unix domain socket tests. + * Creates a SOCK_STREAM socket bound to a path in /tmp (typically tmpfs). + */ +FIXTURE(xattr_socket) +{ + char socket_path[PATH_MAX]; + int sockfd; +}; + +FIXTURE_VARIANT(xattr_socket) +{ + int sock_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(xattr_socket, stream) { + .sock_type = SOCK_STREAM, + .name = "stream", +}; + +FIXTURE_VARIANT_ADD(xattr_socket, dgram) { + .sock_type = SOCK_DGRAM, + .name = "dgram", +}; + +FIXTURE_VARIANT_ADD(xattr_socket, seqpacket) { + .sock_type = SOCK_SEQPACKET, + .name = "seqpacket", +}; + +FIXTURE_SETUP(xattr_socket) +{ + struct sockaddr_un addr; + int ret; + + self->sockfd = -1; + + snprintf(self->socket_path, sizeof(self->socket_path), + "/tmp/xattr_socket_test_%s.%d", variant->name, getpid()); + unlink(self->socket_path); + + self->sockfd = socket(AF_UNIX, variant->sock_type, 0); + ASSERT_GE(self->sockfd, 0) { + TH_LOG("Failed to create socket: %s", strerror(errno)); + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, self->socket_path, sizeof(addr.sun_path) - 1); + + ret = bind(self->sockfd, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(ret, 0) { + TH_LOG("Failed to bind socket to %s: %s", + self->socket_path, strerror(errno)); + } +} + +FIXTURE_TEARDOWN(xattr_socket) +{ + if (self->sockfd >= 0) + close(self->sockfd); + unlink(self->socket_path); +} + +TEST_F(xattr_socket, set_user_xattr) +{ + int ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s (errno=%d)", strerror(errno), errno); + } +} + +TEST_F(xattr_socket, get_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) { + TH_LOG("getxattr returned %zd, expected %zu: %s", + ret, strlen(TEST_XATTR_VALUE), strerror(errno)); + } + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +TEST_F(xattr_socket, list_user_xattr) +{ + char list[1024]; + ssize_t ret; + bool found = false; + char *ptr; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s", strerror(errno)); + } + + memset(list, 0, sizeof(list)); + ret = listxattr(self->socket_path, list, sizeof(list)); + ASSERT_GT(ret, 0) { + TH_LOG("listxattr failed: %s", strerror(errno)); + } + + for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) { + if (strcmp(ptr, TEST_XATTR_NAME) == 0) { + found = true; + break; + } + } + ASSERT_TRUE(found) { + TH_LOG("xattr %s not found in list", TEST_XATTR_NAME); + } +} + +TEST_F(xattr_socket, remove_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s", strerror(errno)); + } + + ret = removexattr(self->socket_path, TEST_XATTR_NAME); + ASSERT_EQ(ret, 0) { + TH_LOG("removexattr failed: %s", strerror(errno)); + } + + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA) { + TH_LOG("Expected ENODATA, got %s", strerror(errno)); + } +} + +/* + * Test that xattrs persist across socket close and reopen. + * The xattr is on the filesystem inode, not the socket fd. + */ +TEST_F(xattr_socket, xattr_persistence) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s", strerror(errno)); + } + + close(self->sockfd); + self->sockfd = -1; + + memset(buf, 0, sizeof(buf)); + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) { + TH_LOG("getxattr after close failed: %s", strerror(errno)); + } + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +TEST_F(xattr_socket, update_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0); + ASSERT_EQ(ret, 0); + + memset(buf, 0, sizeof(buf)); + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE2); +} + +TEST_F(xattr_socket, xattr_create_flag) +{ + int ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), XATTR_CREATE); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EEXIST); +} + +TEST_F(xattr_socket, xattr_replace_flag) +{ + int ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), XATTR_REPLACE); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_socket, multiple_xattrs) +{ + char buf[256]; + ssize_t ret; + int i; + char name[64], value[64]; + const int num_xattrs = 5; + + for (i = 0; i < num_xattrs; i++) { + snprintf(name, sizeof(name), "user.test%d", i); + snprintf(value, sizeof(value), "value%d", i); + ret = setxattr(self->socket_path, name, value, strlen(value), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr %s failed: %s", name, strerror(errno)); + } + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(name, sizeof(name), "user.test%d", i); + snprintf(value, sizeof(value), "value%d", i); + memset(buf, 0, sizeof(buf)); + ret = getxattr(self->socket_path, name, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(value)); + ASSERT_STREQ(buf, value); + } +} + +TEST_F(xattr_socket, xattr_empty_value) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, "", 0, 0); + ASSERT_EQ(ret, 0); + + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, 0); +} + +TEST_F(xattr_socket, xattr_get_size) +{ + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = getxattr(self->socket_path, TEST_XATTR_NAME, NULL, 0); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); +} + +TEST_F(xattr_socket, xattr_buffer_too_small) +{ + char buf[2]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ERANGE); +} + +TEST_F(xattr_socket, xattr_nonexistent) +{ + char buf[256]; + ssize_t ret; + + ret = getxattr(self->socket_path, "user.nonexistent", buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_socket, remove_nonexistent_xattr) +{ + int ret; + + ret = removexattr(self->socket_path, "user.nonexistent"); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_socket, large_xattr_value) +{ + char large_value[4096]; + char read_buf[4096]; + ssize_t ret; + + memset(large_value, 'A', sizeof(large_value)); + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + large_value, sizeof(large_value), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr with large value failed: %s", strerror(errno)); + } + + memset(read_buf, 0, sizeof(read_buf)); + ret = getxattr(self->socket_path, TEST_XATTR_NAME, + read_buf, sizeof(read_buf)); + ASSERT_EQ(ret, (ssize_t)sizeof(large_value)); + ASSERT_EQ(memcmp(large_value, read_buf, sizeof(large_value)), 0); +} + +/* + * Test lsetxattr/lgetxattr (don't follow symlinks). + * Socket files aren't symlinks, so this should work the same. + */ +TEST_F(xattr_socket, lsetxattr_lgetxattr) +{ + char buf[256]; + ssize_t ret; + + ret = lsetxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("lsetxattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = lgetxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +/* + * Fixture for trusted.* xattr tests. + * These require CAP_SYS_ADMIN. + */ +FIXTURE(xattr_socket_trusted) +{ + char socket_path[PATH_MAX]; + int sockfd; +}; + +FIXTURE_VARIANT(xattr_socket_trusted) +{ + int sock_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(xattr_socket_trusted, stream) { + .sock_type = SOCK_STREAM, + .name = "stream", +}; + +FIXTURE_VARIANT_ADD(xattr_socket_trusted, dgram) { + .sock_type = SOCK_DGRAM, + .name = "dgram", +}; + +FIXTURE_VARIANT_ADD(xattr_socket_trusted, seqpacket) { + .sock_type = SOCK_SEQPACKET, + .name = "seqpacket", +}; + +FIXTURE_SETUP(xattr_socket_trusted) +{ + struct sockaddr_un addr; + int ret; + + self->sockfd = -1; + + snprintf(self->socket_path, sizeof(self->socket_path), + "/tmp/xattr_socket_trusted_%s.%d", variant->name, getpid()); + unlink(self->socket_path); + + self->sockfd = socket(AF_UNIX, variant->sock_type, 0); + ASSERT_GE(self->sockfd, 0); + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, self->socket_path, sizeof(addr.sun_path) - 1); + + ret = bind(self->sockfd, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(xattr_socket_trusted) +{ + if (self->sockfd >= 0) + close(self->sockfd); + unlink(self->socket_path); +} + +TEST_F(xattr_socket_trusted, set_trusted_xattr) +{ + char buf[256]; + ssize_t len; + int ret; + + ret = setxattr(self->socket_path, "trusted.testattr", + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + if (ret == -1 && errno == EPERM) + SKIP(return, "Need CAP_SYS_ADMIN for trusted.* xattrs"); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr trusted.testattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + len = getxattr(self->socket_path, "trusted.testattr", + buf, sizeof(buf)); + ASSERT_EQ(len, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +TEST_F(xattr_socket_trusted, get_trusted_xattr_unprivileged) +{ + char buf[256]; + ssize_t ret; + + ret = getxattr(self->socket_path, "trusted.testattr", buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_TRUE(errno == ENODATA || errno == EPERM) { + TH_LOG("Expected ENODATA or EPERM, got %s", strerror(errno)); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c b/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c new file mode 100644 index 000000000000..bfabe91b2ed1 --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2026 Christian Brauner <brauner@kernel.org> +/* + * Test user.* xattrs on various socket families. + * + * All socket types use sockfs for their inodes, so user.* xattrs should + * work on any socket regardless of address family. This tests AF_INET, + * AF_INET6, AF_NETLINK, AF_PACKET, and abstract namespace AF_UNIX sockets. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/xattr.h> +#include <linux/netlink.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define TEST_XATTR_NAME "user.testattr" +#define TEST_XATTR_VALUE "testvalue" + +FIXTURE(xattr_socket_types) +{ + int sockfd; +}; + +FIXTURE_VARIANT(xattr_socket_types) +{ + int family; + int type; + int protocol; +}; + +FIXTURE_VARIANT_ADD(xattr_socket_types, inet) { + .family = AF_INET, + .type = SOCK_STREAM, + .protocol = 0, +}; + +FIXTURE_VARIANT_ADD(xattr_socket_types, inet6) { + .family = AF_INET6, + .type = SOCK_STREAM, + .protocol = 0, +}; + +FIXTURE_VARIANT_ADD(xattr_socket_types, netlink) { + .family = AF_NETLINK, + .type = SOCK_RAW, + .protocol = NETLINK_USERSOCK, +}; + +FIXTURE_VARIANT_ADD(xattr_socket_types, packet) { + .family = AF_PACKET, + .type = SOCK_DGRAM, + .protocol = 0, +}; + +FIXTURE_SETUP(xattr_socket_types) +{ + self->sockfd = socket(variant->family, variant->type, + variant->protocol); + if (self->sockfd < 0 && + (errno == EAFNOSUPPORT || errno == EPERM || errno == EACCES)) + SKIP(return, "socket(%d, %d, %d) not available: %s", + variant->family, variant->type, variant->protocol, + strerror(errno)); + ASSERT_GE(self->sockfd, 0) { + TH_LOG("Failed to create socket(%d, %d, %d): %s", + variant->family, variant->type, variant->protocol, + strerror(errno)); + } +} + +FIXTURE_TEARDOWN(xattr_socket_types) +{ + if (self->sockfd >= 0) + close(self->sockfd); +} + +TEST_F(xattr_socket_types, set_get_list_remove) +{ + char buf[256], list[4096], *ptr; + ssize_t ret; + bool found; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); + + memset(list, 0, sizeof(list)); + ret = flistxattr(self->sockfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + found = false; + for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) { + if (strcmp(ptr, TEST_XATTR_NAME) == 0) + found = true; + } + ASSERT_TRUE(found); + + ret = fremovexattr(self->sockfd, TEST_XATTR_NAME); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +/* + * Test abstract namespace AF_UNIX socket. + * Abstract sockets don't have a filesystem path; their inodes live in + * sockfs so user.* xattrs should work via fsetxattr/fgetxattr. + */ +FIXTURE(xattr_abstract) +{ + int sockfd; +}; + +FIXTURE_SETUP(xattr_abstract) +{ + struct sockaddr_un addr; + char name[64]; + int ret, len; + + self->sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(self->sockfd, 0); + + len = snprintf(name, sizeof(name), "xattr_test_abstract_%d", getpid()); + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + addr.sun_path[0] = '\0'; + memcpy(&addr.sun_path[1], name, len); + + ret = bind(self->sockfd, (struct sockaddr *)&addr, + offsetof(struct sockaddr_un, sun_path) + 1 + len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(xattr_abstract) +{ + if (self->sockfd >= 0) + close(self->sockfd); +} + +TEST_F(xattr_abstract, set_get) +{ + char buf[256]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr on abstract socket failed: %s", + strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c b/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c new file mode 100644 index 000000000000..b4824b01a86d --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2026 Christian Brauner <brauner@kernel.org> +/* + * Test extended attributes on sockfs sockets. + * + * Sockets created via socket() have their inodes in sockfs, which supports + * user.* xattrs with per-inode limits: up to 128 xattrs and 128KB total + * value size. These tests verify xattr operations via fsetxattr/fgetxattr/ + * flistxattr/fremovexattr on the socket fd, as well as limit enforcement. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define TEST_XATTR_NAME "user.testattr" +#define TEST_XATTR_VALUE "testvalue" +#define TEST_XATTR_VALUE2 "newvalue" + +/* Per-inode limits for user.* xattrs on sockfs (from include/linux/xattr.h) */ +#define SIMPLE_XATTR_MAX_NR 128 +#define SIMPLE_XATTR_MAX_SIZE (128 << 10) /* 128 KB */ + +#ifndef XATTR_SIZE_MAX +#define XATTR_SIZE_MAX 65536 +#endif + +/* + * Fixture for sockfs socket xattr tests. + * Creates an AF_UNIX socket (lives in sockfs, not bound to any path). + */ +FIXTURE(xattr_sockfs) +{ + int sockfd; +}; + +FIXTURE_SETUP(xattr_sockfs) +{ + self->sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(self->sockfd, 0) { + TH_LOG("Failed to create socket: %s", strerror(errno)); + } +} + +FIXTURE_TEARDOWN(xattr_sockfs) +{ + if (self->sockfd >= 0) + close(self->sockfd); +} + +TEST_F(xattr_sockfs, set_get_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) { + TH_LOG("fgetxattr returned %zd: %s", ret, strerror(errno)); + } + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +/* + * Test listing xattrs on a sockfs socket. + * Should include user.* xattrs and system.sockprotoname. + */ +TEST_F(xattr_sockfs, list_user_xattr) +{ + char list[4096]; + ssize_t ret; + char *ptr; + bool found_user = false; + bool found_proto = false; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr failed: %s", strerror(errno)); + } + + memset(list, 0, sizeof(list)); + ret = flistxattr(self->sockfd, list, sizeof(list)); + ASSERT_GT(ret, 0) { + TH_LOG("flistxattr failed: %s", strerror(errno)); + } + + for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) { + if (strcmp(ptr, TEST_XATTR_NAME) == 0) + found_user = true; + if (strcmp(ptr, "system.sockprotoname") == 0) + found_proto = true; + } + ASSERT_TRUE(found_user) { + TH_LOG("user xattr not found in list"); + } + ASSERT_TRUE(found_proto) { + TH_LOG("system.sockprotoname not found in list"); + } +} + +TEST_F(xattr_sockfs, remove_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fremovexattr(self->sockfd, TEST_XATTR_NAME); + ASSERT_EQ(ret, 0) { + TH_LOG("fremovexattr failed: %s", strerror(errno)); + } + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_sockfs, update_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0); + ASSERT_EQ(ret, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE2); +} + +TEST_F(xattr_sockfs, xattr_create_flag) +{ + int ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), + XATTR_CREATE); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EEXIST); +} + +TEST_F(xattr_sockfs, xattr_replace_flag) +{ + int ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), + XATTR_REPLACE); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_sockfs, get_nonexistent) +{ + char buf[256]; + ssize_t ret; + + ret = fgetxattr(self->sockfd, "user.nonexistent", buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_sockfs, empty_value) +{ + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, "", 0, 0); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, NULL, 0); + ASSERT_EQ(ret, 0); +} + +TEST_F(xattr_sockfs, get_size) +{ + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, NULL, 0); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); +} + +TEST_F(xattr_sockfs, buffer_too_small) +{ + char buf[2]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ERANGE); +} + +/* + * Test maximum number of user.* xattrs per socket. + * The kernel enforces SIMPLE_XATTR_MAX_NR (128), so the 129th should + * fail with ENOSPC. + */ +TEST_F(xattr_sockfs, max_nr_xattrs) +{ + char name[32]; + int i, ret; + + for (i = 0; i < SIMPLE_XATTR_MAX_NR; i++) { + snprintf(name, sizeof(name), "user.test%03d", i); + ret = fsetxattr(self->sockfd, name, "v", 1, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr %s failed at i=%d: %s", + name, i, strerror(errno)); + } + } + + ret = fsetxattr(self->sockfd, "user.overflow", "v", 1, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOSPC) { + TH_LOG("Expected ENOSPC for xattr %d, got %s", + SIMPLE_XATTR_MAX_NR + 1, strerror(errno)); + } +} + +/* + * Test maximum total value size for user.* xattrs. + * The kernel enforces SIMPLE_XATTR_MAX_SIZE (128KB). Individual xattr + * values are limited to XATTR_SIZE_MAX (64KB) by the VFS, so we need + * at least two xattrs to hit the total limit. + */ +TEST_F(xattr_sockfs, max_xattr_size) +{ + char *value; + int ret; + + value = malloc(XATTR_SIZE_MAX); + ASSERT_NE(value, NULL); + memset(value, 'A', XATTR_SIZE_MAX); + + /* First 64KB xattr - total = 64KB */ + ret = fsetxattr(self->sockfd, "user.big1", value, XATTR_SIZE_MAX, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("first large xattr failed: %s", strerror(errno)); + } + + /* Second 64KB xattr - total = 128KB (exactly at limit) */ + ret = fsetxattr(self->sockfd, "user.big2", value, XATTR_SIZE_MAX, 0); + free(value); + ASSERT_EQ(ret, 0) { + TH_LOG("second large xattr failed: %s", strerror(errno)); + } + + /* Third xattr with 1 byte - total > 128KB, should fail */ + ret = fsetxattr(self->sockfd, "user.big3", "v", 1, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOSPC) { + TH_LOG("Expected ENOSPC when exceeding size limit, got %s", + strerror(errno)); + } +} + +/* + * Test that removing an xattr frees limit space, allowing re-addition. + */ +TEST_F(xattr_sockfs, limit_remove_readd) +{ + char name[32]; + int i, ret; + + /* Fill up to the maximum count */ + for (i = 0; i < SIMPLE_XATTR_MAX_NR; i++) { + snprintf(name, sizeof(name), "user.test%03d", i); + ret = fsetxattr(self->sockfd, name, "v", 1, 0); + ASSERT_EQ(ret, 0); + } + + /* Verify we're at the limit */ + ret = fsetxattr(self->sockfd, "user.overflow", "v", 1, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOSPC); + + /* Remove one xattr */ + ret = fremovexattr(self->sockfd, "user.test000"); + ASSERT_EQ(ret, 0); + + /* Now we should be able to add one more */ + ret = fsetxattr(self->sockfd, "user.newattr", "v", 1, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("re-add after remove failed: %s", strerror(errno)); + } +} + +/* + * Test that two different sockets have independent xattr limits. + */ +TEST_F(xattr_sockfs, limits_per_inode) +{ + char buf[256]; + int sock2; + ssize_t ret; + + sock2 = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(sock2, 0); + + /* Set xattr on first socket */ + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + /* First socket's xattr should not be visible on second socket */ + ret = fgetxattr(sock2, TEST_XATTR_NAME, NULL, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); + + /* Second socket should independently accept xattrs */ + ret = fsetxattr(sock2, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0); + ASSERT_EQ(ret, 0); + + /* Verify each socket has its own value */ + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(sock2, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE2); + + close(sock2); +} + +TEST_HARNESS_MAIN |
