diff options
Diffstat (limited to 'fs/kernfs/dir.c')
| -rw-r--r-- | fs/kernfs/dir.c | 392 |
1 files changed, 260 insertions, 132 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 5f0f8b95f44c..4f9ade82b08a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -14,10 +14,10 @@ #include <linux/slab.h> #include <linux/security.h> #include <linux/hash.h> +#include <linux/ns_common.h> #include "kernfs-internal.h" -static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ /* * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to * call pr_cont() while holding rename_lock. Because sometimes pr_cont() @@ -27,7 +27,6 @@ static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ */ static DEFINE_SPINLOCK(kernfs_pr_cont_lock); static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ -static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -51,22 +50,14 @@ static bool kernfs_lockdep(struct kernfs_node *kn) #endif } -static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) -{ - if (!kn) - return strscpy(buf, "(null)", buflen); - - return strscpy(buf, kn->parent ? kn->name : "/", buflen); -} - /* kernfs_node_depth - compute depth from @from to @to */ static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) { size_t depth = 0; - while (to->parent && to != from) { + while (rcu_dereference(to->__parent) && to != from) { depth++; - to = to->parent; + to = rcu_dereference(to->__parent); } return depth; } @@ -84,18 +75,18 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, db = kernfs_depth(rb->kn, b); while (da > db) { - a = a->parent; + a = rcu_dereference(a->__parent); da--; } while (db > da) { - b = b->parent; + b = rcu_dereference(b->__parent); db--; } /* worst case b and a will be the same at root */ while (b != a) { - b = b->parent; - a = a->parent; + b = rcu_dereference(b->__parent); + a = rcu_dereference(a->__parent); } return a; @@ -168,10 +159,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, /* Calculate how many bytes we need for the rest */ for (i = depth_to - 1; i >= 0; i--) { + const char *name; + for (kn = kn_to, j = 0; j < i; j++) - kn = kn->parent; + kn = rcu_dereference(kn->__parent); - len += scnprintf(buf + len, buflen - len, "/%s", kn->name); + name = rcu_dereference(kn->name); + len += scnprintf(buf + len, buflen - len, "/%s", name); } return len; @@ -195,13 +189,18 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, */ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { - unsigned long flags; - int ret; + struct kernfs_node *kn_parent; - read_lock_irqsave(&kernfs_rename_lock, flags); - ret = kernfs_name_locked(kn, buf, buflen); - read_unlock_irqrestore(&kernfs_rename_lock, flags); - return ret; + if (!kn) + return strscpy(buf, "(null)", buflen); + + guard(rcu)(); + /* + * KERNFS_ROOT_INVARIANT_PARENT is ignored here. The name is RCU freed and + * the parent is either existing or not. + */ + kn_parent = rcu_dereference(kn->__parent); + return strscpy(buf, kn_parent ? rcu_dereference(kn->name) : "/", buflen); } /** @@ -223,13 +222,17 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, char *buf, size_t buflen) { - unsigned long flags; - int ret; + struct kernfs_root *root; - read_lock_irqsave(&kernfs_rename_lock, flags); - ret = kernfs_path_from_node_locked(to, from, buf, buflen); - read_unlock_irqrestore(&kernfs_rename_lock, flags); - return ret; + guard(rcu)(); + if (to) { + root = kernfs_root(to); + if (!(root->flags & KERNFS_ROOT_INVARIANT_PARENT)) { + guard(read_lock_irqsave)(&root->kernfs_rename_lock); + return kernfs_path_from_node_locked(to, from, buf, buflen); + } + } + return kernfs_path_from_node_locked(to, from, buf, buflen); } EXPORT_SYMBOL_GPL(kernfs_path_from_node); @@ -292,16 +295,30 @@ out: struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) { struct kernfs_node *parent; + struct kernfs_root *root; unsigned long flags; - read_lock_irqsave(&kernfs_rename_lock, flags); - parent = kn->parent; + root = kernfs_root(kn); + read_lock_irqsave(&root->kernfs_rename_lock, flags); + parent = kernfs_parent(kn); kernfs_get(parent); - read_unlock_irqrestore(&kernfs_rename_lock, flags); + read_unlock_irqrestore(&root->kernfs_rename_lock, flags); return parent; } +/* + * kernfs_ns_id - return the namespace id for a given namespace + * @ns: namespace tag (may be NULL) + * + * Use the 64-bit namespace id instead of raw pointers for hashing + * and comparison to avoid leaking kernel addresses to userspace. + */ +static u64 kernfs_ns_id(const struct ns_common *ns) +{ + return ns ? ns->ns_id : 0; +} + /** * kernfs_name_hash - calculate hash of @ns + @name * @name: Null terminated string to hash @@ -309,9 +326,10 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) * * Return: 31-bit hash of ns + name (so it fits in an off_t) */ -static unsigned int kernfs_name_hash(const char *name, const void *ns) +static unsigned int kernfs_name_hash(const char *name, + const struct ns_common *ns) { - unsigned long hash = init_name_hash(ns); + unsigned long hash = init_name_hash(kernfs_ns_id(ns)); unsigned int len = strlen(name); while (len--) hash = partial_name_hash(*name++, hash); @@ -326,23 +344,26 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns) } static int kernfs_name_compare(unsigned int hash, const char *name, - const void *ns, const struct kernfs_node *kn) + const struct ns_common *ns, const struct kernfs_node *kn) { + u64 ns_id = kernfs_ns_id(ns); + u64 kn_ns_id = kernfs_ns_id(kn->ns); + if (hash < kn->hash) return -1; if (hash > kn->hash) return 1; - if (ns < kn->ns) + if (ns_id < kn_ns_id) return -1; - if (ns > kn->ns) + if (ns_id > kn_ns_id) return 1; - return strcmp(name, kn->name); + return strcmp(name, kernfs_rcu_name(kn)); } static int kernfs_sd_compare(const struct kernfs_node *left, const struct kernfs_node *right) { - return kernfs_name_compare(left->hash, left->name, left->ns, right); + return kernfs_name_compare(left->hash, kernfs_rcu_name(left), left->ns, right); } /** @@ -360,8 +381,12 @@ static int kernfs_sd_compare(const struct kernfs_node *left, */ static int kernfs_link_sibling(struct kernfs_node *kn) { - struct rb_node **node = &kn->parent->dir.children.rb_node; struct rb_node *parent = NULL; + struct kernfs_node *kn_parent; + struct rb_node **node; + + kn_parent = kernfs_parent(kn); + node = &kn_parent->dir.children.rb_node; while (*node) { struct kernfs_node *pos; @@ -380,13 +405,13 @@ static int kernfs_link_sibling(struct kernfs_node *kn) /* add new node and rebalance the tree */ rb_link_node(&kn->rb, parent, node); - rb_insert_color(&kn->rb, &kn->parent->dir.children); + rb_insert_color(&kn->rb, &kn_parent->dir.children); /* successfully added, account subdir number */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs++; - kernfs_inc_rev(kn->parent); + kn_parent->dir.subdirs++; + kernfs_inc_rev(kn_parent); up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); return 0; @@ -407,16 +432,19 @@ static int kernfs_link_sibling(struct kernfs_node *kn) */ static bool kernfs_unlink_sibling(struct kernfs_node *kn) { + struct kernfs_node *kn_parent; + if (RB_EMPTY_NODE(&kn->rb)) return false; + kn_parent = kernfs_parent(kn); down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs--; - kernfs_inc_rev(kn->parent); + kn_parent->dir.subdirs--; + kernfs_inc_rev(kn_parent); up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); - rb_erase(&kn->rb, &kn->parent->dir.children); + rb_erase(&kn->rb, &kn_parent->dir.children); RB_CLEAR_NODE(&kn->rb); return true; } @@ -470,12 +498,14 @@ void kernfs_put_active(struct kernfs_node *kn) /** * kernfs_drain - drain kernfs_node * @kn: kernfs_node to drain + * @drop_supers: Set to true if this function is called with the + * kernfs_supers_rwsem locked. * * Drain existing usages and nuke all existing mmaps of @kn. Multiple * removers may invoke this function concurrently on @kn and all will * return after draining is complete. */ -static void kernfs_drain(struct kernfs_node *kn) +static void kernfs_drain(struct kernfs_node *kn, bool drop_supers) __releases(&kernfs_root(kn)->kernfs_rwsem) __acquires(&kernfs_root(kn)->kernfs_rwsem) { @@ -495,6 +525,8 @@ static void kernfs_drain(struct kernfs_node *kn) return; up_write(&root->kernfs_rwsem); + if (drop_supers) + up_read(&root->kernfs_supers_rwsem); if (kernfs_lockdep(kn)) { rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); @@ -513,6 +545,8 @@ static void kernfs_drain(struct kernfs_node *kn) if (kernfs_should_drain_open_files(kn)) kernfs_drain_open_files(kn); + if (drop_supers) + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); } @@ -533,12 +567,11 @@ static void kernfs_free_rcu(struct rcu_head *rcu) { struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu); - kfree_const(kn->name); + /* If the whole node goes away, then name can't be used outside */ + kfree_const(rcu_access_pointer(kn->name)); - if (kn->iattr) { - simple_xattrs_free(&kn->iattr->xattrs, NULL); + if (kn->iattr) kmem_cache_free(kernfs_iattrs_cache, kn->iattr); - } kmem_cache_free(kernfs_node_cache, kn); } @@ -562,18 +595,25 @@ void kernfs_put(struct kernfs_node *kn) * Moving/renaming is always done while holding reference. * kn->parent won't change beneath us. */ - parent = kn->parent; + parent = kernfs_parent(kn); WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, "kernfs_put: %s/%s: released with incorrect active_ref %d\n", - parent ? parent->name : "", kn->name, atomic_read(&kn->active)); + parent ? rcu_dereference(parent->name) : "", + rcu_dereference(kn->name), atomic_read(&kn->active)); if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); - spin_lock(&kernfs_idr_lock); + if (kn->iattr && kn->iattr->xattrs) { + simple_xattrs_free(kn->iattr->xattrs, NULL); + kfree(kn->iattr->xattrs); + kn->iattr->xattrs = NULL; + } + + spin_lock(&root->kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); - spin_unlock(&kernfs_idr_lock); + spin_unlock(&root->kernfs_idr_lock); call_rcu(&kn->rcu, kernfs_free_rcu); @@ -626,13 +666,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, goto err_out1; idr_preload(GFP_KERNEL); - spin_lock(&kernfs_idr_lock); + spin_lock(&root->kernfs_idr_lock); ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); if (ret >= 0 && ret < root->last_id_lowbits) root->id_highbits++; id_highbits = root->id_highbits; root->last_id_lowbits = ret; - spin_unlock(&kernfs_idr_lock); + spin_unlock(&root->kernfs_idr_lock); idr_preload_end(); if (ret < 0) goto err_out2; @@ -643,7 +683,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, atomic_set(&kn->active, KN_DEACTIVATED_BIAS); RB_CLEAR_NODE(&kn->rb); - kn->name = name; + rcu_assign_pointer(kn->name, name); kn->mode = mode; kn->flags = flags; @@ -662,15 +702,23 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, if (parent) { ret = security_kernfs_init_security(parent, kn); if (ret) - goto err_out3; + goto err_out4; } return kn; + err_out4: + if (kn->iattr) { + if (kn->iattr->xattrs) { + simple_xattrs_free(kn->iattr->xattrs, NULL); + kfree(kn->iattr->xattrs); + } + kmem_cache_free(kernfs_iattrs_cache, kn->iattr); + } err_out3: - spin_lock(&kernfs_idr_lock); + spin_lock(&root->kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); - spin_unlock(&kernfs_idr_lock); + spin_unlock(&root->kernfs_idr_lock); err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: @@ -701,7 +749,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, name, mode, uid, gid, flags); if (kn) { kernfs_get(parent); - kn->parent = parent; + rcu_assign_pointer(kn->__parent, parent); } return kn; } @@ -769,18 +817,20 @@ err_unlock: */ int kernfs_add_one(struct kernfs_node *kn) { - struct kernfs_node *parent = kn->parent; - struct kernfs_root *root = kernfs_root(parent); + struct kernfs_root *root = kernfs_root(kn); struct kernfs_iattrs *ps_iattr; + struct kernfs_node *parent; bool has_ns; int ret; down_write(&root->kernfs_rwsem); + parent = kernfs_parent(kn); ret = -EINVAL; has_ns = kernfs_ns_enabled(parent); if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, kn->name)) + has_ns ? "required" : "invalid", + kernfs_rcu_name(parent), kernfs_rcu_name(kn))) goto out_unlock; if (kernfs_type(parent) != KERNFS_DIR) @@ -790,7 +840,7 @@ int kernfs_add_one(struct kernfs_node *kn) if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR)) goto out_unlock; - kn->hash = kernfs_name_hash(kn->name, kn->ns); + kn->hash = kernfs_name_hash(kernfs_rcu_name(kn), kn->ns); ret = kernfs_link_sibling(kn); if (ret) @@ -836,7 +886,7 @@ out_unlock: */ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, const unsigned char *name, - const void *ns) + const struct ns_common *ns) { struct rb_node *node = parent->dir.children.rb_node; bool has_ns = kernfs_ns_enabled(parent); @@ -846,7 +896,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, if (has_ns != (bool)ns) { WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, name); + has_ns ? "required" : "invalid", kernfs_rcu_name(parent), name); return NULL; } @@ -869,7 +919,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, const unsigned char *path, - const void *ns) + const struct ns_common *ns) { ssize_t len; char *p, *name; @@ -910,7 +960,8 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, - const char *name, const void *ns) + const char *name, + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root = kernfs_root(parent); @@ -936,7 +987,8 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, - const char *path, const void *ns) + const char *path, + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root = kernfs_root(parent); @@ -949,6 +1001,11 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, return kn; } +unsigned int kernfs_root_flags(struct kernfs_node *kn) +{ + return kernfs_root(kn)->flags; +} + /** * kernfs_create_root - create a new kernfs hierarchy * @scops: optional syscall operations for the hierarchy @@ -964,15 +1021,17 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, struct kernfs_root *root; struct kernfs_node *kn; - root = kzalloc(sizeof(*root), GFP_KERNEL); + root = kzalloc_obj(*root); if (!root) return ERR_PTR(-ENOMEM); idr_init(&root->ino_idr); + spin_lock_init(&root->kernfs_idr_lock); init_rwsem(&root->kernfs_rwsem); init_rwsem(&root->kernfs_iattr_rwsem); init_rwsem(&root->kernfs_supers_rwsem); INIT_LIST_HEAD(&root->supers); + rwlock_init(&root->kernfs_rename_lock); /* * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino. @@ -1052,7 +1111,8 @@ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns) + void *priv, + const struct ns_common *ns) { struct kernfs_node *kn; int rc; @@ -1112,7 +1172,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { - struct kernfs_node *kn; + struct kernfs_node *kn, *parent; struct kernfs_root *root; if (flags & LOOKUP_RCU) @@ -1120,8 +1180,6 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, /* Negative hashed dentry? */ if (d_really_is_negative(dentry)) { - struct kernfs_node *parent; - /* If the kernfs parent node has changed discard and * proceed to ->lookup. * @@ -1163,17 +1221,18 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, if (!kernfs_active(kn)) goto out_bad; + parent = kernfs_parent(kn); /* The kernfs node has been moved? */ - if (kernfs_dentry_node(dentry->d_parent) != kn->parent) + if (kernfs_dentry_node(dentry->d_parent) != parent) goto out_bad; /* The kernfs node has been renamed */ - if (strcmp(dentry->d_name.name, kn->name) != 0) + if (strcmp(dentry->d_name.name, kernfs_rcu_name(kn)) != 0) goto out_bad; /* The kernfs node has been moved to a different namespace */ - if (kn->parent && kernfs_ns_enabled(kn->parent) && - kernfs_info(dentry->d_sb)->ns != kn->ns) + if (parent && kernfs_ns_enabled(parent) && + kernfs_ns_id(kernfs_info(dentry->d_sb)->ns) != kernfs_ns_id(kn->ns)) goto out_bad; up_read(&root->kernfs_rwsem); @@ -1195,7 +1254,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, struct kernfs_node *kn; struct kernfs_root *root; struct inode *inode = NULL; - const void *ns = NULL; + const struct ns_common *ns = NULL; root = kernfs_root(parent); down_read(&root->kernfs_rwsem); @@ -1230,24 +1289,24 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, return d_splice_alias(inode, dentry); } -static int kernfs_iop_mkdir(struct mnt_idmap *idmap, - struct inode *dir, struct dentry *dentry, - umode_t mode) +static struct dentry *kernfs_iop_mkdir(struct mnt_idmap *idmap, + struct inode *dir, struct dentry *dentry, + umode_t mode) { struct kernfs_node *parent = dir->i_private; struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; int ret; if (!scops || !scops->mkdir) - return -EPERM; + return ERR_PTR(-EPERM); if (!kernfs_get_active(parent)) - return -ENODEV; + return ERR_PTR(-ENODEV); ret = scops->mkdir(parent, dentry->d_name.name, mode); kernfs_put_active(parent); - return ret; + return ERR_PTR(ret); } static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) @@ -1365,7 +1424,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, return kernfs_leftmost_descendant(rb_to_kn(rbn)); /* no sibling left, visit parent */ - return pos->parent; + return kernfs_parent(pos); } static void kernfs_activate_one(struct kernfs_node *kn) @@ -1377,7 +1436,7 @@ static void kernfs_activate_one(struct kernfs_node *kn) if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING))) return; - WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb)); + WARN_ON_ONCE(rcu_access_pointer(kn->__parent) && RB_EMPTY_NODE(&kn->rb)); WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); @@ -1439,38 +1498,72 @@ void kernfs_show(struct kernfs_node *kn, bool show) kn->flags |= KERNFS_HIDDEN; if (kernfs_active(kn)) atomic_add(KN_DEACTIVATED_BIAS, &kn->active); - kernfs_drain(kn); + kernfs_drain(kn, false); } up_write(&root->kernfs_rwsem); } +/* + * This function enables VFS to send fsnotify events for deletions. + * There is gap in this implementation for certain file removals due their + * unique nature in kernfs. Directory removals that trigger file removals occur + * through vfs_rmdir, which shrinks the dcache and emits fsnotify events after + * the rmdir operation; there is no issue here. However kernfs writes to + * particular files (e.g. cgroup.subtree_control) can also cause file removal, + * but vfs_write does not attempt to emit fsnotify events after the write + * operation, even if i_nlink counts are 0. As a usecase for monitoring this + * category of file removals is not known, they are left without having + * IN_DELETE or IN_DELETE_SELF events generated. + * Fanotify recursive monitoring also does not work for kernfs nodes that do not + * have inodes attached, as they are created on-demand in kernfs. + */ +static void kernfs_clear_inode_nlink(struct kernfs_node *kn) +{ + struct kernfs_root *root = kernfs_root(kn); + struct kernfs_super_info *info; + + lockdep_assert_held_read(&root->kernfs_supers_rwsem); + + list_for_each_entry(info, &root->supers, node) { + struct inode *inode = ilookup(info->sb, kernfs_ino(kn)); + + if (inode) { + clear_nlink(inode); + iput(inode); + } + } +} + static void __kernfs_remove(struct kernfs_node *kn) { - struct kernfs_node *pos; + struct kernfs_node *pos, *parent; /* Short-circuit if non-root @kn has already finished removal. */ if (!kn) return; + lockdep_assert_held_read(&kernfs_root(kn)->kernfs_supers_rwsem); lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); /* * This is for kernfs_remove_self() which plays with active ref * after removal. */ - if (kn->parent && RB_EMPTY_NODE(&kn->rb)) + if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb)) return; - pr_debug("kernfs %s: removing\n", kn->name); + pr_debug("kernfs %s: removing\n", kernfs_rcu_name(kn)); /* prevent new usage by marking all nodes removing and deactivating */ + down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); pos = NULL; while ((pos = kernfs_next_descendant_post(pos, kn))) { pos->flags |= KERNFS_REMOVING; if (kernfs_active(pos)) atomic_add(KN_DEACTIVATED_BIAS, &pos->active); } + up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); /* deactivate and unlink the subtree node-by-node */ do { @@ -1484,19 +1577,21 @@ static void __kernfs_remove(struct kernfs_node *kn) */ kernfs_get(pos); - kernfs_drain(pos); - + kernfs_drain(pos, true); + parent = kernfs_parent(pos); /* * kernfs_unlink_sibling() succeeds once per node. Use it * to decide who's responsible for cleanups. */ - if (!pos->parent || kernfs_unlink_sibling(pos)) { + if (!parent || kernfs_unlink_sibling(pos)) { struct kernfs_iattrs *ps_iattr = - pos->parent ? pos->parent->iattr : NULL; + parent ? parent->iattr : NULL; - /* update timestamps on the parent */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); + kernfs_clear_inode_nlink(pos); + + /* update timestamps on the parent */ if (ps_iattr) { ktime_get_real_ts64(&ps_iattr->ia_ctime); ps_iattr->ia_mtime = ps_iattr->ia_ctime; @@ -1525,9 +1620,11 @@ void kernfs_remove(struct kernfs_node *kn) root = kernfs_root(kn); + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); __kernfs_remove(kn); up_write(&root->kernfs_rwsem); + up_read(&root->kernfs_supers_rwsem); } /** @@ -1561,8 +1658,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn) * invoked before finishing the kernfs operation. Note that while this * function restores the active reference, it doesn't and can't actually * restore the active protection - @kn may already or be in the process of - * being removed. Once kernfs_break_active_protection() is invoked, that - * protection is irreversibly gone for the kernfs operation instance. + * being drained and removed. Once kernfs_break_active_protection() is + * invoked, that protection is irreversibly gone for the kernfs operation + * instance. * * While this function may be called at any point after * kernfs_break_active_protection() is invoked, its most useful location @@ -1617,6 +1715,7 @@ bool kernfs_remove_self(struct kernfs_node *kn) bool ret; struct kernfs_root *root = kernfs_root(kn); + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); kernfs_break_active_protection(kn); @@ -1646,7 +1745,9 @@ bool kernfs_remove_self(struct kernfs_node *kn) break; up_write(&root->kernfs_rwsem); + up_read(&root->kernfs_supers_rwsem); schedule(); + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); } finish_wait(waitq, &wait); @@ -1661,6 +1762,7 @@ bool kernfs_remove_self(struct kernfs_node *kn) kernfs_unbreak_active_protection(kn); up_write(&root->kernfs_rwsem); + up_read(&root->kernfs_supers_rwsem); return ret; } @@ -1675,7 +1777,7 @@ bool kernfs_remove_self(struct kernfs_node *kn) * Return: %0 on success, -ENOENT if such entry doesn't exist. */ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, - const void *ns) + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root; @@ -1687,6 +1789,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, } root = kernfs_root(parent); + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); kn = kernfs_find_ns(parent, name, ns); @@ -1697,6 +1800,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, } up_write(&root->kernfs_rwsem); + up_read(&root->kernfs_supers_rwsem); if (kn) return 0; @@ -1714,15 +1818,15 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, * Return: %0 on success, -errno on failure. */ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns) + const char *new_name, const struct ns_common *new_ns) { struct kernfs_node *old_parent; struct kernfs_root *root; - const char *old_name = NULL; + const char *old_name; int error; /* can't move or rename root */ - if (!kn->parent) + if (!rcu_access_pointer(kn->__parent)) return -EINVAL; root = kernfs_root(kn); @@ -1733,9 +1837,20 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; + old_parent = kernfs_parent(kn); + if (root->flags & KERNFS_ROOT_INVARIANT_PARENT) { + error = -EINVAL; + if (WARN_ON_ONCE(old_parent != new_parent)) + goto out; + } + error = 0; - if ((kn->parent == new_parent) && (kn->ns == new_ns) && - (strcmp(kn->name, new_name) == 0)) + old_name = kernfs_rcu_name(kn); + if (!new_name) + new_name = old_name; + if ((old_parent == new_parent) && + (kernfs_ns_id(kn->ns) == kernfs_ns_id(new_ns)) && + (strcmp(old_name, new_name) == 0)) goto out; /* nothing to rename */ error = -EEXIST; @@ -1743,7 +1858,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, goto out; /* rename kernfs_node */ - if (strcmp(kn->name, new_name) != 0) { + if (strcmp(old_name, new_name) != 0) { error = -ENOMEM; new_name = kstrdup_const(new_name, GFP_KERNEL); if (!new_name) @@ -1756,27 +1871,32 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, * Move to the appropriate place in the appropriate directories rbtree. */ kernfs_unlink_sibling(kn); - kernfs_get(new_parent); - /* rename_lock protects ->parent and ->name accessors */ - write_lock_irq(&kernfs_rename_lock); + /* rename_lock protects ->parent accessors */ + if (old_parent != new_parent) { + kernfs_get(new_parent); + write_lock_irq(&root->kernfs_rename_lock); - old_parent = kn->parent; - kn->parent = new_parent; + rcu_assign_pointer(kn->__parent, new_parent); - kn->ns = new_ns; - if (new_name) { - old_name = kn->name; - kn->name = new_name; - } + kn->ns = new_ns; + if (new_name) + rcu_assign_pointer(kn->name, new_name); - write_unlock_irq(&kernfs_rename_lock); + write_unlock_irq(&root->kernfs_rename_lock); + kernfs_put(old_parent); + } else { + /* name assignment is RCU protected, parent is the same */ + kn->ns = new_ns; + if (new_name) + rcu_assign_pointer(kn->name, new_name); + } - kn->hash = kernfs_name_hash(kn->name, kn->ns); + kn->hash = kernfs_name_hash(new_name ?: old_name, kn->ns); kernfs_link_sibling(kn); - kernfs_put(old_parent); - kfree_const(old_name); + if (new_name && !is_kernel_rodata((unsigned long)old_name)) + kfree_rcu_mightsleep(old_name); error = 0; out: @@ -1790,18 +1910,20 @@ static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) return 0; } -static struct kernfs_node *kernfs_dir_pos(const void *ns, +static struct kernfs_node *kernfs_dir_pos(const struct ns_common *ns, struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) { if (pos) { int valid = kernfs_active(pos) && - pos->parent == parent && hash == pos->hash; + rcu_access_pointer(pos->__parent) == parent && + hash == pos->hash; kernfs_put(pos); if (!valid) pos = NULL; } if (!pos && (hash > 1) && (hash < INT_MAX)) { struct rb_node *node = parent->dir.children.rb_node; + u64 ns_id = kernfs_ns_id(ns); while (node) { pos = rb_to_kn(node); @@ -1809,12 +1931,17 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, node = node->rb_left; else if (hash > pos->hash) node = node->rb_right; + else if (ns_id < kernfs_ns_id(pos->ns)) + node = node->rb_left; + else if (ns_id > kernfs_ns_id(pos->ns)) + node = node->rb_right; else break; } } /* Skip over entries which are dying/dead or in the wrong namespace */ - while (pos && (!kernfs_active(pos) || pos->ns != ns)) { + while (pos && (!kernfs_active(pos) || + kernfs_ns_id(pos->ns) != kernfs_ns_id(ns))) { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; @@ -1824,7 +1951,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, return pos; } -static struct kernfs_node *kernfs_dir_next_pos(const void *ns, +static struct kernfs_node *kernfs_dir_next_pos(const struct ns_common *ns, struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) { pos = kernfs_dir_pos(ns, parent, ino, pos); @@ -1835,7 +1962,8 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns, pos = NULL; else pos = rb_to_kn(node); - } while (pos && (!kernfs_active(pos) || pos->ns != ns)); + } while (pos && (!kernfs_active(pos) || + kernfs_ns_id(pos->ns) != kernfs_ns_id(ns))); } return pos; } @@ -1846,7 +1974,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) struct kernfs_node *parent = kernfs_dentry_node(dentry); struct kernfs_node *pos = file->private_data; struct kernfs_root *root; - const void *ns = NULL; + const struct ns_common *ns = NULL; if (!dir_emit_dots(file, ctx)) return 0; @@ -1860,7 +1988,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); pos; pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { - const char *name = pos->name; + const char *name = kernfs_rcu_name(pos); unsigned int type = fs_umode_to_dtype(pos->mode); int len = strlen(name); ino_t ino = kernfs_ino(pos); @@ -1869,10 +1997,10 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) file->private_data = pos; kernfs_get(pos); - up_read(&root->kernfs_rwsem); - if (!dir_emit(ctx, name, len, ino, type)) + if (!dir_emit(ctx, name, len, ino, type)) { + up_read(&root->kernfs_rwsem); return 0; - down_read(&root->kernfs_rwsem); + } } up_read(&root->kernfs_rwsem); file->private_data = NULL; |
