diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-03 11:31:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-03 11:31:42 -0700 |
commit | d9395512c5bd326924ba0b36ee0d5d51d763a8d6 (patch) | |
tree | 678718d5ce35e7c56f2f1da51e21b759b4500638 | |
parent | f00654007fe1c154dafbdc1f5953c132e8c27c38 (diff) | |
parent | 3bd8bc897161730042051cd5f9c6ed1e94cb5453 (diff) | |
download | lwn-d9395512c5bd326924ba0b36ee0d5d51d763a8d6.tar.gz lwn-d9395512c5bd326924ba0b36ee0d5d51d763a8d6.zip |
Merge tag 'pull-work.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs namei updates from Al Viro:
"RCU pathwalk cleanups.
Storing sampled ->d_seq of the next dentry in nameidata simplifies
life considerably, especially if we delay fetching ->d_inode until
step_into()"
* tag 'pull-work.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
step_into(): move fetching ->d_inode past handle_mounts()
lookup_fast(): don't bother with inode
follow_dotdot{,_rcu}(): don't bother with inode
step_into(): lose inode argument
namei: stash the sampled ->d_seq into nameidata
namei: move clearing LOOKUP_RCU towards rcu_read_unlock()
switch try_to_unlazy_next() to __legitimize_mnt()
follow_dotdot{,_rcu}(): change calling conventions
namei: get rid of pointless unlikely(read_seqcount_retry(...))
__follow_mount_rcu(): verify that mount_lock remains unchanged
-rw-r--r-- | fs/mount.h | 1 | ||||
-rw-r--r-- | fs/namei.c | 191 | ||||
-rw-r--r-- | fs/namespace.c | 2 |
3 files changed, 86 insertions, 108 deletions
diff --git a/fs/mount.h b/fs/mount.h index 0b6e08cf8afb..130c07c2f8d2 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -100,7 +100,6 @@ static inline int is_mounted(struct vfsmount *mnt) extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); extern int __legitimize_mnt(struct vfsmount *, unsigned); -extern bool legitimize_mnt(struct vfsmount *, unsigned); static inline bool __path_is_mountpoint(const struct path *path) { diff --git a/fs/namei.c b/fs/namei.c index 1f28d3f463c3..ed3ffd9b22a3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -567,7 +567,7 @@ struct nameidata { struct path root; struct inode *inode; /* path.dentry.d_inode */ unsigned int flags, state; - unsigned seq, m_seq, r_seq; + unsigned seq, next_seq, m_seq, r_seq; int last_type; unsigned depth; int total_link_count; @@ -665,6 +665,13 @@ static void drop_links(struct nameidata *nd) } } +static void leave_rcu(struct nameidata *nd) +{ + nd->flags &= ~LOOKUP_RCU; + nd->seq = nd->next_seq = 0; + rcu_read_unlock(); +} + static void terminate_walk(struct nameidata *nd) { drop_links(nd); @@ -678,8 +685,7 @@ static void terminate_walk(struct nameidata *nd) nd->state &= ~ND_ROOT_GRABBED; } } else { - nd->flags &= ~LOOKUP_RCU; - rcu_read_unlock(); + leave_rcu(nd); } nd->depth = 0; nd->path.mnt = NULL; @@ -765,14 +771,13 @@ static bool try_to_unlazy(struct nameidata *nd) BUG_ON(!(nd->flags & LOOKUP_RCU)); - nd->flags &= ~LOOKUP_RCU; if (unlikely(!legitimize_links(nd))) goto out1; if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) goto out; if (unlikely(!legitimize_root(nd))) goto out; - rcu_read_unlock(); + leave_rcu(nd); BUG_ON(nd->inode != parent->d_inode); return true; @@ -780,7 +785,7 @@ out1: nd->path.mnt = NULL; nd->path.dentry = NULL; out: - rcu_read_unlock(); + leave_rcu(nd); return false; } @@ -788,7 +793,6 @@ out: * try_to_unlazy_next - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * @dentry: next dentry to step into - * @seq: seq number to check @dentry against * Returns: true on success, false on failure * * Similar to try_to_unlazy(), but here we have the next dentry already @@ -797,15 +801,19 @@ out: * Nothing should touch nameidata between try_to_unlazy_next() failure and * terminate_walk(). */ -static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq) +static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry) { + int res; BUG_ON(!(nd->flags & LOOKUP_RCU)); - nd->flags &= ~LOOKUP_RCU; if (unlikely(!legitimize_links(nd))) goto out2; - if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq))) - goto out2; + res = __legitimize_mnt(nd->path.mnt, nd->m_seq); + if (unlikely(res)) { + if (res > 0) + goto out2; + goto out1; + } if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref))) goto out1; @@ -818,7 +826,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi */ if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) goto out; - if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) + if (read_seqcount_retry(&dentry->d_seq, nd->next_seq)) goto out_dput; /* * Sequence counts matched. Now make sure that the root is @@ -826,7 +834,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi */ if (unlikely(!legitimize_root(nd))) goto out_dput; - rcu_read_unlock(); + leave_rcu(nd); return true; out2: @@ -834,10 +842,10 @@ out2: out1: nd->path.dentry = NULL; out: - rcu_read_unlock(); + leave_rcu(nd); return false; out_dput: - rcu_read_unlock(); + leave_rcu(nd); dput(dentry); return false; } @@ -962,7 +970,7 @@ static int nd_jump_root(struct nameidata *nd) d = nd->path.dentry; nd->inode = d->d_inode; nd->seq = nd->root_seq; - if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq))) + if (read_seqcount_retry(&d->d_seq, nd->seq)) return -ECHILD; } else { path_put(&nd->path); @@ -1466,8 +1474,7 @@ EXPORT_SYMBOL(follow_down); * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if * we meet a managed dentry that would need blocking. */ -static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode, unsigned *seqp) +static bool __follow_mount_rcu(struct nameidata *nd, struct path *path) { struct dentry *dentry = path->dentry; unsigned int flags = dentry->d_flags; @@ -1496,15 +1503,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, path->mnt = &mounted->mnt; dentry = path->dentry = mounted->mnt.mnt_root; nd->state |= ND_JUMPED; - *seqp = read_seqcount_begin(&dentry->d_seq); - *inode = dentry->d_inode; - /* - * We don't need to re-check ->d_seq after this - * ->d_inode read - there will be an RCU delay - * between mount hash removal and ->mnt_root - * becoming unpinned. - */ + nd->next_seq = read_seqcount_begin(&dentry->d_seq); flags = dentry->d_flags; + // makes sure that non-RCU pathwalk could reach + // this state. + if (read_seqretry(&mount_lock, nd->m_seq)) + return false; continue; } if (read_seqretry(&mount_lock, nd->m_seq)) @@ -1515,8 +1519,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, } static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, - struct path *path, struct inode **inode, - unsigned int *seqp) + struct path *path) { bool jumped; int ret; @@ -1524,16 +1527,15 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, path->mnt = nd->path.mnt; path->dentry = dentry; if (nd->flags & LOOKUP_RCU) { - unsigned int seq = *seqp; - if (unlikely(!*inode)) - return -ENOENT; - if (likely(__follow_mount_rcu(nd, path, inode, seqp))) + unsigned int seq = nd->next_seq; + if (likely(__follow_mount_rcu(nd, path))) return 0; - if (!try_to_unlazy_next(nd, dentry, seq)) - return -ECHILD; - // *path might've been clobbered by __follow_mount_rcu() + // *path and nd->next_seq might've been clobbered path->mnt = nd->path.mnt; path->dentry = dentry; + nd->next_seq = seq; + if (!try_to_unlazy_next(nd, dentry)) + return -ECHILD; } ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags); if (jumped) { @@ -1546,9 +1548,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, dput(path->dentry); if (path->mnt != nd->path.mnt) mntput(path->mnt); - } else { - *inode = d_backing_inode(path->dentry); - *seqp = 0; /* out of RCU mode, so the value doesn't matter */ } return ret; } @@ -1607,9 +1606,7 @@ static struct dentry *__lookup_hash(const struct qstr *name, return dentry; } -static struct dentry *lookup_fast(struct nameidata *nd, - struct inode **inode, - unsigned *seqp) +static struct dentry *lookup_fast(struct nameidata *nd) { struct dentry *dentry, *parent = nd->path.dentry; int status = 1; @@ -1620,8 +1617,7 @@ static struct dentry *lookup_fast(struct nameidata *nd, * going to fall back to non-racy lookup. */ if (nd->flags & LOOKUP_RCU) { - unsigned seq; - dentry = __d_lookup_rcu(parent, &nd->last, &seq); + dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq); if (unlikely(!dentry)) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); @@ -1629,28 +1625,16 @@ static struct dentry *lookup_fast(struct nameidata *nd, } /* - * This sequence count validates that the inode matches - * the dentry name information from lookup. - */ - *inode = d_backing_inode(dentry); - if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) - return ERR_PTR(-ECHILD); - - /* * This sequence count validates that the parent had no * changes while we did the lookup of the dentry above. - * - * The memory barrier in read_seqcount_begin of child is - * enough, we can use __read_seqcount_retry here. */ - if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq))) + if (read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD); - *seqp = seq; status = d_revalidate(dentry, nd->flags); if (likely(status > 0)) return dentry; - if (!try_to_unlazy_next(nd, dentry, seq)) + if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ @@ -1731,7 +1715,7 @@ static inline int may_lookup(struct user_namespace *mnt_userns, return inode_permission(mnt_userns, nd->inode, MAY_EXEC); } -static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) +static int reserve_stack(struct nameidata *nd, struct path *link) { if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) return -ELOOP; @@ -1746,7 +1730,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) if (nd->flags & LOOKUP_RCU) { // we need to grab link before we do unlazy. And we can't skip // unlazy even if we fail to grab the link - cleanup needs it - bool grabbed_link = legitimize_path(nd, link, seq); + bool grabbed_link = legitimize_path(nd, link, nd->next_seq); if (!try_to_unlazy(nd) || !grabbed_link) return -ECHILD; @@ -1760,11 +1744,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4}; static const char *pick_link(struct nameidata *nd, struct path *link, - struct inode *inode, unsigned seq, int flags) + struct inode *inode, int flags) { struct saved *last; const char *res; - int error = reserve_stack(nd, link, seq); + int error = reserve_stack(nd, link); if (unlikely(error)) { if (!(nd->flags & LOOKUP_RCU)) @@ -1774,7 +1758,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link, last = nd->stack + nd->depth++; last->link = *link; clear_delayed_call(&last->done); - last->seq = seq; + last->seq = nd->next_seq; if (flags & WALK_TRAILING) { error = may_follow_link(nd, inode); @@ -1836,43 +1820,50 @@ all_done: // pure jump * to do this check without having to look at inode->i_op, * so we keep a cache of "no, this doesn't need follow_link" * for the common case. + * + * NOTE: dentry must be what nd->next_seq had been sampled from. */ static const char *step_into(struct nameidata *nd, int flags, - struct dentry *dentry, struct inode *inode, unsigned seq) + struct dentry *dentry) { struct path path; - int err = handle_mounts(nd, dentry, &path, &inode, &seq); + struct inode *inode; + int err = handle_mounts(nd, dentry, &path); if (err < 0) return ERR_PTR(err); + inode = path.dentry->d_inode; if (likely(!d_is_symlink(path.dentry)) || ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) || (flags & WALK_NOFOLLOW)) { /* not a symlink or should not follow */ - if (!(nd->flags & LOOKUP_RCU)) { + if (nd->flags & LOOKUP_RCU) { + if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) + return ERR_PTR(-ECHILD); + if (unlikely(!inode)) + return ERR_PTR(-ENOENT); + } else { dput(nd->path.dentry); if (nd->path.mnt != path.mnt) mntput(nd->path.mnt); } nd->path = path; nd->inode = inode; - nd->seq = seq; + nd->seq = nd->next_seq; return NULL; } if (nd->flags & LOOKUP_RCU) { /* make sure that d_is_symlink above matches inode */ - if (read_seqcount_retry(&path.dentry->d_seq, seq)) + if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); } else { if (path.mnt == nd->path.mnt) mntget(path.mnt); } - return pick_link(nd, &path, inode, seq, flags); + return pick_link(nd, &path, inode, flags); } -static struct dentry *follow_dotdot_rcu(struct nameidata *nd, - struct inode **inodep, - unsigned *seqp) +static struct dentry *follow_dotdot_rcu(struct nameidata *nd) { struct dentry *parent, *old; @@ -1889,30 +1880,30 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd, nd->path = path; nd->inode = path.dentry->d_inode; nd->seq = seq; - if (unlikely(read_seqretry(&mount_lock, nd->m_seq))) + // makes sure that non-RCU pathwalk could reach this state + if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); /* we know that mountpoint was pinned */ } old = nd->path.dentry; parent = old->d_parent; - *inodep = parent->d_inode; - *seqp = read_seqcount_begin(&parent->d_seq); - if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq))) + nd->next_seq = read_seqcount_begin(&parent->d_seq); + // makes sure that non-RCU pathwalk could reach this state + if (read_seqcount_retry(&old->d_seq, nd->seq)) return ERR_PTR(-ECHILD); if (unlikely(!path_connected(nd->path.mnt, parent))) return ERR_PTR(-ECHILD); return parent; in_root: - if (unlikely(read_seqretry(&mount_lock, nd->m_seq))) + if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-ECHILD); - return NULL; + nd->next_seq = nd->seq; + return nd->path.dentry; } -static struct dentry *follow_dotdot(struct nameidata *nd, - struct inode **inodep, - unsigned *seqp) +static struct dentry *follow_dotdot(struct nameidata *nd) { struct dentry *parent; @@ -1936,15 +1927,12 @@ static struct dentry *follow_dotdot(struct nameidata *nd, dput(parent); return ERR_PTR(-ENOENT); } - *seqp = 0; - *inodep = parent->d_inode; return parent; in_root: if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-EXDEV); - dget(nd->path.dentry); - return NULL; + return dget(nd->path.dentry); } static const char *handle_dots(struct nameidata *nd, int type) @@ -1952,8 +1940,6 @@ static const char *handle_dots(struct nameidata *nd, int type) if (type == LAST_DOTDOT) { const char *error = NULL; struct dentry *parent; - struct inode *inode; - unsigned seq; if (!nd->root.mnt) { error = ERR_PTR(set_root(nd)); @@ -1961,17 +1947,12 @@ static const char *handle_dots(struct nameidata *nd, int type) return error; } if (nd->flags & LOOKUP_RCU) - parent = follow_dotdot_rcu(nd, &inode, &seq); + parent = follow_dotdot_rcu(nd); else - parent = follow_dotdot(nd, &inode, &seq); + parent = follow_dotdot(nd); if (IS_ERR(parent)) return ERR_CAST(parent); - if (unlikely(!parent)) - error = step_into(nd, WALK_NOFOLLOW, - nd->path.dentry, nd->inode, nd->seq); - else - error = step_into(nd, WALK_NOFOLLOW, - parent, inode, seq); + error = step_into(nd, WALK_NOFOLLOW, parent); if (unlikely(error)) return error; @@ -1983,9 +1964,9 @@ static const char *handle_dots(struct nameidata *nd, int type) * some fallback). */ smp_rmb(); - if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq))) + if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)) return ERR_PTR(-EAGAIN); - if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq))) + if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)) return ERR_PTR(-EAGAIN); } } @@ -1995,8 +1976,6 @@ static const char *handle_dots(struct nameidata *nd, int type) static const char *walk_component(struct nameidata *nd, int flags) { struct dentry *dentry; - struct inode *inode; - unsigned seq; /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and @@ -2007,7 +1986,7 @@ static const char *walk_component(struct nameidata *nd, int flags) put_link(nd); return handle_dots(nd, nd->last_type); } - dentry = lookup_fast(nd, &inode, &seq); + dentry = lookup_fast(nd); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (unlikely(!dentry)) { @@ -2017,7 +1996,7 @@ static const char *walk_component(struct nameidata *nd, int flags) } if (!(flags & WALK_MORE) && nd->depth) put_link(nd); - return step_into(nd, flags, dentry, inode, seq); + return step_into(nd, flags, dentry); } /* @@ -2372,6 +2351,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags) flags &= ~LOOKUP_RCU; if (flags & LOOKUP_RCU) rcu_read_lock(); + else + nd->seq = nd->next_seq = 0; nd->flags = flags; nd->state |= ND_JUMPED; @@ -2473,8 +2454,8 @@ static int handle_lookup_down(struct nameidata *nd) { if (!(nd->flags & LOOKUP_RCU)) dget(nd->path.dentry); - return PTR_ERR(step_into(nd, WALK_NOFOLLOW, - nd->path.dentry, nd->inode, nd->seq)); + nd->next_seq = nd->seq; + return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry)); } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ @@ -3393,8 +3374,6 @@ static const char *open_last_lookups(struct nameidata *nd, struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool got_write = false; - unsigned seq; - struct inode *inode; struct dentry *dentry; const char *res; @@ -3410,7 +3389,7 @@ static const char *open_last_lookups(struct nameidata *nd, if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; /* we _can_ be in RCU mode here */ - dentry = lookup_fast(nd, &inode, &seq); + dentry = lookup_fast(nd); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (likely(dentry)) @@ -3464,7 +3443,7 @@ static const char *open_last_lookups(struct nameidata *nd, finish_lookup: if (nd->depth) put_link(nd); - res = step_into(nd, WALK_TRAILING, dentry, inode, seq); + res = step_into(nd, WALK_TRAILING, dentry); if (unlikely(res)) nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); return res; diff --git a/fs/namespace.c b/fs/namespace.c index e6a7e769d25d..68789f896f08 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -648,7 +648,7 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) } /* call under rcu_read_lock */ -bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) +static bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) { int res = __legitimize_mnt(bastard, seq); if (likely(!res)) |