summaryrefslogtreecommitdiff
path: root/fs/namei.c
diff options
context:
space:
mode:
authorDave Hansen <haveblue@us.ibm.com>2008-02-15 14:37:48 -0800
committerAl Viro <viro@zeniv.linux.org.uk>2008-04-19 00:29:25 -0400
commit4a3fd211ccfc08a88edc824300e25a87785c6a5f (patch)
tree99f1a76a99fa78464b8de731f7fdb5bcc9667a5e /fs/namei.c
parent42a74f206b914db13ee1f5ae932dcd91a77c8579 (diff)
downloadlwn-4a3fd211ccfc08a88edc824300e25a87785c6a5f.tar.gz
lwn-4a3fd211ccfc08a88edc824300e25a87785c6a5f.zip
[PATCH] r/o bind mounts: elevate write count for open()s
This is the first really tricky patch in the series. It elevates the writer count on a mount each time a non-special file is opened for write. We used to do this in may_open(), but Miklos pointed out that __dentry_open() is used as well to create filps. This will cover even those cases, while a call in may_open() would not have. There is also an elevated count around the vfs_create() call in open_namei(). See the comments for more details, but we need this to fix a 'create, remount, fail r/w open()' race. Some filesystems forego the use of normal vfs calls to create struct files. Make sure that these users elevate the mnt writer count because they will get __fput(), and we need to make sure they're balanced. Acked-by: Al Viro <viro@ZenIV.linux.org.uk> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Hansen <haveblue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c75
1 files changed, 65 insertions, 10 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 83c843b3fea3..e179f71bfcb0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
return -EACCES;
flag &= ~O_TRUNC;
- } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
- return -EROFS;
+ }
error = vfs_permission(nd, acc_mode);
if (error)
@@ -1724,18 +1723,32 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
+static int open_will_write_to_fs(int flag, struct inode *inode)
+{
+ /*
+ * We'll never write to the fs underlying
+ * a device file.
+ */
+ if (special_file(inode->i_mode))
+ return 0;
+ return (flag & O_TRUNC);
+}
+
/*
- * Note that the low bits of "flag" aren't the same as in the open
- * system call. See open_to_namei_flags().
+ * Note that the low bits of the passed in "open_flag"
+ * are not the same as in the local variable "flag". See
+ * open_to_namei_flags() for more details.
*/
struct file *do_filp_open(int dfd, const char *pathname,
int open_flag, int mode)
{
+ struct file *filp;
struct nameidata nd;
int acc_mode, error;
struct path path;
struct dentry *dir;
int count = 0;
+ int will_write;
int flag = open_to_namei_flags(open_flag);
acc_mode = ACC_MODE(flag);
@@ -1791,17 +1804,30 @@ do_last:
}
if (IS_ERR(nd.intent.open.file)) {
- mutex_unlock(&dir->d_inode->i_mutex);
error = PTR_ERR(nd.intent.open.file);
- goto exit_dput;
+ goto exit_mutex_unlock;
}
/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
- error = __open_namei_create(&nd, &path, flag, mode);
+ /*
+ * This write is needed to ensure that a
+ * ro->rw transition does not occur between
+ * the time when the file is created and when
+ * a permanent write count is taken through
+ * the 'struct file' in nameidata_to_filp().
+ */
+ error = mnt_want_write(nd.path.mnt);
if (error)
+ goto exit_mutex_unlock;
+ error = __open_namei_create(&nd, &path, flag, mode);
+ if (error) {
+ mnt_drop_write(nd.path.mnt);
goto exit;
- return nameidata_to_filp(&nd, open_flag);
+ }
+ filp = nameidata_to_filp(&nd, open_flag);
+ mnt_drop_write(nd.path.mnt);
+ return filp;
}
/*
@@ -1831,11 +1857,40 @@ do_last:
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
+ /*
+ * Consider:
+ * 1. may_open() truncates a file
+ * 2. a rw->ro mount transition occurs
+ * 3. nameidata_to_filp() fails due to
+ * the ro mount.
+ * That would be inconsistent, and should
+ * be avoided. Taking this mnt write here
+ * ensures that (2) can not occur.
+ */
+ will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
+ if (will_write) {
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit;
+ }
error = may_open(&nd, acc_mode, flag);
- if (error)
+ if (error) {
+ if (will_write)
+ mnt_drop_write(nd.path.mnt);
goto exit;
- return nameidata_to_filp(&nd, open_flag);
+ }
+ filp = nameidata_to_filp(&nd, open_flag);
+ /*
+ * It is now safe to drop the mnt write
+ * because the filp has had a write taken
+ * on its behalf.
+ */
+ if (will_write)
+ mnt_drop_write(nd.path.mnt);
+ return filp;
+exit_mutex_unlock:
+ mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
path_put_conditional(&path, &nd);
exit: