From 312b90fbed0e07f61d2f060789440a83df6bba23 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Aug 2012 10:18:17 +0400 Subject: mqueue: lift mnt_want_write() outside ->i_mutex, clean up a bit the way it abuses ->d_fsdata still needs to be killed, but that's a separate story. Signed-off-by: Al Viro --- ipc/mqueue.c | 61 ++++++++++++++++++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index f8e54f5b9080..9a08acc9e649 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -726,7 +726,6 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, struct mq_attr *attr) { const struct cred *cred = current_cred(); - struct file *result; int ret; if (attr) { @@ -748,21 +747,11 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, } mode &= ~current_umask(); - ret = mnt_want_write(path->mnt); - if (ret) - return ERR_PTR(ret); ret = vfs_create(dir, path->dentry, mode, true); path->dentry->d_fsdata = NULL; - if (!ret) - result = dentry_open(path, oflag, cred); - else - result = ERR_PTR(ret); - /* - * dentry_open() took a persistent mnt_want_write(), - * so we can now drop this one. - */ - mnt_drop_write(path->mnt); - return result; + if (ret) + return ERR_PTR(ret); + return dentry_open(path, oflag, cred); } /* Opens existing queue */ @@ -788,7 +777,9 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, struct mq_attr attr; int fd, error; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - struct dentry *root = ipc_ns->mq_mnt->mnt_root; + struct vfsmount *mnt = ipc_ns->mq_mnt; + struct dentry *root = mnt->mnt_root; + int ro; if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) return -EFAULT; @@ -802,6 +793,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, if (fd < 0) goto out_putname; + ro = mnt_want_write(mnt); /* we'll drop it in any case */ error = 0; mutex_lock(&root->d_inode->i_mutex); path.dentry = lookup_one_len(name, root, strlen(name)); @@ -809,7 +801,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, error = PTR_ERR(path.dentry); goto out_putfd; } - path.mnt = mntget(ipc_ns->mq_mnt); + path.mnt = mntget(mnt); if (oflag & O_CREAT) { if (path.dentry->d_inode) { /* entry already exists */ @@ -820,6 +812,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, } filp = do_open(&path, oflag); } else { + if (ro) { + error = ro; + goto out; + } filp = do_create(ipc_ns, root->d_inode, &path, oflag, mode, u_attr ? &attr : NULL); @@ -845,6 +841,7 @@ out_putfd: fd = error; } mutex_unlock(&root->d_inode->i_mutex); + mnt_drop_write(mnt); out_putname: putname(name); return fd; @@ -857,40 +854,38 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) struct dentry *dentry; struct inode *inode = NULL; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; + struct vfsmount *mnt = ipc_ns->mq_mnt; name = getname(u_name); if (IS_ERR(name)) return PTR_ERR(name); - mutex_lock_nested(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex, - I_MUTEX_PARENT); - dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name)); + err = mnt_want_write(mnt); + if (err) + goto out_name; + mutex_lock_nested(&mnt->mnt_root->d_inode->i_mutex, I_MUTEX_PARENT); + dentry = lookup_one_len(name, mnt->mnt_root, strlen(name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out_unlock; } - if (!dentry->d_inode) { - err = -ENOENT; - goto out_err; - } - inode = dentry->d_inode; - if (inode) + if (!inode) { + err = -ENOENT; + } else { ihold(inode); - err = mnt_want_write(ipc_ns->mq_mnt); - if (err) - goto out_err; - err = vfs_unlink(dentry->d_parent->d_inode, dentry); - mnt_drop_write(ipc_ns->mq_mnt); -out_err: + err = vfs_unlink(dentry->d_parent->d_inode, dentry); + } dput(dentry); out_unlock: - mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); - putname(name); + mutex_unlock(&mnt->mnt_root->d_inode->i_mutex); if (inode) iput(inode); + mnt_drop_write(mnt); +out_name: + putname(name); return err; } -- cgit v1.2.3 From 934ad4c235f87dcb9206abdfa22922358999afab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 19:49:09 -0400 Subject: vfio: don't dereference after kfree... Acked-by: Alex Williamson Signed-off-by: Al Viro --- drivers/vfio/vfio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 9591e2b509d7..0b025d58de81 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -416,8 +416,9 @@ static void vfio_device_release(struct kref *kref) /* Device reference always implies a group reference */ static void vfio_device_put(struct vfio_device *device) { + struct vfio_group *group = device->group; kref_put(&device->kref, vfio_device_release); - vfio_group_put(device->group); + vfio_group_put(group); } static void vfio_device_get(struct vfio_device *device) -- cgit v1.2.3 From 8ad5db8a8ddbe3bd33078863a027011e28f1f4ee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 20:10:46 -0400 Subject: introduce kref_put_mutex() equivalent of mutex_lock(mutex); if (!kref_put(kref, release)) mutex_unlock(mutex); Signed-off-by: Al Viro --- include/linux/kref.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/linux/kref.h b/include/linux/kref.h index 9c07dcebded7..65af6887872f 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -18,6 +18,7 @@ #include #include #include +#include struct kref { atomic_t refcount; @@ -93,4 +94,21 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) { return kref_sub(kref, 1, release); } + +static inline int kref_put_mutex(struct kref *kref, + void (*release)(struct kref *kref), + struct mutex *lock) +{ + WARN_ON(release == NULL); + if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) { + mutex_lock(lock); + if (unlikely(!atomic_dec_and_test(&kref->refcount))) { + mutex_unlock(lock); + return 0; + } + release(kref); + return 1; + } + return 0; +} #endif /* _KREF_H_ */ -- cgit v1.2.3 From 6d2cd3ce815b302e885b44ca1bdbe3c7db321c7a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 21:27:32 -0400 Subject: vfio: get rid of open-coding kref_put_mutex Acked-by: Alex Williamson Signed-off-by: Al Viro --- drivers/vfio/vfio.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 0b025d58de81..92b85676e6be 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -264,6 +264,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) return group; } +/* called with vfio.group_lock held */ static void vfio_group_release(struct kref *kref) { struct vfio_group *group = container_of(kref, struct vfio_group, kref); @@ -287,13 +288,7 @@ static void vfio_group_release(struct kref *kref) static void vfio_group_put(struct vfio_group *group) { - mutex_lock(&vfio.group_lock); - /* - * Release needs to unlock to unregister the notifier, so only - * unlock if not released. - */ - if (!kref_put(&group->kref, vfio_group_release)) - mutex_unlock(&vfio.group_lock); + kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); } /* Assume group_lock or group reference is held */ -- cgit v1.2.3 From 90b1253e4139776e8257914ae9e2292d0de2fecc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 21:29:06 -0400 Subject: vfio: get rid of vfio_device_put()/vfio_group_get_device* races we really need to make sure that dropping the last reference happens under the group->device_lock; otherwise a loop (under device_lock) might find vfio_device instance that is being freed right now, has already dropped the last reference and waits on device_lock to exclude the sucker from the list. Acked-by: Alex Williamson Signed-off-by: Al Viro --- drivers/vfio/vfio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 92b85676e6be..887ae43276bb 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -396,7 +396,6 @@ static void vfio_device_release(struct kref *kref) struct vfio_device, kref); struct vfio_group *group = device->group; - mutex_lock(&group->device_lock); list_del(&device->group_next); mutex_unlock(&group->device_lock); @@ -412,7 +411,7 @@ static void vfio_device_release(struct kref *kref) static void vfio_device_put(struct vfio_device *device) { struct vfio_group *group = device->group; - kref_put(&device->kref, vfio_device_release); + kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); vfio_group_put(group); } -- cgit v1.2.3 From 31605debdf5459cc8aacabf192a911a803a81c26 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 21:32:56 -0400 Subject: vfio: grab vfio_device reference *before* exposing the sucker via fd_install() It's not critical (anymore) since another thread closing the file will block on ->device_lock before it gets to dropping the final reference, but it's definitely cleaner that way... Acked-by: Alex Williamson Signed-off-by: Al Viro --- drivers/vfio/vfio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 887ae43276bb..17830c9c7cc6 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1111,10 +1111,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) */ filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); - fd_install(ret, filep); - vfio_device_get(device); atomic_inc(&group->container_users); + + fd_install(ret, filep); break; } mutex_unlock(&group->device_lock); -- cgit v1.2.3 From 98022748f6c7bce85b9f123fd4d1a621219dd8d9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Aug 2012 22:42:36 -0400 Subject: eventpoll: use-after-possible-free in epoll_create1() As soon as we'd installed the file into descriptor table, it can get closed by another thread. Freeing ep in process... Signed-off-by: Al Viro --- fs/eventpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1c8b55670804..eedec84c1809 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) error = PTR_ERR(file); goto out_free_fd; } - fd_install(fd, file); ep->file = file; + fd_install(fd, file); return fd; out_free_fd: -- cgit v1.2.3 From 55852635a8e2803cbc22d0e143d727813f0fcdb5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Aug 2012 17:39:25 -0700 Subject: fs: fix fs/namei.c kernel-doc warnings Fix kernel-doc warnings in fs/namei.c: Warning(fs/namei.c:360): No description found for parameter 'inode' Warning(fs/namei.c:672): No description found for parameter 'nd' Signed-off-by: Randy Dunlap Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index db76b866a097..dd1ed1b8e98e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask) /** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on + * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. @@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1; /** * may_follow_link - Check symlink following for unsafe situations * @link: The path of the symlink + * @nd: nameidata pathwalk data * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is -- cgit v1.2.3 From 88ec2789d856056344161aa20420dd37e893b0fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Aug 2012 15:05:14 +0200 Subject: task_work: add a scheduling point in task_work_run() It seems commit 4a9d4b02 (switch fput to task_work_add) reintroduced the problem addressed in commit 944be0b2 (close_files(): add scheduling point) If a server process with a lot of files (say 2 million tcp sockets) is killed, we can spend a lot of time in task_work_run() and trigger a soft lockup. Signed-off-by: Eric Dumazet Signed-off-by: Al Viro --- kernel/task_work.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/task_work.c b/kernel/task_work.c index 91d4e1742a0c..d320d44903bd 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -75,6 +75,7 @@ void task_work_run(void) p = q->next; q->func(q); q = p; + cond_resched(); } } } -- cgit v1.2.3