summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-12 15:46:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-12 15:46:17 -0800
commit065019a38feab5f2659cbd44080d528f8dff0b00 (patch)
tree80e8fcd6fea433ba9ddd3a3ba9333446947be78f
parent4f31d774dd5239e563f22ffe1403292414e6f779 (diff)
parentb4d629a39e104a8326d5b281ce07c21240c130c9 (diff)
downloadlwn-065019a38feab5f2659cbd44080d528f8dff0b00.tar.gz
lwn-065019a38feab5f2659cbd44080d528f8dff0b00.zip
Merge tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux
Pull file locking updates from Jeff Layton: "File locking related changes for v4.5 (pile #1) Highlights: - new Kconfig option to allow disabling mandatory locking (which is racy anyway) - new tracepoints for setlk and close codepaths - fix for a long-standing bug in code that handles races between setting a POSIX lock and close()" * tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux: locks: rename __posix_lock_file to posix_lock_inode locks: prink more detail when there are leaked locks locks: pass inode pointer to locks_free_lock_context locks: sprinkle some tracepoints around the file locking code locks: don't check for race with close when setting OFD lock locks: fix unlock when fcntl_setlk races with a close fs: make locks.c explicitly non-modular locks: use list_first_entry_or_null() locks: Don't allow mounts in user namespaces to enable mandatory locking locks: Allow disabling mandatory locking at compile time
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/inode.c2
-rw-r--r--fs/locks.c138
-rw-r--r--fs/namespace.c10
-rw-r--r--include/linux/fs.h78
-rw-r--r--include/trace/events/filelock.h77
6 files changed, 232 insertions, 83 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 6ce72d8d1ee1..2bb1ef86c411 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -73,6 +73,16 @@ config FILE_LOCKING
for filesystems like NFS and for the flock() system
call. Disabling this option saves about 11k.
+config MANDATORY_FILE_LOCKING
+ bool "Enable Mandatory file locking"
+ depends on FILE_LOCKING
+ default y
+ help
+ This option enables files appropriately marked files on appropriely
+ mounted filesystems to support mandatory locking.
+
+ To the best of my knowledge this is dead code that no one cares about.
+
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
diff --git a/fs/inode.c b/fs/inode.c
index 5bb85a064ce7..4230f66b7410 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -225,7 +225,7 @@ void __destroy_inode(struct inode *inode)
inode_detach_wb(inode);
security_inode_free(inode);
fsnotify_inode_delete(inode);
- locks_free_lock_context(inode->i_flctx);
+ locks_free_lock_context(inode);
if (!inode->i_nlink) {
WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
atomic_long_dec(&inode->i_sb->s_remove_count);
diff --git a/fs/locks.c b/fs/locks.c
index 0d2b3267e2a3..a91f4ab00a90 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -119,7 +119,6 @@
#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/init.h>
-#include <linux/module.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
@@ -230,16 +229,44 @@ locks_get_lock_context(struct inode *inode, int type)
ctx = smp_load_acquire(&inode->i_flctx);
}
out:
+ trace_locks_get_lock_context(inode, type, ctx);
return ctx;
}
+static void
+locks_dump_ctx_list(struct list_head *list, char *list_type)
+{
+ struct file_lock *fl;
+
+ list_for_each_entry(fl, list, fl_list) {
+ pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
+ }
+}
+
+static void
+locks_check_ctx_lists(struct inode *inode)
+{
+ struct file_lock_context *ctx = inode->i_flctx;
+
+ if (unlikely(!list_empty(&ctx->flc_flock) ||
+ !list_empty(&ctx->flc_posix) ||
+ !list_empty(&ctx->flc_lease))) {
+ pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
+ MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
+ inode->i_ino);
+ locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
+ locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
+ locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
+ }
+}
+
void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
{
- if (ctx) {
- WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
- WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
- WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
+ struct file_lock_context *ctx = inode->i_flctx;
+
+ if (unlikely(ctx)) {
+ locks_check_ctx_lists(inode);
kmem_cache_free(flctx_cache, ctx);
}
}
@@ -934,7 +961,8 @@ out:
return error;
}
-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ struct file_lock *conflock)
{
struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL;
@@ -1142,6 +1170,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
+ trace_posix_lock_inode(inode, request, error);
+
return error;
}
@@ -1162,7 +1192,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return __posix_lock_file(file_inode(filp), fl, conflock);
+ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@@ -1178,7 +1208,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int error;
might_sleep ();
for (;;) {
- error = __posix_lock_file(inode, fl, NULL);
+ error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1191,6 +1221,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
return error;
}
+#ifdef CONFIG_MANDATORY_FILE_LOCKING
/**
* locks_mandatory_locked - Check for an active lock
* @file: the file to check
@@ -1260,7 +1291,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (filp) {
fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (!error)
break;
}
@@ -1268,7 +1299,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (sleep)
fl.fl_flags |= FL_SLEEP;
fl.fl_owner = current->files;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
@@ -1289,6 +1320,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
}
EXPORT_SYMBOL(locks_mandatory_area);
+#endif /* CONFIG_MANDATORY_FILE_LOCKING */
static void lease_clear_pending(struct file_lock *fl, int arg)
{
@@ -1503,12 +1535,10 @@ void lease_get_mtime(struct inode *inode, struct timespec *time)
ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
- if (!list_empty(&ctx->flc_lease)) {
- fl = list_first_entry(&ctx->flc_lease,
- struct file_lock, fl_list);
- if (fl->fl_type == F_WRLCK)
- has_lease = true;
- }
+ fl = list_first_entry_or_null(&ctx->flc_lease,
+ struct file_lock, fl_list);
+ if (fl && (fl->fl_type == F_WRLCK))
+ has_lease = true;
spin_unlock(&ctx->flc_lock);
}
@@ -2165,6 +2195,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
+ inode = file_inode(filp);
+
/*
* This might block, so we do it before checking the inode.
*/
@@ -2172,8 +2204,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
- inode = file_inode(filp);
-
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@@ -2182,7 +2212,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
}
-again:
error = flock_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
@@ -2221,23 +2250,29 @@ again:
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by
- * releasing the lock that was just acquired.
- */
- /*
- * we need that spin_lock here - it prevents reordering between
- * update of i_flctx->flc_posix and check for it done in close().
- * rcu_read_lock() wouldn't do.
+ * Attempt to detect a close/fcntl race and recover by releasing the
+ * lock that was just acquired. There is no need to do that when we're
+ * unlocking though, or for OFD locks.
*/
- spin_lock(&current->files->file_lock);
- f = fcheck(fd);
- spin_unlock(&current->files->file_lock);
- if (!error && f != filp && flock.l_type != F_UNLCK) {
- flock.l_type = F_UNLCK;
- goto again;
+ if (!error && file_lock->fl_type != F_UNLCK &&
+ !(file_lock->fl_flags & FL_OFDLCK)) {
+ /*
+ * We need that spin_lock here - it prevents reordering between
+ * update of i_flctx->flc_posix and check for it done in
+ * close(). rcu_read_lock() wouldn't do.
+ */
+ spin_lock(&current->files->file_lock);
+ f = fcheck(fd);
+ spin_unlock(&current->files->file_lock);
+ if (f != filp) {
+ file_lock->fl_type = F_UNLCK;
+ error = do_lock_file_wait(filp, cmd, file_lock);
+ WARN_ON_ONCE(error);
+ error = -EBADF;
+ }
}
-
out:
+ trace_fcntl_setlk(inode, file_lock, error);
locks_free_lock(file_lock);
return error;
}
@@ -2322,7 +2357,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
goto out;
}
-again:
error = flock64_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
@@ -2361,17 +2395,27 @@ again:
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by
- * releasing the lock that was just acquired.
+ * Attempt to detect a close/fcntl race and recover by releasing the
+ * lock that was just acquired. There is no need to do that when we're
+ * unlocking though, or for OFD locks.
*/
- spin_lock(&current->files->file_lock);
- f = fcheck(fd);
- spin_unlock(&current->files->file_lock);
- if (!error && f != filp && flock.l_type != F_UNLCK) {
- flock.l_type = F_UNLCK;
- goto again;
+ if (!error && file_lock->fl_type != F_UNLCK &&
+ !(file_lock->fl_flags & FL_OFDLCK)) {
+ /*
+ * We need that spin_lock here - it prevents reordering between
+ * update of i_flctx->flc_posix and check for it done in
+ * close(). rcu_read_lock() wouldn't do.
+ */
+ spin_lock(&current->files->file_lock);
+ f = fcheck(fd);
+ spin_unlock(&current->files->file_lock);
+ if (f != filp) {
+ file_lock->fl_type = F_UNLCK;
+ error = do_lock_file_wait(filp, cmd, file_lock);
+ WARN_ON_ONCE(error);
+ error = -EBADF;
+ }
}
-
out:
locks_free_lock(file_lock);
return error;
@@ -2385,6 +2429,7 @@ out:
*/
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
+ int error;
struct file_lock lock;
struct file_lock_context *ctx;
@@ -2407,10 +2452,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
- vfs_lock_file(filp, F_SETLK, &lock, NULL);
+ error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
+ trace_locks_remove_posix(file_inode(filp), &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
@@ -2706,7 +2752,7 @@ static int __init proc_locks_init(void)
proc_create("locks", 0, NULL, &proc_locks_operations);
return 0;
}
-module_init(proc_locks_init);
+fs_initcall(proc_locks_init);
#endif
static int __init filelock_init(void)
diff --git a/fs/namespace.c b/fs/namespace.c
index 0570729c87fd..4d2c8f64b7bf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1584,6 +1584,14 @@ static inline bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
+static inline bool may_mandlock(void)
+{
+#ifndef CONFIG_MANDATORY_FILE_LOCKING
+ return false;
+#endif
+ return capable(CAP_SYS_ADMIN);
+}
+
/*
* Now umount can handle mount points as well as block devices.
* This is important for filesystems which use unnamed block devices.
@@ -2677,6 +2685,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
type_page, flags, data_page);
if (!retval && !may_mount())
retval = -EPERM;
+ if (!retval && (flags & MS_MANDLOCK) && !may_mandlock())
+ retval = -EPERM;
if (retval)
goto dput_out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ef3cd36689f6..566f8e078ffc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1043,7 +1043,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
extern int fcntl_getlease(struct file *filp);
/* fs/locks.c */
-void locks_free_lock_context(struct file_lock_context *ctx);
+void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
extern void locks_init_lock(struct file_lock *);
extern struct file_lock * locks_alloc_lock(void);
@@ -1104,7 +1104,7 @@ static inline int fcntl_getlease(struct file *filp)
}
static inline void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
{
}
@@ -2030,7 +2030,7 @@ extern struct kobject *fs_kobj;
#define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2
-#ifdef CONFIG_FILE_LOCKING
+#ifdef CONFIG_MANDATORY_FILE_LOCKING
extern int locks_mandatory_locked(struct file *);
extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
@@ -2075,6 +2075,45 @@ static inline int locks_verify_truncate(struct inode *inode,
return 0;
}
+#else /* !CONFIG_MANDATORY_FILE_LOCKING */
+
+static inline int locks_mandatory_locked(struct file *file)
+{
+ return 0;
+}
+
+static inline int locks_mandatory_area(int rw, struct inode *inode,
+ struct file *filp, loff_t offset,
+ size_t count)
+{
+ return 0;
+}
+
+static inline int __mandatory_lock(struct inode *inode)
+{
+ return 0;
+}
+
+static inline int mandatory_lock(struct inode *inode)
+{
+ return 0;
+}
+
+static inline int locks_verify_locked(struct file *file)
+{
+ return 0;
+}
+
+static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
+ size_t size)
+{
+ return 0;
+}
+
+#endif /* CONFIG_MANDATORY_FILE_LOCKING */
+
+
+#ifdef CONFIG_FILE_LOCKING
static inline int break_lease(struct inode *inode, unsigned int mode)
{
/*
@@ -2136,39 +2175,6 @@ static inline int break_layout(struct inode *inode, bool wait)
}
#else /* !CONFIG_FILE_LOCKING */
-static inline int locks_mandatory_locked(struct file *file)
-{
- return 0;
-}
-
-static inline int locks_mandatory_area(int rw, struct inode *inode,
- struct file *filp, loff_t offset,
- size_t count)
-{
- return 0;
-}
-
-static inline int __mandatory_lock(struct inode *inode)
-{
- return 0;
-}
-
-static inline int mandatory_lock(struct inode *inode)
-{
- return 0;
-}
-
-static inline int locks_verify_locked(struct file *file)
-{
- return 0;
-}
-
-static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
- size_t size)
-{
- return 0;
-}
-
static inline int break_lease(struct inode *inode, unsigned int mode)
{
return 0;
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index c72f2dc01d0b..63a7680347cb 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -34,6 +34,83 @@
{ F_WRLCK, "F_WRLCK" }, \
{ F_UNLCK, "F_UNLCK" })
+TRACE_EVENT(locks_get_lock_context,
+ TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),
+
+ TP_ARGS(inode, type, ctx),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(unsigned char, type)
+ __field(struct file_lock_context *, ctx)
+ ),
+
+ TP_fast_assign(
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->i_ino = inode->i_ino;
+ __entry->type = type;
+ __entry->ctx = ctx;
+ ),
+
+ TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
+ MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
+);
+
+DECLARE_EVENT_CLASS(filelock_lock,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+
+ TP_ARGS(inode, fl, ret),
+
+ TP_STRUCT__entry(
+ __field(struct file_lock *, fl)
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(struct file_lock *, fl_next)
+ __field(fl_owner_t, fl_owner)
+ __field(unsigned int, fl_pid)
+ __field(unsigned int, fl_flags)
+ __field(unsigned char, fl_type)
+ __field(loff_t, fl_start)
+ __field(loff_t, fl_end)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->fl = fl ? fl : NULL;
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->i_ino = inode->i_ino;
+ __entry->fl_next = fl ? fl->fl_next : NULL;
+ __entry->fl_owner = fl ? fl->fl_owner : NULL;
+ __entry->fl_pid = fl ? fl->fl_pid : 0;
+ __entry->fl_flags = fl ? fl->fl_flags : 0;
+ __entry->fl_type = fl ? fl->fl_type : 0;
+ __entry->fl_start = fl ? fl->fl_start : 0;
+ __entry->fl_end = fl ? fl->fl_end : 0;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
+ __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino, __entry->fl_next, __entry->fl_owner,
+ __entry->fl_pid, show_fl_flags(__entry->fl_flags),
+ show_fl_type(__entry->fl_type),
+ __entry->fl_start, __entry->fl_end, __entry->ret)
+);
+
+DEFINE_EVENT(filelock_lock, posix_lock_inode,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, fcntl_setlk,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, locks_remove_posix,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
DECLARE_EVENT_CLASS(filelock_lease,
TP_PROTO(struct inode *inode, struct file_lock *fl),