summaryrefslogtreecommitdiff
path: root/fs/namespace.c
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2021-11-25 13:07:11 +0100
committerChristian Brauner <christian.brauner@ubuntu.com>2021-11-26 12:09:09 +0100
commit0f8821da48458982cf379eb4432f23958f2e3a6c (patch)
tree95089eb557bdf7a7e67a1396819052d074ae1ec5 /fs/namespace.c
parent136057256686de39cc3a07c2e39ef6bc43003ff6 (diff)
downloadlwn-0f8821da48458982cf379eb4432f23958f2e3a6c.tar.gz
lwn-0f8821da48458982cf379eb4432f23958f2e3a6c.zip
fs/namespace: Boost the mount_lock.lock owner instead of spinning on PREEMPT_RT.
The MNT_WRITE_HOLD flag is used to hold back any new writers while the mount point is about to be made read-only. __mnt_want_write() then loops with disabled preemption until this flag disappears. Callers of mnt_hold_writers() (which sets the flag) hold the spinlock_t of mount_lock (seqlock_t) which disables preemption on !PREEMPT_RT and ensures the task is not scheduled away so that the spinning side spins for a long time. On PREEMPT_RT the spinlock_t does not disable preemption and so it is possible that the task setting MNT_WRITE_HOLD is preempted by task with higher priority which then spins infinitely waiting for MNT_WRITE_HOLD to get removed. Acquire mount_lock::lock which is held by setter of MNT_WRITE_HOLD. This will PI-boost the owner and wait until the lock is dropped and which means that MNT_WRITE_HOLD is cleared again. Link: https://lore.kernel.org/r/20211025152218.opvcqfku2lhqvp4o@linutronix.de Link: https://lore.kernel.org/r/20211125120711.dgbsienyrsxfzpoi@linutronix.de Acked-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c20
1 files changed, 18 insertions, 2 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 659a8f39c61a..3ab45b47b286 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -343,8 +343,24 @@ int __mnt_want_write(struct vfsmount *m)
* incremented count after it has set MNT_WRITE_HOLD.
*/
smp_mb();
- while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
- cpu_relax();
+ might_lock(&mount_lock.lock);
+ while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ cpu_relax();
+ } else {
+ /*
+ * This prevents priority inversion, if the task
+ * setting MNT_WRITE_HOLD got preempted on a remote
+ * CPU, and it prevents life lock if the task setting
+ * MNT_WRITE_HOLD has a lower priority and is bound to
+ * the same CPU as the task that is spinning here.
+ */
+ preempt_enable();
+ lock_mount_hash();
+ unlock_mount_hash();
+ preempt_disable();
+ }
+ }
/*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
* be set to match its requirements. So we must not load that until