summaryrefslogtreecommitdiff
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-02 10:42:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 11:25:16 -0700
commitf9bf352224d7d4612b55b8d0cd0eaa981a3246cf (patch)
tree7a4399f3ced4d752182a5fdc8a7a0d6d271647b6 /fs/userfaultfd.c
parent3208167a865e862fff5045d7910387941ff7e114 (diff)
downloadlwn-f9bf352224d7d4612b55b8d0cd0eaa981a3246cf.tar.gz
lwn-f9bf352224d7d4612b55b8d0cd0eaa981a3246cf.zip
userfaultfd: simplify fault handling
Instead of waiting in a loop for the userfaultfd condition to become true, just wait once and return VM_FAULT_RETRY. We've already dropped the mmap lock, we know we can't really successfully handle the fault at this point and the caller will have to retry anyway. So there's no point in making the wait any more complicated than it needs to be - just schedule away. And once you don't have that complexity with explicit looping, you can also just lose all the 'userfaultfd_signal_pending()' complexity, because once we've set the correct process sleeping state, and don't loop, the act of scheduling itself will be checking if there are any pending signals before going to sleep. We can also drop the VM_FAULT_MAJOR games, since we'll be treating all retried faults as major soon anyway (series to regularize and share more of fault handling across architectures in a separate series by Peter Xu, and in the meantime we won't worry about the possible minor - I'll be here all week, try the veal - accounting difference). Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Peter Xu <peterx@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c39
1 files changed, 1 insertions, 38 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 52de29000c7e..6e264dded46e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -339,7 +339,6 @@ out:
return ret;
}
-/* Should pair with userfaultfd_signal_pending() */
static inline long userfaultfd_get_blocking_state(unsigned int flags)
{
if (flags & FAULT_FLAG_INTERRUPTIBLE)
@@ -351,18 +350,6 @@ static inline long userfaultfd_get_blocking_state(unsigned int flags)
return TASK_UNINTERRUPTIBLE;
}
-/* Should pair with userfaultfd_get_blocking_state() */
-static inline bool userfaultfd_signal_pending(unsigned int flags)
-{
- if (flags & FAULT_FLAG_INTERRUPTIBLE)
- return signal_pending(current);
-
- if (flags & FAULT_FLAG_KILLABLE)
- return fatal_signal_pending(current);
-
- return false;
-}
-
/*
* The locking rules involved in returning VM_FAULT_RETRY depending on
* FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
@@ -516,33 +503,9 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
vmf->flags, reason);
mmap_read_unlock(mm);
- if (likely(must_wait && !READ_ONCE(ctx->released) &&
- !userfaultfd_signal_pending(vmf->flags))) {
+ if (likely(must_wait && !READ_ONCE(ctx->released))) {
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
schedule();
- ret |= VM_FAULT_MAJOR;
-
- /*
- * False wakeups can orginate even from rwsem before
- * up_read() however userfaults will wait either for a
- * targeted wakeup on the specific uwq waitqueue from
- * wake_userfault() or for signals or for uffd
- * release.
- */
- while (!READ_ONCE(uwq.waken)) {
- /*
- * This needs the full smp_store_mb()
- * guarantee as the state write must be
- * visible to other CPUs before reading
- * uwq.waken from other CPUs.
- */
- set_current_state(blocking_state);
- if (READ_ONCE(uwq.waken) ||
- READ_ONCE(ctx->released) ||
- userfaultfd_signal_pending(vmf->flags))
- break;
- schedule();
- }
}
__set_current_state(TASK_RUNNING);