summaryrefslogtreecommitdiff
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
authorLokesh Gidra <lokeshgidra@google.com>2020-12-14 19:13:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-15 12:13:46 -0800
commitd0d4730ac2e404a5b0da9a87ef38c73e51cb1664 (patch)
tree7e84be5f2ccf09cc2073880a584aa5ea9bd913c0 /fs/userfaultfd.c
parent37cd0575b8510159992d279c530c05f872990b02 (diff)
downloadlwn-d0d4730ac2e404a5b0da9a87ef38c73e51cb1664.tar.gz
lwn-d0d4730ac2e404a5b0da9a87ef38c73e51cb1664.zip
userfaultfd: add user-mode only option to unprivileged_userfaultfd sysctl knob
With this change, when the knob is set to 0, it allows unprivileged users to call userfaultfd, like when it is set to 1, but with the restriction that page faults from only user-mode can be handled. In this mode, an unprivileged user (without SYS_CAP_PTRACE capability) must pass UFFD_USER_MODE_ONLY to userfaultd or the API will fail with EPERM. This enables administrators to reduce the likelihood that an attacker with access to userfaultfd can delay faulting kernel code to widen timing windows for other exploits. The default value of this knob is changed to 0. This is required for correct functioning of pipe mutex. However, this will fail postcopy live migration, which will be unnoticeable to the VM guests. To avoid this, set 'vm.userfault = 1' in /sys/sysctl.conf. The main reason this change is desirable as in the short term is that the Android userland will behave as with the sysctl set to zero. So without this commit, any Linux binary using userfaultfd to manage its memory would behave differently if run within the Android userland. For more details, refer to Andrea's reply [1]. [1] https://lore.kernel.org/lkml/20200904033438.GI9411@redhat.com/ Link: https://lkml.kernel.org/r/20201120030411.2690816-3-lokeshgidra@google.com Signed-off-by: Lokesh Gidra <lokeshgidra@google.com> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Peter Xu <peterx@redhat.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Stephen Smalley <stephen.smalley.work@gmail.com> Cc: Eric Biggers <ebiggers@kernel.org> Cc: Daniel Colascione <dancol@dancol.org> Cc: "Joel Fernandes (Google)" <joel@joelfernandes.org> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Jeff Vander Stoep <jeffv@google.com> Cc: <calin@google.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Shaohua Li <shli@fb.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Nitin Gupta <nigupta@nvidia.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Iurii Zaikin <yzaikin@google.com> Cc: Luis Chamberlain <mcgrof@kernel.org> Cc: Daniel Colascione <dancol@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 605599fde015..894cc28142e7 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -28,7 +28,7 @@
#include <linux/security.h>
#include <linux/hugetlb.h>
-int sysctl_unprivileged_userfaultfd __read_mostly = 1;
+int sysctl_unprivileged_userfaultfd __read_mostly;
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
@@ -1966,8 +1966,14 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
struct userfaultfd_ctx *ctx;
int fd;
- if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE))
+ if (!sysctl_unprivileged_userfaultfd &&
+ (flags & UFFD_USER_MODE_ONLY) == 0 &&
+ !capable(CAP_SYS_PTRACE)) {
+ printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd "
+ "sysctl knob to 1 if kernel faults must be handled "
+ "without obtaining CAP_SYS_PTRACE capability\n");
return -EPERM;
+ }
BUG_ON(!current->mm);