From 2865baf54077aa98fcdb478cefe6a42c417b9374 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Apr 2024 20:04:58 -0700 Subject: x86: support user address masking instead of non-speculative conditional The Spectre-v1 mitigations made "access_ok()" much more expensive, since it has to serialize execution with the test for a valid user address. All the normal user copy routines avoid this by just masking the user address with a data-dependent mask instead, but the fast "unsafe_user_read()" kind of patterms that were supposed to be a fast case got slowed down. This introduces a notion of using src = masked_user_access_begin(src); to do the user address sanity using a data-dependent mask instead of the more traditional conditional if (user_read_access_begin(src, len)) { model. This model only works for dense accesses that start at 'src' and on architectures that have a guard region that is guaranteed to fault in between the user space and the kernel space area. With this, the user access doesn't need to be manually checked, because a bad address is guaranteed to fault (by some architecture masking trick: on x86-64 this involves just turning an invalid user address into all ones, since we don't map the top of address space). This only converts a couple of examples for now. Example x86-64 code generation for loading two words from user space: stac mov %rax,%rcx sar $0x3f,%rcx or %rax,%rcx mov (%rcx),%r13 mov 0x8(%rcx),%r14 clac where all the error handling and -EFAULT is now purely handled out of line by the exception path. Of course, if the micro-architecture does badly at 'clac' and 'stac', the above is still pitifully slow. But at least we did as well as we could. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess_64.h | 8 ++++++++ fs/select.c | 4 +++- include/linux/uaccess.h | 7 +++++++ lib/strncpy_from_user.c | 9 +++++++++ lib/strnlen_user.c | 9 +++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 04789f45ab2b..a10149a96d9e 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -53,6 +53,14 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, */ #define valid_user_address(x) ((__force long)(x) >= 0) +/* + * Masking the user address is an alternative to a conditional + * user_access_begin that can avoid the fencing. This only works + * for dense accesses starting at the address. + */ +#define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63))) +#define masked_user_access_begin(x) ({ __uaccess_begin(); mask_user_address(x); }) + /* * User pointers can have tag bits on x86-64. This scheme tolerates * arbitrary values in those bits rather then masking them off. diff --git a/fs/select.c b/fs/select.c index 9515c3fa1a03..bc185d111436 100644 --- a/fs/select.c +++ b/fs/select.c @@ -780,7 +780,9 @@ static inline int get_sigset_argpack(struct sigset_argpack *to, { // the path is hot enough for overhead of copy_from_user() to matter if (from) { - if (!user_read_access_begin(from, sizeof(*from))) + if (can_do_masked_user_access()) + from = masked_user_access_begin(from); + else if (!user_read_access_begin(from, sizeof(*from))) return -EFAULT; unsafe_get_user(to->p, &from->p, Efault); unsafe_get_user(to->size, &from->size, Efault); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 3064314f4832..f18371f6cf36 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -32,6 +32,13 @@ }) #endif +#ifdef masked_user_access_begin + #define can_do_masked_user_access() 1 +#else + #define can_do_masked_user_access() 0 + #define masked_user_access_begin(src) NULL +#endif + /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 6432b8c3e431..989a12a67872 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -120,6 +120,15 @@ long strncpy_from_user(char *dst, const char __user *src, long count) if (unlikely(count <= 0)) return 0; + if (can_do_masked_user_access()) { + long retval; + + src = masked_user_access_begin(src); + retval = do_strncpy_from_user(dst, src, count, count); + user_read_access_end(); + return retval; + } + max_addr = TASK_SIZE_MAX; src_addr = (unsigned long)untagged_addr(src); if (likely(src_addr < max_addr)) { diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index feeb935a2299..6e489f9e90f1 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -96,6 +96,15 @@ long strnlen_user(const char __user *str, long count) if (unlikely(count <= 0)) return 0; + if (can_do_masked_user_access()) { + long retval; + + str = masked_user_access_begin(str); + retval = do_strnlen_user(str, count, count); + user_read_access_end(); + return retval; + } + max_addr = TASK_SIZE_MAX; src_addr = (unsigned long)untagged_addr(str); if (likely(src_addr < max_addr)) { -- cgit v1.2.3 From 05f4216272c4b588c87551d3ba9bfb88b1bffaba Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 25 Jul 2024 13:45:10 -0700 Subject: x86: do the user address masking outside the user access area In any normal situation this really shouldn't matter, but in case the address passed in to masked_user_access_begin() were to be some complex expression, we should evaluate it fully before doing the 'stac' instruction. And even without that issue (which objdump would pick up on for any really bad case), just in general we should strive to minimize the amount of code we run with user accesses enabled. For example, even for the trivial pselect6() case, the code generation (obviously with a non-debug build) just diff with this ends up being - stac mov %rax,%rcx sar $0x3f,%rcx or %rax,%rcx + stac mov (%rcx),%r13 mov 0x8(%rcx),%r14 clac so the area delimeted by the 'stac / clac' pair is now literally just the two user access instructions, and the address generation has been moved out to before that code. This will be much more noticeable if we end up deciding that we can go back to just inlining "get_user()" using the new masked user access model. The get_user() pointers can often be more complex expressions involving kernel memory accesses or even function calls. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess_64.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index a10149a96d9e..92859c1ef59c 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -59,7 +59,9 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, * for dense accesses starting at the address. */ #define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63))) -#define masked_user_access_begin(x) ({ __uaccess_begin(); mask_user_address(x); }) +#define masked_user_access_begin(x) ({ \ + __auto_type __masked_ptr = mask_user_address(x); \ + __uaccess_begin(); __masked_ptr; }) /* * User pointers can have tag bits on x86-64. This scheme tolerates -- cgit v1.2.3 From 533ab223aa1a036cfe5d6747fa3be92069f80988 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Sep 2024 10:55:42 -0700 Subject: x86: make the masked_user_access_begin() macro use its argument only once This doesn't actually matter for any of the current users, but before merging it mainline, make sure we don't have any surprising semantics. We don't actually want to use an inline function here, because we want to allow - but not require - const pointer arguments, and return them as such. But we already had a local auto-type variable, so let's just use it to avoid any possible double evaluation. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess_64.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 92859c1ef59c..afce8ee5d7b7 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -59,8 +59,9 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, * for dense accesses starting at the address. */ #define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63))) -#define masked_user_access_begin(x) ({ \ - __auto_type __masked_ptr = mask_user_address(x); \ +#define masked_user_access_begin(x) ({ \ + __auto_type __masked_ptr = (x); \ + __masked_ptr = mask_user_address(__masked_ptr); \ __uaccess_begin(); __masked_ptr; }) /* -- cgit v1.2.3