From b394d468e7d75637e682a9be4a1181b27186c593 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 14:22:38 -0800 Subject: usercopy: Enhance and rename report_usercopy() In preparation for refactoring the usercopy checks to pass offset to the hardened usercopy report, this renames report_usercopy() to the more accurate usercopy_abort(), marks it as noreturn because it is, adds a hopefully helpful comment for anyone investigating such reports, makes the function available to the slab allocators, and adds new "detail" and "offset" arguments. Signed-off-by: Kees Cook --- include/linux/uaccess.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 251e655d407f..38b6442dc569 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -273,4 +273,10 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) #endif +#ifdef CONFIG_HARDENED_USERCOPY +void __noreturn usercopy_abort(const char *name, const char *detail, + bool to_user, unsigned long offset, + unsigned long len); +#endif + #endif /* __LINUX_UACCESS_H__ */ -- cgit v1.2.3 From f4e6e289cb9cf67885b6b18b9d56d2c3e1c714a1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 14:48:22 -0800 Subject: usercopy: Include offset in hardened usercopy report This refactors the hardened usercopy code so that failure reporting can happen within the checking functions instead of at the top level. This simplifies the return value handling and allows more details and offsets to be included in the report. Having the offset can be much more helpful in understanding hardened usercopy bugs. Signed-off-by: Kees Cook --- include/linux/slab.h | 12 +++---- mm/slab.c | 8 ++--- mm/slub.c | 14 ++++---- mm/usercopy.c | 95 +++++++++++++++++++++++----------------------------- 4 files changed, 57 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 50697a1d6621..2dbeccdcb76b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -167,15 +167,11 @@ void kzfree(const void *); size_t ksize(const void *); #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page); +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user); #else -static inline const char *__check_heap_object(const void *ptr, - unsigned long n, - struct page *page) -{ - return NULL; -} +static inline void __check_heap_object(const void *ptr, unsigned long n, + struct page *page, bool to_user) { } #endif /* diff --git a/mm/slab.c b/mm/slab.c index 183e996dde5f..b2beb2cc15e2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4397,8 +4397,8 @@ module_init(slab_proc_init); * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page) +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user) { struct kmem_cache *cachep; unsigned int objnr; @@ -4414,9 +4414,9 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Allow address range falling entirely within object size. */ if (offset <= cachep->object_size && n <= cachep->object_size - offset) - return NULL; + return; - return cachep->name; + usercopy_abort("SLAB object", cachep->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/mm/slub.c b/mm/slub.c index cfd56e5a35fb..bcd22332300a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3818,8 +3818,8 @@ EXPORT_SYMBOL(__kmalloc_node); * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page) +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user) { struct kmem_cache *s; unsigned long offset; @@ -3831,7 +3831,8 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Reject impossible pointers. */ if (ptr < page_address(page)) - return s->name; + usercopy_abort("SLUB object not in SLUB page?!", NULL, + to_user, 0, n); /* Find offset within object. */ offset = (ptr - page_address(page)) % s->size; @@ -3839,15 +3840,16 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Adjust for redzone and reject if within the redzone. */ if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) { if (offset < s->red_left_pad) - return s->name; + usercopy_abort("SLUB object in left red zone", + s->name, to_user, offset, n); offset -= s->red_left_pad; } /* Allow address range falling entirely within object size. */ if (offset <= object_size && n <= object_size - offset) - return NULL; + return; - return s->name; + usercopy_abort("SLUB object", s->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/mm/usercopy.c b/mm/usercopy.c index 8006baa4caac..a562dd094ace 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -86,10 +86,10 @@ void __noreturn usercopy_abort(const char *name, const char *detail, } /* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ -static bool overlaps(const void *ptr, unsigned long n, unsigned long low, - unsigned long high) +static bool overlaps(const unsigned long ptr, unsigned long n, + unsigned long low, unsigned long high) { - unsigned long check_low = (uintptr_t)ptr; + const unsigned long check_low = ptr; unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ @@ -100,15 +100,15 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low, } /* Is this address range in the kernel text area? */ -static inline const char *check_kernel_text_object(const void *ptr, - unsigned long n) +static inline void check_kernel_text_object(const unsigned long ptr, + unsigned long n, bool to_user) { unsigned long textlow = (unsigned long)_stext; unsigned long texthigh = (unsigned long)_etext; unsigned long textlow_linear, texthigh_linear; if (overlaps(ptr, n, textlow, texthigh)) - return ""; + usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n); /* * Some architectures have virtual memory mappings with a secondary @@ -121,32 +121,30 @@ static inline const char *check_kernel_text_object(const void *ptr, textlow_linear = (unsigned long)lm_alias(textlow); /* No different mapping: we're done. */ if (textlow_linear == textlow) - return NULL; + return; /* Check the secondary mapping... */ texthigh_linear = (unsigned long)lm_alias(texthigh); if (overlaps(ptr, n, textlow_linear, texthigh_linear)) - return ""; - - return NULL; + usercopy_abort("linear kernel text", NULL, to_user, + ptr - textlow_linear, n); } -static inline const char *check_bogus_address(const void *ptr, unsigned long n) +static inline void check_bogus_address(const unsigned long ptr, unsigned long n, + bool to_user) { /* Reject if object wraps past end of memory. */ - if ((unsigned long)ptr + n < (unsigned long)ptr) - return ""; + if (ptr + n < ptr) + usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n); /* Reject if NULL or ZERO-allocation. */ if (ZERO_OR_NULL_PTR(ptr)) - return ""; - - return NULL; + usercopy_abort("null address", NULL, to_user, ptr, n); } /* Checks for allocs that are marked in some way as spanning multiple pages. */ -static inline const char *check_page_span(const void *ptr, unsigned long n, - struct page *page, bool to_user) +static inline void check_page_span(const void *ptr, unsigned long n, + struct page *page, bool to_user) { #ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN const void *end = ptr + n - 1; @@ -163,28 +161,28 @@ static inline const char *check_page_span(const void *ptr, unsigned long n, if (ptr >= (const void *)__start_rodata && end <= (const void *)__end_rodata) { if (!to_user) - return ""; - return NULL; + usercopy_abort("rodata", NULL, to_user, 0, n); + return; } /* Allow kernel data region (if not marked as Reserved). */ if (ptr >= (const void *)_sdata && end <= (const void *)_edata) - return NULL; + return; /* Allow kernel bss region (if not marked as Reserved). */ if (ptr >= (const void *)__bss_start && end <= (const void *)__bss_stop) - return NULL; + return; /* Is the object wholly within one base page? */ if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == ((unsigned long)end & (unsigned long)PAGE_MASK))) - return NULL; + return; /* Allow if fully inside the same compound (__GFP_COMP) page. */ endpage = virt_to_head_page(end); if (likely(endpage == page)) - return NULL; + return; /* * Reject if range is entirely either Reserved (i.e. special or @@ -194,36 +192,37 @@ static inline const char *check_page_span(const void *ptr, unsigned long n, is_reserved = PageReserved(page); is_cma = is_migrate_cma_page(page); if (!is_reserved && !is_cma) - return ""; + usercopy_abort("spans multiple pages", NULL, to_user, 0, n); for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { page = virt_to_head_page(ptr); if (is_reserved && !PageReserved(page)) - return ""; + usercopy_abort("spans Reserved and non-Reserved pages", + NULL, to_user, 0, n); if (is_cma && !is_migrate_cma_page(page)) - return ""; + usercopy_abort("spans CMA and non-CMA pages", NULL, + to_user, 0, n); } #endif - - return NULL; } -static inline const char *check_heap_object(const void *ptr, unsigned long n, - bool to_user) +static inline void check_heap_object(const void *ptr, unsigned long n, + bool to_user) { struct page *page; if (!virt_addr_valid(ptr)) - return NULL; + return; page = virt_to_head_page(ptr); - /* Check slab allocator for flags and size. */ - if (PageSlab(page)) - return __check_heap_object(ptr, n, page); - - /* Verify object does not incorrectly span multiple pages. */ - return check_page_span(ptr, n, page, to_user); + if (PageSlab(page)) { + /* Check slab allocator for flags and size. */ + __check_heap_object(ptr, n, page, to_user); + } else { + /* Verify object does not incorrectly span multiple pages. */ + check_page_span(ptr, n, page, to_user); + } } /* @@ -234,21 +233,15 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, */ void __check_object_size(const void *ptr, unsigned long n, bool to_user) { - const char *err; - /* Skip all tests if size is zero. */ if (!n) return; /* Check for invalid addresses. */ - err = check_bogus_address(ptr, n); - if (err) - goto report; + check_bogus_address((const unsigned long)ptr, n, to_user); /* Check for bad heap object. */ - err = check_heap_object(ptr, n, to_user); - if (err) - goto report; + check_heap_object(ptr, n, to_user); /* Check for bad stack object. */ switch (check_stack_object(ptr, n)) { @@ -264,16 +257,10 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user) */ return; default: - err = ""; - goto report; + usercopy_abort("process stack", NULL, to_user, 0, n); } /* Check for object in kernel to avoid text exposure. */ - err = check_kernel_text_object(ptr, n); - if (!err) - return; - -report: - usercopy_abort(err, NULL, to_user, 0, n); + check_kernel_text_object((const unsigned long)ptr, n, to_user); } EXPORT_SYMBOL(__check_object_size); -- cgit v1.2.3 From 4229a470175be14e1d2648713be8a5e8e8fbea02 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 12:53:20 -0800 Subject: stddef.h: Introduce sizeof_field() The size of fields within a structure is needed in a few places in the kernel already, and will be needed for the usercopy whitelisting when declaring whitelist regions within structures. This creates a dedicated macro and redefines offsetofend() to use it. Existing usage, ignoring the 1200+ lustre assert uses: $ git grep -E 'sizeof\(\(\((struct )?[a-zA-Z_]+ \*\)0\)->' | \ grep -v staging/lustre | wc -l 65 Signed-off-by: Kees Cook --- include/linux/stddef.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 2181719fd907..998a4ba28eba 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -19,6 +19,14 @@ enum { #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) #endif +/** + * sizeof_field(TYPE, MEMBER) + * + * @TYPE: The structure containing the field of interest + * @MEMBER: The field to return the size of + */ +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) + /** * offsetofend(TYPE, MEMBER) * @@ -26,6 +34,6 @@ enum { * @MEMBER: The member within the structure to get the end offset of */ #define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif -- cgit v1.2.3 From 8eb8284b412906181357c2b0110d879d5af95e52 Mon Sep 17 00:00:00 2001 From: David Windsor Date: Sat, 10 Jun 2017 22:50:28 -0400 Subject: usercopy: Prepare for usercopy whitelisting This patch prepares the slab allocator to handle caches having annotations (useroffset and usersize) defining usercopy regions. This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY whitelisting code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Currently, hardened usercopy performs dynamic bounds checking on slab cache objects. This is good, but still leaves a lot of kernel memory available to be copied to/from userspace in the face of bugs. To further restrict what memory is available for copying, this creates a way to whitelist specific areas of a given slab cache object for copying to/from userspace, allowing much finer granularity of access control. Slab caches that are never exposed to userspace can declare no whitelist for their objects, thereby keeping them unavailable to userspace via dynamic copy operations. (Note, an implicit form of whitelisting is the use of constant sizes in usercopy operations and get_user()/put_user(); these bypass hardened usercopy checks since these sizes cannot change at runtime.) To support this whitelist annotation, usercopy region offset and size members are added to struct kmem_cache. The slab allocator receives a new function, kmem_cache_create_usercopy(), that creates a new cache with a usercopy region defined, suitable for declaring spans of fields within the objects that get copied to/from userspace. In this patch, the default kmem_cache_create() marks the entire allocation as whitelisted, leaving it semantically unchanged. Once all fine-grained whitelists have been added (in subsequent patches), this will be changed to a usersize of 0, making caches created with kmem_cache_create() not copyable to/from userspace. After the entire usercopy whitelist series is applied, less than 15% of the slab cache memory remains exposed to potential usercopy bugs after a fresh boot: Total Slab Memory: 48074720 Usercopyable Memory: 6367532 13.2% task_struct 0.2% 4480/1630720 RAW 0.3% 300/96000 RAWv6 2.1% 1408/64768 ext4_inode_cache 3.0% 269760/8740224 dentry 11.1% 585984/5273856 mm_struct 29.1% 54912/188448 kmalloc-8 100.0% 24576/24576 kmalloc-16 100.0% 28672/28672 kmalloc-32 100.0% 81920/81920 kmalloc-192 100.0% 96768/96768 kmalloc-128 100.0% 143360/143360 names_cache 100.0% 163840/163840 kmalloc-64 100.0% 167936/167936 kmalloc-256 100.0% 339968/339968 kmalloc-512 100.0% 350720/350720 kmalloc-96 100.0% 455616/455616 kmalloc-8192 100.0% 655360/655360 kmalloc-1024 100.0% 812032/812032 kmalloc-4096 100.0% 819200/819200 kmalloc-2048 100.0% 1310720/1310720 After some kernel build workloads, the percentage (mainly driven by dentry and inode caches expanding) drops under 10%: Total Slab Memory: 95516184 Usercopyable Memory: 8497452 8.8% task_struct 0.2% 4000/1456000 RAW 0.3% 300/96000 RAWv6 2.1% 1408/64768 ext4_inode_cache 3.0% 1217280/39439872 dentry 11.1% 1623200/14608800 mm_struct 29.1% 73216/251264 kmalloc-8 100.0% 24576/24576 kmalloc-16 100.0% 28672/28672 kmalloc-32 100.0% 94208/94208 kmalloc-192 100.0% 96768/96768 kmalloc-128 100.0% 143360/143360 names_cache 100.0% 163840/163840 kmalloc-64 100.0% 245760/245760 kmalloc-256 100.0% 339968/339968 kmalloc-512 100.0% 350720/350720 kmalloc-96 100.0% 563520/563520 kmalloc-8192 100.0% 655360/655360 kmalloc-1024 100.0% 794624/794624 kmalloc-4096 100.0% 819200/819200 kmalloc-2048 100.0% 1257472/1257472 Signed-off-by: David Windsor [kees: adjust commit log, split out a few extra kmalloc hunks] [kees: add field names to function declarations] [kees: convert BUGs to WARNs and fail closed] [kees: add attack surface reduction analysis to commit log] Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Christoph Lameter --- include/linux/slab.h | 27 +++++++++++++++++++++------ include/linux/slab_def.h | 3 +++ include/linux/slub_def.h | 3 +++ mm/slab.c | 2 +- mm/slab.h | 5 ++++- mm/slab_common.c | 46 ++++++++++++++++++++++++++++++++++++++-------- mm/slub.c | 11 +++++++++-- 7 files changed, 79 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 2dbeccdcb76b..8bf14d9762ec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -135,9 +135,13 @@ struct mem_cgroup; void __init kmem_cache_init(void); bool slab_is_available(void); -struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, - slab_flags_t, - void (*)(void *)); +struct kmem_cache *kmem_cache_create(const char *name, size_t size, + size_t align, slab_flags_t flags, + void (*ctor)(void *)); +struct kmem_cache *kmem_cache_create_usercopy(const char *name, + size_t size, size_t align, slab_flags_t flags, + size_t useroffset, size_t usersize, + void (*ctor)(void *)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -153,9 +157,20 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *); * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ -#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ - sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL) +#define KMEM_CACHE(__struct, __flags) \ + kmem_cache_create(#__struct, sizeof(struct __struct), \ + __alignof__(struct __struct), (__flags), NULL) + +/* + * To whitelist a single field for copying to/from usercopy, use this + * macro instead for KMEM_CACHE() above. + */ +#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ + kmem_cache_create_usercopy(#__struct, \ + sizeof(struct __struct), \ + __alignof__(struct __struct), (__flags), \ + offsetof(struct __struct, __field), \ + sizeof_field(struct __struct, __field), NULL) /* * Common kmalloc functions provided by all allocators diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 072e46e9e1d5..7385547c04b1 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -85,6 +85,9 @@ struct kmem_cache { unsigned int *random_seq; #endif + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ + struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 0adae162dc8f..8ad99c47b19c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -135,6 +135,9 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ + struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/mm/slab.c b/mm/slab.c index b2beb2cc15e2..47acfe54e1ae 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1281,7 +1281,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, node) + nr_node_ids * sizeof(struct kmem_cache_node *), - SLAB_HWCACHE_ALIGN); + SLAB_HWCACHE_ALIGN, 0, 0); list_add(&kmem_cache->list, &slab_caches); slab_state = PARTIAL; diff --git a/mm/slab.h b/mm/slab.h index ad657ffa44e5..8f3030788e01 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -22,6 +22,8 @@ struct kmem_cache { unsigned int size; /* The aligned/padded/added on size */ unsigned int align; /* Alignment as calculated */ slab_flags_t flags; /* Active flags on the slab */ + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ const char *name; /* Slab name for sysfs */ int refcount; /* Use counter */ void (*ctor)(void *); /* Called on object slot creation */ @@ -97,7 +99,8 @@ int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, slab_flags_t flags); extern void create_boot_cache(struct kmem_cache *, const char *name, - size_t size, slab_flags_t flags); + size_t size, slab_flags_t flags, size_t useroffset, + size_t usersize); int slab_unmergeable(struct kmem_cache *s); struct kmem_cache *find_mergeable(size_t size, size_t align, diff --git a/mm/slab_common.c b/mm/slab_common.c index c8cb36774ba1..fc3e66bdce75 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -281,6 +281,9 @@ int slab_unmergeable(struct kmem_cache *s) if (s->ctor) return 1; + if (s->usersize) + return 1; + /* * We may have set a slab to be unmergeable during bootstrap. */ @@ -366,12 +369,16 @@ unsigned long calculate_alignment(slab_flags_t flags, static struct kmem_cache *create_cache(const char *name, size_t object_size, size_t size, size_t align, - slab_flags_t flags, void (*ctor)(void *), + slab_flags_t flags, size_t useroffset, + size_t usersize, void (*ctor)(void *), struct mem_cgroup *memcg, struct kmem_cache *root_cache) { struct kmem_cache *s; int err; + if (WARN_ON(useroffset + usersize > object_size)) + useroffset = usersize = 0; + err = -ENOMEM; s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); if (!s) @@ -382,6 +389,8 @@ static struct kmem_cache *create_cache(const char *name, s->size = size; s->align = align; s->ctor = ctor; + s->useroffset = useroffset; + s->usersize = usersize; err = init_memcg_params(s, memcg, root_cache); if (err) @@ -406,11 +415,13 @@ out_free_cache: } /* - * kmem_cache_create - Create a cache. + * kmem_cache_create_usercopy - Create a cache. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. * @flags: SLAB flags + * @useroffset: Usercopy region offset + * @usersize: Usercopy region size * @ctor: A constructor for the objects. * * Returns a ptr to the cache on success, NULL on failure. @@ -430,8 +441,9 @@ out_free_cache: * as davem. */ struct kmem_cache * -kmem_cache_create(const char *name, size_t size, size_t align, - slab_flags_t flags, void (*ctor)(void *)) +kmem_cache_create_usercopy(const char *name, size_t size, size_t align, + slab_flags_t flags, size_t useroffset, size_t usersize, + void (*ctor)(void *)) { struct kmem_cache *s = NULL; const char *cache_name; @@ -462,7 +474,13 @@ kmem_cache_create(const char *name, size_t size, size_t align, */ flags &= CACHE_CREATE_MASK; - s = __kmem_cache_alias(name, size, align, flags, ctor); + /* Fail closed on bad usersize of useroffset values. */ + if (WARN_ON(!usersize && useroffset) || + WARN_ON(size < usersize || size - usersize < useroffset)) + usersize = useroffset = 0; + + if (!usersize) + s = __kmem_cache_alias(name, size, align, flags, ctor); if (s) goto out_unlock; @@ -474,7 +492,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, s = create_cache(cache_name, size, size, calculate_alignment(flags, align, size), - flags, ctor, NULL, NULL); + flags, useroffset, usersize, ctor, NULL, NULL); if (IS_ERR(s)) { err = PTR_ERR(s); kfree_const(cache_name); @@ -500,6 +518,15 @@ out_unlock: } return s; } +EXPORT_SYMBOL(kmem_cache_create_usercopy); + +struct kmem_cache * +kmem_cache_create(const char *name, size_t size, size_t align, + slab_flags_t flags, void (*ctor)(void *)) +{ + return kmem_cache_create_usercopy(name, size, align, flags, 0, size, + ctor); +} EXPORT_SYMBOL(kmem_cache_create); static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) @@ -612,6 +639,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, s = create_cache(cache_name, root_cache->object_size, root_cache->size, root_cache->align, root_cache->flags & CACHE_CREATE_MASK, + root_cache->useroffset, root_cache->usersize, root_cache->ctor, memcg, root_cache); /* * If we could not create a memcg cache, do not complain, because @@ -879,13 +907,15 @@ bool slab_is_available(void) #ifndef CONFIG_SLOB /* Create a cache during boot when no slab services are available yet */ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, - slab_flags_t flags) + slab_flags_t flags, size_t useroffset, size_t usersize) { int err; s->name = name; s->size = s->object_size = size; s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); + s->useroffset = useroffset; + s->usersize = usersize; slab_init_memcg_params(s); @@ -906,7 +936,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, if (!s) panic("Out of memory when creating slab %s\n", name); - create_boot_cache(s, name, size, flags); + create_boot_cache(s, name, size, flags, 0, size); list_add(&s->list, &slab_caches); memcg_link_cache(s); s->refcount = 1; diff --git a/mm/slub.c b/mm/slub.c index bcd22332300a..f40a57164dd6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4183,7 +4183,7 @@ void __init kmem_cache_init(void) kmem_cache = &boot_kmem_cache; create_boot_cache(kmem_cache_node, "kmem_cache_node", - sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN); + sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0); register_hotmemory_notifier(&slab_memory_callback_nb); @@ -4193,7 +4193,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, node) + nr_node_ids * sizeof(struct kmem_cache_node *), - SLAB_HWCACHE_ALIGN); + SLAB_HWCACHE_ALIGN, 0, 0); kmem_cache = bootstrap(&boot_kmem_cache); @@ -5063,6 +5063,12 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) SLAB_ATTR_RO(cache_dma); #endif +static ssize_t usersize_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%zu\n", s->usersize); +} +SLAB_ATTR_RO(usersize); + static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU)); @@ -5437,6 +5443,7 @@ static struct attribute *slab_attrs[] = { #ifdef CONFIG_FAILSLAB &failslab_attr.attr, #endif + &usersize_attr.attr, NULL }; -- cgit v1.2.3 From afcc90f8621e289cd082ba97900e76f01afe778c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 15:17:01 -0800 Subject: usercopy: WARN() on slab cache usercopy region violations This patch adds checking of usercopy cache whitelisting, and is modified from Brad Spengler/PaX Team's PAX_USERCOPY whitelisting code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. The SLAB and SLUB allocators are modified to WARN() on all copy operations in which the kernel heap memory being modified falls outside of the cache's defined usercopy region. Based on an earlier patch from David Windsor. Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Laura Abbott Cc: Ingo Molnar Cc: Mark Rutland Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/uaccess.h | 2 ++ mm/slab.c | 22 +++++++++++++++++++--- mm/slub.c | 23 +++++++++++++++++++---- mm/usercopy.c | 21 ++++++++++++++++++--- 4 files changed, 58 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 38b6442dc569..efe79c1cdd47 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -274,6 +274,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #endif #ifdef CONFIG_HARDENED_USERCOPY +void usercopy_warn(const char *name, const char *detail, bool to_user, + unsigned long offset, unsigned long len); void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); diff --git a/mm/slab.c b/mm/slab.c index 47acfe54e1ae..1c02f6e94235 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4392,7 +4392,9 @@ module_init(slab_proc_init); #ifdef CONFIG_HARDENED_USERCOPY /* - * Rejects objects that are incorrectly sized. + * Rejects incorrectly sized objects and objects that are to be copied + * to/from userspace but do not fall entirely within the containing slab + * cache's usercopy region. * * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. @@ -4412,10 +4414,24 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, /* Find offset within object. */ offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); - /* Allow address range falling entirely within object size. */ - if (offset <= cachep->object_size && n <= cachep->object_size - offset) + /* Allow address range falling entirely within usercopy region. */ + if (offset >= cachep->useroffset && + offset - cachep->useroffset <= cachep->usersize && + n <= cachep->useroffset - offset + cachep->usersize) return; + /* + * If the copy is still within the allocated object, produce + * a warning instead of rejecting the copy. This is intended + * to be a temporary method to find any missing usercopy + * whitelists. + */ + if (offset <= cachep->object_size && + n <= cachep->object_size - offset) { + usercopy_warn("SLAB object", cachep->name, to_user, offset, n); + return; + } + usercopy_abort("SLAB object", cachep->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/mm/slub.c b/mm/slub.c index f40a57164dd6..6d9b1e7d3226 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3813,7 +3813,9 @@ EXPORT_SYMBOL(__kmalloc_node); #ifdef CONFIG_HARDENED_USERCOPY /* - * Rejects objects that are incorrectly sized. + * Rejects incorrectly sized objects and objects that are to be copied + * to/from userspace but do not fall entirely within the containing slab + * cache's usercopy region. * * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. @@ -3827,7 +3829,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, /* Find object and usable object size. */ s = page->slab_cache; - object_size = slab_ksize(s); /* Reject impossible pointers. */ if (ptr < page_address(page)) @@ -3845,10 +3846,24 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, offset -= s->red_left_pad; } - /* Allow address range falling entirely within object size. */ - if (offset <= object_size && n <= object_size - offset) + /* Allow address range falling entirely within usercopy region. */ + if (offset >= s->useroffset && + offset - s->useroffset <= s->usersize && + n <= s->useroffset - offset + s->usersize) return; + /* + * If the copy is still within the allocated object, produce + * a warning instead of rejecting the copy. This is intended + * to be a temporary method to find any missing usercopy + * whitelists. + */ + object_size = slab_ksize(s); + if (offset <= object_size && n <= object_size - offset) { + usercopy_warn("SLUB object", s->name, to_user, offset, n); + return; + } + usercopy_abort("SLUB object", s->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/mm/usercopy.c b/mm/usercopy.c index a562dd094ace..e9e9325f7638 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -59,13 +59,28 @@ static noinline int check_stack_object(const void *obj, unsigned long len) } /* - * If this function is reached, then CONFIG_HARDENED_USERCOPY has found an - * unexpected state during a copy_from_user() or copy_to_user() call. + * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found + * an unexpected state during a copy_from_user() or copy_to_user() call. * There are several checks being performed on the buffer by the * __check_object_size() function. Normal stack buffer usage should never * trip the checks, and kernel text addressing will always trip the check. - * For cache objects, copies must be within the object size. + * For cache objects, it is checking that only the whitelisted range of + * bytes for a given cache is being accessed (via the cache's usersize and + * useroffset fields). To adjust a cache whitelist, use the usercopy-aware + * kmem_cache_create_usercopy() function to create the cache (and + * carefully audit the whitelist range). */ +void usercopy_warn(const char *name, const char *detail, bool to_user, + unsigned long offset, unsigned long len) +{ + WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", + to_user ? "exposure" : "overwrite", + to_user ? "from" : "to", + name ? : "unknown?!", + detail ? " '" : "", detail ? : "", detail ? "'" : "", + offset, len); +} + void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len) -- cgit v1.2.3 From 2d891fbc3bb681ba1f826e7ee70dbe38ca7465fe Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 30 Nov 2017 13:04:32 -0800 Subject: usercopy: Allow strict enforcement of whitelists This introduces CONFIG_HARDENED_USERCOPY_FALLBACK to control the behavior of hardened usercopy whitelist violations. By default, whitelist violations will continue to WARN() so that any bad or missing usercopy whitelists can be discovered without being too disruptive. If this config is disabled at build time or a system is booted with "slab_common.usercopy_fallback=0", usercopy whitelists will BUG() instead of WARN(). This is useful for admins that want to use usercopy whitelists immediately. Suggested-by: Matthew Garrett Signed-off-by: Kees Cook --- include/linux/slab.h | 2 ++ mm/slab.c | 3 ++- mm/slab_common.c | 8 ++++++++ mm/slub.c | 3 ++- security/Kconfig | 14 ++++++++++++++ 5 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 8bf14d9762ec..231abc8976c5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -135,6 +135,8 @@ struct mem_cgroup; void __init kmem_cache_init(void); bool slab_is_available(void); +extern bool usercopy_fallback; + struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, slab_flags_t flags, void (*ctor)(void *)); diff --git a/mm/slab.c b/mm/slab.c index 1c02f6e94235..b9b0df620bb9 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4426,7 +4426,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, * to be a temporary method to find any missing usercopy * whitelists. */ - if (offset <= cachep->object_size && + if (usercopy_fallback && + offset <= cachep->object_size && n <= cachep->object_size - offset) { usercopy_warn("SLAB object", cachep->name, to_user, offset, n); return; diff --git a/mm/slab_common.c b/mm/slab_common.c index fc3e66bdce75..a51f65408637 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -31,6 +31,14 @@ LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; +#ifdef CONFIG_HARDENED_USERCOPY +bool usercopy_fallback __ro_after_init = + IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK); +module_param(usercopy_fallback, bool, 0400); +MODULE_PARM_DESC(usercopy_fallback, + "WARN instead of reject usercopy whitelist violations"); +#endif + static LIST_HEAD(slab_caches_to_rcu_destroy); static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work); static DECLARE_WORK(slab_caches_to_rcu_destroy_work, diff --git a/mm/slub.c b/mm/slub.c index 6d9b1e7d3226..862d835b3042 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3859,7 +3859,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, * whitelists. */ object_size = slab_ksize(s); - if (offset <= object_size && n <= object_size - offset) { + if (usercopy_fallback && + offset <= object_size && n <= object_size - offset) { usercopy_warn("SLUB object", s->name, to_user, offset, n); return; } diff --git a/security/Kconfig b/security/Kconfig index e8e449444e65..ae457b018da5 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -152,6 +152,20 @@ config HARDENED_USERCOPY or are part of the kernel text. This kills entire classes of heap overflow exploits and similar kernel memory exposures. +config HARDENED_USERCOPY_FALLBACK + bool "Allow usercopy whitelist violations to fallback to object size" + depends on HARDENED_USERCOPY + default y + help + This is a temporary option that allows missing usercopy whitelists + to be discovered via a WARN() to the kernel log, instead of + rejecting the copy, falling back to non-whitelisted hardened + usercopy that checks the slab allocation size instead of the + whitelist size. This option will be removed once it seems like + all missing usercopy whitelists have been identified and fixed. + Booting with "slab_common.usercopy_fallback=Y/N" can change + this setting. + config HARDENED_USERCOPY_PAGESPAN bool "Refuse to copy allocations that span multiple pages" depends on HARDENED_USERCOPY -- cgit v1.2.3 From 5905429ad85657c28d93ec3d826ddeea1f44c3ce Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Aug 2017 13:00:58 -0700 Subject: fork: Provide usercopy whitelisting for task_struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the blocked and saved_sigmask fields of task_struct are copied to userspace (via sigmask_to_save() and setup_rt_frame()), it is always copied with a static length (i.e. sizeof(sigset_t)). The only portion of task_struct that is potentially dynamically sized and may be copied to userspace is in the architecture-specific thread_struct at the end of task_struct. cache object allocation: kernel/fork.c: alloc_task_struct_node(...): return kmem_cache_alloc_node(task_struct_cachep, ...); dup_task_struct(...): ... tsk = alloc_task_struct_node(node); copy_process(...): ... dup_task_struct(...) _do_fork(...): ... copy_process(...) example usage trace: arch/x86/kernel/fpu/signal.c: __fpu__restore_sig(...): ... struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; ... __copy_from_user(&fpu->state.xsave, ..., state_size); fpu__restore_sig(...): ... return __fpu__restore_sig(...); arch/x86/kernel/signal.c: restore_sigcontext(...): ... fpu__restore_sig(...) This introduces arch_thread_struct_whitelist() to let an architecture declare specifically where the whitelist should be within thread_struct. If undefined, the entire thread_struct field is left whitelisted. Cc: Andrew Morton Cc: Nicholas Piggin Cc: Laura Abbott Cc: "Mickaël Salaün" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andy Lutomirski Signed-off-by: Kees Cook Acked-by: Rik van Riel --- arch/Kconfig | 11 +++++++++++ include/linux/sched/task.h | 14 ++++++++++++++ kernel/fork.c | 22 ++++++++++++++++++++-- 3 files changed, 45 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..8911ff37335a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -242,6 +242,17 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool +config HAVE_ARCH_THREAD_STRUCT_WHITELIST + bool + depends on !ARCH_TASK_STRUCT_ALLOCATOR + help + An architecture should select this to provide hardened usercopy + knowledge about what region of the thread_struct should be + whitelisted for copying to userspace. Normally this is only the + FPU registers. Specifically, arch_thread_struct_whitelist() + should be implemented. Without this, the entire thread_struct + field in task_struct will be left whitelisted. + # Select if arch has its private alloc_thread_stack() function config ARCH_THREAD_STACK_ALLOCATOR bool diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 05b8650f06f5..5be31eb7b266 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -104,6 +104,20 @@ extern int arch_task_struct_size __read_mostly; # define arch_task_struct_size (sizeof(struct task_struct)) #endif +#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST +/* + * If an architecture has not declared a thread_struct whitelist we + * must assume something there may need to be copied to userspace. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = 0; + /* Handle dynamically sized thread_struct. */ + *size = arch_task_struct_size - offsetof(struct task_struct, thread); +} +#endif + #ifdef CONFIG_VMAP_STACK static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) { diff --git a/kernel/fork.c b/kernel/fork.c index 0e086af148f2..5977e691c754 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -458,6 +458,21 @@ static void set_max_threads(unsigned int max_threads_suggested) int arch_task_struct_size __read_mostly; #endif +static void task_struct_whitelist(unsigned long *offset, unsigned long *size) +{ + /* Fetch thread_struct whitelist for the architecture. */ + arch_thread_struct_whitelist(offset, size); + + /* + * Handle zero-sized whitelist or empty thread_struct, otherwise + * adjust offset to position of thread_struct in task_struct. + */ + if (unlikely(*size == 0)) + *offset = 0; + else + *offset += offsetof(struct task_struct, thread); +} + void __init fork_init(void) { int i; @@ -466,11 +481,14 @@ void __init fork_init(void) #define ARCH_MIN_TASKALIGN 0 #endif int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); + unsigned long useroffset, usersize; /* create a slab on which task_structs can be allocated */ - task_struct_cachep = kmem_cache_create("task_struct", + task_struct_whitelist(&useroffset, &usersize); + task_struct_cachep = kmem_cache_create_usercopy("task_struct", arch_task_struct_size, align, - SLAB_PANIC|SLAB_ACCOUNT, NULL); + SLAB_PANIC|SLAB_ACCOUNT, + useroffset, usersize, NULL); #endif /* do the arch specific task caches init */ -- cgit v1.2.3