diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 46 | ||||
-rw-r--r-- | lib/Kconfig.kcsan | 2 | ||||
-rw-r--r-- | lib/Makefile | 13 | ||||
-rw-r--r-- | lib/bug.c | 15 | ||||
-rw-r--r-- | lib/cpumask.c | 52 | ||||
-rw-r--r-- | lib/crypto/blake2s-selftest.c | 25 | ||||
-rw-r--r-- | lib/dec_and_lock.c | 31 | ||||
-rw-r--r-- | lib/errname.c | 22 | ||||
-rw-r--r-- | lib/find_bit.c | 9 | ||||
-rw-r--r-- | lib/group_cpus.c | 428 | ||||
-rw-r--r-- | lib/hashtable_test.c | 317 | ||||
-rw-r--r-- | lib/iov_iter.c | 299 | ||||
-rw-r--r-- | lib/kunit/Makefile | 4 | ||||
-rw-r--r-- | lib/kunit/assert.c | 40 | ||||
-rw-r--r-- | lib/kunit/hooks-impl.h | 31 | ||||
-rw-r--r-- | lib/kunit/hooks.c | 21 | ||||
-rw-r--r-- | lib/kunit/kunit-example-test.c | 38 | ||||
-rw-r--r-- | lib/kunit/static_stub.c | 123 | ||||
-rw-r--r-- | lib/kunit/test.c | 14 | ||||
-rw-r--r-- | lib/memcpy_kunit.c | 2 | ||||
-rw-r--r-- | lib/mpi/mpicoder.c | 3 | ||||
-rw-r--r-- | lib/nlattr.c | 3 | ||||
-rw-r--r-- | lib/nmi_backtrace.c | 2 | ||||
-rw-r--r-- | lib/sbitmap.c | 102 | ||||
-rw-r--r-- | lib/scatterlist.c | 25 | ||||
-rw-r--r-- | lib/string.c | 10 | ||||
-rw-r--r-- | lib/test_kmod.c | 11 | ||||
-rw-r--r-- | lib/test_kprobes.c | 39 | ||||
-rw-r--r-- | lib/ubsan.c | 73 | ||||
-rw-r--r-- | lib/ubsan.h | 32 | ||||
-rw-r--r-- | lib/usercopy.c | 7 |
31 files changed, 1679 insertions, 160 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 958087475edb..2e91421e096e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -389,6 +389,15 @@ config PAHOLE_HAS_BTF_TAG btf_decl_tag) or not. Currently only clang compiler implements these attributes, so make the config depend on CC_IS_CLANG. +config PAHOLE_HAS_LANG_EXCLUDE + def_bool PAHOLE_VERSION >= 124 + help + Support for the --lang_exclude flag which makes pahole exclude + compilation units from the supplied language. Used in Kbuild to + omit Rust CUs which are not supported in version 1.24 of pahole, + otherwise it would emit malformed kernel and module binaries when + using DEBUG_INFO_BTF_MODULES. + config DEBUG_INFO_BTF_MODULES def_bool y depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF @@ -1482,6 +1491,17 @@ config TRACE_IRQFLAGS_NMI depends on TRACE_IRQFLAGS depends on TRACE_IRQFLAGS_NMI_SUPPORT +config NMI_CHECK_CPU + bool "Debugging for CPUs failing to respond to backtrace requests" + depends on DEBUG_KERNEL + depends on X86 + default n + help + Enables debug prints when a CPU fails to respond to a given + backtrace NMI. These prints provide some reasons why a CPU + might legitimately be failing to respond, for example, if it + is offline of if ignore_nmis is set. + config DEBUG_IRQFLAGS bool "Debug IRQ flag manipulation" help @@ -1847,7 +1867,7 @@ config FUNCTION_ERROR_INJECTION help Add fault injections into various functions that are annotated with ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return - value of theses functions. This is useful to test error paths of code. + value of these functions. This is useful to test error paths of code. If unsure, say N @@ -2426,6 +2446,19 @@ config LIST_KUNIT_TEST If unsure, say N. +config HASHTABLE_KUNIT_TEST + tristate "KUnit Test for Kernel Hashtable structures" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds the hashtable KUnit test suite. + It tests the basic functionality of the API defined in + include/linux/hashtable.h. For more information on KUnit and + unit tests in general please refer to the KUnit documentation + in Documentation/dev-tools/kunit/. + + If unsure, say N. + config LINEAR_RANGES_TEST tristate "KUnit test for linear_ranges" depends on KUNIT @@ -2496,6 +2529,15 @@ config MEMCPY_KUNIT_TEST If unsure, say N. +config MEMCPY_SLOW_KUNIT_TEST + bool "Include exhaustive memcpy tests" + depends on MEMCPY_KUNIT_TEST + default y + help + Some memcpy tests are quite exhaustive in checking for overlaps + and bit ranges. These can be very slow, so they are split out + as a separate config, in case they need to be disabled. + config IS_SIGNED_TYPE_KUNIT_TEST tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS depends on KUNIT @@ -2802,6 +2844,4 @@ config RUST_BUILD_ASSERT_ALLOW endmenu # "Rust" -source "Documentation/Kconfig" - endmenu # Kernel hacking diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 375575a5a0e3..4dedd61e5192 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -194,7 +194,7 @@ config KCSAN_WEAK_MEMORY Enable support for modeling a subset of weak memory, which allows detecting a subset of data races due to missing memory barriers. - Depends on KCSAN_STRICT, because the options strenghtening certain + Depends on KCSAN_STRICT, because the options strengthening certain plain accesses by default (depending on !KCSAN_STRICT) reduce the ability to detect any data races invoving reordered accesses, in particular reordered writes. diff --git a/lib/Makefile b/lib/Makefile index 4d9461bfea42..a269af847e2e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -126,6 +126,14 @@ CFLAGS_test_fpu.o += $(FPU_CFLAGS) obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/ obj-$(CONFIG_KUNIT) += kunit/ +# Include the KUnit hooks unconditionally. They'll compile to nothing if +# CONFIG_KUNIT=n, otherwise will be a small table of static data (static key, +# function pointers) which need to be built-in even when KUnit is a module. +ifeq ($(CONFIG_KUNIT), m) +obj-y += kunit/hooks.o +else +obj-$(CONFIG_KUNIT) += kunit/hooks.o +endif ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -340,9 +348,7 @@ quiet_cmd_build_OID_registry = GEN $@ clean-files += oid_registry_data.c obj-$(CONFIG_UCS2_STRING) += ucs2_string.o -ifneq ($(CONFIG_UBSAN_TRAP),y) obj-$(CONFIG_UBSAN) += ubsan.o -endif UBSAN_SANITIZE_ubsan.o := n KASAN_SANITIZE_ubsan.o := n @@ -353,6 +359,8 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o obj-$(CONFIG_PARMAN) += parman.o +obj-y += group_cpus.o + # GCC library routines obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o obj-$(CONFIG_GENERIC_LIB_ASHRDI3) += ashrdi3.o @@ -369,6 +377,7 @@ obj-$(CONFIG_PLDMFW) += pldmfw/ CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o +obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o obj-$(CONFIG_BITS_TEST) += test_bits.o obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o diff --git a/lib/bug.c b/lib/bug.c index c223a2575b72..e0ff21989990 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -47,6 +47,7 @@ #include <linux/sched.h> #include <linux/rculist.h> #include <linux/ftrace.h> +#include <linux/context_tracking.h> extern struct bug_entry __start___bug_table[], __stop___bug_table[]; @@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long bugaddr) return module_find_bug(bugaddr); } -enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) +static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs) { struct bug_entry *bug; const char *file; @@ -209,6 +210,18 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_BUG; } +enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) +{ + enum bug_trap_type ret; + bool rcu = false; + + rcu = warn_rcu_enter(); + ret = __report_bug(bugaddr, regs); + warn_rcu_exit(rcu); + + return ret; +} + static void clear_once_table(struct bug_entry *start, struct bug_entry *end) { struct bug_entry *bug; diff --git a/lib/cpumask.c b/lib/cpumask.c index c7c392514fd3..e7258836b60b 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -110,15 +110,33 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_local_spread - select the i'th cpu with local numa cpu's first + * cpumask_local_spread - select the i'th cpu based on NUMA distances * @i: index number * @node: local numa_node * - * This function selects an online CPU according to a numa aware policy; - * local cpus are returned first, followed by non-local ones, then it - * wraps around. + * Returns online CPU according to a numa aware policy; local cpus are returned + * first, followed by non-local ones, then it wraps around. * - * It's not very efficient, but useful for setup. + * For those who wants to enumerate all CPUs based on their NUMA distances, + * i.e. call this function in a loop, like: + * + * for (i = 0; i < num_online_cpus(); i++) { + * cpu = cpumask_local_spread(i, node); + * do_something(cpu); + * } + * + * There's a better alternative based on for_each()-like iterators: + * + * for_each_numa_hop_mask(mask, node) { + * for_each_cpu_andnot(cpu, mask, prev) + * do_something(cpu); + * prev = mask; + * } + * + * It's simpler and more verbose than above. Complexity of iterator-based + * enumeration is O(sched_domains_numa_levels * nr_cpu_ids), while + * cpumask_local_spread() when called for each cpu is + * O(sched_domains_numa_levels * nr_cpu_ids * log(nr_cpu_ids)). */ unsigned int cpumask_local_spread(unsigned int i, int node) { @@ -127,24 +145,12 @@ unsigned int cpumask_local_spread(unsigned int i, int node) /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (node == NUMA_NO_NODE) { - cpu = cpumask_nth(i, cpu_online_mask); - if (cpu < nr_cpu_ids) - return cpu; - } else { - /* NUMA first. */ - cpu = cpumask_nth_and(i, cpu_online_mask, cpumask_of_node(node)); - if (cpu < nr_cpu_ids) - return cpu; - - i -= cpumask_weight_and(cpu_online_mask, cpumask_of_node(node)); - - /* Skip NUMA nodes, done above. */ - cpu = cpumask_nth_andnot(i, cpu_online_mask, cpumask_of_node(node)); - if (cpu < nr_cpu_ids) - return cpu; - } - BUG(); + cpu = (node == NUMA_NO_NODE) ? + cpumask_nth(i, cpu_online_mask) : + sched_numa_find_nth_cpu(cpu_online_mask, i, node); + + WARN_ON(cpu >= nr_cpu_ids); + return cpu; } EXPORT_SYMBOL(cpumask_local_spread); diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 7d77dea15587..d0634ed6a937 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -545,7 +545,7 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, }; -bool __init blake2s_selftest(void) +static bool __init noinline_for_stack blake2s_digest_test(void) { u8 key[BLAKE2S_KEY_SIZE]; u8 buf[ARRAY_SIZE(blake2s_testvecs)]; @@ -589,11 +589,20 @@ bool __init blake2s_selftest(void) } } + return success; +} + +static bool __init noinline_for_stack blake2s_random_test(void) +{ + struct blake2s_state state; + bool success = true; + int i, l; + for (i = 0; i < 32; ++i) { enum { TEST_ALIGNMENT = 16 }; - u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1] + u8 blocks[BLAKE2S_BLOCK_SIZE * 2 + TEST_ALIGNMENT - 1] __aligned(TEST_ALIGNMENT); - u8 blocks[BLAKE2S_BLOCK_SIZE * 2]; + u8 *unaligned_block = blocks + BLAKE2S_BLOCK_SIZE; struct blake2s_state state1, state2; get_random_bytes(blocks, sizeof(blocks)); @@ -630,3 +639,13 @@ bool __init blake2s_selftest(void) return success; } + +bool __init blake2s_selftest(void) +{ + bool success; + + success = blake2s_digest_test(); + success &= blake2s_random_test(); + + return success; +} diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 9555b68bb774..1dcca8f2e194 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -49,3 +49,34 @@ int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, return 0; } EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); + +int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + raw_spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + raw_spin_unlock(lock); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_raw_lock); + +int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, + unsigned long *flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + raw_spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(atomic)) + return 1; + raw_spin_unlock_irqrestore(lock, *flags); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_raw_lock_irqsave); diff --git a/lib/errname.c b/lib/errname.c index 05cbf731545f..67739b174a8c 100644 --- a/lib/errname.c +++ b/lib/errname.c @@ -21,6 +21,7 @@ static const char *names_0[] = { E(EADDRNOTAVAIL), E(EADV), E(EAFNOSUPPORT), + E(EAGAIN), /* EWOULDBLOCK */ E(EALREADY), E(EBADE), E(EBADF), @@ -31,15 +32,17 @@ static const char *names_0[] = { E(EBADSLT), E(EBFONT), E(EBUSY), -#ifdef ECANCELLED - E(ECANCELLED), -#endif + E(ECANCELED), /* ECANCELLED */ E(ECHILD), E(ECHRNG), E(ECOMM), E(ECONNABORTED), + E(ECONNREFUSED), /* EREFUSED */ E(ECONNRESET), + E(EDEADLK), /* EDEADLOCK */ +#if EDEADLK != EDEADLOCK /* mips, sparc, powerpc */ E(EDEADLOCK), +#endif E(EDESTADDRREQ), E(EDOM), E(EDOTDOT), @@ -166,14 +169,17 @@ static const char *names_0[] = { E(EUSERS), E(EXDEV), E(EXFULL), - - E(ECANCELED), /* ECANCELLED */ - E(EAGAIN), /* EWOULDBLOCK */ - E(ECONNREFUSED), /* EREFUSED */ - E(EDEADLK), /* EDEADLOCK */ }; #undef E +#ifdef EREFUSED /* parisc */ +static_assert(EREFUSED == ECONNREFUSED); +#endif +#ifdef ECANCELLED /* parisc */ +static_assert(ECANCELLED == ECANCELED); +#endif +static_assert(EAGAIN == EWOULDBLOCK); /* everywhere */ + #define E(err) [err - 512 + BUILD_BUG_ON_ZERO(err < 512 || err > 550)] = "-" #err static const char *names_512[] = { E(ERESTARTSYS), diff --git a/lib/find_bit.c b/lib/find_bit.c index 18bc0a7ac8ee..c10920e66788 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -155,6 +155,15 @@ unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned l } EXPORT_SYMBOL(__find_nth_andnot_bit); +unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + const unsigned long *addr3, + unsigned long size, unsigned long n) +{ + return FIND_NTH_BIT(addr1[idx] & addr2[idx] & ~addr3[idx], size, n); +} +EXPORT_SYMBOL(__find_nth_and_andnot_bit); + #ifndef find_next_and_bit unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start) diff --git a/lib/group_cpus.c b/lib/group_cpus.c new file mode 100644 index 000000000000..9c837a35fef7 --- /dev/null +++ b/lib/group_cpus.c @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2016 Thomas Gleixner. + * Copyright (C) 2016-2017 Christoph Hellwig. + */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/sort.h> +#include <linux/group_cpus.h> + +#ifdef CONFIG_SMP + +static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, + unsigned int cpus_per_grp) +{ + const struct cpumask *siblmsk; + int cpu, sibl; + + for ( ; cpus_per_grp > 0; ) { + cpu = cpumask_first(nmsk); + + /* Should not happen, but I'm too lazy to think about it */ + if (cpu >= nr_cpu_ids) + return; + + cpumask_clear_cpu(cpu, nmsk); + cpumask_set_cpu(cpu, irqmsk); + cpus_per_grp--; + + /* If the cpu has siblings, use them first */ + siblmsk = topology_sibling_cpumask(cpu); + for (sibl = -1; cpus_per_grp > 0; ) { + sibl = cpumask_next(sibl, siblmsk); + if (sibl >= nr_cpu_ids) + break; + if (!cpumask_test_and_clear_cpu(sibl, nmsk)) + continue; + cpumask_set_cpu(sibl, irqmsk); + cpus_per_grp--; + } + } +} + +static cpumask_var_t *alloc_node_to_cpumask(void) +{ + cpumask_var_t *masks; + int node; + + masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); + if (!masks) + return NULL; + + for (node = 0; node < nr_node_ids; node++) { + if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) + goto out_unwind; + } + + return masks; + +out_unwind: + while (--node >= 0) + free_cpumask_var(masks[node]); + kfree(masks); + return NULL; +} + +static void free_node_to_cpumask(cpumask_var_t *masks) +{ + int node; + + for (node = 0; node < nr_node_ids; node++) + free_cpumask_var(masks[node]); + kfree(masks); +} + +static void build_node_to_cpumask(cpumask_var_t *masks) +{ + int cpu; + + for_each_possible_cpu(cpu) + cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); +} + +static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, + const struct cpumask *mask, nodemask_t *nodemsk) +{ + int n, nodes = 0; + + /* Calculate the number of nodes in the supplied affinity mask */ + for_each_node(n) { + if (cpumask_intersects(mask, node_to_cpumask[n])) { + node_set(n, *nodemsk); + nodes++; + } + } + return nodes; +} + +struct node_groups { + unsigned id; + + union { + unsigned ngroups; + unsigned ncpus; + }; +}; + +static int ncpus_cmp_func(const void *l, const void *r) +{ + const struct node_groups *ln = l; + const struct node_groups *rn = r; + + return ln->ncpus - rn->ncpus; +} + +/* + * Allocate group number for each node, so that for each node: + * + * 1) the allocated number is >= 1 + * + * 2) the allocated number is <= active CPU number of this node + * + * The actual allocated total groups may be less than @numgrps when + * active total CPU number is less than @numgrps. + * + * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' + * for each node. + */ +static void alloc_nodes_groups(unsigned int numgrps, + cpumask_var_t *node_to_cpumask, + const struct cpumask *cpu_mask, + const nodemask_t nodemsk, + struct cpumask *nmsk, + struct node_groups *node_groups) +{ + unsigned n, remaining_ncpus = 0; + + for (n = 0; n < nr_node_ids; n++) { + node_groups[n].id = n; + node_groups[n].ncpus = UINT_MAX; + } + + for_each_node_mask(n, nodemsk) { + unsigned ncpus; + + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + ncpus = cpumask_weight(nmsk); + + if (!ncpus) + continue; + remaining_ncpus += ncpus; + node_groups[n].ncpus = ncpus; + } + + numgrps = min_t(unsigned, remaining_ncpus, numgrps); + + sort(node_groups, nr_node_ids, sizeof(node_groups[0]), + ncpus_cmp_func, NULL); + + /* + * Allocate groups for each node according to the ratio of this + * node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is + * bigger than number of active numa nodes. Always start the + * allocation from the node with minimized nr_cpus. + * + * This way guarantees that each active node gets allocated at + * least one group, and the theory is simple: over-allocation + * is only done when this node is assigned by one group, so + * other nodes will be allocated >= 1 groups, since 'numgrps' is + * bigger than number of numa nodes. + * + * One perfect invariant is that number of allocated groups for + * each node is <= CPU count of this node: + * + * 1) suppose there are two nodes: A and B + * ncpu(X) is CPU count of node X + * grps(X) is the group count allocated to node X via this + * algorithm + * + * ncpu(A) <= ncpu(B) + * ncpu(A) + ncpu(B) = N + * grps(A) + grps(B) = G + * + * grps(A) = max(1, round_down(G * ncpu(A) / N)) + * grps(B) = G - grps(A) + * + * both N and G are integer, and 2 <= G <= N, suppose + * G = N - delta, and 0 <= delta <= N - 2 + * + * 2) obviously grps(A) <= ncpu(A) because: + * + * if grps(A) is 1, then grps(A) <= ncpu(A) given + * ncpu(A) >= 1 + * + * otherwise, + * grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N + * + * 3) prove how grps(B) <= ncpu(B): + * + * if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be + * over-allocated, so grps(B) <= ncpu(B), + * + * otherwise: + * + * grps(A) = + * round_down(G * ncpu(A) / N) = + * round_down((N - delta) * ncpu(A) / N) = + * round_down((N * ncpu(A) - delta * ncpu(A)) / N) >= + * round_down((N * ncpu(A) - delta * N) / N) = + * cpu(A) - delta + * + * then: + * + * grps(A) - G >= ncpu(A) - delta - G + * => + * G - grps(A) <= G + delta - ncpu(A) + * => + * grps(B) <= N - ncpu(A) + * => + * grps(B) <= cpu(B) + * + * For nodes >= 3, it can be thought as one node and another big + * node given that is exactly what this algorithm is implemented, + * and we always re-calculate 'remaining_ncpus' & 'numgrps', and + * finally for each node X: grps(X) <= ncpu(X). + * + */ + for (n = 0; n < nr_node_ids; n++) { + unsigned ngroups, ncpus; + + if (node_groups[n].ncpus == UINT_MAX) + continue; + + WARN_ON_ONCE(numgrps == 0); + + ncpus = node_groups[n].ncpus; + ngroups = max_t(unsigned, 1, + numgrps * ncpus / remaining_ncpus); + WARN_ON_ONCE(ngroups > ncpus); + + node_groups[n].ngroups = ngroups; + + remaining_ncpus -= ncpus; + numgrps -= ngroups; + } +} + +static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, + cpumask_var_t *node_to_cpumask, + const struct cpumask *cpu_mask, + struct cpumask *nmsk, struct cpumask *masks) +{ + unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0; + unsigned int last_grp = numgrps; + unsigned int curgrp = startgrp; + nodemask_t nodemsk = NODE_MASK_NONE; + struct node_groups *node_groups; + + if (cpumask_empty(cpu_mask)) + return 0; + + nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); + + /* + * If the number of nodes in the mask is greater than or equal the + * number of groups we just spread the groups across the nodes. + */ + if (numgrps <= nodes) { + for_each_node_mask(n, nodemsk) { + /* Ensure that only CPUs which are in both masks are set */ + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + cpumask_or(&masks[curgrp], &masks[curgrp], nmsk); + if (++curgrp == last_grp) + curgrp = 0; + } + return numgrps; + } + + node_groups = kcalloc(nr_node_ids, + sizeof(struct node_groups), + GFP_KERNEL); + if (!node_groups) + return -ENOMEM; + + /* allocate group number for each node */ + alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask, + nodemsk, nmsk, node_groups); + for (i = 0; i < nr_node_ids; i++) { + unsigned int ncpus, v; + struct node_groups *nv = &node_groups[i]; + + if (nv->ngroups == UINT_MAX) + continue; + + /* Get the cpus on this node which are in the mask */ + cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]); + ncpus = cpumask_weight(nmsk); + if (!ncpus) + continue; + + WARN_ON_ONCE(nv->ngroups > ncpus); + + /* Account for rounding errors */ + extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups); + + /* Spread allocated groups on CPUs of the current node */ + for (v = 0; v < nv->ngroups; v++, curgrp++) { + cpus_per_grp = ncpus / nv->ngroups; + + /* Account for extra groups to compensate rounding errors */ + if (extra_grps) { + cpus_per_grp++; + --extra_grps; + } + + /* + * wrapping has to be considered given 'startgrp' + * may start anywhere + */ + if (curgrp >= last_grp) + curgrp = 0; + grp_spread_init_one(&masks[curgrp], nmsk, + cpus_per_grp); + } + done += nv->ngroups; + } + kfree(node_groups); + return done; +} + +/** + * group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality + * @numgrps: number of groups + * + * Return: cpumask array if successful, NULL otherwise. And each element + * includes CPUs assigned to this group + * + * Try to put close CPUs from viewpoint of CPU and NUMA locality into + * same group, and run two-stage grouping: + * 1) allocate present CPUs on these groups evenly first + * 2) allocate other possible CPUs on these groups evenly + * + * We guarantee in the resulted grouping that all CPUs are covered, and + * no same CPU is assigned to multiple groups + */ +struct cpumask *group_cpus_evenly(unsigned int numgrps) +{ + unsigned int curgrp = 0, nr_present = 0, nr_others = 0; + cpumask_var_t *node_to_cpumask; + cpumask_var_t nmsk, npresmsk; + int ret = -ENOMEM; + struct cpumask *masks = NULL; + + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) + return NULL; + + if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) + goto fail_nmsk; + + node_to_cpumask = alloc_node_to_cpumask(); + if (!node_to_cpumask) + goto fail_npresmsk; + + masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + if (!masks) + goto fail_node_to_cpumask; + + /* Stabilize the cpumasks */ + cpus_read_lock(); + build_node_to_cpumask(node_to_cpumask); + + /* grouping present CPUs first */ + ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, + cpu_present_mask, nmsk, masks); + if (ret < 0) + goto fail_build_affinity; + nr_present = ret; + + /* + * Allocate non present CPUs starting from the next group to be + * handled. If the grouping of present CPUs already exhausted the + * group space, assign the non present CPUs to the already + * allocated out groups. + */ + if (nr_present >= numgrps) + curgrp = 0; + else + curgrp = nr_present; + cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); + ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, + npresmsk, nmsk, masks); + if (ret >= 0) + nr_others = ret; + + fail_build_affinity: + cpus_read_unlock(); + + if (ret >= 0) + WARN_ON(nr_present + nr_others < numgrps); + + fail_node_to_cpumask: + free_node_to_cpumask(node_to_cpumask); + + fail_npresmsk: + free_cpumask_var(npresmsk); + + fail_nmsk: + free_cpumask_var(nmsk); + if (ret < 0) { + kfree(masks); + return NULL; + } + return masks; +} +#else /* CONFIG_SMP */ +struct cpumask *group_cpus_evenly(unsigned int numgrps) +{ + struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + + if (!masks) + return NULL; + + /* assign all CPUs(cpu 0) to the 1st group only */ + cpumask_copy(&masks[0], cpu_possible_mask); + return masks; +} +#endif /* CONFIG_SMP */ diff --git a/lib/hashtable_test.c b/lib/hashtable_test.c new file mode 100644 index 000000000000..1d1b3288dee2 --- /dev/null +++ b/lib/hashtable_test.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnit test for the Kernel Hashtable structures. + * + * Copyright (C) 2022, Google LLC. + * Author: Rae Moar <rmoar@google.com> + */ +#include <kunit/test.h> + +#include <linux/hashtable.h> + +struct hashtable_test_entry { + int key; + int data; + struct hlist_node node; + int visited; +}; + +static void hashtable_test_hash_init(struct kunit *test) +{ + /* Test the different ways of initialising a hashtable. */ + DEFINE_HASHTABLE(hash1, 2); + DECLARE_HASHTABLE(hash2, 3); + + /* When using DECLARE_HASHTABLE, must use hash_init to + * initialize the hashtable. + */ + hash_init(hash2); + + KUNIT_EXPECT_TRUE(test, hash_empty(hash1)); + KUNIT_EXPECT_TRUE(test, hash_empty(hash2)); +} + +static void hashtable_test_hash_empty(struct kunit *test) +{ + struct hashtable_test_entry a; + DEFINE_HASHTABLE(hash, 1); + + KUNIT_EXPECT_TRUE(test, hash_empty(hash)); + + a.key = 1; + a.data = 13; + hash_add(hash, &a.node, a.key); + + /* Hashtable should no longer be empty. */ + KUNIT_EXPECT_FALSE(test, hash_empty(hash)); +} + +static void hashtable_test_hash_hashed(struct kunit *test) +{ + struct hashtable_test_entry a, b; + DEFINE_HASHTABLE(hash, 4); + + a.key = 1; + a.data = 13; + hash_add(hash, &a.node, a.key); + b.key = 1; + b.data = 2; + hash_add(hash, &b.node, b.key); + + KUNIT_EXPECT_TRUE(test, hash_hashed(&a.node)); + KUNIT_EXPECT_TRUE(test, hash_hashed(&b.node)); +} + +static void hashtable_test_hash_add(struct kunit *test) +{ + struct hashtable_test_entry a, b, *x; + int bkt; + DEFINE_HASHTABLE(hash, 3); + + a.key = 1; + a.data = 13; + a.visited = 0; + hash_add(hash, &a.node, a.key); + b.key = 2; + b.data = 10; + b.visited = 0; + hash_add(hash, &b.node, b.key); + + hash_for_each(hash, bkt, x, node) { + x->visited++; + if (x->key == a.key) + KUNIT_EXPECT_EQ(test, x->data, 13); + else if (x->key == b.key) + KUNIT_EXPECT_EQ(test, x->data, 10); + else + KUNIT_FAIL(test, "Unexpected key in hashtable."); + } + + /* Both entries should have been visited exactly once. */ + KUNIT_EXPECT_EQ(test, a.visited, 1); + KUNIT_EXPECT_EQ(test, b.visited, 1); +} + +static void hashtable_test_hash_del(struct kunit *test) +{ + struct hashtable_test_entry a, b, *x; + DEFINE_HASHTABLE(hash, 6); + + a.key = 1; + a.data = 13; + hash_add(hash, &a.node, a.key); + b.key = 2; + b.data = 10; + b.visited = 0; + hash_add(hash, &b.node, b.key); + + hash_del(&b.node); + hash_for_each_possible(hash, x, node, b.key) { + x->visited++; + KUNIT_EXPECT_NE(test, x->key, b.key); + } + + /* The deleted entry should not have been visited. */ + KUNIT_EXPECT_EQ(test, b.visited, 0); + + hash_del(&a.node); + + /* The hashtable should be empty. */ + KUNIT_EXPECT_TRUE(test, hash_empty(hash)); +} + +static void hashtable_test_hash_for_each(struct kunit *test) +{ + struct hashtable_test_entry entries[3]; + struct hashtable_test_entry *x; + int bkt, i, j, count; + DEFINE_HASHTABLE(hash, 3); + + /* Add three entries to the hashtable. */ + for (i = 0; i < 3; i++) { + entries[i].key = i; + entries[i].data = i + 10; + entries[i].visited = 0; + hash_add(hash, &entries[i].node, entries[i].key); + } + + count = 0; + hash_for_each(hash, bkt, x, node) { + x->visited += 1; + KUNIT_ASSERT_GE_MSG(test, x->key, 0, "Unexpected key in hashtable."); + KUNIT_ASSERT_LT_MSG(test, x->key, 3, "Unexpected key in hashtable."); + count++; + } + + /* Should have visited each entry exactly once. */ + KUNIT_EXPECT_EQ(test, count, 3); + for (j = 0; j < 3; j++) + KUNIT_EXPECT_EQ(test, entries[j].visited, 1); +} + +static void hashtable_test_hash_for_each_safe(struct kunit *test) +{ + struct hashtable_test_entry entries[3]; + struct hashtable_test_entry *x; + struct hlist_node *tmp; + int bkt, i, j, count; + DEFINE_HASHTABLE(hash, 3); + + /* Add three entries to the hashtable. */ + for (i = 0; i < 3; i++) { + entries[i].key = i; + entries[i].data = i + 10; + entries[i].visited = 0; + hash_add(hash, &entries[i].node, entries[i].key); + } + + count = 0; + hash_for_each_safe(hash, bkt, tmp, x, node) { + x->visited += 1; + KUNIT_ASSERT_GE_MSG(test, x->key, 0, "Unexpected key in hashtable."); + KUNIT_ASSERT_LT_MSG(test, x->key, 3, "Unexpected key in hashtable."); + count++; + + /* Delete entry during loop. */ + hash_del(&x->node); + } + + /* Should have visited each entry exactly once. */ + KUNIT_EXPECT_EQ(test, count, 3); + for (j = 0; j < 3; j++) + KUNIT_EXPECT_EQ(test, entries[j].visited, 1); +} + +static void hashtable_test_hash_for_each_possible(struct kunit *test) +{ + struct hashtable_test_entry entries[4]; + struct hashtable_test_entry *x, *y; + int buckets[2]; + int bkt, i, j, count; + DEFINE_HASHTABLE(hash, 5); + + /* Add three entries with key = 0 to the hashtable. */ + for (i = 0; i < 3; i++) { + entries[i].key = 0; + entries[i].data = i; + entries[i].visited = 0; + hash_add(hash, &entries[i].node, entries[i].key); + } + + /* Add an entry with key = 1. */ + entries[3].key = 1; + entries[3].data = 3; + entries[3].visited = 0; + hash_add(hash, &entries[3].node, entries[3].key); + + count = 0; + hash_for_each_possible(hash, x, node, 0) { + x->visited += 1; + KUNIT_ASSERT_GE_MSG(test, x->data, 0, "Unexpected data in hashtable."); + KUNIT_ASSERT_LT_MSG(test, x->data, 4, "Unexpected data in hashtable."); + count++; + } + + /* Should have visited each entry with key = 0 exactly once. */ + for (j = 0; j < 3; j++) + KUNIT_EXPECT_EQ(test, entries[j].visited, 1); + + /* Save the buckets for the different keys. */ + hash_for_each(hash, bkt, y, node) { + KUNIT_ASSERT_GE_MSG(test, y->key, 0, "Unexpected key in hashtable."); + KUNIT_ASSERT_LE_MSG(test, y->key, 1, "Unexpected key in hashtable."); + buckets[y->key] = bkt; + } + + /* If entry with key = 1 is in the same bucket as the entries with + * key = 0, check it was visited. Otherwise ensure that only three + * entries were visited. + */ + if (buckets[0] == buckets[1]) { + KUNIT_EXPECT_EQ(test, count, 4); + KUNIT_EXPECT_EQ(test, entries[3].visited, 1); + } else { + KUNIT_EXPECT_EQ(test, count, 3); + KUNIT_EXPECT_EQ(test, entries[3].visited, 0); + } +} + +static void hashtable_test_hash_for_each_possible_safe(struct kunit *test) +{ + struct hashtable_test_entry entries[4]; + struct hashtable_test_entry *x, *y; + struct hlist_node *tmp; + int buckets[2]; + int bkt, i, j, count; + DEFINE_HASHTABLE(hash, 5); + + /* Add three entries with key = 0 to the hashtable. */ + for (i = 0; i < 3; i++) { + entries[i].key = 0; + entries[i].data = i; + entries[i].visited = 0; + hash_add(hash, &entries[i].node, entries[i].key); + } + + /* Add an entry with key = 1. */ + entries[3].key = 1; + entries[3].data = 3; + entries[3].visited = 0; + hash_add(hash, &entries[3].node, entries[3].key); + + count = 0; + hash_for_each_possible_safe(hash, x, tmp, node, 0) { + x->visited += 1; + KUNIT_ASSERT_GE_MSG(test, x->data, 0, "Unexpected data in hashtable."); + KUNIT_ASSERT_LT_MSG(test, x->data, 4, "Unexpected data in hashtable."); + count++; + + /* Delete entry during loop. */ + hash_del(&x->node); + } + + /* Should have visited each entry with key = 0 exactly once. */ + for (j = 0; j < 3; j++) + KUNIT_EXPECT_EQ(test, entries[j].visited, 1); + + /* Save the buckets for the different keys. */ + hash_for_each(hash, bkt, y, node) { + KUNIT_ASSERT_GE_MSG(test, y->key, 0, "Unexpected key in hashtable."); + KUNIT_ASSERT_LE_MSG(test, y->key, 1, "Unexpected key in hashtable."); + buckets[y->key] = bkt; + } + + /* If entry with key = 1 is in the same bucket as the entries with + * key = 0, check it was visited. Otherwise ensure that only three + * entries were visited. + */ + if (buckets[0] == buckets[1]) { + KUNIT_EXPECT_EQ(test, count, 4); + KUNIT_EXPECT_EQ(test, entries[3].visited, 1); + } else { + KUNIT_EXPECT_EQ(test, count, 3); + KUNIT_EXPECT_EQ(test, entries[3].visited, 0); + } +} + +static struct kunit_case hashtable_test_cases[] = { + KUNIT_CASE(hashtable_test_hash_init), + KUNIT_CASE(hashtable_test_hash_empty), + KUNIT_CASE(hashtable_test_hash_hashed), + KUNIT_CASE(hashtable_test_hash_add), + KUNIT_CASE(hashtable_test_hash_del), + KUNIT_CASE(hashtable_test_hash_for_each), + KUNIT_CASE(hashtable_test_hash_for_each_safe), + KUNIT_CASE(hashtable_test_hash_for_each_possible), + KUNIT_CASE(hashtable_test_hash_for_each_possible_safe), + {}, +}; + +static struct kunit_suite hashtable_test_module = { + .name = "hashtable", + .test_cases = hashtable_test_cases, +}; + +kunit_test_suites(&hashtable_test_module); + +MODULE_LICENSE("GPL"); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f9a3ff37ecd1..274014e4eafe 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -186,12 +186,6 @@ static int copyin(void *to, const void __user *from, size_t n) return res; } -static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, - unsigned int slot) -{ - return &pipe->bufs[slot & (pipe->ring_size - 1)]; -} - #ifdef PIPE_PARANOIA static bool sanity(const struct iov_iter *i) { @@ -1432,9 +1426,9 @@ static struct page *first_bvec_segment(const struct iov_iter *i, static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, size_t *start, - unsigned int gup_flags) + iov_iter_extraction_t extraction_flags) { - unsigned int n; + unsigned int n, gup_flags = 0; if (maxsize > i->count) maxsize = i->count; @@ -1442,6 +1436,8 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return 0; if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; + if (extraction_flags & ITER_ALLOW_P2PDMA) + gup_flags |= FOLL_PCI_P2PDMA; if (likely(user_backed_iter(i))) { unsigned long addr; @@ -1495,14 +1491,14 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, - size_t *start, unsigned gup_flags) + size_t *start, iov_iter_extraction_t extraction_flags) { if (!maxpages) return 0; BUG_ON(!pages); return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, - start, gup_flags); + start, extraction_flags); } EXPORT_SYMBOL_GPL(iov_iter_get_pages); @@ -1515,14 +1511,14 @@ EXPORT_SYMBOL(iov_iter_get_pages2); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, - size_t *start, unsigned gup_flags) + size_t *start, iov_iter_extraction_t extraction_flags) { ssize_t len; *pages = NULL; len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start, - gup_flags); + extraction_flags); if (len <= 0) { kvfree(*pages); *pages = NULL; @@ -1877,6 +1873,17 @@ int import_single_range(int rw, void __user *buf, size_t len, } EXPORT_SYMBOL(import_single_range); +int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) +{ + if (len > MAX_RW_COUNT) + len = MAX_RW_COUNT; + if (unlikely(!access_ok(buf, len))) + return -EFAULT; + + iov_iter_ubuf(i, rw, buf, len); + return 0; +} + /** * iov_iter_restore() - Restore a &struct iov_iter to the same state as when * iov_iter_save_state() was called. @@ -1891,8 +1898,8 @@ EXPORT_SYMBOL(import_single_range); */ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) { - if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && - !iov_iter_is_kvec(i) && !iter_is_ubuf(i)) + if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) && + !iter_is_ubuf(i)) && !iov_iter_is_kvec(i)) return; i->iov_offset = state->iov_offset; i->count = state->count; @@ -1914,3 +1921,267 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) i->iov -= state->nr_segs - i->nr_segs; i->nr_segs = state->nr_segs; } + +/* + * Extract a list of contiguous pages from an ITER_XARRAY iterator. This does not + * get references on the pages, nor does it get a pin on them. + */ +static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + struct page *page, **p; + unsigned int nr = 0, offset; + loff_t pos = i->xarray_start + i->iov_offset; + pgoff_t index = pos >> PAGE_SHIFT; + XA_STATE(xas, i->xarray, index); + + offset = pos & ~PAGE_MASK; + *offset0 = offset; + + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + p = *pages; + + rcu_read_lock(); + for (page = xas_load(&xas); page; page = xas_next(&xas)) { + if (xas_retry(&xas, page)) + continue; + + /* Has the page moved or been split? */ + if (unlikely(page != xas_reload(&xas))) { + xas_reset(&xas); + continue; + } + + p[nr++] = find_subpage(page, xas.xa_index); + if (nr == maxpages) + break; + } + rcu_read_unlock(); + + maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); + iov_iter_advance(i, maxsize); + return maxsize; +} + +/* + * Extract a list of contiguous pages from an ITER_BVEC iterator. This does + * not get references on the pages, nor does it get a pin on them. + */ +static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + struct page **p, *page; + size_t skip = i->iov_offset, offset; + int k; + + for (;;) { + if (i->nr_segs == 0) + return 0; + maxsize = min(maxsize, i->bvec->bv_len - skip); + if (maxsize) + break; + i->iov_offset = 0; + i->nr_segs--; + i->bvec++; + skip = 0; + } + + skip += i->bvec->bv_offset; + page = i->bvec->bv_page + skip / PAGE_SIZE; + offset = skip % PAGE_SIZE; + *offset0 = offset; + + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + p = *pages; + for (k = 0; k < maxpages; k++) + p[k] = page + k; + + maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset); + iov_iter_advance(i, maxsize); + return maxsize; +} + +/* + * Extract a list of virtually contiguous pages from an ITER_KVEC iterator. + * This does not get references on the pages, nor does it get a pin on them. + */ +static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + struct page **p, *page; + const void *kaddr; + size_t skip = i->iov_offset, offset, len; + int k; + + for (;;) { + if (i->nr_segs == 0) + return 0; + maxsize = min(maxsize, i->kvec->iov_len - skip); + if (maxsize) + break; + i->iov_offset = 0; + i->nr_segs--; + i->kvec++; + skip = 0; + } + + kaddr = i->kvec->iov_base + skip; + offset = (unsigned long)kaddr & ~PAGE_MASK; + *offset0 = offset; + + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + p = *pages; + + kaddr -= offset; + len = offset + maxsize; + for (k = 0; k < maxpages; k++) { + size_t seg = min_t(size_t, len, PAGE_SIZE); + + if (is_vmalloc_or_module_addr(kaddr)) + page = vmalloc_to_page(kaddr); + else + page = virt_to_page(kaddr); + + p[k] = page; + len -= seg; + kaddr += PAGE_SIZE; + } + + maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset); + iov_iter_advance(i, maxsize); + return maxsize; +} + +/* + * Extract a list of contiguous pages from a user iterator and get a pin on + * each of them. This should only be used if the iterator is user-backed + * (IOBUF/UBUF). + * + * It does not get refs on the pages, but the pages must be unpinned by the + * caller once the transfer is complete. + * + * This is safe to be used where background IO/DMA *is* going to be modifying + * the buffer; using a pin rather than a ref makes forces fork() to give the + * child a copy of the page. + */ +static ssize_t iov_iter_extract_user_pages(struct iov_iter *i, + struct page ***pages, + size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + unsigned long addr; + unsigned int gup_flags = 0; + size_t offset; + int res; + + if (i->data_source == ITER_DEST) + gup_flags |= FOLL_WRITE; + if (extraction_flags & ITER_ALLOW_P2PDMA) + gup_flags |= FOLL_PCI_P2PDMA; + if (i->nofault) + gup_flags |= FOLL_NOFAULT; + + addr = first_iovec_segment(i, &maxsize); + *offset0 = offset = addr % PAGE_SIZE; + addr &= PAGE_MASK; + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + res = pin_user_pages_fast(addr, maxpages, gup_flags, *pages); + if (unlikely(res <= 0)) + return res; + maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - offset); + iov_iter_advance(i, maxsize); + return maxsize; +} + +/** + * iov_iter_extract_pages - Extract a list of contiguous pages from an iterator + * @i: The iterator to extract from + * @pages: Where to return the list of pages + * @maxsize: The maximum amount of iterator to extract + * @maxpages: The maximum size of the list of pages + * @extraction_flags: Flags to qualify request + * @offset0: Where to return the starting offset into (*@pages)[0] + * + * Extract a list of contiguous pages from the current point of the iterator, + * advancing the iterator. The maximum number of pages and the maximum amount + * of page contents can be set. + * + * If *@pages is NULL, a page list will be allocated to the required size and + * *@pages will be set to its base. If *@pages is not NULL, it will be assumed + * that the caller allocated a page list at least @maxpages in size and this + * will be filled in. + * + * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA + * be allowed on the pages extracted. + * + * The iov_iter_extract_will_pin() function can be used to query how cleanup + * should be performed. + * + * Extra refs or pins on the pages may be obtained as follows: + * + * (*) If the iterator is user-backed (ITER_IOVEC/ITER_UBUF), pins will be + * added to the pages, but refs will not be taken. + * iov_iter_extract_will_pin() will return true. + * + * (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are + * merely listed; no extra refs or pins are obtained. + * iov_iter_extract_will_pin() will return 0. + * + * Note also: + * + * (*) Use with ITER_DISCARD is not supported as that has no content. + * + * On success, the function sets *@pages to the new pagelist, if allocated, and + * sets *offset0 to the offset into the first page. + * + * It may also return -ENOMEM and -EFAULT. + */ +ssize_t iov_iter_extract_pages(struct iov_iter *i, + struct page ***pages, + size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + maxsize = min_t(size_t, min_t(size_t, maxsize, i->count), MAX_RW_COUNT); + if (!maxsize) + return 0; + + if (likely(user_backed_iter(i))) + return iov_iter_extract_user_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); + if (iov_iter_is_kvec(i)) + return iov_iter_extract_kvec_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); + if (iov_iter_is_bvec(i)) + return iov_iter_extract_bvec_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); + if (iov_iter_is_xarray(i)) + return iov_iter_extract_xarray_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); + return -EFAULT; +} +EXPORT_SYMBOL_GPL(iov_iter_extract_pages); diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile index 29aff6562b42..da665cd4ea12 100644 --- a/lib/kunit/Makefile +++ b/lib/kunit/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_KUNIT) += kunit.o kunit-objs += test.o \ resource.o \ + static_stub.o \ string-stream.o \ assert.o \ try-catch.o \ @@ -11,6 +12,9 @@ ifeq ($(CONFIG_KUNIT_DEBUGFS),y) kunit-objs += debugfs.o endif +# KUnit 'hooks' are built-in even when KUnit is built as a module. +lib-y += hooks.o + obj-$(CONFIG_KUNIT_TEST) += kunit-test.o # string-stream-test compiles built-in only. diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c index f5b50babe38d..05a09652f5a1 100644 --- a/lib/kunit/assert.c +++ b/lib/kunit/assert.c @@ -241,24 +241,34 @@ void kunit_mem_assert_format(const struct kunit_assert *assert, mem_assert = container_of(assert, struct kunit_mem_assert, assert); - string_stream_add(stream, - KUNIT_SUBTEST_INDENT "Expected %s %s %s, but\n", - mem_assert->text->left_text, - mem_assert->text->operation, - mem_assert->text->right_text); + if (!mem_assert->left_value) { + string_stream_add(stream, + KUNIT_SUBTEST_INDENT "Expected %s is not null, but is\n", + mem_assert->text->left_text); + } else if (!mem_assert->right_value) { + string_stream_add(stream, + KUNIT_SUBTEST_INDENT "Expected %s is not null, but is\n", + mem_assert->text->right_text); + } else { + string_stream_add(stream, + KUNIT_SUBTEST_INDENT "Expected %s %s %s, but\n", + mem_assert->text->left_text, + mem_assert->text->operation, + mem_assert->text->right_text); - string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n", - mem_assert->text->left_text); - kunit_assert_hexdump(stream, mem_assert->left_value, - mem_assert->right_value, mem_assert->size); + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n", + mem_assert->text->left_text); + kunit_assert_hexdump(stream, mem_assert->left_value, + mem_assert->right_value, mem_assert->size); - string_stream_add(stream, "\n"); + string_stream_add(stream, "\n"); - string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n", - mem_assert->text->right_text); - kunit_assert_hexdump(stream, mem_assert->right_value, - mem_assert->left_value, mem_assert->size); + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n", + mem_assert->text->right_text); + kunit_assert_hexdump(stream, mem_assert->right_value, + mem_assert->left_value, mem_assert->size); - kunit_assert_print_msg(message, stream); + kunit_assert_print_msg(message, stream); + } } EXPORT_SYMBOL_GPL(kunit_mem_assert_format); diff --git a/lib/kunit/hooks-impl.h b/lib/kunit/hooks-impl.h new file mode 100644 index 000000000000..4e71b2d0143b --- /dev/null +++ b/lib/kunit/hooks-impl.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Declarations for hook implementations. + * + * These will be set as the function pointers in struct kunit_hook_table, + * found in include/kunit/test-bug.h. + * + * Copyright (C) 2023, Google LLC. + * Author: David Gow <davidgow@google.com> + */ + +#ifndef _KUNIT_HOOKS_IMPL_H +#define _KUNIT_HOOKS_IMPL_H + +#include <kunit/test-bug.h> + +/* List of declarations. */ +void __printf(3, 4) __kunit_fail_current_test_impl(const char *file, + int line, + const char *fmt, ...); +void *__kunit_get_static_stub_address_impl(struct kunit *test, void *real_fn_addr); + +/* Code to set all of the function pointers. */ +static inline void kunit_install_hooks(void) +{ + /* Install the KUnit hook functions. */ + kunit_hooks.fail_current_test = __kunit_fail_current_test_impl; + kunit_hooks.get_static_stub_address = __kunit_get_static_stub_address_impl; +} + +#endif /* _KUNIT_HOOKS_IMPL_H */ diff --git a/lib/kunit/hooks.c b/lib/kunit/hooks.c new file mode 100644 index 000000000000..365d98d4953c --- /dev/null +++ b/lib/kunit/hooks.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnit 'Hooks' implementation. + * + * This file contains code / structures which should be built-in even when + * KUnit itself is built as a module. + * + * Copyright (C) 2022, Google LLC. + * Author: David Gow <davidgow@google.com> + */ + + +#include <kunit/test-bug.h> + +DEFINE_STATIC_KEY_FALSE(kunit_running); +EXPORT_SYMBOL(kunit_running); + +/* Function pointers for hooks. */ +struct kunit_hooks_table kunit_hooks; +EXPORT_SYMBOL(kunit_hooks); + diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c index 66cc4e2365ec..cd8b7e51d02b 100644 --- a/lib/kunit/kunit-example-test.c +++ b/lib/kunit/kunit-example-test.c @@ -7,6 +7,7 @@ */ #include <kunit/test.h> +#include <kunit/static_stub.h> /* * This is the most fundamental element of KUnit, the test case. A test case @@ -130,6 +131,42 @@ static void example_all_expect_macros_test(struct kunit *test) KUNIT_ASSERT_GT_MSG(test, sizeof(int), 0, "Your ints are 0-bit?!"); } +/* This is a function we'll replace with static stubs. */ +static int add_one(int i) +{ + /* This will trigger the stub if active. */ + KUNIT_STATIC_STUB_REDIRECT(add_one, i); + + return i + 1; +} + +/* This is used as a replacement for the above function. */ +static int subtract_one(int i) +{ + /* We don't need to trigger the stub from the replacement. */ + + return i - 1; +} + +/* + * This test shows the use of static stubs. + */ +static void example_static_stub_test(struct kunit *test) +{ + /* By default, function is not stubbed. */ + KUNIT_EXPECT_EQ(test, add_one(1), 2); + + /* Replace add_one() with subtract_one(). */ + kunit_activate_static_stub(test, add_one, subtract_one); + + /* add_one() is now replaced. */ + KUNIT_EXPECT_EQ(test, add_one(1), 0); + + /* Return add_one() to normal. */ + kunit_deactivate_static_stub(test, add_one); + KUNIT_EXPECT_EQ(test, add_one(1), 2); +} + /* * Here we make a list of all the test cases we want to add to the test suite * below. @@ -145,6 +182,7 @@ static struct kunit_case example_test_cases[] = { KUNIT_CASE(example_skip_test), KUNIT_CASE(example_mark_skipped_test), KUNIT_CASE(example_all_expect_macros_test), + KUNIT_CASE(example_static_stub_test), {} }; diff --git a/lib/kunit/static_stub.c b/lib/kunit/static_stub.c new file mode 100644 index 000000000000..92b2cccd5e76 --- /dev/null +++ b/lib/kunit/static_stub.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnit function redirection (static stubbing) API. + * + * Copyright (C) 2022, Google LLC. + * Author: David Gow <davidgow@google.com> + */ + +#include <kunit/test.h> +#include <kunit/static_stub.h> +#include "hooks-impl.h" + + +/* Context for a static stub. This is stored in the resource data. */ +struct kunit_static_stub_ctx { + void *real_fn_addr; + void *replacement_addr; +}; + +static void __kunit_static_stub_resource_free(struct kunit_resource *res) +{ + kfree(res->data); +} + +/* Matching function for kunit_find_resource(). match_data is real_fn_addr. */ +static bool __kunit_static_stub_resource_match(struct kunit *test, + struct kunit_resource *res, + void *match_real_fn_addr) +{ + /* This pointer is only valid if res is a static stub resource. */ + struct kunit_static_stub_ctx *ctx = res->data; + + /* Make sure the resource is a static stub resource. */ + if (res->free != &__kunit_static_stub_resource_free) + return false; + + return ctx->real_fn_addr == match_real_fn_addr; +} + +/* Hook to return the address of the replacement function. */ +void *__kunit_get_static_stub_address_impl(struct kunit *test, void *real_fn_addr) +{ + struct kunit_resource *res; + struct kunit_static_stub_ctx *ctx; + void *replacement_addr; + + res = kunit_find_resource(test, + __kunit_static_stub_resource_match, + real_fn_addr); + + if (!res) + return NULL; + + ctx = res->data; + replacement_addr = ctx->replacement_addr; + kunit_put_resource(res); + return replacement_addr; +} + +void kunit_deactivate_static_stub(struct kunit *test, void *real_fn_addr) +{ + struct kunit_resource *res; + + KUNIT_ASSERT_PTR_NE_MSG(test, real_fn_addr, NULL, + "Tried to deactivate a NULL stub."); + + /* Look up the existing stub for this function. */ + res = kunit_find_resource(test, + __kunit_static_stub_resource_match, + real_fn_addr); + + /* Error out if the stub doesn't exist. */ + KUNIT_ASSERT_PTR_NE_MSG(test, res, NULL, + "Tried to deactivate a nonexistent stub."); + + /* Free the stub. We 'put' twice, as we got a reference + * from kunit_find_resource() + */ + kunit_remove_resource(test, res); + kunit_put_resource(res); +} +EXPORT_SYMBOL_GPL(kunit_deactivate_static_stub); + +/* Helper function for kunit_activate_static_stub(). The macro does + * typechecking, so use it instead. + */ +void __kunit_activate_static_stub(struct kunit *test, + void *real_fn_addr, + void *replacement_addr) +{ + struct kunit_static_stub_ctx *ctx; + struct kunit_resource *res; + + KUNIT_ASSERT_PTR_NE_MSG(test, real_fn_addr, NULL, + "Tried to activate a stub for function NULL"); + + /* If the replacement address is NULL, deactivate the stub. */ + if (!replacement_addr) { + kunit_deactivate_static_stub(test, replacement_addr); + return; + } + + /* Look up any existing stubs for this function, and replace them. */ + res = kunit_find_resource(test, + __kunit_static_stub_resource_match, + real_fn_addr); + if (res) { + ctx = res->data; + ctx->replacement_addr = replacement_addr; + + /* We got an extra reference from find_resource(), so put it. */ + kunit_put_resource(res); + } else { + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ctx->real_fn_addr = real_fn_addr; + ctx->replacement_addr = replacement_addr; + res = kunit_alloc_resource(test, NULL, + &__kunit_static_stub_resource_free, + GFP_KERNEL, ctx); + } +} +EXPORT_SYMBOL_GPL(__kunit_activate_static_stub); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index c9ebf975e56b..c9e15bb60058 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -17,16 +17,14 @@ #include <linux/sched.h> #include "debugfs.h" +#include "hooks-impl.h" #include "string-stream.h" #include "try-catch-impl.h" -DEFINE_STATIC_KEY_FALSE(kunit_running); - -#if IS_BUILTIN(CONFIG_KUNIT) /* - * Fail the current test and print an error message to the log. + * Hook to fail the current test and print an error message to the log. */ -void __kunit_fail_current_test(const char *file, int line, const char *fmt, ...) +void __printf(3, 4) __kunit_fail_current_test_impl(const char *file, int line, const char *fmt, ...) { va_list args; int len; @@ -53,8 +51,6 @@ void __kunit_fail_current_test(const char *file, int line, const char *fmt, ...) kunit_err(current->kunit_test, "%s:%d: %s", file, line, buffer); kunit_kfree(current->kunit_test, buffer); } -EXPORT_SYMBOL_GPL(__kunit_fail_current_test); -#endif /* * Enable KUnit tests to run. @@ -777,6 +773,9 @@ EXPORT_SYMBOL_GPL(kunit_cleanup); static int __init kunit_init(void) { + /* Install the KUnit hook functions. */ + kunit_install_hooks(); + kunit_debugfs_init(); #ifdef CONFIG_MODULES return register_module_notifier(&kunit_mod_nb); @@ -788,6 +787,7 @@ late_initcall(kunit_init); static void __exit kunit_exit(void) { + memset(&kunit_hooks, 0, sizeof(kunit_hooks)); #ifdef CONFIG_MODULES unregister_module_notifier(&kunit_mod_nb); #endif diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c index 89128551448d..887926f04731 100644 --- a/lib/memcpy_kunit.c +++ b/lib/memcpy_kunit.c @@ -309,6 +309,8 @@ static void set_random_nonzero(struct kunit *test, u8 *byte) static void init_large(struct kunit *test) { + if (!IS_ENABLED(CONFIG_MEMCPY_SLOW_KUNIT_TEST)) + kunit_skip(test, "Slow test skipped. Enable with CONFIG_MEMCPY_SLOW_KUNIT_TEST=y"); /* Get many bit patterns. */ get_random_bytes(large_src, ARRAY_SIZE(large_src)); diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 39c4c6731094..3cb6bd148fa9 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -504,7 +504,8 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) while (sg_miter_next(&miter)) { buff = miter.addr; - len = miter.length; + len = min_t(unsigned, miter.length, nbytes); + nbytes -= len; for (x = 0; x < len; x++) { a <<= 8; diff --git a/lib/nlattr.c b/lib/nlattr.c index 9055e8b4d144..489e15bde5c1 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/jiffies.h> +#include <linux/nospec.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> @@ -381,6 +382,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (type <= 0 || type > maxtype) return 0; + type = array_index_nospec(type, maxtype + 1); pt = &policy[type]; BUG_ON(pt->type > NLA_TYPE_MAX); @@ -596,6 +598,7 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, } continue; } + type = array_index_nospec(type, maxtype + 1); if (policy) { int err = validate_nla(nla, maxtype, policy, validate, extack, depth); diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index d01aec6ae15c..5274bbb026d7 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -64,6 +64,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, if (!cpumask_empty(to_cpumask(backtrace_mask))) { pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n", this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask)); + nmi_backtrace_stall_snap(to_cpumask(backtrace_mask)); raise(to_cpumask(backtrace_mask)); } @@ -74,6 +75,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, mdelay(1); touch_softlockup_watchdog(); } + nmi_backtrace_stall_check(to_cpumask(backtrace_mask)); /* * Force flush any remote buffers that might be stuck in IRQ context diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 1fcede228fa2..eff4e42c425a 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -167,15 +167,16 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, return nr; } -static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, - unsigned int alloc_hint) +static int sbitmap_find_bit_in_word(struct sbitmap_word *map, + unsigned int depth, + unsigned int alloc_hint, + bool wrap) { - struct sbitmap_word *map = &sb->map[index]; int nr; do { - nr = __sbitmap_get_word(&map->word, __map_depth(sb, index), - alloc_hint, !sb->round_robin); + nr = __sbitmap_get_word(&map->word, depth, + alloc_hint, wrap); if (nr != -1) break; if (!sbitmap_deferred_clear(map)) @@ -185,25 +186,22 @@ static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, return nr; } -static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) +static int sbitmap_find_bit(struct sbitmap *sb, + unsigned int depth, + unsigned int index, + unsigned int alloc_hint, + bool wrap) { - unsigned int i, index; + unsigned int i; int nr = -1; - index = SB_NR_TO_INDEX(sb, alloc_hint); - - /* - * Unless we're doing round robin tag allocation, just use the - * alloc_hint to find the right word index. No point in looping - * twice in find_next_zero_bit() for that case. - */ - if (sb->round_robin) - alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); - else - alloc_hint = 0; - for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_index(sb, index, alloc_hint); + nr = sbitmap_find_bit_in_word(&sb->map[index], + min_t(unsigned int, + __map_depth(sb, index), + depth), + alloc_hint, wrap); + if (nr != -1) { nr += index << sb->shift; break; @@ -218,6 +216,26 @@ static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) return nr; } +static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) +{ + unsigned int index; + + index = SB_NR_TO_INDEX(sb, alloc_hint); + + /* + * Unless we're doing round robin tag allocation, just use the + * alloc_hint to find the right word index. No point in looping + * twice in find_next_zero_bit() for that case. + */ + if (sb->round_robin) + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); + else + alloc_hint = 0; + + return sbitmap_find_bit(sb, UINT_MAX, index, alloc_hint, + !sb->round_robin); +} + int sbitmap_get(struct sbitmap *sb) { int nr; @@ -239,37 +257,12 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth) { - unsigned int i, index; - int nr = -1; + unsigned int index; index = SB_NR_TO_INDEX(sb, alloc_hint); + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); - for (i = 0; i < sb->map_nr; i++) { -again: - nr = __sbitmap_get_word(&sb->map[index].word, - min_t(unsigned int, - __map_depth(sb, index), - shallow_depth), - SB_NR_TO_BIT(sb, alloc_hint), true); - if (nr != -1) { - nr += index << sb->shift; - break; - } - - if (sbitmap_deferred_clear(&sb->map[index])) - goto again; - - /* Jump to next index. */ - index++; - alloc_hint = index << sb->shift; - - if (index >= sb->map_nr) { - index = 0; - alloc_hint = 0; - } - } - - return nr; + return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); } int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) @@ -464,13 +457,10 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; - unsigned int min_batch; unsigned int depth = (sbq->sb.depth + users - 1) / users; - min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1; - wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, - min_batch, SBQ_WAKE_BATCH); + 1, SBQ_WAKE_BATCH); WRITE_ONCE(sbq->wake_batch, wake_batch); } @@ -521,11 +511,9 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, get_mask = ((1UL << nr_tags) - 1) << nr; val = READ_ONCE(map->word); - do { - if ((val & ~get_mask) != val) - goto next; - } while (!atomic_long_try_cmpxchg(ptr, &val, - get_mask | val)); + while (!atomic_long_try_cmpxchg(ptr, &val, + get_mask | val)) + ; get_mask = (get_mask & ~val) >> nr; if (get_mask) { *offset = nr + (index << sb->shift); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index f72aa50c6654..8d7519a8f308 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -470,22 +470,27 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, return -EOPNOTSUPP; if (sgt_append->prv) { + unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) + + sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE; + if (WARN_ON(offset)) return -EINVAL; /* Merge contiguous pages into the last SG */ prv_len = sgt_append->prv->length; - last_pg = sg_page(sgt_append->prv); - while (n_pages && pages_are_mergeable(pages[0], last_pg)) { - if (sgt_append->prv->length + PAGE_SIZE > max_segment) - break; - sgt_append->prv->length += PAGE_SIZE; - last_pg = pages[0]; - pages++; - n_pages--; + if (page_to_pfn(pages[0]) == next_pfn) { + last_pg = pfn_to_page(next_pfn - 1); + while (n_pages && pages_are_mergeable(pages[0], last_pg)) { + if (sgt_append->prv->length + PAGE_SIZE > max_segment) + break; + sgt_append->prv->length += PAGE_SIZE; + last_pg = pages[0]; + pages++; + n_pages--; + } + if (!n_pages) + goto out; } - if (!n_pages) - goto out; } /* compute number of contiguous chunks */ diff --git a/lib/string.c b/lib/string.c index 4fb566ea610f..3d55ef890106 100644 --- a/lib/string.c +++ b/lib/string.c @@ -480,13 +480,11 @@ EXPORT_SYMBOL(strcspn); */ char *strpbrk(const char *cs, const char *ct) { - const char *sc1, *sc2; + const char *sc; - for (sc1 = cs; *sc1 != '\0'; ++sc1) { - for (sc2 = ct; *sc2 != '\0'; ++sc2) { - if (*sc1 == *sc2) - return (char *)sc1; - } + for (sc = cs; *sc != '\0'; ++sc) { + if (strchr(ct, *sc)) + return (char *)sc; } return NULL; } diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 6423df9fa8dd..43d9dfd57ab7 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -51,12 +51,11 @@ static int num_test_devs; /** * enum kmod_test_case - linker table test case - * - * If you add a test case, please be sure to review if you need to se - * @need_mod_put for your tests case. - * * @TEST_KMOD_DRIVER: stress tests request_module() * @TEST_KMOD_FS_TYPE: stress tests get_fs_type() + * + * If you add a test case, please be sure to review if you need to set + * @need_mod_put for your tests case. */ enum kmod_test_case { __TEST_KMOD_INVALID = 0, @@ -78,7 +77,7 @@ struct test_config { struct kmod_test_device; /** - * kmod_test_device_info - thread info + * struct kmod_test_device_info - thread info * * @ret_sync: return value if request_module() is used, sync request for * @TEST_KMOD_DRIVER @@ -101,7 +100,7 @@ struct kmod_test_device_info { }; /** - * kmod_test_device - test device to help test kmod + * struct kmod_test_device - test device to help test kmod * * @dev_idx: unique ID for test device * @config: configuration for the test diff --git a/lib/test_kprobes.c b/lib/test_kprobes.c index 1c95e5719802..0648f7154f5c 100644 --- a/lib/test_kprobes.c +++ b/lib/test_kprobes.c @@ -14,6 +14,7 @@ static u32 rand1, preh_val, posth_val; static u32 (*target)(u32 value); +static u32 (*recursed_target)(u32 value); static u32 (*target2)(u32 value); static struct kunit *current_test; @@ -27,18 +28,27 @@ static noinline u32 kprobe_target(u32 value) return (value / div_factor); } +static noinline u32 kprobe_recursed_target(u32 value) +{ + return (value / div_factor); +} + static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs) { KUNIT_EXPECT_FALSE(current_test, preemptible()); - preh_val = (rand1 / div_factor); + + preh_val = recursed_target(rand1); return 0; } static void kp_post_handler(struct kprobe *p, struct pt_regs *regs, unsigned long flags) { + u32 expval = recursed_target(rand1); + KUNIT_EXPECT_FALSE(current_test, preemptible()); - KUNIT_EXPECT_EQ(current_test, preh_val, (rand1 / div_factor)); + KUNIT_EXPECT_EQ(current_test, preh_val, expval); + posth_val = preh_val + div_factor; } @@ -136,6 +146,29 @@ static void test_kprobes(struct kunit *test) unregister_kprobes(kps, 2); } +static struct kprobe kp_missed = { + .symbol_name = "kprobe_recursed_target", + .pre_handler = kp_pre_handler, + .post_handler = kp_post_handler, +}; + +static void test_kprobe_missed(struct kunit *test) +{ + current_test = test; + preh_val = 0; + posth_val = 0; + + KUNIT_EXPECT_EQ(test, 0, register_kprobe(&kp_missed)); + + recursed_target(rand1); + + KUNIT_EXPECT_EQ(test, 2, kp_missed.nmissed); + KUNIT_EXPECT_NE(test, 0, preh_val); + KUNIT_EXPECT_NE(test, 0, posth_val); + + unregister_kprobe(&kp_missed); +} + #ifdef CONFIG_KRETPROBES static u32 krph_val; @@ -336,6 +369,7 @@ static int kprobes_test_init(struct kunit *test) { target = kprobe_target; target2 = kprobe_target2; + recursed_target = kprobe_recursed_target; stacktrace_target = kprobe_stacktrace_target; internal_target = kprobe_stacktrace_internal_target; stacktrace_driver = kprobe_stacktrace_driver; @@ -346,6 +380,7 @@ static int kprobes_test_init(struct kunit *test) static struct kunit_case kprobes_testcases[] = { KUNIT_CASE(test_kprobe), KUNIT_CASE(test_kprobes), + KUNIT_CASE(test_kprobe_missed), #ifdef CONFIG_KRETPROBES KUNIT_CASE(test_kretprobe), KUNIT_CASE(test_kretprobes), diff --git a/lib/ubsan.c b/lib/ubsan.c index 60c7099857a0..e2cc4a799312 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -14,10 +14,76 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/uaccess.h> +#include <linux/ubsan.h> #include <kunit/test-bug.h> #include "ubsan.h" +#ifdef CONFIG_UBSAN_TRAP +/* + * Only include matches for UBSAN checks that are actually compiled in. + * The mappings of struct SanitizerKind (the -fsanitize=xxx args) to + * enum SanitizerHandler (the traps) in Clang is in clang/lib/CodeGen/. + */ +const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) +{ + switch (check_type) { +#ifdef CONFIG_UBSAN_BOUNDS + /* + * SanitizerKind::ArrayBounds and SanitizerKind::LocalBounds + * emit SanitizerHandler::OutOfBounds. + */ + case ubsan_out_of_bounds: + return "UBSAN: array index out of bounds"; +#endif +#ifdef CONFIG_UBSAN_SHIFT + /* + * SanitizerKind::ShiftBase and SanitizerKind::ShiftExponent + * emit SanitizerHandler::ShiftOutOfBounds. + */ + case ubsan_shift_out_of_bounds: + return "UBSAN: shift out of bounds"; +#endif +#ifdef CONFIG_UBSAN_DIV_ZERO + /* + * SanitizerKind::IntegerDivideByZero emits + * SanitizerHandler::DivremOverflow. + */ + case ubsan_divrem_overflow: + return "UBSAN: divide/remainder overflow"; +#endif +#ifdef CONFIG_UBSAN_UNREACHABLE + /* + * SanitizerKind::Unreachable emits + * SanitizerHandler::BuiltinUnreachable. + */ + case ubsan_builtin_unreachable: + return "UBSAN: unreachable code"; +#endif +#if defined(CONFIG_UBSAN_BOOL) || defined(CONFIG_UBSAN_ENUM) + /* + * SanitizerKind::Bool and SanitizerKind::Enum emit + * SanitizerHandler::LoadInvalidValue. + */ + case ubsan_load_invalid_value: + return "UBSAN: loading invalid value"; +#endif +#ifdef CONFIG_UBSAN_ALIGNMENT + /* + * SanitizerKind::Alignment emits SanitizerHandler::TypeMismatch + * or SanitizerHandler::AlignmentAssumption. + */ + case ubsan_alignment_assumption: + return "UBSAN: alignment assumption"; + case ubsan_type_mismatch: + return "UBSAN: type mismatch"; +#endif + default: + return "UBSAN: unrecognized failure code"; + } +} + +#else static const char * const type_check_kinds[] = { "load of", "store to", @@ -339,9 +405,10 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val) { struct invalid_value_data *data = _data; char val_str[VALUE_LENGTH]; + unsigned long ua_flags = user_access_save(); if (suppress_report(&data->location)) - return; + goto out; ubsan_prologue(&data->location, "invalid-load"); @@ -351,6 +418,8 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val) val_str, data->type->type_name); ubsan_epilogue(); +out: + user_access_restore(ua_flags); } EXPORT_SYMBOL(__ubsan_handle_load_invalid_value); @@ -384,3 +453,5 @@ void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr, ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_alignment_assumption); + +#endif /* !CONFIG_UBSAN_TRAP */ diff --git a/lib/ubsan.h b/lib/ubsan.h index 9a0b71c5ff9f..cc5cb94895a6 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -2,6 +2,38 @@ #ifndef _LIB_UBSAN_H #define _LIB_UBSAN_H +/* + * ABI defined by Clang's UBSAN enum SanitizerHandler: + * https://github.com/llvm/llvm-project/blob/release/16.x/clang/lib/CodeGen/CodeGenFunction.h#L113 + */ +enum ubsan_checks { + ubsan_add_overflow, + ubsan_builtin_unreachable, + ubsan_cfi_check_fail, + ubsan_divrem_overflow, + ubsan_dynamic_type_cache_miss, + ubsan_float_cast_overflow, + ubsan_function_type_mismatch, + ubsan_implicit_conversion, + ubsan_invalid_builtin, + ubsan_invalid_objc_cast, + ubsan_load_invalid_value, + ubsan_missing_return, + ubsan_mul_overflow, + ubsan_negate_overflow, + ubsan_nullability_arg, + ubsan_nullability_return, + ubsan_nonnull_arg, + ubsan_nonnull_return, + ubsan_out_of_bounds, + ubsan_pointer_overflow, + ubsan_shift_out_of_bounds, + ubsan_sub_overflow, + ubsan_type_mismatch, + ubsan_alignment_assumption, + ubsan_vla_bound_not_positive, +}; + enum { type_kind_int = 0, type_kind_float = 1, diff --git a/lib/usercopy.c b/lib/usercopy.c index 1505a52f23a0..d29fe29c6849 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -3,6 +3,7 @@ #include <linux/fault-inject-usercopy.h> #include <linux/instrumented.h> #include <linux/uaccess.h> +#include <linux/nospec.h> /* out-of-line parts */ @@ -12,6 +13,12 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { + /* + * Ensure that bad access_ok() speculation will not + * lead to nasty side effects *after* the copy is + * finished: + */ + barrier_nospec(); instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, res); |