From 48b1fc190a180d971fb69217c88c7247f4f2ca19 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 21 Feb 2020 23:02:09 +0100 Subject: kcsan: Add option to allow watcher interruptions Add option to allow interrupts while a watchpoint is set up. This can be enabled either via CONFIG_KCSAN_INTERRUPT_WATCHER or via the boot parameter 'kcsan.interrupt_watcher=1'. Note that, currently not all safe per-CPU access primitives and patterns are accounted for, which could result in false positives. For example, asm-generic/percpu.h uses plain operations, which by default are instrumented. On interrupts and subsequent accesses to the same variable, KCSAN would currently report a data race with this option. Therefore, this option should currently remain disabled by default, but may be enabled for specific test scenarios. To avoid new warnings, changes all uses of smp_processor_id() to use the raw version (as already done in kcsan_found_watchpoint()). The exact SMP processor id is for informational purposes in the report, and correctness is not affected. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'kernel/kcsan/core.c') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 589b1e7f0f25..e7387fec6679 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -21,6 +21,7 @@ static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE); static unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK; static unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT; static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH; +static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER); #ifdef MODULE_PARAM_PREFIX #undef MODULE_PARAM_PREFIX @@ -30,6 +31,7 @@ module_param_named(early_enable, kcsan_early_enable, bool, 0); module_param_named(udelay_task, kcsan_udelay_task, uint, 0644); module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644); module_param_named(skip_watch, kcsan_skip_watch, long, 0644); +module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444); bool kcsan_enabled; @@ -354,7 +356,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) unsigned long access_mask; enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE; unsigned long ua_flags = user_access_save(); - unsigned long irq_flags; + unsigned long irq_flags = 0; /* * Always reset kcsan_skip counter in slow-path to avoid underflow; see @@ -370,26 +372,9 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) goto out; } - /* - * Disable interrupts & preemptions to avoid another thread on the same - * CPU accessing memory locations for the set up watchpoint; this is to - * avoid reporting races to e.g. CPU-local data. - * - * An alternative would be adding the source CPU to the watchpoint - * encoding, and checking that watchpoint-CPU != this-CPU. There are - * several problems with this: - * 1. we should avoid stealing more bits from the watchpoint encoding - * as it would affect accuracy, as well as increase performance - * overhead in the fast-path; - * 2. if we are preempted, but there *is* a genuine data race, we - * would *not* report it -- since this is the common case (vs. - * CPU-local data accesses), it makes more sense (from a data race - * detection point of view) to simply disable preemptions to ensure - * as many tasks as possible run on other CPUs. - * - * Use raw versions, to avoid lockdep recursion via IRQ flags tracing. - */ - raw_local_irq_save(irq_flags); + if (!kcsan_interrupt_watcher) + /* Use raw to avoid lockdep recursion via IRQ flags tracing. */ + raw_local_irq_save(irq_flags); watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); if (watchpoint == NULL) { @@ -507,7 +492,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); - kcsan_report(ptr, size, type, value_change, smp_processor_id(), + kcsan_report(ptr, size, type, value_change, raw_smp_processor_id(), KCSAN_REPORT_RACE_SIGNAL); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ @@ -518,13 +503,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, - smp_processor_id(), + raw_smp_processor_id(), KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); } kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); out_unlock: - raw_local_irq_restore(irq_flags); + if (!kcsan_interrupt_watcher) + raw_local_irq_restore(irq_flags); out: user_access_restore(ua_flags); } -- cgit v1.2.3 From 2402d0eae589a31ee7b1774cb220d84d0f5605b4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Sat, 22 Feb 2020 00:10:27 +0100 Subject: kcsan: Add option for verbose reporting Adds CONFIG_KCSAN_VERBOSE to optionally enable more verbose reports. Currently information about the reporting task's held locks and IRQ trace events are shown, if they are enabled. Signed-off-by: Marco Elver Suggested-by: Qian Cai Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 4 +- kernel/kcsan/kcsan.h | 3 ++ kernel/kcsan/report.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++- lib/Kconfig.kcsan | 13 +++++++ 4 files changed, 120 insertions(+), 3 deletions(-) (limited to 'kernel/kcsan/core.c') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index e7387fec6679..065615df88ea 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -18,8 +18,8 @@ #include "kcsan.h" static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE); -static unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK; -static unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT; +unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK; +unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT; static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH; static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER); diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 892de5120c1b..e282f8b5749e 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -13,6 +13,9 @@ /* The number of adjacent watchpoints to check. */ #define KCSAN_CHECK_ADJACENT 1 +extern unsigned int kcsan_udelay_task; +extern unsigned int kcsan_udelay_interrupt; + /* * Globally enable and disable KCSAN. */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 11c791b886f3..18f9d3bc93a5 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include #include @@ -31,7 +33,26 @@ static struct { int cpu_id; unsigned long stack_entries[NUM_STACK_ENTRIES]; int num_stack_entries; -} other_info = { .ptr = NULL }; + + /* + * Optionally pass @current. Typically we do not need to pass @current + * via @other_info since just @task_pid is sufficient. Passing @current + * has additional overhead. + * + * To safely pass @current, we must either use get_task_struct/ + * put_task_struct, or stall the thread that populated @other_info. + * + * We cannot rely on get_task_struct/put_task_struct in case + * release_report() races with a task being released, and would have to + * free it in release_report(). This may result in deadlock if we want + * to use KCSAN on the allocators. + * + * Since we also want to reliably print held locks for + * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread + * that populated @other_info until it has been consumed. + */ + struct task_struct *task; +} other_info; /* * Information about reported races; used to rate limit reporting. @@ -245,6 +266,16 @@ static int sym_strcmp(void *addr1, void *addr2) return strncmp(buf1, buf2, sizeof(buf1)); } +static void print_verbose_info(struct task_struct *task) +{ + if (!task) + return; + + pr_err("\n"); + debug_show_held_locks(task); + print_irqtrace_events(task); +} + /* * Returns true if a report was generated, false otherwise. */ @@ -319,6 +350,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, other_info.num_stack_entries - other_skipnr, 0); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) + print_verbose_info(other_info.task); + pr_err("\n"); pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", get_access_type(access_type), ptr, size, @@ -340,6 +374,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 0); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) + print_verbose_info(current); + /* Print report footer. */ pr_err("\n"); pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); @@ -357,6 +394,67 @@ static void release_report(unsigned long *flags, enum kcsan_report_type type) spin_unlock_irqrestore(&report_lock, *flags); } +/* + * Sets @other_info.task and awaits consumption of @other_info. + * + * Precondition: report_lock is held. + * Postcondition: report_lock is held. + */ +static void +set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) +{ + /* + * We may be instrumenting a code-path where current->state is already + * something other than TASK_RUNNING. + */ + const bool is_running = current->state == TASK_RUNNING; + /* + * To avoid deadlock in case we are in an interrupt here and this is a + * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a + * timeout to ensure this works in all contexts. + * + * Await approximately the worst case delay of the reporting thread (if + * we are not interrupted). + */ + int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); + + other_info.task = current; + do { + if (is_running) { + /* + * Let lockdep know the real task is sleeping, to print + * the held locks (recall we turned lockdep off, so + * locking/unlocking @report_lock won't be recorded). + */ + set_current_state(TASK_UNINTERRUPTIBLE); + } + spin_unlock_irqrestore(&report_lock, *flags); + /* + * We cannot call schedule() since we also cannot reliably + * determine if sleeping here is permitted -- see in_atomic(). + */ + + udelay(1); + spin_lock_irqsave(&report_lock, *flags); + if (timeout-- < 0) { + /* + * Abort. Reset other_info.task to NULL, since it + * appears the other thread is still going to consume + * it. It will result in no verbose info printed for + * this task. + */ + other_info.task = NULL; + break; + } + /* + * If @ptr nor @current matches, then our information has been + * consumed and we may continue. If not, retry. + */ + } while (other_info.ptr == ptr && other_info.task == current); + if (is_running) + set_current_state(TASK_RUNNING); +} + /* * Depending on the report type either sets other_info and returns false, or * acquires the matching other_info and returns true. If other_info is not @@ -388,6 +486,9 @@ retry: other_info.cpu_id = cpu_id; other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) + set_other_info_task_blocking(flags, ptr); + spin_unlock_irqrestore(&report_lock, *flags); /* diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 081ed2e1bf7b..0f1447ff8f55 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -20,6 +20,19 @@ menuconfig KCSAN if KCSAN +config KCSAN_VERBOSE + bool "Show verbose reports with more information about system state" + depends on PROVE_LOCKING + help + If enabled, reports show more information about the system state that + may help better analyze and debug races. This includes held locks and + IRQ trace events. + + While this option should generally be benign, we call into more + external functions on report generation; if a race report is + generated from any one of them, system stability may suffer due to + deadlocks or recursion. If in doubt, say N. + config KCSAN_DEBUG bool "Debugging of KCSAN internals" -- cgit v1.2.3 From 44656d3dc4f0dc20010d054f27397a4a1469fabf Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 25 Feb 2020 15:32:58 +0100 Subject: kcsan: Add current->state to implicitly atomic accesses Add volatile current->state to list of implicitly atomic accesses. This is in preparation to eventually enable KCSAN on kernel/sched (which currently still has KCSAN_SANITIZE := n). Since accesses that match the special check in atomic.h are rare, it makes more sense to move this check to the slow-path, avoiding the additional compare in the fast-path. With the microbenchmark, a speedup of ~6% is measured. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/atomic.h | 21 +++++++-------------- kernel/kcsan/core.c | 22 +++++++++++++++------- kernel/kcsan/debugfs.c | 27 ++++++++++++++++++--------- 3 files changed, 40 insertions(+), 30 deletions(-) (limited to 'kernel/kcsan/core.c') diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h index a9c193053491..be9e625227f3 100644 --- a/kernel/kcsan/atomic.h +++ b/kernel/kcsan/atomic.h @@ -4,24 +4,17 @@ #define _KERNEL_KCSAN_ATOMIC_H #include +#include /* - * Helper that returns true if access to @ptr should be considered an atomic - * access, even though it is not explicitly atomic. - * - * List all volatile globals that have been observed in races, to suppress - * data race reports between accesses to these variables. - * - * For now, we assume that volatile accesses of globals are as strong as atomic - * accesses (READ_ONCE, WRITE_ONCE cast to volatile). The situation is still not - * entirely clear, as on some architectures (Alpha) READ_ONCE/WRITE_ONCE do more - * than cast to volatile. Eventually, we hope to be able to remove this - * function. + * Special rules for certain memory where concurrent conflicting accesses are + * common, however, the current convention is to not mark them; returns true if + * access to @ptr should be considered atomic. Called from slow-path. */ -static __always_inline bool kcsan_is_atomic(const volatile void *ptr) +static bool kcsan_is_atomic_special(const volatile void *ptr) { - /* only jiffies for now */ - return ptr == &jiffies; + /* volatile globals that have been observed in data races. */ + return ptr == &jiffies || ptr == ¤t->state; } #endif /* _KERNEL_KCSAN_ATOMIC_H */ diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 065615df88ea..eb30ecdc8c00 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -188,12 +188,13 @@ static __always_inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } +/* Rules for generic atomic accesses. Called from fast-path. */ static __always_inline bool is_atomic(const volatile void *ptr, size_t size, int type) { struct kcsan_ctx *ctx; - if ((type & KCSAN_ACCESS_ATOMIC) != 0) + if (type & KCSAN_ACCESS_ATOMIC) return true; /* @@ -201,16 +202,16 @@ is_atomic(const volatile void *ptr, size_t size, int type) * as atomic. This allows using them also in atomic regions, such as * seqlocks, without implicitly changing their semantics. */ - if ((type & KCSAN_ACCESS_ASSERT) != 0) + if (type & KCSAN_ACCESS_ASSERT) return false; if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) && - (type & KCSAN_ACCESS_WRITE) != 0 && size <= sizeof(long) && + (type & KCSAN_ACCESS_WRITE) && size <= sizeof(long) && IS_ALIGNED((unsigned long)ptr, size)) return true; /* Assume aligned writes up to word size are atomic. */ ctx = get_ctx(); - if (unlikely(ctx->atomic_next > 0)) { + if (ctx->atomic_next > 0) { /* * Because we do not have separate contexts for nested * interrupts, in case atomic_next is set, we simply assume that @@ -224,10 +225,8 @@ is_atomic(const volatile void *ptr, size_t size, int type) --ctx->atomic_next; /* in task, or outer interrupt */ return true; } - if (unlikely(ctx->atomic_nest_count > 0 || ctx->in_flat_atomic)) - return true; - return kcsan_is_atomic(ptr); + return ctx->atomic_nest_count > 0 || ctx->in_flat_atomic; } static __always_inline bool @@ -367,6 +366,15 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (!kcsan_is_enabled()) goto out; + /* + * Special atomic rules: unlikely to be true, so we check them here in + * the slow-path, and not in the fast-path in is_atomic(). Call after + * kcsan_is_enabled(), as we may access memory that is not yet + * initialized during early boot. + */ + if (!is_assert && kcsan_is_atomic_special(ptr)) + goto out; + if (!check_encodable((unsigned long)ptr, size)) { kcsan_counter_inc(KCSAN_COUNTER_UNENCODABLE_ACCESSES); goto out; diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 2ff196123977..72ee188ebc54 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -74,25 +74,34 @@ void kcsan_counter_dec(enum kcsan_counter_id id) */ static noinline void microbenchmark(unsigned long iters) { + const struct kcsan_ctx ctx_save = current->kcsan_ctx; + const bool was_enabled = READ_ONCE(kcsan_enabled); cycles_t cycles; + /* We may have been called from an atomic region; reset context. */ + memset(¤t->kcsan_ctx, 0, sizeof(current->kcsan_ctx)); + /* + * Disable to benchmark fast-path for all accesses, and (expected + * negligible) call into slow-path, but never set up watchpoints. + */ + WRITE_ONCE(kcsan_enabled, false); + pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); cycles = get_cycles(); while (iters--) { - /* - * We can run this benchmark from multiple tasks; this address - * calculation increases likelyhood of some accesses - * overlapping. Make the access type an atomic read, to never - * set up watchpoints and test the fast-path only. - */ - unsigned long addr = - iters % (CONFIG_KCSAN_NUM_WATCHPOINTS * PAGE_SIZE); - __kcsan_check_access((void *)addr, sizeof(long), KCSAN_ACCESS_ATOMIC); + unsigned long addr = iters & ((PAGE_SIZE << 8) - 1); + int type = !(iters & 0x7f) ? KCSAN_ACCESS_ATOMIC : + (!(iters & 0xf) ? KCSAN_ACCESS_WRITE : 0); + __kcsan_check_access((void *)addr, sizeof(long), type); } cycles = get_cycles() - cycles; pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles); + + WRITE_ONCE(kcsan_enabled, was_enabled); + /* restore context */ + current->kcsan_ctx = ctx_save; } /* -- cgit v1.2.3 From e7b34100500733f7e052ce3dee94e6338b86e6bc Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Thu, 5 Mar 2020 15:21:07 +0100 Subject: kcsan: Fix a typo in a comment s/slots slots/slots/ Signed-off-by: Qiujun Huang Reviewed-by: Nick Desaulniers [elver: commit message] Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/kcsan/core.c') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index eb30ecdc8c00..ee8200835b60 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -45,7 +45,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { }; /* - * Helper macros to index into adjacent slots slots, starting from address slot + * Helper macros to index into adjacent slots, starting from address slot * itself, followed by the right and left slots. * * The purpose is 2-fold: -- cgit v1.2.3 From 135c0872d86948046d11d7083e36c930cc43ac93 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 18 Mar 2020 18:38:44 +0100 Subject: kcsan: Introduce report access_info and other_info Improve readability by introducing access_info and other_info structs, and in preparation of the following commit in this series replaces the single instance of other_info with an array of size 1. No functional change intended. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 6 +-- kernel/kcsan/kcsan.h | 2 +- kernel/kcsan/report.c | 147 +++++++++++++++++++++++++------------------------- 3 files changed, 77 insertions(+), 78 deletions(-) (limited to 'kernel/kcsan/core.c') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index ee8200835b60..f1c38620e3cf 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -321,7 +321,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, flags = user_access_save(); if (consumed) { - kcsan_report(ptr, size, type, true, raw_smp_processor_id(), + kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE, KCSAN_REPORT_CONSUMED_WATCHPOINT); } else { /* @@ -500,8 +500,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); - kcsan_report(ptr, size, type, value_change, raw_smp_processor_id(), - KCSAN_REPORT_RACE_SIGNAL); + kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ @@ -511,7 +510,6 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, - raw_smp_processor_id(), KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); } diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index e282f8b5749e..6630dfe32f31 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -135,7 +135,7 @@ enum kcsan_report_type { * Print a race report from thread that encountered the race. */ extern void kcsan_report(const volatile void *ptr, size_t size, int access_type, - enum kcsan_value_change value_change, int cpu_id, + enum kcsan_value_change value_change, enum kcsan_report_type type); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 18f9d3bc93a5..de234d1c1b3d 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -19,18 +19,23 @@ */ #define NUM_STACK_ENTRIES 64 +/* Common access info. */ +struct access_info { + const volatile void *ptr; + size_t size; + int access_type; + int task_pid; + int cpu_id; +}; + /* * Other thread info: communicated from other racing thread to thread that set * up the watchpoint, which then prints the complete report atomically. Only * need one struct, as all threads should to be serialized regardless to print * the reports, with reporting being in the slow-path. */ -static struct { - const volatile void *ptr; - size_t size; - int access_type; - int task_pid; - int cpu_id; +struct other_info { + struct access_info ai; unsigned long stack_entries[NUM_STACK_ENTRIES]; int num_stack_entries; @@ -52,7 +57,9 @@ static struct { * that populated @other_info until it has been consumed. */ struct task_struct *task; -} other_info; +}; + +static struct other_info other_infos[1]; /* * Information about reported races; used to rate limit reporting. @@ -238,7 +245,7 @@ static const char *get_thread_desc(int task_id) } /* Helper to skip KCSAN-related functions in stack-trace. */ -static int get_stack_skipnr(unsigned long stack_entries[], int num_entries) +static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) { char buf[64]; int skip = 0; @@ -279,9 +286,10 @@ static void print_verbose_info(struct task_struct *task) /* * Returns true if a report was generated, false otherwise. */ -static bool print_report(const volatile void *ptr, size_t size, int access_type, - enum kcsan_value_change value_change, int cpu_id, - enum kcsan_report_type type) +static bool print_report(enum kcsan_value_change value_change, + enum kcsan_report_type type, + const struct access_info *ai, + const struct other_info *other_info) { unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); @@ -297,9 +305,9 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, return false; if (type == KCSAN_REPORT_RACE_SIGNAL) { - other_skipnr = get_stack_skipnr(other_info.stack_entries, - other_info.num_stack_entries); - other_frame = other_info.stack_entries[other_skipnr]; + other_skipnr = get_stack_skipnr(other_info->stack_entries, + other_info->num_stack_entries); + other_frame = other_info->stack_entries[other_skipnr]; /* @value_change is only known for the other thread */ if (skip_report(value_change, other_frame)) @@ -321,13 +329,13 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, */ cmp = sym_strcmp((void *)other_frame, (void *)this_frame); pr_err("BUG: KCSAN: %s in %ps / %ps\n", - get_bug_type(access_type | other_info.access_type), + get_bug_type(ai->access_type | other_info->ai.access_type), (void *)(cmp < 0 ? other_frame : this_frame), (void *)(cmp < 0 ? this_frame : other_frame)); } break; case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: - pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(access_type), + pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type), (void *)this_frame); break; @@ -341,30 +349,28 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, switch (type) { case KCSAN_REPORT_RACE_SIGNAL: pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(other_info.access_type), other_info.ptr, - other_info.size, get_thread_desc(other_info.task_pid), - other_info.cpu_id); + get_access_type(other_info->ai.access_type), other_info->ai.ptr, + other_info->ai.size, get_thread_desc(other_info->ai.task_pid), + other_info->ai.cpu_id); /* Print the other thread's stack trace. */ - stack_trace_print(other_info.stack_entries + other_skipnr, - other_info.num_stack_entries - other_skipnr, + stack_trace_print(other_info->stack_entries + other_skipnr, + other_info->num_stack_entries - other_skipnr, 0); if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) - print_verbose_info(other_info.task); + print_verbose_info(other_info->task); pr_err("\n"); pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(access_type), ptr, size, - get_thread_desc(in_task() ? task_pid_nr(current) : -1), - cpu_id); + get_access_type(ai->access_type), ai->ptr, ai->size, + get_thread_desc(ai->task_pid), ai->cpu_id); break; case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", - get_access_type(access_type), ptr, size, - get_thread_desc(in_task() ? task_pid_nr(current) : -1), - cpu_id); + get_access_type(ai->access_type), ai->ptr, ai->size, + get_thread_desc(ai->task_pid), ai->cpu_id); break; default: @@ -386,22 +392,23 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, return true; } -static void release_report(unsigned long *flags, enum kcsan_report_type type) +static void release_report(unsigned long *flags, struct other_info *other_info) { - if (type == KCSAN_REPORT_RACE_SIGNAL) - other_info.ptr = NULL; /* mark for reuse */ + if (other_info) + other_info->ai.ptr = NULL; /* Mark for reuse. */ spin_unlock_irqrestore(&report_lock, *flags); } /* - * Sets @other_info.task and awaits consumption of @other_info. + * Sets @other_info->task and awaits consumption of @other_info. * * Precondition: report_lock is held. * Postcondition: report_lock is held. */ -static void -set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) +static void set_other_info_task_blocking(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info) { /* * We may be instrumenting a code-path where current->state is already @@ -418,7 +425,7 @@ set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) */ int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); - other_info.task = current; + other_info->task = current; do { if (is_running) { /* @@ -438,19 +445,19 @@ set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) spin_lock_irqsave(&report_lock, *flags); if (timeout-- < 0) { /* - * Abort. Reset other_info.task to NULL, since it + * Abort. Reset @other_info->task to NULL, since it * appears the other thread is still going to consume * it. It will result in no verbose info printed for * this task. */ - other_info.task = NULL; + other_info->task = NULL; break; } /* * If @ptr nor @current matches, then our information has been * consumed and we may continue. If not, retry. */ - } while (other_info.ptr == ptr && other_info.task == current); + } while (other_info->ai.ptr == ai->ptr && other_info->task == current); if (is_running) set_current_state(TASK_RUNNING); } @@ -460,9 +467,8 @@ set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) * acquires the matching other_info and returns true. If other_info is not * required for the report type, simply acquires report_lock and returns true. */ -static bool prepare_report(unsigned long *flags, const volatile void *ptr, - size_t size, int access_type, int cpu_id, - enum kcsan_report_type type) +static bool prepare_report(unsigned long *flags, enum kcsan_report_type type, + const struct access_info *ai, struct other_info *other_info) { if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT && type != KCSAN_REPORT_RACE_SIGNAL) { @@ -476,18 +482,14 @@ retry: switch (type) { case KCSAN_REPORT_CONSUMED_WATCHPOINT: - if (other_info.ptr != NULL) + if (other_info->ai.ptr) break; /* still in use, retry */ - other_info.ptr = ptr; - other_info.size = size; - other_info.access_type = access_type; - other_info.task_pid = in_task() ? task_pid_nr(current) : -1; - other_info.cpu_id = cpu_id; - other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); + other_info->ai = *ai; + other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 1); if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) - set_other_info_task_blocking(flags, ptr); + set_other_info_task_blocking(flags, ai, other_info); spin_unlock_irqrestore(&report_lock, *flags); @@ -498,37 +500,31 @@ retry: return false; case KCSAN_REPORT_RACE_SIGNAL: - if (other_info.ptr == NULL) + if (!other_info->ai.ptr) break; /* no data available yet, retry */ /* * First check if this is the other_info we are expecting, i.e. * matches based on how watchpoint was encoded. */ - if (!matching_access((unsigned long)other_info.ptr & - WATCHPOINT_ADDR_MASK, - other_info.size, - (unsigned long)ptr & WATCHPOINT_ADDR_MASK, - size)) + if (!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, + (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)) break; /* mismatching watchpoint, retry */ - if (!matching_access((unsigned long)other_info.ptr, - other_info.size, (unsigned long)ptr, - size)) { + if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, + (unsigned long)ai->ptr, ai->size)) { /* * If the actual accesses to not match, this was a false * positive due to watchpoint encoding. */ - kcsan_counter_inc( - KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); + kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); /* discard this other_info */ - release_report(flags, KCSAN_REPORT_RACE_SIGNAL); + release_report(flags, other_info); return false; } - access_type |= other_info.access_type; - if ((access_type & KCSAN_ACCESS_WRITE) == 0) { + if (!((ai->access_type | other_info->ai.access_type) & KCSAN_ACCESS_WRITE)) { /* * While the address matches, this is not the other_info * from the thread that consumed our watchpoint, since @@ -561,15 +557,11 @@ retry: * data, and at this point the likelihood that we * re-report the same race again is high. */ - release_report(flags, KCSAN_REPORT_RACE_SIGNAL); + release_report(flags, other_info); return false; } - /* - * Matching & usable access in other_info: keep other_info_lock - * locked, as this thread consumes it to print the full report; - * unlocked in release_report. - */ + /* Matching access in other_info. */ return true; default: @@ -582,10 +574,19 @@ retry: } void kcsan_report(const volatile void *ptr, size_t size, int access_type, - enum kcsan_value_change value_change, int cpu_id, + enum kcsan_value_change value_change, enum kcsan_report_type type) { unsigned long flags = 0; + const struct access_info ai = { + .ptr = ptr, + .size = size, + .access_type = access_type, + .task_pid = in_task() ? task_pid_nr(current) : -1, + .cpu_id = raw_smp_processor_id() + }; + struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN + ? NULL : &other_infos[0]; /* * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if @@ -596,19 +597,19 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, lockdep_off(); kcsan_disable_current(); - if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) { + if (prepare_report(&flags, type, &ai, other_info)) { /* * Never report if value_change is FALSE, only if we it is * either TRUE or MAYBE. In case of MAYBE, further filtering may * be done once we know the full stack trace in print_report(). */ bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && - print_report(ptr, size, access_type, value_change, cpu_id, type); + print_report(value_change, type, &ai, other_info); if (reported && panic_on_warn) panic("panic_on_warn set ...\n"); - release_report(&flags, type); + release_report(&flags, other_info); } kcsan_enable_current(); -- cgit v1.2.3 From 6119418f94ca5314392d258d27eb0cb58bb1774e Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 18 Mar 2020 18:38:45 +0100 Subject: kcsan: Avoid blocking producers in prepare_report() To avoid deadlock in case watchers can be interrupted, we need to ensure that producers of the struct other_info can never be blocked by an unrelated consumer. (Likely to occur with KCSAN_INTERRUPT_WATCHER.) There are several cases that can lead to this scenario, for example: 1. A watchpoint A was set up by task T1, but interrupted by interrupt I1. Some other thread (task or interrupt) finds watchpoint A consumes it, and sets other_info. Then I1 also finds some unrelated watchpoint B, consumes it, but is blocked because other_info is in use. T1 cannot consume other_info because I1 never returns -> deadlock. 2. A watchpoint A was set up by task T1, but interrupted by interrupt I1, which also sets up a watchpoint B. Some other thread finds watchpoint A, and consumes it and sets up other_info with its information. Similarly some other thread finds watchpoint B and consumes it, but is then blocked because other_info is in use. When I1 continues it sees its watchpoint was consumed, and that it must wait for other_info, which currently contains information to be consumed by T1. However, T1 cannot unblock other_info because I1 never returns -> deadlock. To avoid this, we need to ensure that producers of struct other_info always have a usable other_info entry. This is obviously not the case with only a single instance of struct other_info, as concurrent producers must wait for the entry to be released by some consumer (which may be locked up as illustrated above). While it would be nice if producers could simply call kmalloc() and append their instance of struct other_info to a list, we are very limited in this code path: since KCSAN can instrument the allocators themselves, calling kmalloc() could lead to deadlock or corrupted allocator state. Since producers of the struct other_info will always succeed at try_consume_watchpoint(), preceding the call into kcsan_report(), we know that the particular watchpoint slot cannot simply be reused or consumed by another potential other_info producer. If we move removal of a watchpoint after reporting (by the consumer of struct other_info), we can see a consumed watchpoint as a held lock on elements of other_info, if we create a one-to-one mapping of a watchpoint to an other_info element. Therefore, the simplest solution is to create an array of struct other_info that is as large as the watchpoints array in core.c, and pass the watchpoint index to kcsan_report() for producers and consumers, and change watchpoints to be removed after reporting is done. With a default config on a 64-bit system, the array other_infos consumes ~37KiB. For most systems today this is not a problem. On smaller memory constrained systems, the config value CONFIG_KCSAN_NUM_WATCHPOINTS can be reduced appropriately. Overall, this change is a simplification of the prepare_report() code, and makes some of the checks (such as checking if at least one access is a write) redundant. Tested: $ tools/testing/selftests/rcutorture/bin/kvm.sh \ --cpus 12 --duration 10 --kconfig "CONFIG_DEBUG_INFO=y \ CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n \ CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n \ CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y \ CONFIG_KCSAN_INTERRUPT_WATCHER=y CONFIG_PROVE_LOCKING=y" \ --configs TREE03 => No longer hangs and runs to completion as expected. Reported-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 31 +++++--- kernel/kcsan/kcsan.h | 3 +- kernel/kcsan/report.c | 212 ++++++++++++++++++++++++-------------------------- 3 files changed, 124 insertions(+), 122 deletions(-) (limited to 'kernel/kcsan/core.c') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index f1c38620e3cf..4d8ea0fca5f1 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -69,7 +69,6 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { * slot=9: [10, 11, 9] * slot=63: [64, 65, 63] */ -#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT) #define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS)) /* @@ -171,12 +170,16 @@ try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint) return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT); } -/* - * Return true if watchpoint was not touched, false if consumed. - */ -static inline bool remove_watchpoint(atomic_long_t *watchpoint) +/* Return true if watchpoint was not touched, false if already consumed. */ +static inline bool consume_watchpoint(atomic_long_t *watchpoint) { - return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT; + return atomic_long_xchg_relaxed(watchpoint, CONSUMED_WATCHPOINT) != CONSUMED_WATCHPOINT; +} + +/* Remove the watchpoint -- its slot may be reused after. */ +static inline void remove_watchpoint(atomic_long_t *watchpoint) +{ + atomic_long_set(watchpoint, INVALID_WATCHPOINT); } static __always_inline struct kcsan_ctx *get_ctx(void) @@ -322,7 +325,8 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, if (consumed) { kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE, - KCSAN_REPORT_CONSUMED_WATCHPOINT); + KCSAN_REPORT_CONSUMED_WATCHPOINT, + watchpoint - watchpoints); } else { /* * The other thread may not print any diagnostics, as it has @@ -470,7 +474,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) value_change = KCSAN_VALUE_CHANGE_TRUE; /* Check if this access raced with another. */ - if (!remove_watchpoint(watchpoint)) { + if (!consume_watchpoint(watchpoint)) { /* * Depending on the access type, map a value_change of MAYBE to * TRUE (always report) or FALSE (never report). @@ -500,7 +504,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); - kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL); + kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL, + watchpoint - watchpoints); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ @@ -510,9 +515,15 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, - KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); + KCSAN_REPORT_RACE_UNKNOWN_ORIGIN, + watchpoint - watchpoints); } + /* + * Remove watchpoint; must be after reporting, since the slot may be + * reused after this point. + */ + remove_watchpoint(watchpoint); kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); out_unlock: if (!kcsan_interrupt_watcher) diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 6630dfe32f31..763d6d08d94b 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -12,6 +12,7 @@ /* The number of adjacent watchpoints to check. */ #define KCSAN_CHECK_ADJACENT 1 +#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT) extern unsigned int kcsan_udelay_task; extern unsigned int kcsan_udelay_interrupt; @@ -136,6 +137,6 @@ enum kcsan_report_type { */ extern void kcsan_report(const volatile void *ptr, size_t size, int access_type, enum kcsan_value_change value_change, - enum kcsan_report_type type); + enum kcsan_report_type type, int watchpoint_idx); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index de234d1c1b3d..ae0a383238ea 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -30,9 +30,7 @@ struct access_info { /* * Other thread info: communicated from other racing thread to thread that set - * up the watchpoint, which then prints the complete report atomically. Only - * need one struct, as all threads should to be serialized regardless to print - * the reports, with reporting being in the slow-path. + * up the watchpoint, which then prints the complete report atomically. */ struct other_info { struct access_info ai; @@ -59,7 +57,11 @@ struct other_info { struct task_struct *task; }; -static struct other_info other_infos[1]; +/* + * To never block any producers of struct other_info, we need as many elements + * as we have watchpoints (upper bound on concurrent races to report). + */ +static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; /* * Information about reported races; used to rate limit reporting. @@ -96,10 +98,11 @@ struct report_time { static struct report_time report_times[REPORT_TIMES_SIZE]; /* - * This spinlock protects reporting and other_info, since other_info is usually - * required when reporting. + * Spinlock serializing report generation, and access to @other_infos. Although + * it could make sense to have a finer-grained locking story for @other_infos, + * report generation needs to be serialized either way, so not much is gained. */ -static DEFINE_SPINLOCK(report_lock); +static DEFINE_RAW_SPINLOCK(report_lock); /* * Checks if the race identified by thread frames frame1 and frame2 has @@ -395,9 +398,13 @@ static bool print_report(enum kcsan_value_change value_change, static void release_report(unsigned long *flags, struct other_info *other_info) { if (other_info) - other_info->ai.ptr = NULL; /* Mark for reuse. */ + /* + * Use size to denote valid/invalid, since KCSAN entirely + * ignores 0-sized accesses. + */ + other_info->ai.size = 0; - spin_unlock_irqrestore(&report_lock, *flags); + raw_spin_unlock_irqrestore(&report_lock, *flags); } /* @@ -435,14 +442,14 @@ static void set_other_info_task_blocking(unsigned long *flags, */ set_current_state(TASK_UNINTERRUPTIBLE); } - spin_unlock_irqrestore(&report_lock, *flags); + raw_spin_unlock_irqrestore(&report_lock, *flags); /* * We cannot call schedule() since we also cannot reliably * determine if sleeping here is permitted -- see in_atomic(). */ udelay(1); - spin_lock_irqsave(&report_lock, *flags); + raw_spin_lock_irqsave(&report_lock, *flags); if (timeout-- < 0) { /* * Abort. Reset @other_info->task to NULL, since it @@ -454,128 +461,107 @@ static void set_other_info_task_blocking(unsigned long *flags, break; } /* - * If @ptr nor @current matches, then our information has been - * consumed and we may continue. If not, retry. + * If invalid, or @ptr nor @current matches, then @other_info + * has been consumed and we may continue. If not, retry. */ - } while (other_info->ai.ptr == ai->ptr && other_info->task == current); + } while (other_info->ai.size && other_info->ai.ptr == ai->ptr && + other_info->task == current); if (is_running) set_current_state(TASK_RUNNING); } -/* - * Depending on the report type either sets other_info and returns false, or - * acquires the matching other_info and returns true. If other_info is not - * required for the report type, simply acquires report_lock and returns true. - */ -static bool prepare_report(unsigned long *flags, enum kcsan_report_type type, - const struct access_info *ai, struct other_info *other_info) +/* Populate @other_info; requires that the provided @other_info not in use. */ +static void prepare_report_producer(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info) { - if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT && - type != KCSAN_REPORT_RACE_SIGNAL) { - /* other_info not required; just acquire report_lock */ - spin_lock_irqsave(&report_lock, *flags); - return true; - } + raw_spin_lock_irqsave(&report_lock, *flags); -retry: - spin_lock_irqsave(&report_lock, *flags); + /* + * The same @other_infos entry cannot be used concurrently, because + * there is a one-to-one mapping to watchpoint slots (@watchpoints in + * core.c), and a watchpoint is only released for reuse after reporting + * is done by the consumer of @other_info. Therefore, it is impossible + * for another concurrent prepare_report_producer() to set the same + * @other_info, and are guaranteed exclusivity for the @other_infos + * entry pointed to by @other_info. + * + * To check this property holds, size should never be non-zero here, + * because every consumer of struct other_info resets size to 0 in + * release_report(). + */ + WARN_ON(other_info->ai.size); - switch (type) { - case KCSAN_REPORT_CONSUMED_WATCHPOINT: - if (other_info->ai.ptr) - break; /* still in use, retry */ + other_info->ai = *ai; + other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2); - other_info->ai = *ai; - other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 1); + if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) + set_other_info_task_blocking(flags, ai, other_info); - if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) - set_other_info_task_blocking(flags, ai, other_info); + raw_spin_unlock_irqrestore(&report_lock, *flags); +} - spin_unlock_irqrestore(&report_lock, *flags); +/* Awaits producer to fill @other_info and then returns. */ +static bool prepare_report_consumer(unsigned long *flags, + const struct access_info *ai, + struct other_info *other_info) +{ - /* - * The other thread will print the summary; other_info may now - * be consumed. - */ - return false; + raw_spin_lock_irqsave(&report_lock, *flags); + while (!other_info->ai.size) { /* Await valid @other_info. */ + raw_spin_unlock_irqrestore(&report_lock, *flags); + cpu_relax(); + raw_spin_lock_irqsave(&report_lock, *flags); + } - case KCSAN_REPORT_RACE_SIGNAL: - if (!other_info->ai.ptr) - break; /* no data available yet, retry */ + /* Should always have a matching access based on watchpoint encoding. */ + if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, + (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size))) + goto discard; + if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, + (unsigned long)ai->ptr, ai->size)) { /* - * First check if this is the other_info we are expecting, i.e. - * matches based on how watchpoint was encoded. + * If the actual accesses to not match, this was a false + * positive due to watchpoint encoding. */ - if (!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, - (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)) - break; /* mismatching watchpoint, retry */ - - if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, - (unsigned long)ai->ptr, ai->size)) { - /* - * If the actual accesses to not match, this was a false - * positive due to watchpoint encoding. - */ - kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); - - /* discard this other_info */ - release_report(flags, other_info); - return false; - } + kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); + goto discard; + } - if (!((ai->access_type | other_info->ai.access_type) & KCSAN_ACCESS_WRITE)) { - /* - * While the address matches, this is not the other_info - * from the thread that consumed our watchpoint, since - * neither this nor the access in other_info is a write. - * It is invalid to continue with the report, since we - * only have information about reads. - * - * This can happen due to concurrent races on the same - * address, with at least 4 threads. To avoid locking up - * other_info and all other threads, we have to consume - * it regardless. - * - * A concrete case to illustrate why we might lock up if - * we do not consume other_info: - * - * We have 4 threads, all accessing the same address - * (or matching address ranges). Assume the following - * watcher and watchpoint consumer pairs: - * write1-read1, read2-write2. The first to populate - * other_info is write2, however, write1 consumes it, - * resulting in a report of write1-write2. This report - * is valid, however, now read1 populates other_info; - * read2-read1 is an invalid conflict, yet, no other - * conflicting access is left. Therefore, we must - * consume read1's other_info. - * - * Since this case is assumed to be rare, it is - * reasonable to omit this report: one of the other - * reports includes information about the same shared - * data, and at this point the likelihood that we - * re-report the same race again is high. - */ - release_report(flags, other_info); - return false; - } + return true; - /* Matching access in other_info. */ - return true; +discard: + release_report(flags, other_info); + return false; +} +/* + * Depending on the report type either sets @other_info and returns false, or + * awaits @other_info and returns true. If @other_info is not required for the + * report type, simply acquires @report_lock and returns true. + */ +static noinline bool prepare_report(unsigned long *flags, + enum kcsan_report_type type, + const struct access_info *ai, + struct other_info *other_info) +{ + switch (type) { + case KCSAN_REPORT_CONSUMED_WATCHPOINT: + prepare_report_producer(flags, ai, other_info); + return false; + case KCSAN_REPORT_RACE_SIGNAL: + return prepare_report_consumer(flags, ai, other_info); default: - BUG(); + /* @other_info not required; just acquire @report_lock. */ + raw_spin_lock_irqsave(&report_lock, *flags); + return true; } - - spin_unlock_irqrestore(&report_lock, *flags); - - goto retry; } void kcsan_report(const volatile void *ptr, size_t size, int access_type, enum kcsan_value_change value_change, - enum kcsan_report_type type) + enum kcsan_report_type type, int watchpoint_idx) { unsigned long flags = 0; const struct access_info ai = { @@ -586,7 +572,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, .cpu_id = raw_smp_processor_id() }; struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN - ? NULL : &other_infos[0]; + ? NULL : &other_infos[watchpoint_idx]; + + kcsan_disable_current(); + if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos))) + goto out; /* * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if @@ -596,7 +586,6 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, */ lockdep_off(); - kcsan_disable_current(); if (prepare_report(&flags, type, &ai, other_info)) { /* * Never report if value_change is FALSE, only if we it is @@ -611,7 +600,8 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, release_report(&flags, other_info); } - kcsan_enable_current(); lockdep_on(); +out: + kcsan_enable_current(); } -- cgit v1.2.3 From 757a4cefde76697af2b2c284c8a320912b77e7e6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Mar 2020 17:41:56 +0100 Subject: kcsan: Add support for scoped accesses This adds support for scoped accesses, where the memory range is checked for the duration of the scope. The feature is implemented by inserting the relevant access information into a list of scoped accesses for the current execution context, which are then checked (until removed) on every call (through instrumentation) into the KCSAN runtime. An alternative, more complex, implementation could set up a watchpoint for the scoped access, and keep the watchpoint set up. This, however, would require first exposing a handle to the watchpoint, as well as dealing with cases such as accesses by the same thread while the watchpoint is still set up (and several more cases). It is also doubtful if this would provide any benefit, since the majority of delay where the watchpoint is set up is likely due to the injected delays by KCSAN. Therefore, the implementation in this patch is simpler and avoids hurting KCSAN's main use-case (normal data race detection); it also implicitly increases scoped-access race-detection-ability due to increased probability of setting up watchpoints by repeatedly calling __kcsan_check_access() throughout the scope of the access. The implementation required adding an additional conditional branch to the fast-path. However, the microbenchmark showed a *speedup* of ~5% on the fast-path. This appears to be due to subtly improved codegen by GCC from moving get_ctx() and associated load of preempt_count earlier. Suggested-by: Boqun Feng Suggested-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 57 ++++++++++++++++++++++++++++++ include/linux/kcsan.h | 3 ++ init/init_task.c | 1 + kernel/kcsan/core.c | 83 +++++++++++++++++++++++++++++++++++++++----- kernel/kcsan/report.c | 33 ++++++++++++------ 5 files changed, 158 insertions(+), 19 deletions(-) (limited to 'kernel/kcsan/core.c') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 3cd8bb03eb41..b24253d3a442 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -3,6 +3,8 @@ #ifndef _LINUX_KCSAN_CHECKS_H #define _LINUX_KCSAN_CHECKS_H +/* Note: Only include what is already included by compiler.h. */ +#include #include /* @@ -12,10 +14,12 @@ * WRITE : write access; * ATOMIC: access is atomic; * ASSERT: access is not a regular access, but an assertion; + * SCOPED: access is a scoped access; */ #define KCSAN_ACCESS_WRITE 0x1 #define KCSAN_ACCESS_ATOMIC 0x2 #define KCSAN_ACCESS_ASSERT 0x4 +#define KCSAN_ACCESS_SCOPED 0x8 /* * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used @@ -78,6 +82,52 @@ void kcsan_atomic_next(int n); */ void kcsan_set_access_mask(unsigned long mask); +/* Scoped access information. */ +struct kcsan_scoped_access { + struct list_head list; + const volatile void *ptr; + size_t size; + int type; +}; +/* + * Automatically call kcsan_end_scoped_access() when kcsan_scoped_access goes + * out of scope; relies on attribute "cleanup", which is supported by all + * compilers that support KCSAN. + */ +#define __kcsan_cleanup_scoped \ + __maybe_unused __attribute__((__cleanup__(kcsan_end_scoped_access))) + +/** + * kcsan_begin_scoped_access - begin scoped access + * + * Begin scoped access and initialize @sa, which will cause KCSAN to + * continuously check the memory range in the current thread until + * kcsan_end_scoped_access() is called for @sa. + * + * Scoped accesses are implemented by appending @sa to an internal list for the + * current execution context, and then checked on every call into the KCSAN + * runtime. + * + * @ptr: address of access + * @size: size of access + * @type: access type modifier + * @sa: struct kcsan_scoped_access to use for the scope of the access + */ +struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa); + +/** + * kcsan_end_scoped_access - end scoped access + * + * End a scoped access, which will stop KCSAN checking the memory range. + * Requires that kcsan_begin_scoped_access() was previously called once for @sa. + * + * @sa: a previously initialized struct kcsan_scoped_access + */ +void kcsan_end_scoped_access(struct kcsan_scoped_access *sa); + + #else /* CONFIG_KCSAN */ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, @@ -90,6 +140,13 @@ static inline void kcsan_flat_atomic_end(void) { } static inline void kcsan_atomic_next(int n) { } static inline void kcsan_set_access_mask(unsigned long mask) { } +struct kcsan_scoped_access { }; +#define __kcsan_cleanup_scoped __maybe_unused +static inline struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa) { return sa; } +static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { } + #endif /* CONFIG_KCSAN */ /* diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 3b84606e1e67..17ae59e4b685 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -40,6 +40,9 @@ struct kcsan_ctx { * Access mask for all accesses if non-zero. */ unsigned long access_mask; + + /* List of scoped accesses. */ + struct list_head scoped_accesses; }; /** diff --git a/init/init_task.c b/init/init_task.c index 096191d177d5..198943851caf 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -168,6 +168,7 @@ struct task_struct init_task .atomic_nest_count = 0, .in_flat_atomic = false, .access_mask = 0, + .scoped_accesses = {LIST_POISON1, NULL}, }, #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 4d8ea0fca5f1..a572aae61b98 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { .atomic_nest_count = 0, .in_flat_atomic = false, .access_mask = 0, + .scoped_accesses = {LIST_POISON1, NULL}, }; /* @@ -191,12 +193,23 @@ static __always_inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } +/* Check scoped accesses; never inline because this is a slow-path! */ +static noinline void kcsan_check_scoped_accesses(void) +{ + struct kcsan_ctx *ctx = get_ctx(); + struct list_head *prev_save = ctx->scoped_accesses.prev; + struct kcsan_scoped_access *scoped_access; + + ctx->scoped_accesses.prev = NULL; /* Avoid recursion. */ + list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) + __kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type); + ctx->scoped_accesses.prev = prev_save; +} + /* Rules for generic atomic accesses. Called from fast-path. */ static __always_inline bool -is_atomic(const volatile void *ptr, size_t size, int type) +is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) { - struct kcsan_ctx *ctx; - if (type & KCSAN_ACCESS_ATOMIC) return true; @@ -213,7 +226,6 @@ is_atomic(const volatile void *ptr, size_t size, int type) IS_ALIGNED((unsigned long)ptr, size)) return true; /* Assume aligned writes up to word size are atomic. */ - ctx = get_ctx(); if (ctx->atomic_next > 0) { /* * Because we do not have separate contexts for nested @@ -233,7 +245,7 @@ is_atomic(const volatile void *ptr, size_t size, int type) } static __always_inline bool -should_watch(const volatile void *ptr, size_t size, int type) +should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) { /* * Never set up watchpoints when memory operations are atomic. @@ -242,7 +254,7 @@ should_watch(const volatile void *ptr, size_t size, int type) * should not count towards skipped instructions, and (2) to actually * decrement kcsan_atomic_next for consecutive instruction stream. */ - if (is_atomic(ptr, size, type)) + if (is_atomic(ptr, size, type, ctx)) return false; if (this_cpu_dec_return(kcsan_skip) >= 0) @@ -563,8 +575,14 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, if (unlikely(watchpoint != NULL)) kcsan_found_watchpoint(ptr, size, type, watchpoint, encoded_watchpoint); - else if (unlikely(should_watch(ptr, size, type))) - kcsan_setup_watchpoint(ptr, size, type); + else { + struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */ + + if (unlikely(should_watch(ptr, size, type, ctx))) + kcsan_setup_watchpoint(ptr, size, type); + else if (unlikely(ctx->scoped_accesses.prev)) + kcsan_check_scoped_accesses(); + } } /* === Public interface ===================================================== */ @@ -660,6 +678,55 @@ void kcsan_set_access_mask(unsigned long mask) } EXPORT_SYMBOL(kcsan_set_access_mask); +struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa) +{ + struct kcsan_ctx *ctx = get_ctx(); + + __kcsan_check_access(ptr, size, type); + + ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */ + + INIT_LIST_HEAD(&sa->list); + sa->ptr = ptr; + sa->size = size; + sa->type = type; + + if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */ + INIT_LIST_HEAD(&ctx->scoped_accesses); + list_add(&sa->list, &ctx->scoped_accesses); + + ctx->disable_count--; + return sa; +} +EXPORT_SYMBOL(kcsan_begin_scoped_access); + +void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) +{ + struct kcsan_ctx *ctx = get_ctx(); + + if (WARN(!ctx->scoped_accesses.prev, "Unbalanced %s()?", __func__)) + return; + + ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */ + + list_del(&sa->list); + if (list_empty(&ctx->scoped_accesses)) + /* + * Ensure we do not enter kcsan_check_scoped_accesses() + * slow-path if unnecessary, and avoids requiring list_empty() + * in the fast-path (to avoid a READ_ONCE() and potential + * uaccess warning). + */ + ctx->scoped_accesses.prev = NULL; + + ctx->disable_count--; + + __kcsan_check_access(sa->ptr, sa->size, sa->type); +} +EXPORT_SYMBOL(kcsan_end_scoped_access); + void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { check_access(ptr, size, type); diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index ae0a383238ea..ddc18f1224a4 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -205,6 +205,20 @@ skip_report(enum kcsan_value_change value_change, unsigned long top_frame) static const char *get_access_type(int type) { + if (type & KCSAN_ACCESS_ASSERT) { + if (type & KCSAN_ACCESS_SCOPED) { + if (type & KCSAN_ACCESS_WRITE) + return "assert no accesses (scoped)"; + else + return "assert no writes (scoped)"; + } else { + if (type & KCSAN_ACCESS_WRITE) + return "assert no accesses"; + else + return "assert no writes"; + } + } + switch (type) { case 0: return "read"; @@ -214,17 +228,14 @@ static const char *get_access_type(int type) return "write"; case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: return "write (marked)"; - - /* - * ASSERT variants: - */ - case KCSAN_ACCESS_ASSERT: - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_ATOMIC: - return "assert no writes"; - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE: - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: - return "assert no accesses"; - + case KCSAN_ACCESS_SCOPED: + return "read (scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: + return "read (marked, scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: + return "write (scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: + return "write (marked, scoped)"; default: BUG(); } -- cgit v1.2.3 From 19acd03d95dad1f50d06f28179a1866fca431fed Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Apr 2020 17:47:29 +0200 Subject: kcsan: Add __kcsan_{enable,disable}_current() variants The __kcsan_{enable,disable}_current() variants only call into KCSAN if KCSAN is enabled for the current compilation unit. Note: This is typically not what we want, as we usually want to ensure that even calls into other functions still have KCSAN disabled. These variants may safely be used in header files that are shared between regular kernel code and code that does not link the KCSAN runtime. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 17 ++++++++++++++--- kernel/kcsan/core.c | 7 +++++++ 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'kernel/kcsan/core.c') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index ef95ddc49182..7b0b9c44f5f3 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -49,6 +49,7 @@ void kcsan_disable_current(void); * Supports nesting. */ void kcsan_enable_current(void); +void kcsan_enable_current_nowarn(void); /* Safe in uaccess regions. */ /** * kcsan_nestable_atomic_begin - begin nestable atomic region @@ -149,6 +150,7 @@ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, static inline void kcsan_disable_current(void) { } static inline void kcsan_enable_current(void) { } +static inline void kcsan_enable_current_nowarn(void) { } static inline void kcsan_nestable_atomic_begin(void) { } static inline void kcsan_nestable_atomic_end(void) { } static inline void kcsan_flat_atomic_begin(void) { } @@ -165,15 +167,24 @@ static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { } #endif /* CONFIG_KCSAN */ +#ifdef __SANITIZE_THREAD__ /* - * kcsan_*: Only calls into the runtime when the particular compilation unit has - * KCSAN instrumentation enabled. May be used in header files. + * Only calls into the runtime when the particular compilation unit has KCSAN + * instrumentation enabled. May be used in header files. */ -#ifdef __SANITIZE_THREAD__ #define kcsan_check_access __kcsan_check_access + +/* + * Only use these to disable KCSAN for accesses in the current compilation unit; + * calls into libraries may still perform KCSAN checks. + */ +#define __kcsan_disable_current kcsan_disable_current +#define __kcsan_enable_current kcsan_enable_current_nowarn #else static inline void kcsan_check_access(const volatile void *ptr, size_t size, int type) { } +static inline void __kcsan_enable_current(void) { } +static inline void __kcsan_disable_current(void) { } #endif /** diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index a572aae61b98..a73a66cf79df 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -625,6 +625,13 @@ void kcsan_enable_current(void) } EXPORT_SYMBOL(kcsan_enable_current); +void kcsan_enable_current_nowarn(void) +{ + if (get_ctx()->disable_count-- == 0) + kcsan_disable_current(); +} +EXPORT_SYMBOL(kcsan_enable_current_nowarn); + void kcsan_nestable_atomic_begin(void) { /* -- cgit v1.2.3