summaryrefslogtreecommitdiff
path: root/kernel/printk
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/printk')
-rw-r--r--kernel/printk/.kunitconfig3
-rw-r--r--kernel/printk/Makefile2
-rw-r--r--kernel/printk/internal.h68
-rw-r--r--kernel/printk/nbcon.c322
-rw-r--r--kernel/printk/printk.c477
-rw-r--r--kernel/printk/printk_ringbuffer.c126
-rw-r--r--kernel/printk/printk_ringbuffer.h5
-rw-r--r--kernel/printk/printk_ringbuffer_kunit_test.c327
-rw-r--r--kernel/printk/sysctl.c1
9 files changed, 1012 insertions, 319 deletions
diff --git a/kernel/printk/.kunitconfig b/kernel/printk/.kunitconfig
new file mode 100644
index 000000000000..f31458fd1a92
--- /dev/null
+++ b/kernel/printk/.kunitconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_PRINTK=y
+CONFIG_PRINTK_RINGBUFFER_KUNIT_TEST=y
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index 39a2b61c7232..f8004ac3983d 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_PRINTK_INDEX) += index.o
obj-$(CONFIG_PRINTK) += printk_support.o
printk_support-y := printk_ringbuffer.o
printk_support-$(CONFIG_SYSCTL) += sysctl.o
+
+obj-$(CONFIG_PRINTK_RINGBUFFER_KUNIT_TEST) += printk_ringbuffer_kunit_test.o
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index a91bdf802967..85fbf1801cbe 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -3,11 +3,10 @@
* internal.h - printk internal definitions
*/
#include <linux/console.h>
-#include <linux/percpu.h>
#include <linux/types.h>
+#include <linux/sysctl.h>
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
-struct ctl_table;
void __init printk_sysctl_init(void);
int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
@@ -64,6 +63,8 @@ struct dev_printk_info;
extern struct printk_ringbuffer *prb;
extern bool printk_kthreads_running;
+extern bool printk_kthreads_ready;
+extern bool debug_non_panic_cpus;
__printf(4, 0)
int vprintk_store(int facility, int level,
@@ -71,7 +72,6 @@ int vprintk_store(int facility, int level,
const char *fmt, va_list args);
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
-__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
void __printk_safe_enter(void);
void __printk_safe_exit(void);
@@ -111,47 +111,6 @@ bool nbcon_kthread_create(struct console *con);
void nbcon_kthread_stop(struct console *con);
void nbcon_kthreads_wake(void);
-/*
- * Check if the given console is currently capable and allowed to print
- * records. Note that this function does not consider the current context,
- * which can also play a role in deciding if @con can be used to print
- * records.
- */
-static inline bool console_is_usable(struct console *con, short flags, bool use_atomic)
-{
- if (!(flags & CON_ENABLED))
- return false;
-
- if ((flags & CON_SUSPENDED))
- return false;
-
- if (flags & CON_NBCON) {
- /* The write_atomic() callback is optional. */
- if (use_atomic && !con->write_atomic)
- return false;
-
- /*
- * For the !use_atomic case, @printk_kthreads_running is not
- * checked because the write_thread() callback is also used
- * via the legacy loop when the printer threads are not
- * available.
- */
- } else {
- if (!con->write)
- return false;
- }
-
- /*
- * Console drivers may assume that per-cpu resources have been
- * allocated. So unless they're explicitly marked as being able to
- * cope (CON_ANYTIME) don't call them until this CPU is officially up.
- */
- if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
- return false;
-
- return true;
-}
-
/**
* nbcon_kthread_wake - Wake up a console printing thread
* @con: Console to operate on
@@ -179,6 +138,7 @@ static inline void nbcon_kthread_wake(struct console *con)
#define PRINTKRB_RECORD_MAX 0
#define printk_kthreads_running (false)
+#define printk_kthreads_ready (false)
/*
* In !PRINTK builds we still export console_sem
@@ -202,9 +162,6 @@ static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *hand
static inline void nbcon_kthread_wake(struct console *con) { }
static inline void nbcon_kthreads_wake(void) { }
-static inline bool console_is_usable(struct console *con, short flags,
- bool use_atomic) { return false; }
-
#endif /* CONFIG_PRINTK */
extern bool have_boot_console;
@@ -228,6 +185,8 @@ struct console_flush_type {
bool legacy_offload;
};
+extern bool console_irqwork_blocked;
+
/*
* Identify which console flushing methods should be used in the context of
* the caller.
@@ -239,7 +198,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft)
switch (nbcon_get_default_prio()) {
case NBCON_PRIO_NORMAL:
if (have_nbcon_console && !have_boot_console) {
- if (printk_kthreads_running)
+ if (printk_kthreads_running && !console_irqwork_blocked)
ft->nbcon_offload = true;
else
ft->nbcon_atomic = true;
@@ -249,7 +208,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft)
if (have_legacy_console || have_boot_console) {
if (!is_printk_legacy_deferred())
ft->legacy_direct = true;
- else
+ else if (!console_irqwork_blocked)
ft->legacy_offload = true;
}
break;
@@ -262,7 +221,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft)
if (have_legacy_console || have_boot_console) {
if (!is_printk_legacy_deferred())
ft->legacy_direct = true;
- else
+ else if (!console_irqwork_blocked)
ft->legacy_offload = true;
}
break;
@@ -322,15 +281,22 @@ struct printk_buffers {
* nothing to output and this record should be skipped.
* @seq: The sequence number of the record used for @pbufs->outbuf.
* @dropped: The number of dropped records from reading @seq.
+ * @cpu: CPU on which the message was generated.
+ * @pid: PID of the task that generated the message
+ * @comm: Name of the task that generated the message.
*/
struct printk_message {
struct printk_buffers *pbufs;
unsigned int outbuf_len;
u64 seq;
unsigned long dropped;
+#ifdef CONFIG_PRINTK_EXECUTION_CTX
+ int cpu;
+ pid_t pid;
+ char comm[TASK_COMM_LEN];
+#endif
};
-bool other_cpu_in_panic(void);
bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
bool is_extended, bool may_supress);
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index fd12efcc4aed..d7044a7a214b 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -10,8 +10,10 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/irqflags.h>
+#include <linux/kdb.h>
#include <linux/kthread.h>
#include <linux/minmax.h>
+#include <linux/panic.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/slab.h>
@@ -117,6 +119,9 @@
* from scratch.
*/
+/* Counter of active nbcon emergency contexts. */
+static atomic_t nbcon_cpu_emergency_cnt = ATOMIC_INIT(0);
+
/**
* nbcon_state_set - Helper function to set the console state
* @con: Console to update
@@ -214,8 +219,9 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
/**
* nbcon_context_try_acquire_direct - Try to acquire directly
- * @ctxt: The context of the caller
- * @cur: The current console state
+ * @ctxt: The context of the caller
+ * @cur: The current console state
+ * @is_reacquire: This acquire is a reacquire
*
* Acquire the console when it is released. Also acquire the console when
* the current owner has a lower priority and the console is in a safe state.
@@ -225,17 +231,17 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
*
* Errors:
*
- * -EPERM: A panic is in progress and this is not the panic CPU.
- * Or the current owner or waiter has the same or higher
- * priority. No acquire method can be successful in
- * this case.
+ * -EPERM: A panic is in progress and this is neither the panic
+ * CPU nor is this a reacquire. Or the current owner or
+ * waiter has the same or higher priority. No acquire
+ * method can be successful in these cases.
*
* -EBUSY: The current owner has a lower priority but the console
* in an unsafe state. The caller should try using
* the handover acquire method.
*/
static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
- struct nbcon_state *cur)
+ struct nbcon_state *cur, bool is_reacquire)
{
unsigned int cpu = smp_processor_id();
struct console *con = ctxt->console;
@@ -243,14 +249,23 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
do {
/*
- * Panic does not imply that the console is owned. However, it
- * is critical that non-panic CPUs during panic are unable to
- * acquire ownership in order to satisfy the assumptions of
- * nbcon_waiter_matches(). In particular, the assumption that
- * lower priorities are ignored during panic.
+ * Panic does not imply that the console is owned. However,
+ * since all non-panic CPUs are stopped during panic(), it
+ * is safer to have them avoid gaining console ownership.
+ *
+ * One exception is when kdb has locked for printing on this CPU.
+ *
+ * Second exception is a reacquire (and an unsafe takeover
+ * has not previously occurred) then it is allowed to attempt
+ * a direct acquire in panic. This gives console drivers an
+ * opportunity to perform any necessary cleanup if they were
+ * interrupted by the panic CPU while printing.
*/
- if (other_cpu_in_panic())
+ if (panic_on_other_cpu() &&
+ !kdb_printf_on_this_cpu() &&
+ (!is_reacquire || cur->unsafe_takeover)) {
return -EPERM;
+ }
if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio)
return -EPERM;
@@ -301,8 +316,9 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio)
* Event #1 implies this context is EMERGENCY.
* Event #2 implies the new context is PANIC.
* Event #3 occurs when panic() has flushed the console.
- * Events #4 and #5 are not possible due to the other_cpu_in_panic()
- * check in nbcon_context_try_acquire_direct().
+ * Event #4 occurs when a non-panic CPU reacquires.
+ * Event #5 is not possible due to the panic_on_other_cpu() check
+ * in nbcon_context_try_acquire_handover().
*/
return (cur->req_prio == expected_prio);
@@ -340,7 +356,7 @@ static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt,
struct nbcon_state new;
/* Note that the caller must still remove the request! */
- if (other_cpu_in_panic())
+ if (panic_on_other_cpu())
return -EPERM;
/*
@@ -431,6 +447,16 @@ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt,
WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio);
WARN_ON_ONCE(!cur->unsafe);
+ /*
+ * Panic does not imply that the console is owned. However, it
+ * is critical that non-panic CPUs during panic are unable to
+ * wait for a handover in order to satisfy the assumptions of
+ * nbcon_waiter_matches(). In particular, the assumption that
+ * lower priorities are ignored during panic.
+ */
+ if (panic_on_other_cpu())
+ return -EPERM;
+
/* Handover is not possible on the same CPU. */
if (cur->cpu == cpu)
return -EBUSY;
@@ -558,7 +584,8 @@ static struct printk_buffers panic_nbcon_pbufs;
/**
* nbcon_context_try_acquire - Try to acquire nbcon console
- * @ctxt: The context of the caller
+ * @ctxt: The context of the caller
+ * @is_reacquire: This acquire is a reacquire
*
* Context: Under @ctxt->con->device_lock() or local_irq_save().
* Return: True if the console was acquired. False otherwise.
@@ -568,16 +595,15 @@ static struct printk_buffers panic_nbcon_pbufs;
* in an unsafe state. Otherwise, on success the caller may assume
* the console is not in an unsafe state.
*/
-static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
+static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire)
{
- unsigned int cpu = smp_processor_id();
struct console *con = ctxt->console;
struct nbcon_state cur;
int err;
nbcon_state_read(con, &cur);
try_again:
- err = nbcon_context_try_acquire_direct(ctxt, &cur);
+ err = nbcon_context_try_acquire_direct(ctxt, &cur, is_reacquire);
if (err != -EBUSY)
goto out;
@@ -595,7 +621,7 @@ out:
/* Acquire succeeded. */
/* Assign the appropriate buffer for this context. */
- if (atomic_read(&panic_cpu) == cpu)
+ if (panic_on_this_cpu())
ctxt->pbufs = &panic_nbcon_pbufs;
else
ctxt->pbufs = con->pbufs;
@@ -831,8 +857,8 @@ out:
return nbcon_context_can_proceed(ctxt, &cur);
}
-static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt,
- char *buf, unsigned int len)
+void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt,
+ char *buf, unsigned int len)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
struct console *con = ctxt->console;
@@ -913,13 +939,27 @@ void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
- while (!nbcon_context_try_acquire(ctxt))
+ while (!nbcon_context_try_acquire(ctxt, true))
cpu_relax();
nbcon_write_context_set_buf(wctxt, NULL, 0);
}
EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf);
+#ifdef CONFIG_PRINTK_EXECUTION_CTX
+static void wctxt_load_execution_ctx(struct nbcon_write_context *wctxt,
+ struct printk_message *pmsg)
+{
+ wctxt->cpu = pmsg->cpu;
+ wctxt->pid = pmsg->pid;
+ memcpy(wctxt->comm, pmsg->comm, sizeof(wctxt->comm));
+ static_assert(sizeof(wctxt->comm) == sizeof(pmsg->comm));
+}
+#else
+static void wctxt_load_execution_ctx(struct nbcon_write_context *wctxt,
+ struct printk_message *pmsg) {}
+#endif
+
/**
* nbcon_emit_next_record - Emit a record in the acquired context
* @wctxt: The write context that will be handed to the write function
@@ -1022,6 +1062,8 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_a
/* Initialize the write context for driver callbacks. */
nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len);
+ wctxt_load_execution_ctx(wctxt, &pmsg);
+
if (use_atomic)
con->write_atomic(con, wctxt);
else
@@ -1101,7 +1143,7 @@ static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic)
cant_migrate();
}
- if (!nbcon_context_try_acquire(ctxt))
+ if (!nbcon_context_try_acquire(ctxt, false))
goto out;
/*
@@ -1144,6 +1186,17 @@ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_contex
if (kthread_should_stop())
return true;
+ /*
+ * Block the kthread when the system is in an emergency or panic mode.
+ * It increases the chance that these contexts would be able to show
+ * the messages directly. And it reduces the risk of interrupted writes
+ * where the context with a higher priority takes over the nbcon console
+ * ownership in the middle of a message.
+ */
+ if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) ||
+ unlikely(panic_in_progress()))
+ return false;
+
cookie = console_srcu_read_lock();
flags = console_srcu_read_flags(con);
@@ -1195,6 +1248,14 @@ wait_for_event:
if (kthread_should_stop())
return 0;
+ /*
+ * Block the kthread when the system is in an emergency or panic
+ * mode. See nbcon_kthread_should_wakeup() for more details.
+ */
+ if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) ||
+ unlikely(panic_in_progress()))
+ goto wait_for_event;
+
backlog = false;
/*
@@ -1257,6 +1318,13 @@ void nbcon_kthreads_wake(void)
if (!printk_kthreads_running)
return;
+ /*
+ * It is not allowed to call this function when console irq_work
+ * is blocked.
+ */
+ if (WARN_ON_ONCE(console_irqwork_blocked))
+ return;
+
cookie = console_srcu_read_lock();
for_each_console_srcu(con) {
if (!(console_srcu_read_flags(con) & CON_NBCON))
@@ -1375,7 +1443,7 @@ enum nbcon_prio nbcon_get_default_prio(void)
{
unsigned int *cpu_emergency_nesting;
- if (this_cpu_in_panic())
+ if (panic_on_this_cpu())
return NBCON_PRIO_PANIC;
cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
@@ -1385,6 +1453,26 @@ enum nbcon_prio nbcon_get_default_prio(void)
return NBCON_PRIO_NORMAL;
}
+/*
+ * Track if it is allowed to perform unsafe hostile takeovers of console
+ * ownership. When true, console drivers might perform unsafe actions while
+ * printing. It is externally available via nbcon_allow_unsafe_takeover().
+ */
+static bool panic_nbcon_allow_unsafe_takeover;
+
+/**
+ * nbcon_allow_unsafe_takeover - Check if unsafe console takeovers are allowed
+ *
+ * Return: True, when it is permitted to perform unsafe console printing
+ *
+ * This is also used by console_is_usable() to determine if it is allowed to
+ * call write_atomic() callbacks flagged as unsafe (CON_NBCON_ATOMIC_UNSAFE).
+ */
+bool nbcon_allow_unsafe_takeover(void)
+{
+ return panic_on_this_cpu() && panic_nbcon_allow_unsafe_takeover;
+}
+
/**
* nbcon_legacy_emit_next_record - Print one record for an nbcon console
* in legacy contexts
@@ -1455,7 +1543,6 @@ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
* write_atomic() callback
* @con: The nbcon console to flush
* @stop_seq: Flush up until this record
- * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers
*
* Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on
* failure.
@@ -1474,8 +1561,7 @@ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
* returned, it cannot be expected that the unfinalized record will become
* available.
*/
-static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
- bool allow_unsafe_takeover)
+static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
{
struct nbcon_write_context wctxt = { };
struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
@@ -1484,19 +1570,30 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
ctxt->console = con;
ctxt->spinwait_max_us = 2000;
ctxt->prio = nbcon_get_default_prio();
- ctxt->allow_unsafe_takeover = allow_unsafe_takeover;
-
- if (!nbcon_context_try_acquire(ctxt))
- return -EPERM;
+ ctxt->allow_unsafe_takeover = nbcon_allow_unsafe_takeover();
while (nbcon_seq_read(con) < stop_seq) {
/*
- * nbcon_emit_next_record() returns false when the console was
- * handed over or taken over. In both cases the context is no
- * longer valid.
+ * Atomic flushing does not use console driver synchronization
+ * (i.e. it does not hold the port lock for uart consoles).
+ * Therefore IRQs must be disabled to avoid being interrupted
+ * and then calling into a driver that will deadlock trying
+ * to acquire console ownership.
*/
- if (!nbcon_emit_next_record(&wctxt, true))
- return -EAGAIN;
+ scoped_guard(irqsave) {
+ if (!nbcon_context_try_acquire(ctxt, false))
+ return -EPERM;
+
+ /*
+ * nbcon_emit_next_record() returns false when
+ * the console was handed over or taken over.
+ * In both cases the context is no longer valid.
+ */
+ if (!nbcon_emit_next_record(&wctxt, true))
+ return -EAGAIN;
+
+ nbcon_context_release(ctxt);
+ }
if (!ctxt->backlog) {
/* Are there reserved but not yet finalized records? */
@@ -1506,7 +1603,6 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
}
}
- nbcon_context_release(ctxt);
return err;
}
@@ -1515,32 +1611,19 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
* write_atomic() callback
* @con: The nbcon console to flush
* @stop_seq: Flush up until this record
- * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers
*
* This will stop flushing before @stop_seq if another context has ownership.
* That context is then responsible for the flushing. Likewise, if new records
* are added while this context was flushing and there is no other context
* to handle the printing, this context must also flush those records.
*/
-static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
- bool allow_unsafe_takeover)
+static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
{
struct console_flush_type ft;
- unsigned long flags;
int err;
again:
- /*
- * Atomic flushing does not use console driver synchronization (i.e.
- * it does not hold the port lock for uart consoles). Therefore IRQs
- * must be disabled to avoid being interrupted and then calling into
- * a driver that will deadlock trying to acquire console ownership.
- */
- local_irq_save(flags);
-
- err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
-
- local_irq_restore(flags);
+ err = __nbcon_atomic_flush_pending_con(con, stop_seq);
/*
* If there was a new owner (-EPERM, -EAGAIN), that context is
@@ -1570,9 +1653,8 @@ again:
* __nbcon_atomic_flush_pending - Flush all nbcon consoles using their
* write_atomic() callback
* @stop_seq: Flush up until this record
- * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers
*/
-static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover)
+static void __nbcon_atomic_flush_pending(u64 stop_seq)
{
struct console *con;
int cookie;
@@ -1590,7 +1672,7 @@ static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeove
if (nbcon_seq_read(con) >= stop_seq)
continue;
- nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
+ nbcon_atomic_flush_pending_con(con, stop_seq);
}
console_srcu_read_unlock(cookie);
}
@@ -1606,7 +1688,7 @@ static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeove
*/
void nbcon_atomic_flush_pending(void)
{
- __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false);
+ __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb));
}
/**
@@ -1618,7 +1700,9 @@ void nbcon_atomic_flush_pending(void)
*/
void nbcon_atomic_flush_unsafe(void)
{
- __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true);
+ panic_nbcon_allow_unsafe_takeover = true;
+ __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb));
+ panic_nbcon_allow_unsafe_takeover = false;
}
/**
@@ -1636,6 +1720,8 @@ void nbcon_cpu_emergency_enter(void)
preempt_disable();
+ atomic_inc(&nbcon_cpu_emergency_cnt);
+
cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
(*cpu_emergency_nesting)++;
}
@@ -1650,10 +1736,24 @@ void nbcon_cpu_emergency_exit(void)
unsigned int *cpu_emergency_nesting;
cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
-
if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0))
(*cpu_emergency_nesting)--;
+ /*
+ * Wake up kthreads because there might be some pending messages
+ * added by other CPUs with normal priority since the last flush
+ * in the emergency context.
+ */
+ if (!WARN_ON_ONCE(atomic_read(&nbcon_cpu_emergency_cnt) == 0)) {
+ if (atomic_dec_return(&nbcon_cpu_emergency_cnt) == 0) {
+ struct console_flush_type ft;
+
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_offload)
+ nbcon_kthreads_wake();
+ }
+ }
+
preempt_enable();
}
@@ -1671,9 +1771,15 @@ bool nbcon_alloc(struct console *con)
{
struct nbcon_state state = { };
- /* The write_thread() callback is mandatory. */
- if (WARN_ON(!con->write_thread))
+ /* Synchronize the kthread start. */
+ lockdep_assert_console_list_lock_held();
+
+ /* Check for mandatory nbcon callbacks. */
+ if (WARN_ON(!con->write_thread ||
+ !con->device_lock ||
+ !con->device_unlock)) {
return false;
+ }
rcuwait_init(&con->rcuwait);
init_irq_work(&con->irq_work, nbcon_irq_work);
@@ -1695,18 +1801,21 @@ bool nbcon_alloc(struct console *con)
*/
con->pbufs = &printk_shared_pbufs;
} else {
- con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL);
+ con->pbufs = kmalloc_obj(*con->pbufs);
if (!con->pbufs) {
con_printk(KERN_ERR, con, "failed to allocate printing buffer\n");
return false;
}
- if (printk_kthreads_running) {
+ if (printk_kthreads_ready && !have_boot_console) {
if (!nbcon_kthread_create(con)) {
kfree(con->pbufs);
con->pbufs = NULL;
return false;
}
+
+ /* Might be the first kthread. */
+ printk_kthreads_running = true;
}
}
@@ -1716,14 +1825,30 @@ bool nbcon_alloc(struct console *con)
/**
* nbcon_free - Free and cleanup the nbcon console specific data
* @con: Console to free/cleanup nbcon data
+ *
+ * Important: @have_nbcon_console must be updated before calling
+ * this function. In particular, it can be set only when there
+ * is still another nbcon console registered.
*/
void nbcon_free(struct console *con)
{
struct nbcon_state state = { };
- if (printk_kthreads_running)
+ /* Synchronize the kthread stop. */
+ lockdep_assert_console_list_lock_held();
+
+ if (printk_kthreads_running) {
nbcon_kthread_stop(con);
+ /* Might be the last nbcon console.
+ *
+ * Do not rely on printk_kthreads_check_locked(). It is not
+ * called in some code paths, see nbcon_free() callers.
+ */
+ if (!have_nbcon_console)
+ printk_kthreads_running = false;
+ }
+
nbcon_state_set(con, &state);
/* Boot consoles share global printk buffers. */
@@ -1762,7 +1887,7 @@ bool nbcon_device_try_acquire(struct console *con)
ctxt->console = con;
ctxt->prio = NBCON_PRIO_NORMAL;
- if (!nbcon_context_try_acquire(ctxt))
+ if (!nbcon_context_try_acquire(ctxt, false))
return false;
if (!nbcon_context_enter_unsafe(ctxt))
@@ -1803,14 +1928,75 @@ void nbcon_device_release(struct console *con)
* using the legacy loop.
*/
if (ft.nbcon_atomic) {
- __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
+ __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb));
} else if (ft.legacy_direct) {
if (console_trylock())
console_unlock();
} else if (ft.legacy_offload) {
- printk_trigger_flush();
+ defer_console_output();
}
}
console_srcu_read_unlock(cookie);
}
EXPORT_SYMBOL_GPL(nbcon_device_release);
+
+/**
+ * nbcon_kdb_try_acquire - Try to acquire nbcon console and enter unsafe
+ * section
+ * @con: The nbcon console to acquire
+ * @wctxt: The nbcon write context to be used on success
+ *
+ * Context: Under console_srcu_read_lock() for emitting a single kdb message
+ * using the given con->write_atomic() callback. Can be called
+ * only when the console is usable at the moment.
+ *
+ * Return: True if the console was acquired. False otherwise.
+ *
+ * kdb emits messages on consoles registered for printk() without
+ * storing them into the ring buffer. It has to acquire the console
+ * ownerhip so that it could call con->write_atomic() callback a safe way.
+ *
+ * This function acquires the nbcon console using priority NBCON_PRIO_EMERGENCY
+ * and marks it unsafe for handover/takeover.
+ */
+bool nbcon_kdb_try_acquire(struct console *con,
+ struct nbcon_write_context *wctxt)
+{
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+
+ memset(ctxt, 0, sizeof(*ctxt));
+ ctxt->console = con;
+ ctxt->prio = NBCON_PRIO_EMERGENCY;
+
+ if (!nbcon_context_try_acquire(ctxt, false))
+ return false;
+
+ if (!nbcon_context_enter_unsafe(ctxt))
+ return false;
+
+ return true;
+}
+
+/**
+ * nbcon_kdb_release - Exit unsafe section and release the nbcon console
+ *
+ * @wctxt: The nbcon write context initialized by a successful
+ * nbcon_kdb_try_acquire()
+ */
+void nbcon_kdb_release(struct nbcon_write_context *wctxt)
+{
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+
+ if (!nbcon_context_exit_unsafe(ctxt))
+ return;
+
+ nbcon_context_release(ctxt);
+
+ /*
+ * Flush any new printk() messages added when the console was blocked.
+ * Only the console used by the given write context was blocked.
+ * The console was locked only when the write_atomic() callback
+ * was usable.
+ */
+ __nbcon_atomic_flush_pending_con(ctxt->console, prb_next_reserve_seq(prb));
+}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 07668433644b..0323149548f6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -48,6 +48,7 @@
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
+#include <linux/panic.h>
#include <linux/uaccess.h>
#include <asm/sections.h>
@@ -244,6 +245,7 @@ int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write,
* For console list or console->flags updates
*/
void console_list_lock(void)
+ __acquires(&console_mutex)
{
/*
* In unregister_console() and console_force_preferred_locked(),
@@ -268,6 +270,7 @@ EXPORT_SYMBOL(console_list_lock);
* Counterpart to console_list_lock()
*/
void console_list_unlock(void)
+ __releases(&console_mutex)
{
mutex_unlock(&console_mutex);
}
@@ -345,34 +348,6 @@ static void __up_console_sem(unsigned long ip)
}
#define up_console_sem() __up_console_sem(_RET_IP_)
-static bool panic_in_progress(void)
-{
- return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
-}
-
-/* Return true if a panic is in progress on the current CPU. */
-bool this_cpu_in_panic(void)
-{
- /*
- * We can use raw_smp_processor_id() here because it is impossible for
- * the task to be migrated to the panic_cpu, or away from it. If
- * panic_cpu has already been set, and we're not currently executing on
- * that CPU, then we never will be.
- */
- return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
-}
-
-/*
- * Return true if a panic is in progress on a remote CPU.
- *
- * On true, the local CPU should immediately release any printing resources
- * that may be needed by the panic CPU.
- */
-bool other_cpu_in_panic(void)
-{
- return (panic_in_progress() && !this_cpu_in_panic());
-}
-
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
@@ -489,6 +464,9 @@ bool have_boot_console;
/* See printk_legacy_allow_panic_sync() for details. */
bool legacy_allow_panic_sync;
+/* Avoid using irq_work when suspending. */
+bool console_irqwork_blocked;
+
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
static DECLARE_WAIT_QUEUE_HEAD(legacy_wait);
@@ -955,7 +933,7 @@ static int devkmsg_open(struct inode *inode, struct file *file)
return err;
}
- user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
+ user = kvmalloc_obj(struct devkmsg_user);
if (!user)
return -ENOMEM;
@@ -2155,12 +2133,40 @@ static inline void printk_delay(int level)
}
}
+#define CALLER_ID_MASK 0x80000000
+
static inline u32 printk_caller_id(void)
{
return in_task() ? task_pid_nr(current) :
- 0x80000000 + smp_processor_id();
+ CALLER_ID_MASK + smp_processor_id();
+}
+
+#ifdef CONFIG_PRINTK_EXECUTION_CTX
+/* Store the opposite info than caller_id. */
+static u32 printk_caller_id2(void)
+{
+ return !in_task() ? task_pid_nr(current) :
+ CALLER_ID_MASK + smp_processor_id();
+}
+
+static pid_t printk_info_get_pid(const struct printk_info *info)
+{
+ u32 caller_id = info->caller_id;
+ u32 caller_id2 = info->caller_id2;
+
+ return caller_id & CALLER_ID_MASK ? caller_id2 : caller_id;
}
+static int printk_info_get_cpu(const struct printk_info *info)
+{
+ u32 caller_id = info->caller_id;
+ u32 caller_id2 = info->caller_id2;
+
+ return ((caller_id & CALLER_ID_MASK ?
+ caller_id : caller_id2) & ~CALLER_ID_MASK);
+}
+#endif
+
/**
* printk_parse_prefix - Parse level and control flags.
*
@@ -2237,6 +2243,28 @@ static u16 printk_sprint(char *text, u16 size, int facility,
return text_len;
}
+#ifdef CONFIG_PRINTK_EXECUTION_CTX
+static void printk_store_execution_ctx(struct printk_info *info)
+{
+ info->caller_id2 = printk_caller_id2();
+ get_task_comm(info->comm, current);
+}
+
+static void pmsg_load_execution_ctx(struct printk_message *pmsg,
+ const struct printk_info *info)
+{
+ pmsg->cpu = printk_info_get_cpu(info);
+ pmsg->pid = printk_info_get_pid(info);
+ memcpy(pmsg->comm, info->comm, sizeof(pmsg->comm));
+ static_assert(sizeof(pmsg->comm) == sizeof(info->comm));
+}
+#else
+static void printk_store_execution_ctx(struct printk_info *info) {}
+
+static void pmsg_load_execution_ctx(struct printk_message *pmsg,
+ const struct printk_info *info) {}
+#endif
+
__printf(4, 0)
int vprintk_store(int facility, int level,
const struct dev_printk_info *dev_info,
@@ -2344,6 +2372,7 @@ int vprintk_store(int facility, int level,
r.info->caller_id = caller_id;
if (dev_info)
memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
+ printk_store_execution_ctx(r.info);
/* A message without a trailing newline can be continued. */
if (!(flags & LOG_NEWLINE))
@@ -2375,6 +2404,22 @@ void printk_legacy_allow_panic_sync(void)
}
}
+bool __read_mostly debug_non_panic_cpus;
+
+#ifdef CONFIG_PRINTK_CALLER
+static int __init debug_non_panic_cpus_setup(char *str)
+{
+ debug_non_panic_cpus = true;
+ pr_info("allow messages from non-panic CPUs in panic()\n");
+
+ return 0;
+}
+early_param("debug_non_panic_cpus", debug_non_panic_cpus_setup);
+module_param(debug_non_panic_cpus, bool, 0644);
+MODULE_PARM_DESC(debug_non_panic_cpus,
+ "allow messages from non-panic CPUs in panic()");
+#endif
+
asmlinkage int vprintk_emit(int facility, int level,
const struct dev_printk_info *dev_info,
const char *fmt, va_list args)
@@ -2391,7 +2436,9 @@ asmlinkage int vprintk_emit(int facility, int level,
* non-panic CPUs are generating any messages, they will be
* silently dropped.
*/
- if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace)
+ if (panic_on_other_cpu() &&
+ !debug_non_panic_cpus &&
+ !panic_triggering_all_cpu_backtrace)
return 0;
printk_get_console_flush_type(&ft);
@@ -2399,7 +2446,7 @@ asmlinkage int vprintk_emit(int facility, int level,
/* If called from the scheduler, we can not call up(). */
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
- ft.legacy_offload |= ft.legacy_direct;
+ ft.legacy_offload |= ft.legacy_direct && !console_irqwork_blocked;
ft.legacy_direct = false;
}
@@ -2435,7 +2482,7 @@ asmlinkage int vprintk_emit(int facility, int level,
if (ft.legacy_offload)
defer_console_output();
- else
+ else if (!console_irqwork_blocked)
wake_up_klogd();
return printed_len;
@@ -2461,7 +2508,6 @@ asmlinkage __visible int _printk(const char *fmt, ...)
}
EXPORT_SYMBOL(_printk);
-static bool pr_flush(int timeout_ms, bool reset_on_progress);
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
#else /* CONFIG_PRINTK */
@@ -2474,7 +2520,6 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
static u64 syslog_seq;
-static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; }
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
#endif /* CONFIG_PRINTK */
@@ -2733,18 +2778,28 @@ module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool
MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc");
/**
- * suspend_console - suspend the console subsystem
+ * console_suspend_all - suspend the console subsystem
*
* This disables printk() while we go into suspend states
*/
-void suspend_console(void)
+void console_suspend_all(void)
{
struct console *con;
+ if (console_suspend_enabled)
+ pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
+
+ /*
+ * Flush any console backlog and then avoid queueing irq_work until
+ * console_resume_all(). Until then deferred printing is no longer
+ * triggered, NBCON consoles transition to atomic flushing, and
+ * any klogd waiters are not triggered.
+ */
+ pr_flush(1000, true);
+ console_irqwork_blocked = true;
+
if (!console_suspend_enabled)
return;
- pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
- pr_flush(1000, true);
console_list_lock();
for_each_console(con)
@@ -2760,31 +2815,39 @@ void suspend_console(void)
synchronize_srcu(&console_srcu);
}
-void resume_console(void)
+void console_resume_all(void)
{
struct console_flush_type ft;
struct console *con;
- if (!console_suspend_enabled)
- return;
-
- console_list_lock();
- for_each_console(con)
- console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED);
- console_list_unlock();
-
/*
- * Ensure that all SRCU list walks have completed. All printing
- * contexts must be able to see they are no longer suspended so
- * that they are guaranteed to wake up and resume printing.
+ * Allow queueing irq_work. After restoring console state, deferred
+ * printing and any klogd waiters need to be triggered in case there
+ * is now a console backlog.
*/
- synchronize_srcu(&console_srcu);
+ console_irqwork_blocked = false;
+
+ if (console_suspend_enabled) {
+ console_list_lock();
+ for_each_console(con)
+ console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED);
+ console_list_unlock();
+
+ /*
+ * Ensure that all SRCU list walks have completed. All printing
+ * contexts must be able to see they are no longer suspended so
+ * that they are guaranteed to wake up and resume printing.
+ */
+ synchronize_srcu(&console_srcu);
+ }
printk_get_console_flush_type(&ft);
if (ft.nbcon_offload)
nbcon_kthreads_wake();
if (ft.legacy_offload)
defer_console_output();
+ else
+ wake_up_klogd();
pr_flush(1000, true);
}
@@ -2827,7 +2890,7 @@ void console_lock(void)
might_sleep();
/* On panic, the console_lock must be left to the panic cpu. */
- while (other_cpu_in_panic())
+ while (panic_on_other_cpu())
msleep(1000);
down_console_sem();
@@ -2847,7 +2910,7 @@ EXPORT_SYMBOL(console_lock);
int console_trylock(void)
{
/* On panic, the console_lock must be left to the panic cpu. */
- if (other_cpu_in_panic())
+ if (panic_on_other_cpu())
return 0;
if (down_trylock_console_sem())
return 0;
@@ -2992,6 +3055,7 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
pmsg->seq = r.info->seq;
pmsg->dropped = r.info->seq - seq;
force_con = r.info->flags & LOG_FORCE_CON;
+ pmsg_load_execution_ctx(pmsg, r.info);
/*
* Skip records that are not forced to be printed on consoles and that
@@ -3013,21 +3077,18 @@ out:
}
/*
- * Legacy console printing from printk() caller context does not respect
- * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a
- * false positive. For PREEMPT_RT the false positive condition does not
- * occur.
- *
- * This map is used to temporarily establish LD_WAIT_SLEEP context for the
- * console write() callback when legacy printing to avoid false positive
- * lockdep complaints, thus allowing lockdep to continue to function for
- * real issues.
+ * The legacy console always acquires a spinlock_t from its printing
+ * callback. This violates lock nesting if the caller acquired an always
+ * spinning lock (raw_spinlock_t) while invoking printk(). This is not a
+ * problem on PREEMPT_RT because legacy consoles print always from a
+ * dedicated thread and never from within printk(). Therefore we tell
+ * lockdep that a sleeping spin lock (spinlock_t) is valid here.
*/
#ifdef CONFIG_PREEMPT_RT
static inline void printk_legacy_allow_spinlock_enter(void) { }
static inline void printk_legacy_allow_spinlock_exit(void) { }
#else
-static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP);
+static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_CONFIG);
static inline void printk_legacy_allow_spinlock_enter(void)
{
@@ -3145,104 +3206,147 @@ static inline void printk_kthreads_check_locked(void) { }
#endif /* CONFIG_PRINTK */
+
/*
- * Print out all remaining records to all consoles.
+ * Print out one record for each console.
*
* @do_cond_resched is set by the caller. It can be true only in schedulable
* context.
*
* @next_seq is set to the sequence number after the last available record.
- * The value is valid only when this function returns true. It means that all
- * usable consoles are completely flushed.
+ * The value is valid only when all usable consoles were flushed. It is
+ * when the function returns true (can do the job) and @try_again parameter
+ * is set to false, see below.
*
* @handover will be set to true if a printk waiter has taken over the
* console_lock, in which case the caller is no longer holding the
* console_lock. Otherwise it is set to false.
*
- * Returns true when there was at least one usable console and all messages
- * were flushed to all usable consoles. A returned false informs the caller
- * that everything was not flushed (either there were no usable consoles or
- * another context has taken over printing or it is a panic situation and this
- * is not the panic CPU). Regardless the reason, the caller should assume it
- * is not useful to immediately try again.
+ * @try_again will be set to true when it still makes sense to call this
+ * function again. The function could do the job, see the return value.
+ * And some consoles still make progress.
+ *
+ * Returns true when the function could do the job. Some consoles are usable,
+ * and there was no takeover and no panic_on_other_cpu().
*
* Requires the console_lock.
*/
-static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
+static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *handover,
+ bool *try_again)
{
struct console_flush_type ft;
bool any_usable = false;
struct console *con;
- bool any_progress;
int cookie;
- *next_seq = 0;
- *handover = false;
+ *try_again = false;
- do {
- any_progress = false;
+ printk_get_console_flush_type(&ft);
- printk_get_console_flush_type(&ft);
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
+ short flags = console_srcu_read_flags(con);
+ u64 printk_seq;
+ bool progress;
- cookie = console_srcu_read_lock();
- for_each_console_srcu(con) {
- short flags = console_srcu_read_flags(con);
- u64 printk_seq;
- bool progress;
+ /*
+ * console_flush_one_record() is only responsible for
+ * nbcon consoles when the nbcon consoles cannot print via
+ * their atomic or threaded flushing.
+ */
+ if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
+ continue;
- /*
- * console_flush_all() is only responsible for nbcon
- * consoles when the nbcon consoles cannot print via
- * their atomic or threaded flushing.
- */
- if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
- continue;
+ if (!console_is_usable(con, flags, !do_cond_resched))
+ continue;
+ any_usable = true;
- if (!console_is_usable(con, flags, !do_cond_resched))
- continue;
- any_usable = true;
+ if (flags & CON_NBCON) {
+ progress = nbcon_legacy_emit_next_record(con, handover, cookie,
+ !do_cond_resched);
+ printk_seq = nbcon_seq_read(con);
+ } else {
+ progress = console_emit_next_record(con, handover, cookie);
+ printk_seq = con->seq;
+ }
- if (flags & CON_NBCON) {
- progress = nbcon_legacy_emit_next_record(con, handover, cookie,
- !do_cond_resched);
- printk_seq = nbcon_seq_read(con);
- } else {
- progress = console_emit_next_record(con, handover, cookie);
- printk_seq = con->seq;
- }
+ /*
+ * If a handover has occurred, the SRCU read lock
+ * is already released.
+ */
+ if (*handover)
+ goto fail;
- /*
- * If a handover has occurred, the SRCU read lock
- * is already released.
- */
- if (*handover)
- return false;
+ /* Track the next of the highest seq flushed. */
+ if (printk_seq > *next_seq)
+ *next_seq = printk_seq;
- /* Track the next of the highest seq flushed. */
- if (printk_seq > *next_seq)
- *next_seq = printk_seq;
+ if (!progress)
+ continue;
- if (!progress)
- continue;
- any_progress = true;
+ /*
+ * An usable console made a progress. There might still be
+ * pending messages.
+ */
+ *try_again = true;
- /* Allow panic_cpu to take over the consoles safely. */
- if (other_cpu_in_panic())
- goto abandon;
+ /* Allow panic_cpu to take over the consoles safely. */
+ if (panic_on_other_cpu())
+ goto fail_srcu;
- if (do_cond_resched)
- cond_resched();
- }
- console_srcu_read_unlock(cookie);
- } while (any_progress);
+ if (do_cond_resched)
+ cond_resched();
+ }
+ console_srcu_read_unlock(cookie);
return any_usable;
-abandon:
+fail_srcu:
console_srcu_read_unlock(cookie);
+fail:
+ *try_again = false;
return false;
}
+/*
+ * Print out all remaining records to all consoles.
+ *
+ * @do_cond_resched is set by the caller. It can be true only in schedulable
+ * context.
+ *
+ * @next_seq is set to the sequence number after the last available record.
+ * The value is valid only when this function returns true. It means that all
+ * usable consoles are completely flushed.
+ *
+ * @handover will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding the
+ * console_lock. Otherwise it is set to false.
+ *
+ * Returns true when there was at least one usable console and all messages
+ * were flushed to all usable consoles. A returned false informs the caller
+ * that everything was not flushed (either there were no usable consoles or
+ * another context has taken over printing or it is a panic situation and this
+ * is not the panic CPU). Regardless the reason, the caller should assume it
+ * is not useful to immediately try again.
+ *
+ * Requires the console_lock.
+ */
+static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
+{
+ bool try_again;
+ bool ret;
+
+ *next_seq = 0;
+ *handover = false;
+
+ do {
+ ret = console_flush_one_record(do_cond_resched, next_seq,
+ handover, &try_again);
+ } while (try_again);
+
+ return ret;
+}
+
static void __console_flush_and_unlock(void)
{
bool do_cond_resched;
@@ -3312,22 +3416,6 @@ void console_unlock(void)
}
EXPORT_SYMBOL(console_unlock);
-/**
- * console_conditional_schedule - yield the CPU if required
- *
- * If the console code is currently allowed to sleep, and
- * if this CPU should yield the CPU to another task, do
- * so here.
- *
- * Must be called within console_lock();.
- */
-void __sched console_conditional_schedule(void)
-{
- if (console_may_schedule)
- cond_resched();
-}
-EXPORT_SYMBOL(console_conditional_schedule);
-
void console_unblank(void)
{
bool found_unblank = false;
@@ -3342,7 +3430,10 @@ void console_unblank(void)
*/
cookie = console_srcu_read_lock();
for_each_console_srcu(c) {
- if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) {
+ if (!console_is_usable(c, console_srcu_read_flags(c), true))
+ continue;
+
+ if (c->unblank) {
found_unblank = true;
break;
}
@@ -3379,7 +3470,10 @@ void console_unblank(void)
cookie = console_srcu_read_lock();
for_each_console_srcu(c) {
- if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank)
+ if (!console_is_usable(c, console_srcu_read_flags(c), true))
+ continue;
+
+ if (c->unblank)
c->unblank();
}
console_srcu_read_unlock(cookie);
@@ -3497,10 +3591,10 @@ struct tty_driver *console_device(int *index)
/*
* Prevent further output on the passed console device so that (for example)
- * serial drivers can disable console output before suspending a port, and can
+ * serial drivers can suspend console output before suspending a port, and can
* re-enable output afterwards.
*/
-void console_stop(struct console *console)
+void console_suspend(struct console *console)
{
__pr_flush(console, 1000, true);
console_list_lock();
@@ -3515,9 +3609,9 @@ void console_stop(struct console *console)
*/
synchronize_srcu(&console_srcu);
}
-EXPORT_SYMBOL(console_stop);
+EXPORT_SYMBOL(console_suspend);
-void console_start(struct console *console)
+void console_resume(struct console *console)
{
struct console_flush_type ft;
bool is_nbcon;
@@ -3542,13 +3636,13 @@ void console_start(struct console *console)
__pr_flush(console, 1000, true);
}
-EXPORT_SYMBOL(console_start);
+EXPORT_SYMBOL(console_resume);
#ifdef CONFIG_PRINTK
static int unregister_console_locked(struct console *console);
/* True when system boot is far enough to create printer threads. */
-static bool printk_kthreads_ready __ro_after_init;
+bool printk_kthreads_ready __ro_after_init;
static struct task_struct *printk_legacy_kthread;
@@ -3602,17 +3696,26 @@ static bool legacy_kthread_should_wakeup(void)
static int legacy_kthread_func(void *unused)
{
- for (;;) {
- wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup());
+ bool try_again;
+
+wait_for_event:
+ wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup());
+
+ do {
+ bool handover = false;
+ u64 next_seq = 0;
if (kthread_should_stop())
- break;
+ return 0;
console_lock();
- __console_flush_and_unlock();
- }
+ console_flush_one_record(true, &next_seq, &handover, &try_again);
+ if (!handover)
+ __console_unlock();
- return 0;
+ } while (try_again);
+
+ goto wait_for_event;
}
static bool legacy_kthread_create(void)
@@ -3640,12 +3743,13 @@ static bool legacy_kthread_create(void)
/**
* printk_kthreads_shutdown - shutdown all threaded printers
+ * @data: syscore context
*
* On system shutdown all threaded printers are stopped. This allows printk
* to transition back to atomic printing, thus providing a robust mechanism
* for the final shutdown/reboot messages to be output.
*/
-static void printk_kthreads_shutdown(void)
+static void printk_kthreads_shutdown(void *data)
{
struct console *con;
@@ -3667,10 +3771,14 @@ static void printk_kthreads_shutdown(void)
console_list_unlock();
}
-static struct syscore_ops printk_syscore_ops = {
+static const struct syscore_ops printk_syscore_ops = {
.shutdown = printk_kthreads_shutdown,
};
+static struct syscore printk_syscore = {
+ .ops = &printk_syscore_ops,
+};
+
/*
* If appropriate, start nbcon kthreads and set @printk_kthreads_running.
* If any kthreads fail to start, those consoles are unregistered.
@@ -3687,6 +3795,7 @@ static void printk_kthreads_check_locked(void)
if (!printk_kthreads_ready)
return;
+ /* Start or stop the legacy kthread when needed. */
if (have_legacy_console || have_boot_console) {
if (!printk_legacy_kthread &&
force_legacy_kthread() &&
@@ -3737,7 +3846,7 @@ static void printk_kthreads_check_locked(void)
static int __init printk_set_kthreads_ready(void)
{
- register_syscore_ops(&printk_syscore_ops);
+ register_syscore(&printk_syscore);
console_list_lock();
printk_kthreads_ready = true;
@@ -4178,14 +4287,6 @@ static int unregister_console_locked(struct console *console)
*/
synchronize_srcu(&console_srcu);
- if (console->flags & CON_NBCON)
- nbcon_free(console);
-
- console_sysfs_notify();
-
- if (console->exit)
- res = console->exit(console);
-
/*
* With this console gone, the global flags tracking registered
* console types may have changed. Update them.
@@ -4206,6 +4307,15 @@ static int unregister_console_locked(struct console *console)
if (!found_nbcon_con)
have_nbcon_console = found_nbcon_con;
+ /* @have_nbcon_console must be updated before calling nbcon_free(). */
+ if (console->flags & CON_NBCON)
+ nbcon_free(console);
+
+ console_sysfs_notify();
+
+ if (console->exit)
+ res = console->exit(console);
+
/* Changed console list, may require printer threads to start/stop. */
printk_kthreads_check_locked();
@@ -4277,6 +4387,11 @@ void __init console_init(void)
initcall_t call;
initcall_entry_t *ce;
+#ifdef CONFIG_NULL_TTY_DEFAULT_CONSOLE
+ if (!console_set_on_cmdline)
+ add_preferred_console("ttynull", 0, NULL);
+#endif
+
/* Setup the default TTY line discipline. */
n_tty_init();
@@ -4466,7 +4581,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
* Context: Process context. May sleep while acquiring console lock.
* Return: true if all usable printers are caught up.
*/
-static bool pr_flush(int timeout_ms, bool reset_on_progress)
+bool pr_flush(int timeout_ms, bool reset_on_progress)
{
return __pr_flush(NULL, timeout_ms, reset_on_progress);
}
@@ -4505,6 +4620,13 @@ static void __wake_up_klogd(int val)
if (!printk_percpu_data_ready())
return;
+ /*
+ * It is not allowed to call this function when console irq_work
+ * is blocked.
+ */
+ if (WARN_ON_ONCE(console_irqwork_blocked))
+ return;
+
preempt_disable();
/*
* Guarantee any new records can be seen by tasks preparing to wait
@@ -4561,9 +4683,30 @@ void defer_console_output(void)
__wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
}
+/**
+ * printk_trigger_flush - Attempt to flush printk buffer to consoles.
+ *
+ * If possible, flush the printk buffer to all consoles in the caller's
+ * context. If offloading is available, trigger deferred printing.
+ *
+ * This is best effort. Depending on the system state, console states,
+ * and caller context, no actual flushing may result from this call.
+ */
void printk_trigger_flush(void)
{
- defer_console_output();
+ struct console_flush_type ft;
+
+ printk_get_console_flush_type(&ft);
+ if (ft.nbcon_atomic)
+ nbcon_atomic_flush_pending();
+ if (ft.nbcon_offload)
+ nbcon_kthreads_wake();
+ if (ft.legacy_direct) {
+ if (console_trylock())
+ console_unlock();
+ }
+ if (ft.legacy_offload)
+ defer_console_output();
}
int vprintk_deferred(const char *fmt, va_list args)
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index 88e8f3a61922..56c8e3d031f4 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <kunit/visibility.h>
#include <linux/kernel.h>
#include <linux/irqflags.h>
#include <linux/string.h>
@@ -393,25 +394,38 @@ static unsigned int to_blk_size(unsigned int size)
* Sanity checker for reserve size. The ringbuffer code assumes that a data
* block does not exceed the maximum possible size that could fit within the
* ringbuffer. This function provides that basic size check so that the
- * assumption is safe.
+ * assumption is safe. In particular, it guarantees that data_push_tail() will
+ * never attempt to push the tail beyond the head.
*/
static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size)
{
- struct prb_data_block *db = NULL;
-
+ /* Data-less blocks take no space. */
if (size == 0)
return true;
/*
- * Ensure the alignment padded size could possibly fit in the data
- * array. The largest possible data block must still leave room for
- * at least the ID of the next block.
+ * If data blocks were allowed to be larger than half the data ring
+ * size, a wrapping data block could require more space than the full
+ * ringbuffer.
*/
- size = to_blk_size(size);
- if (size > DATA_SIZE(data_ring) - sizeof(db->id))
- return false;
+ return to_blk_size(size) <= DATA_SIZE(data_ring) / 2;
+}
- return true;
+/*
+ * Compare the current and requested logical position and decide
+ * whether more space is needed.
+ *
+ * Return false when @lpos_current is already at or beyond @lpos_target.
+ *
+ * Also return false when the difference between the positions is bigger
+ * than the size of the data buffer. It might happen only when the caller
+ * raced with another CPU(s) which already made and used the space.
+ */
+static bool need_more_space(struct prb_data_ring *data_ring,
+ unsigned long lpos_current,
+ unsigned long lpos_target)
+{
+ return lpos_target - lpos_current - 1 < DATA_SIZE(data_ring);
}
/* Query the state of a descriptor. */
@@ -580,7 +594,7 @@ static bool data_make_reusable(struct printk_ringbuffer *rb,
unsigned long id;
/* Loop until @lpos_begin has advanced to or beyond @lpos_end. */
- while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) {
+ while (need_more_space(data_ring, lpos_begin, lpos_end)) {
blk = to_block(data_ring, lpos_begin);
/*
@@ -671,7 +685,7 @@ static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos)
* sees the new tail lpos, any descriptor states that transitioned to
* the reusable state must already be visible.
*/
- while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) {
+ while (need_more_space(data_ring, tail_lpos, lpos)) {
/*
* Make all descriptors reusable that are associated with
* data blocks before @lpos.
@@ -1002,6 +1016,17 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
return true;
}
+static bool is_blk_wrapped(struct prb_data_ring *data_ring,
+ unsigned long begin_lpos, unsigned long next_lpos)
+{
+ /*
+ * Subtract one from next_lpos since it's not actually part of this data
+ * block. This allows perfectly fitting records to not wrap.
+ */
+ return DATA_WRAPS(data_ring, begin_lpos) !=
+ DATA_WRAPS(data_ring, next_lpos - 1);
+}
+
/* Determine the end of a data block. */
static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
unsigned long lpos, unsigned int size)
@@ -1013,7 +1038,7 @@ static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
next_lpos = lpos + size;
/* First check if the data block does not wrap. */
- if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos))
+ if (!is_blk_wrapped(data_ring, begin_lpos, next_lpos))
return next_lpos;
/* Wrapping data blocks store their data at the beginning. */
@@ -1051,8 +1076,17 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,
do {
next_lpos = get_next_lpos(data_ring, begin_lpos, size);
- if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) {
- /* Failed to allocate, specify a data-less block. */
+ /*
+ * data_check_size() prevents data block allocation that could
+ * cause illegal ringbuffer states. But double check that the
+ * used space will not be bigger than the ring buffer. Wrapped
+ * messages need to reserve more space, see get_next_lpos().
+ *
+ * Specify a data-less block when the check or the allocation
+ * fails.
+ */
+ if (WARN_ON_ONCE(next_lpos - begin_lpos > DATA_SIZE(data_ring)) ||
+ !data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) {
blk_lpos->begin = FAILED_LPOS;
blk_lpos->next = FAILED_LPOS;
return NULL;
@@ -1081,7 +1115,7 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,
blk = to_block(data_ring, begin_lpos);
blk->id = id; /* LMM(data_alloc:B) */
- if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) {
+ if (is_blk_wrapped(data_ring, begin_lpos, next_lpos)) {
/* Wrapping data blocks store their data at the beginning. */
blk = to_block(data_ring, 0);
@@ -1125,14 +1159,21 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
return NULL;
/* Keep track if @blk_lpos was a wrapping data block. */
- wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next));
+ wrapped = is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next);
size = to_blk_size(size);
next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size);
- /* If the data block does not increase, there is nothing to do. */
- if (head_lpos - next_lpos < DATA_SIZE(data_ring)) {
+ /*
+ * Use the current data block when the size does not increase, i.e.
+ * when @head_lpos is already able to accommodate the new @next_lpos.
+ *
+ * Note that need_more_space() could never return false here because
+ * the difference between the positions was bigger than the data
+ * buffer size. The data block is reopened and can't get reused.
+ */
+ if (!need_more_space(data_ring, head_lpos, next_lpos)) {
if (wrapped)
blk = to_block(data_ring, 0);
else
@@ -1140,8 +1181,18 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
return &blk->data[0];
}
- if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring)))
+ /*
+ * data_check_size() prevents data block reallocation that could
+ * cause illegal ringbuffer states. But double check that the
+ * new used space will not be bigger than the ring buffer. Wrapped
+ * messages need to reserve more space, see get_next_lpos().
+ *
+ * Specify failure when the check or the allocation fails.
+ */
+ if (WARN_ON_ONCE(next_lpos - blk_lpos->begin > DATA_SIZE(data_ring)) ||
+ !data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) {
return NULL;
+ }
/* The memory barrier involvement is the same as data_alloc:A. */
if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos,
@@ -1151,7 +1202,7 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
blk = to_block(data_ring, blk_lpos->begin);
- if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) {
+ if (is_blk_wrapped(data_ring, blk_lpos->begin, next_lpos)) {
struct prb_data_block *old_blk = blk;
/* Wrapping data blocks store their data at the beginning. */
@@ -1187,7 +1238,7 @@ static unsigned int space_used(struct prb_data_ring *data_ring,
if (BLK_DATALESS(blk_lpos))
return 0;
- if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) {
+ if (!is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next)) {
/* Data block does not wrap. */
return (DATA_INDEX(data_ring, blk_lpos->next) -
DATA_INDEX(data_ring, blk_lpos->begin));
@@ -1233,15 +1284,15 @@ static const char *get_data(struct prb_data_ring *data_ring,
return NULL;
}
- /* Regular data block: @begin less than @next and in same wrap. */
- if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) &&
- blk_lpos->begin < blk_lpos->next) {
+ /* Regular data block: @begin and @next in the same wrap. */
+ if (!is_blk_wrapped(data_ring, blk_lpos->begin, blk_lpos->next)) {
db = to_block(data_ring, blk_lpos->begin);
*data_size = blk_lpos->next - blk_lpos->begin;
/* Wrapping data block: @begin is one wrap behind @next. */
- } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) ==
- DATA_WRAPS(data_ring, blk_lpos->next)) {
+ } else if (!is_blk_wrapped(data_ring,
+ blk_lpos->begin + DATA_SIZE(data_ring),
+ blk_lpos->next)) {
db = to_block(data_ring, 0);
*data_size = DATA_INDEX(data_ring, blk_lpos->next);
@@ -1251,6 +1302,10 @@ static const char *get_data(struct prb_data_ring *data_ring,
return NULL;
}
+ /* Sanity check. Data-less blocks were handled earlier. */
+ if (WARN_ON_ONCE(!data_check_size(data_ring, *data_size) || !*data_size))
+ return NULL;
+
/* A valid data block will always be aligned to the ID size. */
if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) ||
WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) {
@@ -1685,6 +1740,7 @@ fail:
memset(r, 0, sizeof(*r));
return false;
}
+EXPORT_SYMBOL_IF_KUNIT(prb_reserve);
/* Commit the data (possibly finalizing it) and restore interrupts. */
static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val)
@@ -1759,6 +1815,7 @@ void prb_commit(struct prb_reserved_entry *e)
if (head_id != e->id)
desc_make_final(e->rb, e->id);
}
+EXPORT_SYMBOL_IF_KUNIT(prb_commit);
/**
* prb_final_commit() - Commit and finalize (previously reserved) data to
@@ -2133,9 +2190,9 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
* there may be other finalized records beyond that
* need to be printed for a panic situation. If this
* is the panic CPU, skip this
- * non-existent/non-finalized record unless it is
- * at or beyond the head, in which case it is not
- * possible to continue.
+ * non-existent/non-finalized record unless non-panic
+ * CPUs are still running and their debugging is
+ * explicitly enabled.
*
* Note that new messages printed on panic CPU are
* finalized when we are here. The only exception
@@ -2143,10 +2200,13 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
* But it would have the sequence number returned
* by "prb_next_reserve_seq() - 1".
*/
- if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb)))
+ if (panic_on_this_cpu() &&
+ (!debug_non_panic_cpus || legacy_allow_panic_sync) &&
+ ((*seq + 1) < prb_next_reserve_seq(rb))) {
(*seq)++;
- else
+ } else {
return false;
+ }
}
}
@@ -2181,6 +2241,7 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
{
return _prb_read_valid(rb, &seq, r, NULL);
}
+EXPORT_SYMBOL_IF_KUNIT(prb_read_valid);
/**
* prb_read_valid_info() - Non-blocking read of meta data for a requested
@@ -2330,6 +2391,7 @@ void prb_init(struct printk_ringbuffer *rb,
infos[0].seq = -(u64)_DESCS_COUNT(descbits);
infos[_DESCS_COUNT(descbits) - 1].seq = 0;
}
+EXPORT_SYMBOL_IF_KUNIT(prb_init);
/**
* prb_record_text_space() - Query the full actual used ringbuffer space for
diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h
index 4ef81349d9fb..1651b53ece34 100644
--- a/kernel/printk/printk_ringbuffer.h
+++ b/kernel/printk/printk_ringbuffer.h
@@ -23,6 +23,11 @@ struct printk_info {
u8 flags:5; /* internal record flags */
u8 level:3; /* syslog level */
u32 caller_id; /* thread id or processor id */
+#ifdef CONFIG_PRINTK_EXECUTION_CTX
+ u32 caller_id2; /* caller_id complement */
+ /* name of the task that generated the message */
+ char comm[TASK_COMM_LEN];
+#endif
struct dev_printk_info dev_info;
};
diff --git a/kernel/printk/printk_ringbuffer_kunit_test.c b/kernel/printk/printk_ringbuffer_kunit_test.c
new file mode 100644
index 000000000000..2282348e869a
--- /dev/null
+++ b/kernel/printk/printk_ringbuffer_kunit_test.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cpuhplock.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <kunit/resource.h>
+#include <kunit/test.h>
+
+#include "printk_ringbuffer.h"
+
+/*
+ * This KUnit tests the data integrity of the lockless printk_ringbuffer.
+ * From multiple CPUs it writes messages of varying length and content while
+ * a reader validates the correctness of the messages.
+ *
+ * IMPORTANT: The more CPUs you can use for this KUnit, the better!
+ *
+ * The test works by starting "num_online_cpus() - 1" writer threads, each
+ * pinned to their own CPU. Each writer thread loops, writing data of varying
+ * length into a printk_ringbuffer as fast as possible. The data content is
+ * an embedded data struct followed by string content repeating the byte:
+ *
+ * 'A' + CPUID
+ *
+ * The reader is running on the remaining online CPU, or if there is only one
+ * CPU on the same as the writer.
+ * It ensures that the embedded struct content is consistent with the string
+ * and that the string * is terminated and is composed of the same repeating
+ * byte as its first byte.
+ *
+ * Because the threads are running in such tight loops, they will call
+ * cond_resched() from time to time so the system stays functional.
+ *
+ * If the reader encounters an error, the test is aborted and some
+ * information about the error is reported.
+ * The runtime of the test can be configured with the runtime_ms module parameter.
+ *
+ * Note that the test is performed on a separate printk_ringbuffer instance
+ * and not the instance used by printk().
+ */
+
+static unsigned long runtime_ms = 10 * MSEC_PER_SEC;
+module_param(runtime_ms, ulong, 0400);
+
+/* test data structure */
+struct prbtest_rbdata {
+ unsigned int size;
+ char text[] __counted_by(size);
+};
+
+#define MAX_RBDATA_TEXT_SIZE 0x80
+#define MAX_PRB_RECORD_SIZE (sizeof(struct prbtest_rbdata) + MAX_RBDATA_TEXT_SIZE)
+
+struct prbtest_data {
+ struct kunit *test;
+ struct printk_ringbuffer *ringbuffer;
+ /* used by writers to signal reader of new records */
+ wait_queue_head_t new_record_wait;
+};
+
+struct prbtest_thread_data {
+ unsigned long num;
+ struct prbtest_data *test_data;
+};
+
+static void prbtest_fail_record(struct kunit *test, const struct prbtest_rbdata *dat, u64 seq)
+{
+ unsigned int len;
+
+ len = dat->size - 1;
+
+ KUNIT_FAIL(test, "BAD RECORD: seq=%llu size=%u text=%.*s\n",
+ seq, dat->size,
+ len < MAX_RBDATA_TEXT_SIZE ? len : -1,
+ len < MAX_RBDATA_TEXT_SIZE ? dat->text : "<invalid>");
+}
+
+static bool prbtest_check_data(const struct prbtest_rbdata *dat)
+{
+ unsigned int len;
+
+ /* Sane size? At least one character + trailing '\0' */
+ if (dat->size < 2 || dat->size > MAX_RBDATA_TEXT_SIZE)
+ return false;
+
+ len = dat->size - 1;
+ if (dat->text[len] != '\0')
+ return false;
+
+ /* String repeats with the same character? */
+ while (len--) {
+ if (dat->text[len] != dat->text[0])
+ return false;
+ }
+
+ return true;
+}
+
+static int prbtest_writer(void *data)
+{
+ struct prbtest_thread_data *tr = data;
+ char text_id = 'A' + tr->num;
+ struct prb_reserved_entry e;
+ struct prbtest_rbdata *dat;
+ u32 record_size, text_size;
+ unsigned long count = 0;
+ struct printk_record r;
+
+ kunit_info(tr->test_data->test, "start thread %03lu (writer)\n", tr->num);
+
+ for (;;) {
+ /* ensure at least 1 character + trailing '\0' */
+ text_size = get_random_u32_inclusive(2, MAX_RBDATA_TEXT_SIZE);
+ if (WARN_ON_ONCE(text_size < 2))
+ text_size = 2;
+ if (WARN_ON_ONCE(text_size > MAX_RBDATA_TEXT_SIZE))
+ text_size = MAX_RBDATA_TEXT_SIZE;
+
+ record_size = sizeof(struct prbtest_rbdata) + text_size;
+ WARN_ON_ONCE(record_size > MAX_PRB_RECORD_SIZE);
+
+ /* specify the text sizes for reservation */
+ prb_rec_init_wr(&r, record_size);
+
+ /*
+ * Reservation can fail if:
+ *
+ * - No free descriptor is available.
+ * - The buffer is full, and the oldest record is reserved
+ * but not yet committed.
+ *
+ * It actually happens in this test because all CPUs are trying
+ * to write an unbounded number of messages in a tight loop.
+ * These failures are intentionally ignored because this test
+ * focuses on races, ringbuffer consistency, and pushing system
+ * usability limits.
+ */
+ if (prb_reserve(&e, tr->test_data->ringbuffer, &r)) {
+ r.info->text_len = record_size;
+
+ dat = (struct prbtest_rbdata *)r.text_buf;
+ dat->size = text_size;
+ memset(dat->text, text_id, text_size - 1);
+ dat->text[text_size - 1] = '\0';
+
+ prb_commit(&e);
+
+ wake_up_interruptible(&tr->test_data->new_record_wait);
+ }
+
+ if ((count++ & 0x3fff) == 0)
+ cond_resched();
+
+ if (kthread_should_stop())
+ break;
+ }
+
+ kunit_info(tr->test_data->test, "end thread %03lu: wrote=%lu\n", tr->num, count);
+
+ return 0;
+}
+
+struct prbtest_wakeup_timer {
+ struct timer_list timer;
+ struct task_struct *task;
+};
+
+static void prbtest_wakeup_callback(struct timer_list *timer)
+{
+ struct prbtest_wakeup_timer *wakeup = timer_container_of(wakeup, timer, timer);
+
+ set_tsk_thread_flag(wakeup->task, TIF_NOTIFY_SIGNAL);
+ wake_up_process(wakeup->task);
+}
+
+static int prbtest_reader(struct prbtest_data *test_data, unsigned long timeout_ms)
+{
+ struct prbtest_wakeup_timer wakeup;
+ char text_buf[MAX_PRB_RECORD_SIZE];
+ unsigned long count = 0;
+ struct printk_info info;
+ struct printk_record r;
+ u64 seq = 0;
+
+ wakeup.task = current;
+ timer_setup_on_stack(&wakeup.timer, prbtest_wakeup_callback, 0);
+ mod_timer(&wakeup.timer, jiffies + msecs_to_jiffies(timeout_ms));
+
+ prb_rec_init_rd(&r, &info, text_buf, sizeof(text_buf));
+
+ kunit_info(test_data->test, "start reader\n");
+
+ while (!wait_event_interruptible(test_data->new_record_wait,
+ prb_read_valid(test_data->ringbuffer, seq, &r))) {
+ /* check/track the sequence */
+ if (info.seq < seq)
+ KUNIT_FAIL(test_data->test, "BAD SEQ READ: request=%llu read=%llu\n",
+ seq, info.seq);
+
+ if (!prbtest_check_data((struct prbtest_rbdata *)r.text_buf))
+ prbtest_fail_record(test_data->test,
+ (struct prbtest_rbdata *)r.text_buf, info.seq);
+
+ if ((count++ & 0x3fff) == 0)
+ cond_resched();
+
+ seq = info.seq + 1;
+ }
+
+ timer_delete_sync(&wakeup.timer);
+ timer_destroy_on_stack(&wakeup.timer);
+
+ kunit_info(test_data->test, "end reader: read=%lu seq=%llu\n", count, info.seq);
+
+ return 0;
+}
+
+KUNIT_DEFINE_ACTION_WRAPPER(prbtest_cpumask_cleanup, free_cpumask_var, struct cpumask *);
+KUNIT_DEFINE_ACTION_WRAPPER(prbtest_kthread_cleanup, kthread_stop, struct task_struct *);
+
+static void prbtest_add_cpumask_cleanup(struct kunit *test, cpumask_var_t mask)
+{
+ int err;
+
+ err = kunit_add_action_or_reset(test, prbtest_cpumask_cleanup, mask);
+ KUNIT_ASSERT_EQ(test, err, 0);
+}
+
+static void prbtest_add_kthread_cleanup(struct kunit *test, struct task_struct *kthread)
+{
+ int err;
+
+ err = kunit_add_action_or_reset(test, prbtest_kthread_cleanup, kthread);
+ KUNIT_ASSERT_EQ(test, err, 0);
+}
+
+static inline void prbtest_prb_reinit(struct printk_ringbuffer *rb)
+{
+ prb_init(rb, rb->text_data_ring.data, rb->text_data_ring.size_bits, rb->desc_ring.descs,
+ rb->desc_ring.count_bits, rb->desc_ring.infos);
+}
+
+static void test_readerwriter(struct kunit *test)
+{
+ /* Equivalent to CONFIG_LOG_BUF_SHIFT=13 */
+ DEFINE_PRINTKRB(test_rb, 8, 5);
+
+ struct prbtest_thread_data *thread_data;
+ struct prbtest_data *test_data;
+ struct task_struct *thread;
+ cpumask_var_t test_cpus;
+ int cpu, reader_cpu;
+
+ KUNIT_ASSERT_TRUE(test, alloc_cpumask_var(&test_cpus, GFP_KERNEL));
+ prbtest_add_cpumask_cleanup(test, test_cpus);
+
+ cpus_read_lock();
+ /*
+ * Failure of KUNIT_ASSERT() kills the current task
+ * so it can not be called while the CPU hotplug lock is held.
+ * Instead use a snapshot of the online CPUs.
+ * If they change during test execution it is unfortunate but not a grave error.
+ */
+ cpumask_copy(test_cpus, cpu_online_mask);
+ cpus_read_unlock();
+
+ /* One CPU is for the reader, all others are writers */
+ reader_cpu = cpumask_first(test_cpus);
+ if (cpumask_weight(test_cpus) == 1)
+ kunit_warn(test, "more than one CPU is recommended");
+ else
+ cpumask_clear_cpu(reader_cpu, test_cpus);
+
+ /* KUnit test can get restarted more times. */
+ prbtest_prb_reinit(&test_rb);
+
+ test_data = kunit_kmalloc(test, sizeof(*test_data), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, test_data);
+ test_data->test = test;
+ test_data->ringbuffer = &test_rb;
+ init_waitqueue_head(&test_data->new_record_wait);
+
+ kunit_info(test, "running for %lu ms\n", runtime_ms);
+
+ for_each_cpu(cpu, test_cpus) {
+ thread_data = kunit_kmalloc(test, sizeof(*thread_data), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, thread_data);
+ thread_data->test_data = test_data;
+ thread_data->num = cpu;
+
+ thread = kthread_run_on_cpu(prbtest_writer, thread_data, cpu,
+ "prbtest writer %u");
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, thread);
+ prbtest_add_kthread_cleanup(test, thread);
+ }
+
+ kunit_info(test, "starting test\n");
+
+ set_cpus_allowed_ptr(current, cpumask_of(reader_cpu));
+ prbtest_reader(test_data, runtime_ms);
+
+ kunit_info(test, "completed test\n");
+}
+
+static struct kunit_case prb_test_cases[] = {
+ KUNIT_CASE_SLOW(test_readerwriter),
+ {}
+};
+
+static struct kunit_suite prb_test_suite = {
+ .name = "printk-ringbuffer",
+ .test_cases = prb_test_cases,
+};
+kunit_test_suite(prb_test_suite);
+
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
+MODULE_AUTHOR("John Ogness <john.ogness@linutronix.de>");
+MODULE_DESCRIPTION("printk_ringbuffer KUnit test");
+MODULE_LICENSE("GPL");
diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c
index da77f3f5c1fe..f15732e93c2e 100644
--- a/kernel/printk/sysctl.c
+++ b/kernel/printk/sysctl.c
@@ -3,7 +3,6 @@
* sysctl.c: General linux system control interface
*/
-#include <linux/sysctl.h>
#include <linux/printk.h>
#include <linux/capability.h>
#include <linux/ratelimit.h>