summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorNathan Lynch <nathanl@linux.ibm.com>2023-02-10 12:41:49 -0600
committerMichael Ellerman <mpe@ellerman.id.au>2023-02-13 22:35:01 +1100
commit09d1ea72c88198ef5a9e6b8208f544fe18acbff1 (patch)
tree710afb87dc07641a85b6a14670426d6658ddb952 /arch/powerpc/kernel
parent4b3e71e9a34c48f370b6281e9477515d588e7b26 (diff)
downloadlwn-09d1ea72c88198ef5a9e6b8208f544fe18acbff1.tar.gz
lwn-09d1ea72c88198ef5a9e6b8208f544fe18acbff1.zip
powerpc/rtas: handle extended delays safely in early boot
Some code that runs early in boot calls RTAS functions that can return -2 or 990x statuses, which mean the caller should retry. An example is pSeries_cmo_feature_init(), which invokes ibm,get-system-parameter but treats these benign statuses as errors instead of retrying. pSeries_cmo_feature_init() and similar code should be made to retry until they succeed or receive a real error, using the usual pattern: do { rc = rtas_call(token, etc...); } while (rtas_busy_delay(rc)); But rtas_busy_delay() will perform a timed sleep on any 990x status. This isn't safe so early in boot, before the CPU scheduler and timer subsystem have initialized. The -2 RTAS status is much more likely to occur during single-threaded boot than 990x in practice, at least on PowerVM. This is because -2 usually means that RTAS made progress but exhausted its self-imposed timeslice, while 990x is associated with concurrent requests from the OS causing internal contention. Regardless, according to the language in PAPR, the OS should be prepared to handle either type of status at any time. Add a fallback path to rtas_busy_delay() to handle this as safely as possible, performing a small delay on 990x. Include a counter to detect retry loops that aren't making progress and bail out. Add __ref to rtas_busy_delay() since it now conditionally calls an __init function. This was found by inspection and I'm not aware of any real failures. However, the implementation of rtas_busy_delay() before commit 38f7b7067dae ("powerpc/rtas: rtas_busy_delay() improvements") was not susceptible to this problem, so let's treat this as a regression. Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com> Fixes: 38f7b7067dae ("powerpc/rtas: rtas_busy_delay() improvements") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-1-26929c8cce78@linux.ibm.com
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/rtas.c49
1 files changed, 48 insertions, 1 deletions
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 795225d7f138..86aff1cb8a0d 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -606,6 +606,47 @@ unsigned int rtas_busy_delay_time(int status)
return ms;
}
+/*
+ * Early boot fallback for rtas_busy_delay().
+ */
+static bool __init rtas_busy_delay_early(int status)
+{
+ static size_t successive_ext_delays __initdata;
+ bool retry;
+
+ switch (status) {
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ /*
+ * In the unlikely case that we receive an extended
+ * delay status in early boot, the OS is probably not
+ * the cause, and there's nothing we can do to clear
+ * the condition. Best we can do is delay for a bit
+ * and hope it's transient. Lie to the caller if it
+ * seems like we're stuck in a retry loop.
+ */
+ mdelay(1);
+ retry = true;
+ successive_ext_delays += 1;
+ if (successive_ext_delays > 1000) {
+ pr_err("too many extended delays, giving up\n");
+ dump_stack();
+ retry = false;
+ successive_ext_delays = 0;
+ }
+ break;
+ case RTAS_BUSY:
+ retry = true;
+ successive_ext_delays = 0;
+ break;
+ default:
+ retry = false;
+ successive_ext_delays = 0;
+ break;
+ }
+
+ return retry;
+}
+
/**
* rtas_busy_delay() - helper for RTAS busy and extended delay statuses
*
@@ -624,11 +665,17 @@ unsigned int rtas_busy_delay_time(int status)
* * false - @status is not @RTAS_BUSY nor an extended delay hint. The
* caller is responsible for handling @status.
*/
-bool rtas_busy_delay(int status)
+bool __ref rtas_busy_delay(int status)
{
unsigned int ms;
bool ret;
+ /*
+ * Can't do timed sleeps before timekeeping is up.
+ */
+ if (system_state < SYSTEM_SCHEDULING)
+ return rtas_busy_delay_early(status);
+
switch (status) {
case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
ret = true;