summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2005-11-08 21:39:04 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-09 07:56:33 -0800
commit64c7c8f88559624abdbe12b5da6502e8879f8d28 (patch)
tree02f85a35ddd0f24dec70e5d6ecd61073578fd8d6
parent5bfb5d690f36d316a5f3b4f7775fda996faa6b12 (diff)
downloadlwn-64c7c8f88559624abdbe12b5da6502e8879f8d28.tar.gz
lwn-64c7c8f88559624abdbe12b5da6502e8879f8d28.zip
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce confusion, and make their semantics rigid. Improves efficiency of resched_task and some cpu_idle routines. * In resched_task: - TIF_NEED_RESCHED is only cleared with the task's runqueue lock held, and as we hold it during resched_task, then there is no need for an atomic test and set there. The only other time this should be set is when the task's quantum expires, in the timer interrupt - this is protected against because the rq lock is irq-safe. - If TIF_NEED_RESCHED is set, then we don't need to do anything. It won't get unset until the task get's schedule()d off. - If we are running on the same CPU as the task we resched, then set TIF_NEED_RESCHED and no further action is required. - If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set after TIF_NEED_RESCHED has been set, then we need to send an IPI. Using these rules, we are able to remove the test and set operation in resched_task, and make clear the previously vague semantics of POLLING_NRFLAG. * In idle routines: - Enter cpu_idle with preempt disabled. When the need_resched() condition becomes true, explicitly call schedule(). This makes things a bit clearer (IMO), but haven't updated all architectures yet. - Many do a test and clear of TIF_NEED_RESCHED for some reason. According to the resched_task rules, this isn't needed (and actually breaks the assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock held). So remove that. Generally one less locked memory op when switching to the idle thread. - Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner most polling idle loops. The above resched_task semantics allow it to be set until before the last time need_resched() is checked before going into a halt requiring interrupt wakeup. Many idle routines simply never enter such a halt, and so POLLING_NRFLAG can be always left set, completely eliminating resched IPIs when rescheduling the idle task. POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Con Kolivas <kernel@kolivas.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/sched-arch.txt89
-rw-r--r--arch/alpha/kernel/process.c10
-rw-r--r--arch/arm/kernel/process.c14
-rw-r--r--arch/i386/kernel/apm.c20
-rw-r--r--arch/i386/kernel/process.c64
-rw-r--r--arch/ia64/kernel/process.c32
-rw-r--r--arch/parisc/kernel/process.c2
-rw-r--r--arch/powerpc/platforms/iseries/setup.c10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c12
-rw-r--r--arch/ppc/kernel/idle.c20
-rw-r--r--arch/ppc64/kernel/idle.c11
-rw-r--r--arch/s390/kernel/process.c13
-rw-r--r--arch/sh/kernel/process.c12
-rw-r--r--arch/sh64/kernel/process.c14
-rw-r--r--arch/sparc/kernel/process.c35
-rw-r--r--arch/sparc64/kernel/process.c20
-rw-r--r--arch/sparc64/kernel/smp.c13
-rw-r--r--arch/x86_64/kernel/process.c67
-rw-r--r--drivers/acpi/processor_idle.c37
-rw-r--r--kernel/sched.c21
20 files changed, 296 insertions, 220 deletions
diff --git a/Documentation/sched-arch.txt b/Documentation/sched-arch.txt
new file mode 100644
index 000000000000..941615a9769b
--- /dev/null
+++ b/Documentation/sched-arch.txt
@@ -0,0 +1,89 @@
+ CPU Scheduler implementation hints for architecture specific code
+
+ Nick Piggin, 2005
+
+Context switch
+==============
+1. Runqueue locking
+By default, the switch_to arch function is called with the runqueue
+locked. This is usually not a problem unless switch_to may need to
+take the runqueue lock. This is usually due to a wake up operation in
+the context switch. See include/asm-ia64/system.h for an example.
+
+To request the scheduler call switch_to with the runqueue unlocked,
+you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
+(typically the one where switch_to is defined).
+
+Unlocked context switches introduce only a very minor performance
+penalty to the core scheduler implementation in the CONFIG_SMP case.
+
+2. Interrupt status
+By default, the switch_to arch function is called with interrupts
+disabled. Interrupts may be enabled over the call if it is likely to
+introduce a significant interrupt latency by adding the line
+`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
+unlocked context switches. This define also implies
+`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an
+example.
+
+
+CPU idle
+========
+Your cpu_idle routines need to obey the following rules:
+
+1. Preempt should now disabled over idle routines. Should only
+ be enabled to call schedule() then disabled again.
+
+2. need_resched/TIF_NEED_RESCHED is only ever set, and will never
+ be cleared until the running task has called schedule(). Idle
+ threads need only ever query need_resched, and may never set or
+ clear it.
+
+3. When cpu_idle finds (need_resched() == 'true'), it should call
+ schedule(). It should not call schedule() otherwise.
+
+4. The only time interrupts need to be disabled when checking
+ need_resched is if we are about to sleep the processor until
+ the next interrupt (this doesn't provide any protection of
+ need_resched, it prevents losing an interrupt).
+
+ 4a. Common problem with this type of sleep appears to be:
+ local_irq_disable();
+ if (!need_resched()) {
+ local_irq_enable();
+ *** resched interrupt arrives here ***
+ __asm__("sleep until next interrupt");
+ }
+
+5. TIF_POLLING_NRFLAG can be set by idle routines that do not
+ need an interrupt to wake them up when need_resched goes high.
+ In other words, they must be periodically polling need_resched,
+ although it may be reasonable to do some background work or enter
+ a low CPU priority.
+
+ 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter
+ an interrupt sleep, it needs to be cleared then a memory
+ barrier issued (followed by a test of need_resched with
+ interrupts disabled, as explained in 3).
+
+arch/i386/kernel/process.c has examples of both polling and
+sleeping idle functions.
+
+
+Possible arch/ problems
+=======================
+
+Possible arch problems I found (and either tried to fix or didn't):
+
+h8300 - Is such sleeping racy vs interrupts? (See #4a).
+ The H8/300 manual I found indicates yes, however disabling IRQs
+ over the sleep mean only NMIs can wake it up, so can't fix easily
+ without doing spin waiting.
+
+ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a)
+
+sh64 - Is sleeping racy vs interrupts? (See #4a)
+
+sparc - IRQs on at this point(?), change local_irq_save to _disable.
+ - TODO: needs secondary CPUs to disable preempt (See #1)
+
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index eb20c3afff58..a8682612abc0 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -43,21 +43,17 @@
#include "proto.h"
#include "pci_impl.h"
-void default_idle(void)
-{
- barrier();
-}
-
void
cpu_idle(void)
{
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
while (1) {
- void (*idle)(void) = default_idle;
/* FIXME -- EV6 and LCA45 know how to power down
the CPU. */
while (!need_resched())
- idle();
+ cpu_relax();
schedule();
}
}
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 93dd92cc12f8..c0f6a119de3b 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -86,12 +86,16 @@ EXPORT_SYMBOL(pm_power_off);
*/
void default_idle(void)
{
- local_irq_disable();
- if (!need_resched() && !hlt_counter) {
- timer_dyn_reprogram();
- arch_idle();
+ if (hlt_counter)
+ cpu_relax();
+ else {
+ local_irq_disable();
+ if (!need_resched()) {
+ timer_dyn_reprogram();
+ arch_idle();
+ }
+ local_irq_enable();
}
- local_irq_enable();
}
/*
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 86e80c551478..003548b8735f 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -769,8 +769,26 @@ static int set_system_power_state(u_short state)
static int apm_do_idle(void)
{
u32 eax;
+ u8 ret = 0;
+ int idled = 0;
+ int polling;
+
+ polling = test_thread_flag(TIF_POLLING_NRFLAG);
+ if (polling) {
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
+ }
+ if (!need_resched()) {
+ idled = 1;
+ ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
+ }
+ if (polling)
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ if (!idled)
+ return 0;
- if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) {
+ if (ret) {
static unsigned long t;
/* This always fails on some SMP boards running UP kernels.
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 5296e284ea36..1cb261f225d5 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -99,14 +99,22 @@ EXPORT_SYMBOL(enable_hlt);
*/
void default_idle(void)
{
+ local_irq_enable();
+
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
- local_irq_disable();
- if (!need_resched())
- safe_halt();
- else
- local_irq_enable();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
+ while (!need_resched()) {
+ local_irq_disable();
+ if (!need_resched())
+ safe_halt();
+ else
+ local_irq_enable();
+ }
+ set_thread_flag(TIF_POLLING_NRFLAG);
} else {
- cpu_relax();
+ while (!need_resched())
+ cpu_relax();
}
}
#ifdef CONFIG_APM_MODULE
@@ -120,29 +128,14 @@ EXPORT_SYMBOL(default_idle);
*/
static void poll_idle (void)
{
- int oldval;
-
local_irq_enable();
- /*
- * Deal with another CPU just having chosen a thread to
- * run here:
- */
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- asm volatile(
- "2:"
- "testl %0, %1;"
- "rep; nop;"
- "je 2b;"
- : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
-
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
- }
+ asm volatile(
+ "2:"
+ "testl %0, %1;"
+ "rep; nop;"
+ "je 2b;"
+ : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -181,6 +174,8 @@ void cpu_idle(void)
{
int cpu = smp_processor_id();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
@@ -246,15 +241,12 @@ static void mwait_idle(void)
{
local_irq_enable();
- if (!need_resched()) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- do {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- if (need_resched())
- break;
- __mwait(0, 0);
- } while (!need_resched());
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ while (!need_resched()) {
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (need_resched())
+ break;
+ __mwait(0, 0);
}
}
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 4c621fc3c3b9..640d6908f8ec 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -197,11 +197,15 @@ void
default_idle (void)
{
local_irq_enable();
- while (!need_resched())
- if (can_do_pal_halt)
- safe_halt();
- else
+ while (!need_resched()) {
+ if (can_do_pal_halt) {
+ local_irq_disable();
+ if (!need_resched())
+ safe_halt();
+ local_irq_enable();
+ } else
cpu_relax();
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -263,16 +267,16 @@ void __attribute__((noreturn))
cpu_idle (void)
{
void (*mark_idle)(int) = ia64_mark_idle;
+ int cpu = smp_processor_id();
+ set_thread_flag(TIF_POLLING_NRFLAG);
/* endless idle loop with no priority at all */
while (1) {
+ if (!need_resched()) {
+ void (*idle)(void);
#ifdef CONFIG_SMP
- if (!need_resched())
min_xtp();
#endif
- while (!need_resched()) {
- void (*idle)(void);
-
if (__get_cpu_var(cpu_idle_state))
__get_cpu_var(cpu_idle_state) = 0;
@@ -284,19 +288,17 @@ cpu_idle (void)
if (!idle)
idle = default_idle;
(*idle)();
- }
-
- if (mark_idle)
- (*mark_idle)(0);
-
+ if (mark_idle)
+ (*mark_idle)(0);
#ifdef CONFIG_SMP
- normal_xtp();
+ normal_xtp();
#endif
+ }
preempt_enable_no_resched();
schedule();
preempt_disable();
check_pgt_cache();
- if (cpu_is_offline(smp_processor_id()))
+ if (cpu_is_offline(cpu))
play_dead();
}
}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index f482f78de435..fee4f1f09adc 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -88,6 +88,8 @@ void default_idle(void)
*/
void cpu_idle(void)
{
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched())
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 0130f2619dac..7f8f0cda6a74 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -703,13 +703,10 @@ static void iseries_shared_idle(void)
static void iseries_dedicated_idle(void)
{
long oldval;
+ set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
-
+ if (!need_resched()) {
while (!need_resched()) {
ppc64_runlatch_off();
HMT_low();
@@ -722,9 +719,6 @@ static void iseries_dedicated_idle(void)
}
HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
}
ppc64_runlatch_on();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 4854f5eb5c3d..a093a0d4dd69 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -469,6 +469,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
* more.
*/
clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
/*
* SMT dynamic mode. Cede will result in this thread going
@@ -481,6 +482,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
cede_processor();
else
local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
} else {
/*
* Give the HV an opportunity at the processor, since we are
@@ -492,11 +494,11 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
static void pseries_dedicated_idle(void)
{
- long oldval;
struct paca_struct *lpaca = get_paca();
unsigned int cpu = smp_processor_id();
unsigned long start_snooze;
unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
+ set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
/*
@@ -505,10 +507,7 @@ static void pseries_dedicated_idle(void)
*/
lpaca->lppaca.idle = 1;
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
-
+ if (!need_resched()) {
start_snooze = __get_tb() +
*smt_snooze_delay * tb_ticks_per_usec;
@@ -531,9 +530,6 @@ static void pseries_dedicated_idle(void)
}
HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
}
lpaca->lppaca.idle = 0;
diff --git a/arch/ppc/kernel/idle.c b/arch/ppc/kernel/idle.c
index a6141f05c919..3c4e4cb61074 100644
--- a/arch/ppc/kernel/idle.c
+++ b/arch/ppc/kernel/idle.c
@@ -63,18 +63,18 @@ void cpu_idle(void)
int cpu = smp_processor_id();
for (;;) {
- if (ppc_md.idle != NULL)
- ppc_md.idle();
- else
- default_idle();
- if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
- cpu_die();
- if (need_resched()) {
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ while (need_resched()) {
+ if (ppc_md.idle != NULL)
+ ppc_md.idle();
+ else
+ default_idle();
}
+ if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
+ cpu_die();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
}
}
diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c
index 909ea669af91..715bc0e71e0f 100644
--- a/arch/ppc64/kernel/idle.c
+++ b/arch/ppc64/kernel/idle.c
@@ -34,15 +34,11 @@ extern void power4_idle(void);
void default_idle(void)
{
- long oldval;
unsigned int cpu = smp_processor_id();
+ set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
-
+ if (!need_resched()) {
while (!need_resched() && !cpu_is_offline(cpu)) {
ppc64_runlatch_off();
@@ -55,9 +51,6 @@ void default_idle(void)
}
HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
}
ppc64_runlatch_on();
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 66ca5757e368..78b64fe5e7c2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -99,14 +99,15 @@ void default_idle(void)
{
int cpu, rc;
+ /* CPU is going idle. */
+ cpu = smp_processor_id();
+
local_irq_disable();
- if (need_resched()) {
+ if (need_resched()) {
local_irq_enable();
- return;
- }
+ return;
+ }
- /* CPU is going idle. */
- cpu = smp_processor_id();
rc = notifier_call_chain(&idle_chain, CPU_IDLE, (void *)(long) cpu);
if (rc != NOTIFY_OK && rc != NOTIFY_DONE)
BUG();
@@ -119,7 +120,7 @@ void default_idle(void)
__ctl_set_bit(8, 15);
#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_is_offline(smp_processor_id()))
+ if (cpu_is_offline(cpu))
cpu_die();
#endif
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 1cbc26b796ad..fd4f240b833d 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -51,14 +51,13 @@ void enable_hlt(void)
EXPORT_SYMBOL(enable_hlt);
-void default_idle(void)
+void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
if (hlt_counter) {
- while (1)
- if (need_resched())
- break;
+ while (!need_resched())
+ cpu_relax();
} else {
while (!need_resched())
cpu_sleep();
@@ -70,11 +69,6 @@ void default_idle(void)
}
}
-void cpu_idle(void)
-{
- default_idle();
-}
-
void machine_restart(char * __unused)
{
/* SR.BL=1 and invoke address error to let CPU reset (manual reset) */
diff --git a/arch/sh64/kernel/process.c b/arch/sh64/kernel/process.c
index 0c09537449b3..b95d04141855 100644
--- a/arch/sh64/kernel/process.c
+++ b/arch/sh64/kernel/process.c
@@ -307,23 +307,19 @@ __setup("hlt", hlt_setup);
static inline void hlt(void)
{
- if (hlt_counter)
- return;
-
__asm__ __volatile__ ("sleep" : : : "memory");
}
/*
* The idle loop on a uniprocessor SH..
*/
-void default_idle(void)
+void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
if (hlt_counter) {
- while (1)
- if (need_resched())
- break;
+ while (!need_resched())
+ cpu_relax();
} else {
local_irq_disable();
while (!need_resched()) {
@@ -338,11 +334,7 @@ void default_idle(void)
schedule();
preempt_disable();
}
-}
-void cpu_idle(void)
-{
- default_idle();
}
void machine_restart(char * __unused)
diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
index c39f4d01096d..ea8647411462 100644
--- a/arch/sparc/kernel/process.c
+++ b/arch/sparc/kernel/process.c
@@ -67,13 +67,6 @@ extern void fpsave(unsigned long *, unsigned long *, void *, unsigned long *);
struct task_struct *last_task_used_math = NULL;
struct thread_info *current_set[NR_CPUS];
-/*
- * default_idle is new in 2.5. XXX Review, currently stolen from sparc64.
- */
-void default_idle(void)
-{
-}
-
#ifndef CONFIG_SMP
#define SUN4C_FAULT_HIGH 100
@@ -92,12 +85,11 @@ void cpu_idle(void)
static unsigned long fps;
unsigned long now;
unsigned long faults;
- unsigned long flags;
extern unsigned long sun4c_kernel_faults;
extern void sun4c_grow_kernel_ring(void);
- local_irq_save(flags);
+ local_irq_disable();
now = jiffies;
count -= (now - last_jiffies);
last_jiffies = now;
@@ -113,13 +105,16 @@ void cpu_idle(void)
sun4c_grow_kernel_ring();
}
}
- local_irq_restore(flags);
+ local_irq_enable();
}
- while((!need_resched()) && pm_idle) {
- (*pm_idle)();
+ if (pm_idle) {
+ while (!need_resched())
+ (*pm_idle)();
+ } else {
+ while (!need_resched())
+ cpu_relax();
}
-
preempt_enable_no_resched();
schedule();
preempt_disable();
@@ -132,15 +127,15 @@ void cpu_idle(void)
/* This is being executed in task 0 'user space'. */
void cpu_idle(void)
{
+ set_thread_flag(TIF_POLLING_NRFLAG);
/* endless idle loop with no priority at all */
while(1) {
- if(need_resched()) {
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- check_pgt_cache();
- }
- barrier(); /* or else gcc optimizes... */
+ while (!need_resched())
+ cpu_relax();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ check_pgt_cache();
}
}
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 2f89206e008f..02f9dec1d459 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -85,23 +85,31 @@ void cpu_idle(void)
/*
* the idle loop on a UltraMultiPenguin...
+ *
+ * TIF_POLLING_NRFLAG is set because we do not sleep the cpu
+ * inside of the idler task, so an interrupt is not needed
+ * to get a clean fast response.
+ *
+ * XXX Reverify this assumption... -DaveM
+ *
+ * Addendum: We do want it to do something for the signal
+ * delivery case, we detect that by just seeing
+ * if we are trying to send this to an idler or not.
*/
-#define idle_me_harder() (cpu_data(smp_processor_id()).idle_volume += 1)
-#define unidle_me() (cpu_data(smp_processor_id()).idle_volume = 0)
void cpu_idle(void)
{
+ cpuinfo_sparc *cpuinfo = &local_cpu_data();
set_thread_flag(TIF_POLLING_NRFLAG);
+
while(1) {
if (need_resched()) {
- unidle_me();
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ cpuinfo->idle_volume = 0;
preempt_enable_no_resched();
schedule();
preempt_disable();
- set_thread_flag(TIF_POLLING_NRFLAG);
check_pgt_cache();
}
- idle_me_harder();
+ cpuinfo->idle_volume++;
/* The store ordering is so that IRQ handlers on
* other cpus see our increasing idleness for the buddy
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 8aca4b1dc04e..797a65493fb8 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1152,20 +1152,9 @@ void __init smp_cpus_done(unsigned int max_cpus)
(bogosum/(5000/HZ))%100);
}
-/* This needn't do anything as we do not sleep the cpu
- * inside of the idler task, so an interrupt is not needed
- * to get a clean fast response.
- *
- * XXX Reverify this assumption... -DaveM
- *
- * Addendum: We do want it to do something for the signal
- * delivery case, we detect that by just seeing
- * if we are trying to send this to an idler or not.
- */
void smp_send_reschedule(int cpu)
{
- if (cpu_data(cpu).idle_volume == 0)
- smp_receive_signal(cpu);
+ smp_receive_signal(cpu);
}
/* This is a nop because we capture all other cpus
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 571f9fe490ce..59be85d9a4bc 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -86,12 +86,22 @@ EXPORT_SYMBOL(enable_hlt);
*/
void default_idle(void)
{
+ local_irq_enable();
+
if (!atomic_read(&hlt_counter)) {
- local_irq_disable();
- if (!need_resched())
- safe_halt();
- else
- local_irq_enable();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
+ while (!need_resched()) {
+ local_irq_disable();
+ if (!need_resched())
+ safe_halt();
+ else
+ local_irq_enable();
+ }
+ set_thread_flag(TIF_POLLING_NRFLAG);
+ } else {
+ while (!need_resched())
+ cpu_relax();
}
}
@@ -102,30 +112,16 @@ void default_idle(void)
*/
static void poll_idle (void)
{
- int oldval;
-
local_irq_enable();
- /*
- * Deal with another CPU just having chosen a thread to
- * run here:
- */
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- asm volatile(
- "2:"
- "testl %0,%1;"
- "rep; nop;"
- "je 2b;"
- : :
- "i" (_TIF_NEED_RESCHED),
- "m" (current_thread_info()->flags));
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
- }
+ asm volatile(
+ "2:"
+ "testl %0,%1;"
+ "rep; nop;"
+ "je 2b;"
+ : :
+ "i" (_TIF_NEED_RESCHED),
+ "m" (current_thread_info()->flags));
}
void cpu_idle_wait(void)
@@ -187,6 +183,8 @@ static inline void play_dead(void)
*/
void cpu_idle (void)
{
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
@@ -221,15 +219,12 @@ static void mwait_idle(void)
{
local_irq_enable();
- if (!need_resched()) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- do {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- if (need_resched())
- break;
- __mwait(0, 0);
- } while (!need_resched());
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ while (!need_resched()) {
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (need_resched())
+ break;
+ __mwait(0, 0);
}
}
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 161db4acfb91..573b6a97bb1f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -167,6 +167,19 @@ acpi_processor_power_activate(struct acpi_processor *pr,
return;
}
+static void acpi_safe_halt(void)
+{
+ int polling = test_thread_flag(TIF_POLLING_NRFLAG);
+ if (polling) {
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
+ }
+ if (!need_resched())
+ safe_halt();
+ if (polling)
+ set_thread_flag(TIF_POLLING_NRFLAG);
+}
+
static atomic_t c3_cpu_count;
static void acpi_processor_idle(void)
@@ -177,7 +190,7 @@ static void acpi_processor_idle(void)
int sleep_ticks = 0;
u32 t1, t2 = 0;
- pr = processors[raw_smp_processor_id()];
+ pr = processors[smp_processor_id()];
if (!pr)
return;
@@ -197,8 +210,13 @@ static void acpi_processor_idle(void)
}
cx = pr->power.state;
- if (!cx)
- goto easy_out;
+ if (!cx) {
+ if (pm_idle_save)
+ pm_idle_save();
+ else
+ acpi_safe_halt();
+ return;
+ }
/*
* Check BM Activity
@@ -278,7 +296,8 @@ static void acpi_processor_idle(void)
if (pm_idle_save)
pm_idle_save();
else
- safe_halt();
+ acpi_safe_halt();
+
/*
* TBD: Can't get time duration while in C1, as resumes
* go to an ISR rather than here. Need to instrument
@@ -414,16 +433,6 @@ static void acpi_processor_idle(void)
*/
if (next_state != pr->power.state)
acpi_processor_power_activate(pr, next_state);
-
- return;
-
- easy_out:
- /* do C1 instead of busy loop */
- if (pm_idle_save)
- pm_idle_save();
- else
- safe_halt();
- return;
}
static int acpi_processor_set_power_policy(struct acpi_processor *pr)
diff --git a/kernel/sched.c b/kernel/sched.c
index 0f2def822296..ac3f5cc3bb51 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -864,21 +864,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
#ifdef CONFIG_SMP
static void resched_task(task_t *p)
{
- int need_resched, nrpolling;
+ int cpu;
assert_spin_locked(&task_rq(p)->lock);
- /* minimise the chance of sending an interrupt to poll_idle() */
- nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
- need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
- nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
+ if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+ return;
+
+ set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+
+ cpu = task_cpu(p);
+ if (cpu == smp_processor_id())
+ return;
- if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
- smp_send_reschedule(task_cpu(p));
+ /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */
+ smp_mb();
+ if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG))
+ smp_send_reschedule(cpu);
}
#else
static inline void resched_task(task_t *p)
{
+ assert_spin_locked(&task_rq(p)->lock);
set_tsk_need_resched(p);
}
#endif