diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-07-03 08:30:03 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2009-07-29 23:30:37 +0200 |
commit | 0a930ce98838ed0a03530fd4960eb3423c9b55bc (patch) | |
tree | 660e743d49ecbb04a6bd08b577720b86de327f7d | |
parent | 42cd561b099de734b16c92b7e29f418f0d62daad (diff) | |
download | lwn-0a930ce98838ed0a03530fd4960eb3423c9b55bc.tar.gz lwn-0a930ce98838ed0a03530fd4960eb3423c9b55bc.zip |
sched: preempt-rt support
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | include/linux/hardirq.h | 13 | ||||
-rw-r--r-- | include/linux/sched.h | 49 | ||||
-rw-r--r-- | kernel/mutex.c | 7 | ||||
-rw-r--r-- | kernel/sched.c | 337 | ||||
-rw-r--r-- | kernel/sched_rt.c | 54 | ||||
-rw-r--r-- | lib/kernel_lock.c | 4 |
6 files changed, 381 insertions, 83 deletions
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 1518625411a4..70b12547cfb2 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -96,19 +96,6 @@ #define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) #ifdef CONFIG_PREEMPT -# define PREEMPT_CHECK_OFFSET 1 -#else -# define PREEMPT_CHECK_OFFSET 0 -#endif - -/* - * Check whether we were atomic before we did preempt_disable(): - * (used by the scheduler) - */ -#define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) - -#ifdef CONFIG_PREEMPT # define preemptible() (preempt_count() == 0 && !irqs_disabled()) # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) #else diff --git a/include/linux/sched.h b/include/linux/sched.h index 69faf651b10a..2e5be662deef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -100,6 +100,17 @@ struct fs_struct; struct bts_context; struct perf_counter_context; +#ifdef CONFIG_PREEMPT +extern int kernel_preemption; +#else +# define kernel_preemption 0 +#endif +#ifdef CONFIG_PREEMPT_VOLUNTARY +extern int voluntary_preemption; +#else +# define voluntary_preemption 0 +#endif + #ifdef CONFIG_PREEMPT_SOFTIRQS extern int softirq_preemption; #else @@ -225,6 +236,28 @@ extern struct semaphore kernel_sem; #define set_task_state(tsk, state_value) \ set_mb((tsk)->state, (state_value)) +// #define PREEMPT_DIRECT + +#ifdef CONFIG_X86_LOCAL_APIC +extern void nmi_show_all_regs(void); +#else +# define nmi_show_all_regs() do { } while (0) +#endif + +#include <linux/smp.h> +#include <linux/sem.h> +#include <linux/signal.h> +#include <linux/securebits.h> +#include <linux/fs_struct.h> +#include <linux/compiler.h> +#include <linux/completion.h> +#include <linux/pid.h> +#include <linux/percpu.h> +#include <linux/topology.h> +#include <linux/seccomp.h> + +struct exec_domain; + /* * set_current_state() includes a barrier so that the write of current->state * is correctly serialised wrt the caller's subsequent test of whether to @@ -354,6 +387,11 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void __schedule(void); asmlinkage void schedule(void); extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); +/* + * This one can be called with interrupts disabled, only + * to be used by lowlevel arch code! + */ +asmlinkage void __sched __schedule(void); struct nsproxy; struct user_namespace; @@ -1686,6 +1724,15 @@ extern struct pid *cad_pid; extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) +#ifdef CONFIG_PREEMPT_RT +extern void __put_task_struct_cb(struct rcu_head *rhp); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + call_rcu(&t->rcu, __put_task_struct_cb); +} +#else extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) @@ -1693,6 +1740,7 @@ static inline void put_task_struct(struct task_struct *t) if (atomic_dec_and_test(&t->usage)) __put_task_struct(t); } +#endif extern cputime_t task_utime(struct task_struct *p); extern cputime_t task_stime(struct task_struct *p); @@ -1910,6 +1958,7 @@ extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); +void __yield(void); /* * The default (Linux) execution domain. diff --git a/kernel/mutex.c b/kernel/mutex.c index 3714ee5bc638..73ad8a627e36 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -249,8 +249,13 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, /* didnt get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); - preempt_enable_and_schedule(); + + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); + spin_lock_mutex(&lock->wait_lock, flags); } diff --git a/kernel/sched.c b/kernel/sched.c index 7de11ead31ca..eac72fee3546 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4,6 +4,7 @@ * Kernel scheduler and related syscalls * * Copyright (C) 1991-2002 Linus Torvalds + * Copyright (C) 2004 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> * * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and * make semaphores SMP safe @@ -16,6 +17,7 @@ * by Davide Libenzi, preemptible kernel bits by Robert Love. * 2003-09-03 Interactivity tuning by Con Kolivas. * 2004-04-02 Scheduler domains code by Nick Piggin + * 2004-10-13 Real-Time Preemption support by Ingo Molnar * 2007-04-15 Work begun on replacing all interactivity tuning with a * fair scheduling design by Con Kolivas. * 2007-05-05 Load balancing (smp-nice) and other improvements @@ -61,6 +63,7 @@ #include <linux/sysctl.h> #include <linux/syscalls.h> #include <linux/times.h> +#include <linux/kallsyms.h> #include <linux/tsacct_kern.h> #include <linux/kprobes.h> #include <linux/delayacct.h> @@ -107,6 +110,20 @@ #define NICE_0_LOAD SCHED_LOAD_SCALE #define NICE_0_SHIFT SCHED_LOAD_SHIFT +#if (BITS_PER_LONG < 64) +#define JIFFIES_TO_NS64(TIME) \ + ((unsigned long long)(TIME) * ((unsigned long) (1000000000 / HZ))) + +#define NS64_TO_JIFFIES(TIME) \ + ((((unsigned long long)((TIME)) >> BITS_PER_LONG) * \ + (1 + NS_TO_JIFFIES(~0UL))) + NS_TO_JIFFIES((unsigned long)(TIME))) +#else /* BITS_PER_LONG < 64 */ + +#define NS64_TO_JIFFIES(TIME) NS_TO_JIFFIES(TIME) +#define JIFFIES_TO_NS64(TIME) JIFFIES_TO_NS(TIME) + +#endif /* BITS_PER_LONG < 64 */ + /* * These are the 'tuning knobs' of the scheduler: * @@ -144,6 +161,32 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) } #endif +#define TASK_PREEMPTS_CURR(p, rq) \ + ((p)->prio < (rq)->curr->prio) + +/* + * Tweaks for current + */ + +#ifdef CURRENT_PTR +struct task_struct * const ___current = &init_task; +struct task_struct ** const current_ptr = (struct task_struct ** const)&___current; +struct thread_info * const current_ti = &init_thread_union.thread_info; +struct thread_info ** const current_ti_ptr = (struct thread_info ** const)¤t_ti; + +EXPORT_SYMBOL(___current); +EXPORT_SYMBOL(current_ti); + +/* + * The scheduler itself doesnt want 'current' to be cached + * during context-switches: + */ +# undef current +# define current __current() +# undef current_thread_info +# define current_thread_info() __current_thread_info() +#endif + static inline int rt_policy(int policy) { if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) @@ -497,6 +540,7 @@ struct rt_rq { int overloaded; struct plist_head pushable_tasks; #endif + unsigned long rt_nr_uninterruptible; int rt_throttled; u64 rt_time; u64 rt_runtime; @@ -602,6 +646,8 @@ struct rq { */ unsigned long nr_uninterruptible; + unsigned long switch_timestamp; + unsigned long slice_avg; struct task_struct *curr, *idle; unsigned long next_balance; struct mm_struct *prev_mm; @@ -660,6 +706,13 @@ struct rq { /* BKL stats */ unsigned int bkl_count; + + /* RT-overload stats: */ + unsigned long rto_schedule; + unsigned long rto_schedule_tail; + unsigned long rto_wakeup; + unsigned long rto_pulled; + unsigned long rto_pushed; #endif }; @@ -892,11 +945,23 @@ static inline u64 global_rt_runtime(void) return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; } +/* + * We really dont want to do anything complex within switch_to() + * on PREEMPT_RT - this check enforces this. + */ +#ifdef prepare_arch_switch +# ifdef CONFIG_PREEMPT_RT +# error FIXME +# else +# define _finish_arch_switch finish_arch_switch +# endif +#endif + #ifndef prepare_arch_switch # define prepare_arch_switch(next) do { } while (0) #endif #ifndef finish_arch_switch -# define finish_arch_switch(prev) do { } while (0) +# define _finish_arch_switch(prev) do { } while (0) #endif static inline int task_current(struct rq *rq, struct task_struct *p) @@ -927,7 +992,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) */ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); - atomic_spin_unlock_irq(&rq->lock); + atomic_spin_unlock(&rq->lock); } #else /* __ARCH_WANT_UNLOCKED_CTXSW */ @@ -968,8 +1033,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) smp_wmb(); prev->oncpu = 0; #endif -#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW - local_irq_enable(); +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_disable(); #endif } #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ @@ -1837,6 +1902,8 @@ static inline int normal_prio(struct task_struct *p) prio = MAX_RT_PRIO-1 - p->rt_priority; else prio = __normal_prio(p); + +// trace_special_pid(p->pid, PRIO(p), __PRIO(prio)); return prio; } @@ -2447,6 +2514,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int sync, int mutex) } #endif +#ifdef CONFIG_PREEMPT_RT + /* + * sync wakeups can increase wakeup latencies: + */ + if (rt_task(p)) + sync = 0; +#endif smp_wmb(); rq = task_rq_lock(p, &flags); update_rq_clock(rq); @@ -2855,7 +2929,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) * Manfred Spraul <manfred@colorfullife.com> */ prev_state = prev->state; - finish_arch_switch(prev); + _finish_arch_switch(prev); perf_counter_task_sched_in(current, cpu_of(rq)); finish_lock_switch(rq, prev); #ifdef CONFIG_SMP @@ -2883,12 +2957,15 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) asmlinkage void schedule_tail(struct task_struct *prev) __releases(rq->lock) { - struct rq *rq = this_rq(); - - finish_task_switch(rq, prev); + preempt_disable(); + finish_task_switch(this_rq(), prev); + __preempt_enable_no_resched(); + local_irq_enable(); #ifdef __ARCH_WANT_UNLOCKED_CTXSW /* In this case, finish_task_switch does not reenable preemption */ preempt_enable(); +#else + preempt_check_resched(); #endif if (current->set_child_tid) put_user(task_pid_vnr(current), current->set_child_tid); @@ -2936,6 +3013,11 @@ context_switch(struct rq *rq, struct task_struct *prev, spin_release(&rq->lock.dep_map, 1, _THIS_IP_); #endif +#ifdef CURRENT_PTR + barrier(); + *current_ptr = next; + *current_ti_ptr = next->thread_info; +#endif /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); @@ -2982,6 +3064,11 @@ unsigned long nr_uninterruptible(void) return sum; } +unsigned long nr_uninterruptible_cpu(int cpu) +{ + return cpu_rq(cpu)->nr_uninterruptible; +} + unsigned long long nr_context_switches(void) { int i; @@ -5184,6 +5271,8 @@ void scheduler_tick(void) sched_clock_tick(); + BUG_ON(!irqs_disabled()); + atomic_spin_lock(&rq->lock); update_rq_clock(rq); update_cpu_load(rq); @@ -5277,8 +5366,8 @@ static noinline void __schedule_bug(struct task_struct *prev) { struct pt_regs *regs = get_irq_regs(); - printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n", - prev->comm, prev->pid, preempt_count()); + printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d, CPU#%d\n", + prev->comm, preempt_count(), prev->pid, smp_processor_id()); debug_show_held_locks(prev); print_modules(); @@ -5296,12 +5385,14 @@ static noinline void __schedule_bug(struct task_struct *prev) */ static inline void schedule_debug(struct task_struct *prev) { +// WARN_ON(system_state == SYSTEM_BOOTING); + /* * Test if we are atomic. Since do_exit() needs to call into * schedule() atomically, we ignore that path for now. * Otherwise, whine if we are scheduling when we should not be. */ - if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) + if (unlikely(in_atomic() && !prev->exit_state)) __schedule_bug(prev); profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -5386,10 +5477,11 @@ asmlinkage void __sched __schedule(void) switch_count = &prev->nivcsw; release_kernel_lock(prev); -need_resched_nonpreemptible: schedule_debug(prev); + preempt_disable(); + if (sched_feat(HRTICK)) hrtick_clear(rq); @@ -5401,11 +5493,16 @@ need_resched_nonpreemptible: !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) prev->state = TASK_RUNNING; - else + else { + touch_softlockup_watchdog(); deactivate_task(rq, prev, 1); + } switch_count = &prev->nvcsw; } + if (preempt_count() & PREEMPT_ACTIVE) + sub_preempt_count(PREEMPT_ACTIVE); + #ifdef CONFIG_SMP if (prev->sched_class->pre_schedule) prev->sched_class->pre_schedule(rq, prev); @@ -5432,19 +5529,22 @@ need_resched_nonpreemptible: */ cpu = smp_processor_id(); rq = cpu_rq(cpu); - } else - atomic_spin_unlock_irq(&rq->lock); + __preempt_enable_no_resched(); + } else { + __preempt_enable_no_resched(); + atomic_spin_unlock(&rq->lock); + } - if (unlikely(reacquire_kernel_lock(current) < 0)) - goto need_resched_nonpreemptible; + reacquire_kernel_lock(current); } asmlinkage void __sched schedule(void) { need_resched: - preempt_disable(); + local_irq_disable(); __schedule(); - __preempt_enable_no_resched(); + local_irq_enable(); + if (need_resched()) goto need_resched; } @@ -5512,6 +5612,35 @@ out: #endif #ifdef CONFIG_PREEMPT + +/* + * Global flag to turn preemption off on a CONFIG_PREEMPT kernel: + */ +int kernel_preemption = 1; + +static int __init preempt_setup (char *str) +{ + if (!strncmp(str, "off", 3)) { + if (kernel_preemption) { + printk(KERN_INFO "turning off kernel preemption!\n"); + kernel_preemption = 0; + } + return 1; + } + if (!strncmp(str, "on", 2)) { + if (!kernel_preemption) { + printk(KERN_INFO "turning on kernel preemption!\n"); + kernel_preemption = 1; + } + return 1; + } + get_option(&str, &kernel_preemption); + + return 1; +} + +__setup("preempt=", preempt_setup); + /* * this is the entry point to schedule() from in-kernel preemption * off of preempt_enable. Kernel preemptions off return from interrupt @@ -5523,6 +5652,8 @@ asmlinkage void __sched preempt_schedule(void) struct task_struct *task = current; int saved_lock_depth; + if (!kernel_preemption) + return; /* * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. @@ -5531,6 +5662,7 @@ asmlinkage void __sched preempt_schedule(void) return; do { + local_irq_disable(); add_preempt_count(PREEMPT_ACTIVE); /* @@ -5540,9 +5672,9 @@ asmlinkage void __sched preempt_schedule(void) */ saved_lock_depth = task->lock_depth; task->lock_depth = -1; - schedule(); + __schedule(); task->lock_depth = saved_lock_depth; - sub_preempt_count(PREEMPT_ACTIVE); + local_irq_enable(); /* * Check again in case we missed a preemption opportunity @@ -5554,10 +5686,10 @@ asmlinkage void __sched preempt_schedule(void) EXPORT_SYMBOL(preempt_schedule); /* - * this is the entry point to schedule() from kernel preemption - * off of irq context. - * Note, that this is called and return with irqs disabled. This will - * protect us against recursive calling from irq. + * this is is the entry point for the IRQ return path. Called with + * interrupts disabled. To avoid infinite irq-entry recursion problems + * with fast-paced IRQ sources we do all of this carefully to never + * enable interrupts again. */ asmlinkage void __sched preempt_schedule_irq(void) { @@ -5565,10 +5697,17 @@ asmlinkage void __sched preempt_schedule_irq(void) struct task_struct *task = current; int saved_lock_depth; - /* Catch callers which need to be fixed */ - WARN_ON_ONCE(ti->preempt_count || !irqs_disabled()); + if (!kernel_preemption) + return; + /* + * If there is a non-zero preempt_count then just return. + * (interrupts are disabled) + */ + if (unlikely(ti->preempt_count)) + return; do { + local_irq_disable(); add_preempt_count(PREEMPT_ACTIVE); /* @@ -5578,11 +5717,9 @@ asmlinkage void __sched preempt_schedule_irq(void) */ saved_lock_depth = task->lock_depth; task->lock_depth = -1; - local_irq_enable(); - schedule(); + __schedule(); local_irq_disable(); task->lock_depth = saved_lock_depth; - sub_preempt_count(PREEMPT_ACTIVE); /* * Check again in case we missed a preemption opportunity @@ -6002,6 +6139,7 @@ void task_setprio(struct task_struct *p, int prio) check_class_changed(rq, p, prev_class, oldprio, running); } + task_rq_unlock(rq, &flags); } @@ -6642,6 +6780,7 @@ SYSCALL_DEFINE0(sched_yield) __release(rq->lock); spin_release(&rq->lock.dep_map, 1, _THIS_IP_); _raw_spin_unlock(&rq->lock); + local_irq_enable(); preempt_enable_and_schedule(); @@ -6653,9 +6792,40 @@ static inline int should_resched(void) return need_resched() && !(preempt_count() & PREEMPT_ACTIVE); } +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT) +void __might_sleep(char *file, int line) +{ +#ifdef in_atomic + static unsigned long prev_jiffy; /* ratelimiting */ + + if ((!in_atomic() && !irqs_disabled()) || + system_state != SYSTEM_RUNNING || oops_in_progress) + return; + + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) + return; + prev_jiffy = jiffies; + + printk(KERN_ERR + "BUG: sleeping function called from invalid context at %s:%d\n", + file, line); + printk(KERN_ERR + "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), + current->pid, current->comm); + + debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); + dump_stack(); +#endif +} +EXPORT_SYMBOL(__might_sleep); +#endif + static void __cond_resched(void) { -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT) __might_sleep(__FILE__, __LINE__); #endif /* @@ -6664,10 +6834,11 @@ static void __cond_resched(void) * cond_resched() call. */ do { + local_irq_disable(); add_preempt_count(PREEMPT_ACTIVE); - schedule(); - sub_preempt_count(PREEMPT_ACTIVE); + __schedule(); } while (need_resched()); + local_irq_enable(); } int __sched _cond_resched(void) @@ -6711,7 +6882,11 @@ EXPORT_SYMBOL(cond_resched_lock); */ int __sched cond_resched_softirq(void) { +#ifndef CONFIG_PREEMPT_SOFTIRQS WARN_ON_ONCE(!in_softirq()); + if (!in_softirq()) + return 0; +#endif if (should_resched()) { local_bh_enable(); @@ -6742,17 +6917,56 @@ int __sched cond_resched_softirq_context(void) } EXPORT_SYMBOL(cond_resched_softirq_context); +#ifdef CONFIG_PREEMPT_VOLUNTARY +int voluntary_preemption = 1; +EXPORT_SYMBOL(voluntary_preemption); + +static int __init voluntary_preempt_setup (char *str) +{ + if (!strncmp(str, "off", 3)) + voluntary_preemption = 0; + else + get_option(&str, &voluntary_preemption); + if (!voluntary_preemption) + printk("turning off voluntary preemption!\n"); + + return 1; +} + +__setup("voluntary-preempt=", voluntary_preempt_setup); + +#endif + /** * yield - yield the current processor to other threads. * * This is a shortcut for kernel-space yielding - it marks the * thread runnable and calls sys_sched_yield(). */ -void __sched yield(void) +void __sched __yield(void) { set_current_state(TASK_RUNNING); sys_sched_yield(); } + +void __sched yield(void) +{ + static int once = 1; + + /* + * it's a bug to rely on yield() with RT priorities. We print + * the first occurance after bootup ... this will still give + * us an idea about the scope of the problem, without spamming + * the syslog: + */ + if (once && rt_task(current)) { + once = 0; + printk(KERN_ERR "BUG: %s:%d RT task yield()-ing!\n", + current->comm, current->pid); + dump_stack(); + } + __yield(); +} EXPORT_SYMBOL(yield); /* @@ -6926,6 +7140,7 @@ void sched_show_task(struct task_struct *p) void show_state_filter(unsigned long state_filter) { struct task_struct *g, *p; + int do_unlock = 1; #if BITS_PER_LONG == 32 printk(KERN_INFO @@ -6934,7 +7149,16 @@ void show_state_filter(unsigned long state_filter) printk(KERN_INFO " task PC stack pid father\n"); #endif +#ifdef CONFIG_PREEMPT_RT + if (!read_trylock(&tasklist_lock)) { + printk("hm, tasklist_lock write-locked.\n"); + printk("ignoring ...\n"); + do_unlock = 0; + } +#else read_lock(&tasklist_lock); +#endif + do_each_thread(g, p) { /* * reset the NMI-timeout, listing all files on a slow @@ -6950,7 +7174,8 @@ void show_state_filter(unsigned long state_filter) #ifdef CONFIG_SCHED_DEBUG sysrq_sched_debug_show(); #endif - read_unlock(&tasklist_lock); + if (do_unlock) + read_unlock(&tasklist_lock); /* * Only show locks if all tasks are dumped: */ @@ -7122,11 +7347,18 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) { struct rq *rq_dest, *rq_src; + unsigned long flags; int ret = 0, on_rq; if (unlikely(!cpu_active(dest_cpu))) return ret; + /* + * PREEMPT_RT: this relies on write_lock_irq(&tasklist_lock) + * disabling interrupts - which on PREEMPT_RT does not do: + */ + local_irq_save(flags); + rq_src = cpu_rq(src_cpu); rq_dest = cpu_rq(dest_cpu); @@ -7151,6 +7383,8 @@ done: ret = 1; fail: double_rq_unlock(rq_src, rq_dest); + local_irq_restore(flags); + return ret; } @@ -9470,6 +9704,9 @@ void __init sched_init(void) atomic_inc(&init_mm.mm_count); enter_lazy_tlb(&init_mm, current); +#ifdef CONFIG_PREEMPT_RT + printk("Real-Time Preemption Support (C) 2004-2007 Ingo Molnar\n"); +#endif /* * Make us the idle thread. Technically, schedule() should not be * called from this thread, however somewhere below it might be, @@ -9500,36 +9737,6 @@ void __init sched_init(void) scheduler_running = 1; } -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP -void __might_sleep(char *file, int line) -{ -#ifdef in_atomic - static unsigned long prev_jiffy; /* ratelimiting */ - - if ((!in_atomic() && !irqs_disabled()) || - system_state != SYSTEM_RUNNING || oops_in_progress) - return; - if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) - return; - prev_jiffy = jiffies; - - printk(KERN_ERR - "BUG: sleeping function called from invalid context at %s:%d\n", - file, line); - printk(KERN_ERR - "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), - current->pid, current->comm); - - debug_show_held_locks(current); - if (irqs_disabled()) - print_irqtrace_events(current); - dump_stack(); -#endif -} -EXPORT_SYMBOL(__might_sleep); -#endif - #ifdef CONFIG_MAGIC_SYSRQ static void normalize_task(struct rq *rq, struct task_struct *p) { diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 54779ebe48cc..5d6bb327121d 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -860,6 +860,48 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) } } +static inline void incr_rt_nr_uninterruptible(struct task_struct *p, + struct rq *rq) +{ + rq->rt.rt_nr_uninterruptible++; +} + +static inline void decr_rt_nr_uninterruptible(struct task_struct *p, + struct rq *rq) +{ + rq->rt.rt_nr_uninterruptible--; +} + +unsigned long rt_nr_running(void) +{ + unsigned long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->rt.rt_nr_running; + + return sum; +} + +unsigned long rt_nr_running_cpu(int cpu) +{ + return cpu_rq(cpu)->rt.rt_nr_running; +} + +unsigned long rt_nr_uninterruptible(void) +{ + unsigned long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->rt.rt_nr_uninterruptible; + + return sum; +} + +unsigned long rt_nr_uninterruptible_cpu(int cpu) +{ + return cpu_rq(cpu)->rt.rt_nr_uninterruptible; +} + /* * Adding/removing a task to/from a priority array: */ @@ -872,6 +914,9 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) enqueue_rt_entity(rt_se); + if (p->state == TASK_UNINTERRUPTIBLE) + decr_rt_nr_uninterruptible(p, rq); + if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); @@ -883,6 +928,10 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) struct sched_rt_entity *rt_se = &p->rt; update_curr_rt(rq); + + if (p->state == TASK_UNINTERRUPTIBLE) + incr_rt_nr_uninterruptible(p, rq); + dequeue_rt_entity(rt_se); dequeue_pushable_task(rq, p); @@ -1462,8 +1511,10 @@ static int pull_rt_task(struct rq *this_rq) static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) { /* Try to pull RT tasks here if we lower this rq's prio */ - if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) + if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) { pull_rt_task(rq); + schedstat_inc(rq, rto_schedule); + } } /* @@ -1545,7 +1596,6 @@ static void set_cpus_allowed_rt(struct task_struct *p, */ if (weight > 1) enqueue_pushable_task(rq, p); - } if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 7e2ad7bd4223..54625bec6fb9 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -41,15 +41,15 @@ int __lockfunc __reacquire_kernel_lock(void) struct task_struct *task = current; int saved_lock_depth = task->lock_depth; + local_irq_enable(); BUG_ON(saved_lock_depth < 0); task->lock_depth = -1; - __preempt_enable_no_resched(); down(&kernel_sem); - preempt_disable(); task->lock_depth = saved_lock_depth; + local_irq_enable(); return 0; } |