diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-07-03 08:30:07 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2009-07-29 23:30:38 +0200 |
commit | e9888fb95225bb3b786d79fd983eb67e1acad338 (patch) | |
tree | d09bc138fec534e17b57a559f36a5d8c7e49973f | |
parent | 94b3cbf2548a023b4187e252043eac367f84740c (diff) | |
download | lwn-e9888fb95225bb3b786d79fd983eb67e1acad338.tar.gz lwn-e9888fb95225bb3b786d79fd983eb67e1acad338.zip |
rt: core implementation
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | include/linux/hardirq.h | 6 | ||||
-rw-r--r-- | include/linux/kernel.h | 9 | ||||
-rw-r--r-- | include/linux/profile.h | 11 | ||||
-rw-r--r-- | include/linux/radix-tree.h | 13 | ||||
-rw-r--r-- | include/linux/smp.h | 11 | ||||
-rw-r--r-- | include/linux/smp_lock.h | 2 | ||||
-rw-r--r-- | include/linux/workqueue.h | 3 | ||||
-rw-r--r-- | kernel/Kconfig.preempt | 87 | ||||
-rw-r--r-- | kernel/exit.c | 22 | ||||
-rw-r--r-- | kernel/fork.c | 12 | ||||
-rw-r--r-- | kernel/notifier.c | 4 | ||||
-rw-r--r-- | kernel/signal.c | 12 | ||||
-rw-r--r-- | kernel/softirq.c | 14 | ||||
-rw-r--r-- | kernel/workqueue.c | 54 | ||||
-rw-r--r-- | lib/Kconfig.debug | 4 | ||||
-rw-r--r-- | lib/Makefile | 3 | ||||
-rw-r--r-- | lib/kernel_lock.c | 14 | ||||
-rw-r--r-- | lib/locking-selftest.c | 29 | ||||
-rw-r--r-- | lib/radix-tree.c | 6 |
19 files changed, 244 insertions, 72 deletions
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 70b12547cfb2..16966fbab185 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -77,9 +77,9 @@ * Are we doing bottom half or hardware interrupt processing? * Are we in a softirq context? Interrupt context? */ -#define in_irq() (hardirq_count()) -#define in_softirq() (softirq_count()) -#define in_interrupt() (irq_count()) +#define in_irq() (hardirq_count() || (current->flags & PF_HARDIRQ)) +#define in_softirq() (softirq_count() || (current->flags & PF_SOFTIRQ)) +#define in_interrupt() (irq_count()) /* * Are we in NMI context? diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d6320a3e8def..4651e0971d75 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -124,7 +124,7 @@ extern int _cond_resched(void); # define might_resched() do { } while (0) #endif -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT) void __might_sleep(char *file, int line); /** * might_sleep - annotation for functions that can sleep @@ -284,6 +284,12 @@ extern void printk_tick(void); extern void asmlinkage __attribute__((format(printf, 1, 2))) early_printk(const char *fmt, ...); +#ifdef CONFIG_PREEMPT_RT +extern void zap_rt_locks(void); +#else +# define zap_rt_locks() do { } while (0) +#endif + unsigned long int_sqrt(unsigned long); static inline void console_silent(void) @@ -313,6 +319,7 @@ extern int root_mountflags; /* Values used for system_state */ extern enum system_states { SYSTEM_BOOTING, + SYSTEM_BOOTING_SCHEDULER_OK, SYSTEM_RUNNING, SYSTEM_HALT, SYSTEM_POWER_OFF, diff --git a/include/linux/profile.h b/include/linux/profile.h index a0fc32279fc0..5b72082c273e 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -8,10 +8,11 @@ #include <asm/errno.h> -#define CPU_PROFILING 1 -#define SCHED_PROFILING 2 -#define SLEEP_PROFILING 3 -#define KVM_PROFILING 4 +#define CPU_PROFILING 1 +#define SCHED_PROFILING 2 +#define SLEEP_PROFILING 3 +#define KVM_PROFILING 4 +#define PREEMPT_PROFILING 5 struct proc_dir_entry; struct pt_regs; @@ -36,6 +37,8 @@ enum profile_type { PROFILE_MUNMAP }; +extern int prof_pid; + #ifdef CONFIG_PROFILING extern int prof_on __read_mostly; diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c5da74918096..9eb17f95857b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -169,7 +169,18 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); unsigned long radix_tree_prev_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); +/* + * On a mutex based kernel we can freely schedule within the radix code: + */ +#ifdef CONFIG_PREEMPT_RT +static inline int radix_tree_preload(gfp_t gfp_mask) +{ + return 0; +} +#else int radix_tree_preload(gfp_t gfp_mask); +#endif + void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag); @@ -189,7 +200,9 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); static inline void radix_tree_preload_end(void) { +#ifndef CONFIG_PREEMPT_RT preempt_enable(); +#endif } #endif /* _LINUX_RADIX_TREE_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 9e3d8af09207..378005121d60 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,6 +50,16 @@ extern void smp_send_stop(void); */ extern void smp_send_reschedule(int cpu); +/* + * trigger a reschedule on all other CPUs: + */ +extern void smp_send_reschedule_allbutself(void); + +/* + * trigger a reschedule on all other CPUs: + */ +extern void smp_send_reschedule_allbutself(void); + /* * Prepare machine for booting other CPUs. @@ -142,6 +152,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) 0; \ }) static inline void smp_send_reschedule(int cpu) { } +static inline void smp_send_reschedule_allbutself(void) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_mask(mask, func, info, wait) \ diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 813be59bf345..0cb3cf9a68ef 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -45,7 +45,7 @@ static inline void cycle_kernel_lock(void) #define unlock_kernel() do { } while(0) #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) -#define reacquire_kernel_lock(task) 0 +#define reacquire_kernel_lock(task) do { } while(0) #define kernel_locked() 1 #endif /* CONFIG_LOCK_KERNEL */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 13e1adf55c4c..3f363b7168c4 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -195,6 +195,9 @@ __create_workqueue_key(const char *name, int singlethread, #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0) #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0) +extern void set_workqueue_prio(struct workqueue_struct *wq, int policy, + int rt_priority, int nice); + extern void destroy_workqueue(struct workqueue_struct *wq); extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 4bb60418779f..f4602f8f35d4 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,14 +1,13 @@ - choice - prompt "Preemption Model" - default PREEMPT_NONE + prompt "Preemption Mode" + default PREEMPT_RT config PREEMPT_NONE bool "No Forced Preemption (Server)" help - This is the traditional Linux preemption model, geared towards + This is the traditional Linux preemption model geared towards throughput. It will still provide good latencies most of the - time, but there are no guarantees and occasional longer delays + time but there are no guarantees and occasional long delays are possible. Select this option if you are building a kernel for a server or @@ -21,7 +20,7 @@ config PREEMPT_VOLUNTARY help This option reduces the latency of the kernel by adding more "explicit preemption points" to the kernel code. These new - preemption points have been selected to reduce the maximum + preemption points have been selected to minimize the maximum latency of rescheduling, providing faster application reactions, at the cost of slightly lower throughput. @@ -33,38 +32,73 @@ config PREEMPT_VOLUNTARY Select this if you are building a kernel for a desktop system. -config PREEMPT +config PREEMPT_DESKTOP bool "Preemptible Kernel (Low-Latency Desktop)" help This option reduces the latency of the kernel by making - all kernel code (that is not executing in a critical section) + all kernel code that is not executing in a critical section preemptible. This allows reaction to interactive events by permitting a low priority process to be preempted involuntarily even if it is in kernel mode executing a system call and would - otherwise not be about to reach a natural preemption point. - This allows applications to run more 'smoothly' even when the - system is under load, at the cost of slightly lower throughput - and a slight runtime overhead to kernel code. + otherwise not about to reach a preemption point. This allows + applications to run more 'smoothly' even when the system is + under load, at the cost of slighly lower throughput and a + slight runtime overhead to kernel code. + + (According to profiles, when this mode is selected then even + during kernel-intense workloads the system is in an immediately + preemptible state more than 50% of the time.) Select this if you are building a kernel for a desktop or embedded system with latency requirements in the milliseconds range. +config PREEMPT_RT + bool "Complete Preemption (Real-Time)" + select PREEMPT_SOFTIRQS + select PREEMPT_HARDIRQS + select PREEMPT_RCU + select RT_MUTEXES + help + This option further reduces the scheduling latency of the + kernel by replacing almost every spinlock used by the kernel + with preemptible mutexes and thus making all but the most + critical kernel code involuntarily preemptible. The remaining + handful of lowlevel non-preemptible codepaths are short and + have a deterministic latency of a couple of tens of + microseconds (depending on the hardware). This also allows + applications to run more 'smoothly' even when the system is + under load, at the cost of lower throughput and runtime + overhead to kernel code. + + (According to profiles, when this mode is selected then even + during kernel-intense workloads the system is in an immediately + preemptible state more than 95% of the time.) + + Select this if you are building a kernel for a desktop, + embedded or real-time system with guaranteed latency + requirements of 100 usecs or lower. + endchoice +config PREEMPT + bool + default y + depends on PREEMPT_DESKTOP || PREEMPT_RT + config PREEMPT_SOFTIRQS bool "Thread Softirqs" default n # depends on PREEMPT help This option reduces the latency of the kernel by 'threading' - soft interrupts. This means that all softirqs will execute - in softirqd's context. While this helps latency, it can also - reduce performance. + soft interrupts. This means that all softirqs will execute + in softirqd's context. While this helps latency, it can also + reduce performance. - The threading of softirqs can also be controlled via - /proc/sys/kernel/softirq_preemption runtime flag and the - sofirq-preempt=0/1 boot-time option. + The threading of softirqs can also be controlled via + /proc/sys/kernel/softirq_preemption runtime flag and the + sofirq-preempt=0/1 boot-time option. Say N if you are unsure. @@ -75,15 +109,14 @@ config PREEMPT_HARDIRQS select PREEMPT_SOFTIRQS help This option reduces the latency of the kernel by 'threading' - hardirqs. This means that all (or selected) hardirqs will run - in their own kernel thread context. While this helps latency, - this feature can also reduce performance. + hardirqs. This means that all (or selected) hardirqs will run + in their own kernel thread context. While this helps latency, + this feature can also reduce performance. - The threading of hardirqs can also be controlled via the - /proc/sys/kernel/hardirq_preemption runtime flag and the - hardirq-preempt=0/1 boot-time option. Per-irq threading can - be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded - runtime flags. + The threading of hardirqs can also be controlled via the + /proc/sys/kernel/hardirq_preemption runtime flag and the + hardirq-preempt=0/1 boot-time option. Per-irq threading can + be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded + runtime flags. Say N if you are unsure. - diff --git a/kernel/exit.c b/kernel/exit.c index a27d47d3d0c8..4441e623e671 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -67,7 +67,9 @@ static void __unhash_process(struct task_struct *p) detach_pid(p, PIDTYPE_SID); list_del_rcu(&p->tasks); + preempt_disable(); __get_cpu_var(process_counts)--; + preempt_enable(); } list_del_rcu(&p->thread_group); list_del_init(&p->sibling); @@ -685,9 +687,11 @@ static void exit_mm(struct task_struct * tsk) task_lock(tsk); tsk->mm = NULL; up_read(&mm->mmap_sem); + preempt_disable(); // FIXME enter_lazy_tlb(mm, current); /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); + preempt_enable(); task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); @@ -1009,14 +1013,17 @@ NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); - preempt_disable(); +again: + local_irq_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - schedule(); - BUG(); - /* Avoid "noreturn function does return". */ - for (;;) - cpu_relax(); /* For when BUG is null */ + __schedule(); + printk(KERN_ERR "BUG: dead task %s:%d back from the grave!\n", + current->comm, current->pid); + printk(KERN_ERR ".... flags: %08x, count: %d, state: %08lx\n", + current->flags, atomic_read(¤t->usage), current->state); + printk(KERN_ERR ".... trying again ...\n"); + goto again; } EXPORT_SYMBOL_GPL(do_exit); @@ -1476,6 +1483,9 @@ static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, int ptrace, struct task_struct *p) { int ret = eligible_child(wo, p); + + BUG_ON(!atomic_read(&p->usage)); + if (!ret) return ret; diff --git a/kernel/fork.c b/kernel/fork.c index 7a35caaef037..b013c7ed4b5a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -175,6 +175,16 @@ void __put_task_struct(struct task_struct *tsk) free_task(tsk); } +#ifdef CONFIG_PREEMPT_RT +void __put_task_struct_cb(struct rcu_head *rhp) +{ + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); + + __put_task_struct(tsk); + +} +#endif + /* * macro override instead of weak attribute alias, to workaround * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. @@ -1235,11 +1245,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ + preempt_disable(); p->cpus_allowed = current->cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); + preempt_enable(); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { diff --git a/kernel/notifier.c b/kernel/notifier.c index 61d5aa5eced3..cf40c2d9817f 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -71,7 +71,7 @@ static int notifier_chain_unregister(struct notifier_block **nl, * @returns: notifier_call_chain returns the value returned by the * last notifier function called. */ -static int __kprobes notifier_call_chain(struct notifier_block **nl, +static int __kprobes notrace notifier_call_chain(struct notifier_block **nl, unsigned long val, void *v, int nr_to_call, int *nr_calls) { @@ -217,7 +217,7 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, * not yet working and interrupts must remain disabled. At * such times we must not call down_write(). */ - if (unlikely(system_state == SYSTEM_BOOTING)) + if (unlikely(system_state < SYSTEM_RUNNING)) return notifier_chain_register(&nh->head, n); down_write(&nh->rwsem); diff --git a/kernel/signal.c b/kernel/signal.c index b1889c55fd53..3dd14285faef 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -916,8 +916,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, trace_sched_signal_send(sig, t); +#ifdef CONFIG_SMP assert_spin_locked(&t->sighand->siglock); - +#endif if (!prepare_signal(sig, t, from_ancestor_ns)) return 0; @@ -1692,15 +1693,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) read_lock(&tasklist_lock); if (may_ptrace_stop()) { do_notify_parent_cldstop(current, CLD_TRAPPED); - /* - * Don't want to allow preemption here, because - * sys_ptrace() needs this task to be inactive. - * - * XXX: implement read_unlock_no_resched(). - */ - preempt_disable(); read_unlock(&tasklist_lock); - preempt_enable_and_schedule(); + schedule(); } else { /* * By the time we got the lock, our tracer went away. diff --git a/kernel/softirq.c b/kernel/softirq.c index e53eb38bab84..87821717bb3c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -20,6 +20,7 @@ #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/delay.h> #include <linux/mm.h> #include <linux/notifier.h> #include <linux/percpu.h> @@ -106,6 +107,8 @@ static void trigger_softirqs(void) } } +#ifndef CONFIG_PREEMPT_RT + /* * This one is for softirq.c-internal use, * where hardirqs are disabled legitimately: @@ -207,6 +210,8 @@ void local_bh_enable_ip(unsigned long ip) } EXPORT_SYMBOL(local_bh_enable_ip); +#endif + /* * We restart softirq processing MAX_SOFTIRQ_RESTART times, * and we fall back to softirqd after that. @@ -606,7 +611,7 @@ void tasklet_kill(struct tasklet_struct *t) while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { do { - yield(); + msleep(1); } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } tasklet_unlock_wait(t); @@ -1064,6 +1069,11 @@ int softirq_preemption = 1; EXPORT_SYMBOL(softirq_preemption); +/* + * Real-Time Preemption depends on softirq threading: + */ +#ifndef CONFIG_PREEMPT_RT + static int __init softirq_preempt_setup (char *str) { if (!strncmp(str, "off", 3)) @@ -1077,7 +1087,7 @@ static int __init softirq_preempt_setup (char *str) } __setup("softirq-preempt=", softirq_preempt_setup); - +#endif #endif #ifdef CONFIG_SMP diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0668795d8818..d3f9b451f289 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/syscalls.h> #include <linux/kthread.h> #include <linux/hardirq.h> #include <linux/mempolicy.h> @@ -36,6 +37,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> +#include <asm/uaccess.h> + /* * The per-CPU workqueue (if single thread, we always use the first * possible cpu). @@ -159,13 +162,14 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. + * + * Especially no such guarantee on PREEMPT_RT. */ int queue_work(struct workqueue_struct *wq, struct work_struct *work) { - int ret; + int ret = 0, cpu = raw_smp_processor_id(); - ret = queue_work_on(get_cpu(), wq, work); - put_cpu(); + ret = queue_work_on(cpu, wq, work); return ret; } @@ -883,6 +887,49 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) cwq->thread = NULL; } +void set_workqueue_thread_prio(struct workqueue_struct *wq, int cpu, + int policy, int rt_priority, int nice) +{ + struct sched_param param = { .sched_priority = rt_priority }; + struct cpu_workqueue_struct *cwq; + mm_segment_t oldfs = get_fs(); + struct task_struct *p; + unsigned long flags; + int ret; + + cwq = per_cpu_ptr(wq->cpu_wq, cpu); + spin_lock_irqsave(&cwq->lock, flags); + p = cwq->thread; + spin_unlock_irqrestore(&cwq->lock, flags); + + set_user_nice(p, nice); + + set_fs(KERNEL_DS); + ret = sys_sched_setscheduler(p->pid, policy, ¶m); + set_fs(oldfs); + + WARN_ON(ret); +} + +void set_workqueue_prio(struct workqueue_struct *wq, int policy, + int rt_priority, int nice) +{ + int cpu; + + /* We don't need the distraction of CPUs appearing and vanishing. */ + get_online_cpus(); + spin_lock(&workqueue_lock); + if (is_wq_single_threaded(wq)) + set_workqueue_thread_prio(wq, 0, policy, rt_priority, nice); + else { + for_each_online_cpu(cpu) + set_workqueue_thread_prio(wq, cpu, policy, + rt_priority, nice); + } + spin_unlock(&workqueue_lock); + put_online_cpus(); +} + /** * destroy_workqueue - safely terminate a workqueue * @wq: target workqueue @@ -1015,4 +1062,5 @@ void __init init_workqueues(void) hotcpu_notifier(workqueue_cpu_callback, 0); keventd_wq = create_workqueue("events"); BUG_ON(!keventd_wq); + set_workqueue_prio(keventd_wq, SCHED_FIFO, 1, -20); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 12327b2bb785..8a2ddd3f0922 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -397,6 +397,8 @@ config DEBUG_RT_MUTEXES help This allows rt mutex semantics violations and rt mutex related deadlocks (lockups) to be detected and reported automatically. + When realtime preemption is enabled this includes spinlocks, + rwlocks, mutexes and (rw)semaphores config DEBUG_PI_LIST bool @@ -420,7 +422,7 @@ config DEBUG_SPINLOCK config DEBUG_MUTEXES bool "Mutex debugging: basic checks" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !PREEMPT_RT help This feature allows mutex semantics violations to be detected and reported. diff --git a/lib/Makefile b/lib/Makefile index b6d1857bbf08..0d2ee155f4c9 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -34,7 +34,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o -lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o +obj-$(CONFIG_PREEMPT_RT) += plist.o +obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 54625bec6fb9..709c432b8a92 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -35,6 +35,8 @@ DEFINE_SEMAPHORE(kernel_sem); * about recursion, both due to the down() and due to the enabling of * preemption. schedule() will re-check the preemption flag after * reacquiring the semaphore. + * + * Called with interrupts disabled. */ int __lockfunc __reacquire_kernel_lock(void) { @@ -67,11 +69,15 @@ void __lockfunc lock_kernel(void) struct task_struct *task = current; int depth = task->lock_depth + 1; - if (likely(!depth)) + if (likely(!depth)) { /* * No recursion worries - we set up lock_depth _after_ */ down(&kernel_sem); +#ifdef CONFIG_DEBUG_RT_MUTEXES + current->last_kernel_lock = __builtin_return_address(0); +#endif + } task->lock_depth = depth; } @@ -82,8 +88,12 @@ void __lockfunc unlock_kernel(void) BUG_ON(task->lock_depth < 0); - if (likely(--task->lock_depth < 0)) + if (likely(--task->lock_depth < 0)) { +#ifdef CONFIG_DEBUG_RT_MUTEXES + current->last_kernel_lock = NULL; +#endif up(&kernel_sem); + } } EXPORT_SYMBOL(lock_kernel); diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 619313ed6c46..65e7eab8498e 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -158,7 +158,7 @@ static void init_shared_classes(void) local_bh_disable(); \ local_irq_disable(); \ lockdep_softirq_enter(); \ - WARN_ON(!in_softirq()); + /* FIXME: preemptible softirqs. WARN_ON(!in_softirq()); */ #define SOFTIRQ_EXIT() \ lockdep_softirq_exit(); \ @@ -550,6 +550,11 @@ GENERATE_TESTCASE(init_held_rsem) #undef E /* + * FIXME: turns these into raw-spinlock tests on -rt + */ +#ifndef CONFIG_PREEMPT_RT + +/* * locking an irq-safe lock with irqs enabled: */ #define E1() \ @@ -890,6 +895,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) #include "locking-selftest-softirq.h" // GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft) +#endif /* !CONFIG_PREEMPT_RT */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC # define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) # define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map) @@ -998,7 +1005,7 @@ static inline void print_testname(const char *testname) #define DO_TESTCASE_1(desc, name, nr) \ print_testname(desc"/"#nr); \ - dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ printk("\n"); #define DO_TESTCASE_1B(desc, name, nr) \ @@ -1006,17 +1013,17 @@ static inline void print_testname(const char *testname) dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \ printk("\n"); -#define DO_TESTCASE_3(desc, name, nr) \ - print_testname(desc"/"#nr); \ - dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ - dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ +#define DO_TESTCASE_3(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ printk("\n"); -#define DO_TESTCASE_3RW(desc, name, nr) \ - print_testname(desc"/"#nr); \ +#define DO_TESTCASE_3RW(desc, name, nr) \ + print_testname(desc"/"#nr); \ dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\ - dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ printk("\n"); @@ -1047,7 +1054,7 @@ static inline void print_testname(const char *testname) print_testname(desc); \ dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \ dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \ - dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ @@ -1179,6 +1186,7 @@ void locking_selftest(void) /* * irq-context testcases: */ +#ifndef CONFIG_PREEMPT_RT DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1); DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A); DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B); @@ -1188,6 +1196,7 @@ void locking_selftest(void) DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); +#endif if (unexpected_testcase_failures) { printk("-----------------------------------------------------------------\n"); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 23abbd93cae1..e209012e5d31 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -157,12 +157,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) * succeed in getting a node here (and never reach * kmem_cache_alloc) */ + rtp = &get_cpu_var(radix_tree_preloads); rtp = &__get_cpu_var(radix_tree_preloads); if (rtp->nr) { ret = rtp->nodes[rtp->nr - 1]; rtp->nodes[rtp->nr - 1] = NULL; rtp->nr--; } + put_cpu_var(radix_tree_preloads); } if (ret == NULL) ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); @@ -195,6 +197,8 @@ radix_tree_node_free(struct radix_tree_node *node) call_rcu(&node->rcu_head, radix_tree_node_rcu_free); } +#ifndef CONFIG_PREEMPT_RT + /* * Load up this CPU's radix_tree_node buffer with sufficient objects to * ensure that the addition of a single element in the tree cannot fail. On @@ -227,6 +231,8 @@ out: } EXPORT_SYMBOL(radix_tree_preload); +#endif + /* * Return the maximum key which can be store into a * radix tree with height HEIGHT. |