diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Kconfig.preempt | 87 | ||||
| -rw-r--r-- | kernel/exit.c | 22 | ||||
| -rw-r--r-- | kernel/fork.c | 12 | ||||
| -rw-r--r-- | kernel/notifier.c | 4 | ||||
| -rw-r--r-- | kernel/signal.c | 12 | ||||
| -rw-r--r-- | kernel/softirq.c | 14 | ||||
| -rw-r--r-- | kernel/workqueue.c | 54 |
7 files changed, 156 insertions, 49 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 4bb60418779f..f4602f8f35d4 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,14 +1,13 @@ - choice - prompt "Preemption Model" - default PREEMPT_NONE + prompt "Preemption Mode" + default PREEMPT_RT config PREEMPT_NONE bool "No Forced Preemption (Server)" help - This is the traditional Linux preemption model, geared towards + This is the traditional Linux preemption model geared towards throughput. It will still provide good latencies most of the - time, but there are no guarantees and occasional longer delays + time but there are no guarantees and occasional long delays are possible. Select this option if you are building a kernel for a server or @@ -21,7 +20,7 @@ config PREEMPT_VOLUNTARY help This option reduces the latency of the kernel by adding more "explicit preemption points" to the kernel code. These new - preemption points have been selected to reduce the maximum + preemption points have been selected to minimize the maximum latency of rescheduling, providing faster application reactions, at the cost of slightly lower throughput. @@ -33,38 +32,73 @@ config PREEMPT_VOLUNTARY Select this if you are building a kernel for a desktop system. -config PREEMPT +config PREEMPT_DESKTOP bool "Preemptible Kernel (Low-Latency Desktop)" help This option reduces the latency of the kernel by making - all kernel code (that is not executing in a critical section) + all kernel code that is not executing in a critical section preemptible. This allows reaction to interactive events by permitting a low priority process to be preempted involuntarily even if it is in kernel mode executing a system call and would - otherwise not be about to reach a natural preemption point. - This allows applications to run more 'smoothly' even when the - system is under load, at the cost of slightly lower throughput - and a slight runtime overhead to kernel code. + otherwise not about to reach a preemption point. This allows + applications to run more 'smoothly' even when the system is + under load, at the cost of slighly lower throughput and a + slight runtime overhead to kernel code. + + (According to profiles, when this mode is selected then even + during kernel-intense workloads the system is in an immediately + preemptible state more than 50% of the time.) Select this if you are building a kernel for a desktop or embedded system with latency requirements in the milliseconds range. +config PREEMPT_RT + bool "Complete Preemption (Real-Time)" + select PREEMPT_SOFTIRQS + select PREEMPT_HARDIRQS + select PREEMPT_RCU + select RT_MUTEXES + help + This option further reduces the scheduling latency of the + kernel by replacing almost every spinlock used by the kernel + with preemptible mutexes and thus making all but the most + critical kernel code involuntarily preemptible. The remaining + handful of lowlevel non-preemptible codepaths are short and + have a deterministic latency of a couple of tens of + microseconds (depending on the hardware). This also allows + applications to run more 'smoothly' even when the system is + under load, at the cost of lower throughput and runtime + overhead to kernel code. + + (According to profiles, when this mode is selected then even + during kernel-intense workloads the system is in an immediately + preemptible state more than 95% of the time.) + + Select this if you are building a kernel for a desktop, + embedded or real-time system with guaranteed latency + requirements of 100 usecs or lower. + endchoice +config PREEMPT + bool + default y + depends on PREEMPT_DESKTOP || PREEMPT_RT + config PREEMPT_SOFTIRQS bool "Thread Softirqs" default n # depends on PREEMPT help This option reduces the latency of the kernel by 'threading' - soft interrupts. This means that all softirqs will execute - in softirqd's context. While this helps latency, it can also - reduce performance. + soft interrupts. This means that all softirqs will execute + in softirqd's context. While this helps latency, it can also + reduce performance. - The threading of softirqs can also be controlled via - /proc/sys/kernel/softirq_preemption runtime flag and the - sofirq-preempt=0/1 boot-time option. + The threading of softirqs can also be controlled via + /proc/sys/kernel/softirq_preemption runtime flag and the + sofirq-preempt=0/1 boot-time option. Say N if you are unsure. @@ -75,15 +109,14 @@ config PREEMPT_HARDIRQS select PREEMPT_SOFTIRQS help This option reduces the latency of the kernel by 'threading' - hardirqs. This means that all (or selected) hardirqs will run - in their own kernel thread context. While this helps latency, - this feature can also reduce performance. + hardirqs. This means that all (or selected) hardirqs will run + in their own kernel thread context. While this helps latency, + this feature can also reduce performance. - The threading of hardirqs can also be controlled via the - /proc/sys/kernel/hardirq_preemption runtime flag and the - hardirq-preempt=0/1 boot-time option. Per-irq threading can - be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded - runtime flags. + The threading of hardirqs can also be controlled via the + /proc/sys/kernel/hardirq_preemption runtime flag and the + hardirq-preempt=0/1 boot-time option. Per-irq threading can + be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded + runtime flags. Say N if you are unsure. - diff --git a/kernel/exit.c b/kernel/exit.c index a27d47d3d0c8..4441e623e671 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -67,7 +67,9 @@ static void __unhash_process(struct task_struct *p) detach_pid(p, PIDTYPE_SID); list_del_rcu(&p->tasks); + preempt_disable(); __get_cpu_var(process_counts)--; + preempt_enable(); } list_del_rcu(&p->thread_group); list_del_init(&p->sibling); @@ -685,9 +687,11 @@ static void exit_mm(struct task_struct * tsk) task_lock(tsk); tsk->mm = NULL; up_read(&mm->mmap_sem); + preempt_disable(); // FIXME enter_lazy_tlb(mm, current); /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); + preempt_enable(); task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); @@ -1009,14 +1013,17 @@ NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); - preempt_disable(); +again: + local_irq_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - schedule(); - BUG(); - /* Avoid "noreturn function does return". */ - for (;;) - cpu_relax(); /* For when BUG is null */ + __schedule(); + printk(KERN_ERR "BUG: dead task %s:%d back from the grave!\n", + current->comm, current->pid); + printk(KERN_ERR ".... flags: %08x, count: %d, state: %08lx\n", + current->flags, atomic_read(¤t->usage), current->state); + printk(KERN_ERR ".... trying again ...\n"); + goto again; } EXPORT_SYMBOL_GPL(do_exit); @@ -1476,6 +1483,9 @@ static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, int ptrace, struct task_struct *p) { int ret = eligible_child(wo, p); + + BUG_ON(!atomic_read(&p->usage)); + if (!ret) return ret; diff --git a/kernel/fork.c b/kernel/fork.c index 7a35caaef037..b013c7ed4b5a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -175,6 +175,16 @@ void __put_task_struct(struct task_struct *tsk) free_task(tsk); } +#ifdef CONFIG_PREEMPT_RT +void __put_task_struct_cb(struct rcu_head *rhp) +{ + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); + + __put_task_struct(tsk); + +} +#endif + /* * macro override instead of weak attribute alias, to workaround * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. @@ -1235,11 +1245,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ + preempt_disable(); p->cpus_allowed = current->cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); + preempt_enable(); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { diff --git a/kernel/notifier.c b/kernel/notifier.c index 61d5aa5eced3..cf40c2d9817f 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -71,7 +71,7 @@ static int notifier_chain_unregister(struct notifier_block **nl, * @returns: notifier_call_chain returns the value returned by the * last notifier function called. */ -static int __kprobes notifier_call_chain(struct notifier_block **nl, +static int __kprobes notrace notifier_call_chain(struct notifier_block **nl, unsigned long val, void *v, int nr_to_call, int *nr_calls) { @@ -217,7 +217,7 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, * not yet working and interrupts must remain disabled. At * such times we must not call down_write(). */ - if (unlikely(system_state == SYSTEM_BOOTING)) + if (unlikely(system_state < SYSTEM_RUNNING)) return notifier_chain_register(&nh->head, n); down_write(&nh->rwsem); diff --git a/kernel/signal.c b/kernel/signal.c index b1889c55fd53..3dd14285faef 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -916,8 +916,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, trace_sched_signal_send(sig, t); +#ifdef CONFIG_SMP assert_spin_locked(&t->sighand->siglock); - +#endif if (!prepare_signal(sig, t, from_ancestor_ns)) return 0; @@ -1692,15 +1693,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) read_lock(&tasklist_lock); if (may_ptrace_stop()) { do_notify_parent_cldstop(current, CLD_TRAPPED); - /* - * Don't want to allow preemption here, because - * sys_ptrace() needs this task to be inactive. - * - * XXX: implement read_unlock_no_resched(). - */ - preempt_disable(); read_unlock(&tasklist_lock); - preempt_enable_and_schedule(); + schedule(); } else { /* * By the time we got the lock, our tracer went away. diff --git a/kernel/softirq.c b/kernel/softirq.c index e53eb38bab84..87821717bb3c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -20,6 +20,7 @@ #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/delay.h> #include <linux/mm.h> #include <linux/notifier.h> #include <linux/percpu.h> @@ -106,6 +107,8 @@ static void trigger_softirqs(void) } } +#ifndef CONFIG_PREEMPT_RT + /* * This one is for softirq.c-internal use, * where hardirqs are disabled legitimately: @@ -207,6 +210,8 @@ void local_bh_enable_ip(unsigned long ip) } EXPORT_SYMBOL(local_bh_enable_ip); +#endif + /* * We restart softirq processing MAX_SOFTIRQ_RESTART times, * and we fall back to softirqd after that. @@ -606,7 +611,7 @@ void tasklet_kill(struct tasklet_struct *t) while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { do { - yield(); + msleep(1); } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } tasklet_unlock_wait(t); @@ -1064,6 +1069,11 @@ int softirq_preemption = 1; EXPORT_SYMBOL(softirq_preemption); +/* + * Real-Time Preemption depends on softirq threading: + */ +#ifndef CONFIG_PREEMPT_RT + static int __init softirq_preempt_setup (char *str) { if (!strncmp(str, "off", 3)) @@ -1077,7 +1087,7 @@ static int __init softirq_preempt_setup (char *str) } __setup("softirq-preempt=", softirq_preempt_setup); - +#endif #endif #ifdef CONFIG_SMP diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0668795d8818..d3f9b451f289 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/syscalls.h> #include <linux/kthread.h> #include <linux/hardirq.h> #include <linux/mempolicy.h> @@ -36,6 +37,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> +#include <asm/uaccess.h> + /* * The per-CPU workqueue (if single thread, we always use the first * possible cpu). @@ -159,13 +162,14 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. + * + * Especially no such guarantee on PREEMPT_RT. */ int queue_work(struct workqueue_struct *wq, struct work_struct *work) { - int ret; + int ret = 0, cpu = raw_smp_processor_id(); - ret = queue_work_on(get_cpu(), wq, work); - put_cpu(); + ret = queue_work_on(cpu, wq, work); return ret; } @@ -883,6 +887,49 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) cwq->thread = NULL; } +void set_workqueue_thread_prio(struct workqueue_struct *wq, int cpu, + int policy, int rt_priority, int nice) +{ + struct sched_param param = { .sched_priority = rt_priority }; + struct cpu_workqueue_struct *cwq; + mm_segment_t oldfs = get_fs(); + struct task_struct *p; + unsigned long flags; + int ret; + + cwq = per_cpu_ptr(wq->cpu_wq, cpu); + spin_lock_irqsave(&cwq->lock, flags); + p = cwq->thread; + spin_unlock_irqrestore(&cwq->lock, flags); + + set_user_nice(p, nice); + + set_fs(KERNEL_DS); + ret = sys_sched_setscheduler(p->pid, policy, ¶m); + set_fs(oldfs); + + WARN_ON(ret); +} + +void set_workqueue_prio(struct workqueue_struct *wq, int policy, + int rt_priority, int nice) +{ + int cpu; + + /* We don't need the distraction of CPUs appearing and vanishing. */ + get_online_cpus(); + spin_lock(&workqueue_lock); + if (is_wq_single_threaded(wq)) + set_workqueue_thread_prio(wq, 0, policy, rt_priority, nice); + else { + for_each_online_cpu(cpu) + set_workqueue_thread_prio(wq, cpu, policy, + rt_priority, nice); + } + spin_unlock(&workqueue_lock); + put_online_cpus(); +} + /** * destroy_workqueue - safely terminate a workqueue * @wq: target workqueue @@ -1015,4 +1062,5 @@ void __init init_workqueues(void) hotcpu_notifier(workqueue_cpu_callback, 0); keventd_wq = create_workqueue("events"); BUG_ON(!keventd_wq); + set_workqueue_prio(keventd_wq, SCHED_FIFO, 1, -20); } |
