Merge branch 'rt/core' into rt/base

Conflicts: kernel/spinlock.c lib/kernel_lock.c Manual fixup of kernel/rwlocks.c Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
author: Thomas Gleixner <tglx@linutronix.de> 2009-07-28 14:31:31 +0200
committer: Thomas Gleixner <tglx@linutronix.de> 2009-07-28 14:31:31 +0200
commit: 54b371e002ee69a47e051e02702bf4eb1a360bab (patch)
tree: 123385fd61821a1bf918697b6ef92cb4359a6b1f
parent: b49f8d26493ee0c1f016115ad25912571c284411 (diff)
parent: f9d9cfe4fa444b93238d7c3cf07af2dd85b151e8 (diff)
download: lwn-54b371e002ee69a47e051e02702bf4eb1a360bab.tar.gz
lwn-54b371e002ee69a47e051e02702bf4eb1a360bab.zip
19 files changed, 246 insertions, 62 deletions
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index b2ba2fc8829a..9793123ea1c9 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -20,7 +20,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
 	if (size == 8 && sizeof(unsigned long) != 8)
 		wrong_size_cmpxchg(ptr);
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	switch (size) {
 	case 1: prev = *(u8 *)ptr;
 		if (prev == old)
@@ -41,7 +41,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
 	default:
 		wrong_size_cmpxchg(ptr);
 	}
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 	return prev;
 }
 
@@ -54,11 +54,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
 	u64 prev;
 	unsigned long flags;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	prev = *(u64 *)ptr;
 	if (prev == old)
 		*(u64 *)ptr = new;
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 	return prev;
 }
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index e2ecbe67fe94..c65d2ed54970 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -377,7 +377,6 @@ extern void softirq_init(void);
 #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
-extern void wakeup_softirqd(void);
 
 /* This is the worklist that queues up per-cpu softirq work.
  *
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 8d97e1d97142..5cb6d2028383 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -33,12 +33,24 @@ do { \
 	barrier(); \
 } while (0)
 
-#define preempt_enable_no_resched() \
+#define __preempt_enable_no_resched() \
 do { \
 	barrier(); \
 	dec_preempt_count(); \
 } while (0)
 
+#ifdef CONFIG_DEBUG_PREEMPT
+extern void notrace preempt_enable_no_resched(void);
+#else
+# define preempt_enable_no_resched() __preempt_enable_no_resched()
+#endif
+
+#define preempt_enable_and_schedule() \
+do { \
+	__preempt_enable_no_resched(); \
+	schedule(); \
+} while (0)
+
 #define preempt_check_resched() \
 do { \
 	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
@@ -47,7 +59,7 @@ do { \
 
 #define preempt_enable() \
 do { \
-	preempt_enable_no_resched(); \
+	__preempt_enable_no_resched(); \
 	barrier(); \
 	preempt_check_resched(); \
 } while (0)
@@ -84,6 +96,8 @@ do { \
 
 #define preempt_disable()		do { } while (0)
 #define preempt_enable_no_resched()	do { } while (0)
+#define __preempt_enable_no_resched()	do { } while (0)
+#define preempt_enable_and_schedule()	schedule()
 #define preempt_enable()		do { } while (0)
 #define preempt_check_resched()		do { } while (0)
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9757414fbb12..5dd3fbd1e09b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1317,6 +1317,7 @@ struct task_struct {
 /* signal handlers */
 	struct signal_struct *signal;
 	struct sighand_struct *sighand;
+	struct sigqueue *sigqueue_cache;
 
 	sigset_t blocked, real_blocked;
 	sigset_t saved_sigmask;	/* restored if set_restore_sigmask() was used */
@@ -1423,6 +1424,7 @@ struct task_struct {
 #endif
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
+	struct task_struct *futex_wakeup;
 #endif
 #ifdef CONFIG_PERF_COUNTERS
 	struct perf_counter_context *perf_counter_ctxp;
@@ -1844,9 +1846,14 @@ int sched_rt_handler(struct ctl_table *table, int write,
 
 extern unsigned int sysctl_sched_compat_yield;
 
+extern void task_setprio(struct task_struct *p, int prio);
+
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
+static inline void rt_mutex_setprio(struct task_struct *p, int prio)
+{
+	task_setprio(p, prio);
+}
 extern void rt_mutex_adjust_pi(struct task_struct *p);
 #else
 static inline int rt_mutex_getprio(struct task_struct *p)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index c7552836bd95..46b460031542 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -225,6 +225,7 @@ static inline void init_sigpending(struct sigpending *sig)
 }
 
 extern void flush_sigqueue(struct sigpending *queue);
+extern void flush_task_sigqueue(struct task_struct *tsk);
 
 /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
 static inline int valid_signal(unsigned long sig)
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index d90b0702a0b8..208e47438b33 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -40,7 +40,8 @@
   do { preempt_enable(); __release(lock); (void)(lock); } while (0)
 
 #define __UNLOCK_BH(lock) \
-  do { preempt_enable_no_resched(); local_bh_enable(); __release(lock); (void)(lock); } while (0)
+  do { __preempt_enable_no_resched(); local_bh_enable(); __release(lock); \
+	  (void)(lock); } while (0)
 
 #define __UNLOCK_IRQ(lock) \
   do { local_irq_enable(); __UNLOCK(lock); } while (0)
diff --git a/init/main.c b/init/main.c
index 2c5ade79eb81..252f448d8b9d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -463,8 +463,7 @@ static noinline void __init_refok rest_init(void)
 	 */
 	init_idle_bootup_task(current);
 	rcu_scheduler_starting();
-	preempt_enable_no_resched();
-	schedule();
+	preempt_enable_and_schedule();
 	preempt_disable();
 
 	/* Call into cpu_idle with preempt disabled */
diff --git a/kernel/exit.c b/kernel/exit.c
index aed4d2e8a66a..a27d47d3d0c8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -130,7 +130,7 @@ static void __exit_signal(struct task_struct *tsk)
 	 * Do this under ->siglock, we can race with another thread
 	 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
 	 */
-	flush_sigqueue(&tsk->pending);
+	flush_task_sigqueue(tsk);
 
 	tsk->signal = NULL;
 	tsk->sighand = NULL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 714976f65c72..436edf961de6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1029,6 +1029,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	spin_lock_init(&p->alloc_lock);
 
 	init_sigpending(&p->pending);
+	p->sigqueue_cache = NULL;
 
 	p->utime = cputime_zero;
 	p->stime = cputime_zero;
@@ -1159,6 +1160,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
 	INIT_LIST_HEAD(&p->pi_state_list);
 	p->pi_state_cache = NULL;
+	p->futex_wakeup = NULL;
 #endif
 	/*
 	 * sigaltstack should be cleared when sharing the same VM
diff --git a/kernel/futex.c b/kernel/futex.c
index 974c24b985e1..abce822f3412 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -713,7 +713,7 @@ retry:
  * The hash bucket lock must be held when this is called.
  * Afterwards, the futex_q must not be accessed.
  */
-static void wake_futex(struct futex_q *q)
+static void wake_futex(struct task_struct **wake_list, struct futex_q *q)
 {
 	struct task_struct *p = q->task;
 
@@ -736,8 +736,51 @@ static void wake_futex(struct futex_q *q)
 	smp_wmb();
 	q->lock_ptr = NULL;
 
-	wake_up_state(p, TASK_NORMAL);
-	put_task_struct(p);
+	/*
+	 * Atomically grab the task, if ->futex_wakeup is !0 already it means
+	 * its already queued (either by us or someone else) and will get the
+	 * wakeup due to that.
+	 *
+	 * This cmpxchg() implies a full barrier, which pairs with the write
+	 * barrier implied by the wakeup in wake_futex_list().
+	 */
+	if (cmpxchg(&p->futex_wakeup, 0, p) != 0) {
+		/*
+		 * It was already queued, drop the extra ref and we're done.
+		 */
+		put_task_struct(p);
+		return;
+	}
+
+	/*
+	 * Put the task on our wakeup list by atomically switching it with
+	 * the list head. (XXX its a local list, no possible concurrency,
+	 * this could be written without cmpxchg).
+	 */
+	do {
+		p->futex_wakeup = *wake_list;
+	} while (cmpxchg(wake_list, p->futex_wakeup, p) != p->futex_wakeup);
+}
+
+/*
+ * For each task on the list, deliver the pending wakeup and release the
+ * task reference obtained in wake_futex().
+ */
+static void wake_futex_list(struct task_struct *head)
+{
+	while (head != &init_task) {
+		struct task_struct *next = head->futex_wakeup;
+
+		head->futex_wakeup = NULL;
+		/*
+		 * wake_up_state() implies a wmb() to pair with the queueing
+		 * in wake_futex() so as to not miss wakeups.
+		 */
+		wake_up_state(head, TASK_NORMAL);
+		put_task_struct(head);
+
+		head = next;
+	}
 }
 
 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
@@ -851,6 +894,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 	struct futex_q *this, *next;
 	struct plist_head *head;
 	union futex_key key = FUTEX_KEY_INIT;
+	struct task_struct *wake_list = &init_task;
 	int ret;
 
 	if (!bitset)
@@ -875,7 +919,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 			if (!(this->bitset & bitset))
 				continue;
 
-			wake_futex(this);
+			wake_futex(&wake_list, this);
 			if (++ret >= nr_wake)
 				break;
 		}
@@ -883,6 +927,8 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 
 	spin_unlock(&hb->lock);
 	put_futex_key(fshared, &key);
+
+	wake_futex_list(wake_list);
 out:
 	return ret;
 }
@@ -899,6 +945,7 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head;
 	struct futex_q *this, *next;
+	struct task_struct *wake_list = &init_task;
 	int ret, op_ret;
 
 retry:
@@ -949,7 +996,7 @@ retry_private:
 
 	plist_for_each_entry_safe(this, next, head, list) {
 		if (match_futex (&this->key, &key1)) {
-			wake_futex(this);
+			wake_futex(&wake_list, this);
 			if (++ret >= nr_wake)
 				break;
 		}
@@ -961,7 +1008,7 @@ retry_private:
 		op_ret = 0;
 		plist_for_each_entry_safe(this, next, head, list) {
 			if (match_futex (&this->key, &key2)) {
-				wake_futex(this);
+				wake_futex(&wake_list, this);
 				if (++op_ret >= nr_wake2)
 					break;
 			}
@@ -974,6 +1021,8 @@ out_put_keys:
 	put_futex_key(fshared, &key2);
 out_put_key1:
 	put_futex_key(fshared, &key1);
+
+	wake_futex_list(wake_list);
 out:
 	return ret;
 }
@@ -1119,6 +1168,7 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head1;
 	struct futex_q *this, *next;
+	struct task_struct *wake_list = &init_task;
 	u32 curval2;
 
 	if (requeue_pi) {
@@ -1256,7 +1306,7 @@ retry_private:
 		 * woken by futex_unlock_pi().
 		 */
 		if (++task_count <= nr_wake && !requeue_pi) {
-			wake_futex(this);
+			wake_futex(&wake_list, this);
 			continue;
 		}
 
@@ -1302,6 +1352,8 @@ out_put_keys:
 	put_futex_key(fshared, &key2);
 out_put_key1:
 	put_futex_key(fshared, &key1);
+
+	wake_futex_list(wake_list);
 out:
 	if (pi_state != NULL)
 		free_pi_state(pi_state);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 947b3ad551f8..3714ee5bc638 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -249,8 +249,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
 		/* didnt get the lock, go to sleep: */
 		spin_unlock_mutex(&lock->wait_lock, flags);
-		preempt_enable_no_resched();
-		schedule();
+		preempt_enable_and_schedule();
 		preempt_disable();
 		spin_lock_mutex(&lock->wait_lock, flags);
 	}
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 052ec4d195c7..d90211c5533e 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -420,6 +420,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 static struct pid *good_sigevent(sigevent_t * event)
 {
 	struct task_struct *rtn = current->group_leader;
+	int sig = event->sigev_signo;
 
 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
 		(!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
@@ -428,7 +429,8 @@ static struct pid *good_sigevent(sigevent_t * event)
 		return NULL;
 
 	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
-	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
+	    (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
+	     sig_kernel_coredump(sig)))
 		return NULL;
 
 	return task_pid(rtn);
diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h
index 14193d596d78..b031c8afde9a 100644
--- a/kernel/rtmutex-debug.h
+++ b/kernel/rtmutex-debug.h
@@ -17,17 +17,17 @@ extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
 extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
 extern void debug_rt_mutex_lock(struct rt_mutex *lock);
 extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
-extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
-				      struct task_struct *powner);
+extern void
+debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner);
 extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
 extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
 				    struct rt_mutex *lock);
 extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);
-# define debug_rt_mutex_reset_waiter(w)			\
+# define debug_rt_mutex_reset_waiter(w) \
 	do { (w)->deadlock_lock = NULL; } while (0)
 
-static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
-						 int detect)
+static inline int
+debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, int detect)
 {
-	return (waiter != NULL);
+	return waiter != NULL;
 }
diff --git a/kernel/rwlock.c b/kernel/rwlock.c
index 35460b37b815..ce4fa25d2636 100644
--- a/kernel/rwlock.c
+++ b/kernel/rwlock.c
@@ -189,7 +189,7 @@ void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
-	preempt_enable_no_resched();
+	__preempt_enable_no_resched();
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_read_unlock_bh);
@@ -216,7 +216,7 @@ void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
-	preempt_enable_no_resched();
+	__preempt_enable_no_resched();
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_write_unlock_bh);
diff --git a/kernel/sched.c b/kernel/sched.c
index bf737a66fca0..54c09fdf311a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2156,7 +2156,10 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		 * yield - it could be a while.
 		 */
 		if (unlikely(on_rq)) {
-			schedule_timeout_uninterruptible(1);
+			ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
+
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
 			continue;
 		}
 
@@ -5166,6 +5169,19 @@ notrace unsigned long get_parent_ip(unsigned long addr)
 	return addr;
 }
 
+#ifdef CONFIG_DEBUG_PREEMPT
+void notrace preempt_enable_no_resched(void)
+{
+	barrier();
+	dec_preempt_count();
+
+	WARN_ONCE(!preempt_count(),
+	     KERN_ERR "BUG: %s:%d task might have lost a preemption check!\n",
+	     current->comm, current->pid);
+}
+EXPORT_SYMBOL(preempt_enable_no_resched);
+#endif
+
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_PREEMPT_TRACER))
 
@@ -5384,7 +5400,7 @@ need_resched_nonpreemptible:
 	if (unlikely(reacquire_kernel_lock(current) < 0))
 		goto need_resched_nonpreemptible;
 
-	preempt_enable_no_resched();
+	__preempt_enable_no_resched();
 	if (need_resched())
 		goto need_resched;
 }
@@ -5894,19 +5910,19 @@ long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
 }
 EXPORT_SYMBOL(sleep_on_timeout);
 
-#ifdef CONFIG_RT_MUTEXES
-
 /*
- * rt_mutex_setprio - set the current priority of a task
+ * task_setprio - set the current priority of a task
  * @p: task
  * @prio: prio value (kernel-internal form)
  *
  * This function changes the 'effective' priority of a task. It does
  * not touch ->normal_prio like __setscheduler().
  *
- * Used by the rt_mutex code to implement priority inheritance logic.
+ * Used by the rt_mutex code to implement priority inheritance logic
+ * and by rcupreempt-boost to boost priorities of tasks sleeping
+ * with rcu locks.
  */
-void rt_mutex_setprio(struct task_struct *p, int prio)
+void task_setprio(struct task_struct *p, int prio)
 {
 	unsigned long flags;
 	int oldprio, on_rq, running;
@@ -5945,8 +5961,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	task_rq_unlock(rq, &flags);
 }
 
-#endif
-
 void set_user_nice(struct task_struct *p, long nice)
 {
 	int old_prio, delta, on_rq;
@@ -6584,9 +6598,8 @@ SYSCALL_DEFINE0(sched_yield)
 	__release(rq->lock);
 	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 	_raw_spin_unlock(&rq->lock);
-	preempt_enable_no_resched();
 
-	schedule();
+	preempt_enable_and_schedule();
 
 	return 0;
 }
@@ -7649,7 +7662,7 @@ static int __init migration_init(void)
 	migration_call(&migration_notifier, CPU_ONLINE, cpu);
 	register_cpu_notifier(&migration_notifier);
 
-	return err;
+	return 0;
 }
 early_initcall(migration_init);
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index ccf1ceedaebe..b1889c55fd53 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -188,13 +188,46 @@ int next_signal(struct sigpending *pending, sigset_t *mask)
 	return sig;
 }
 
+#ifdef __HAVE_ARCH_CMPXCHG
+static inline struct sigqueue *get_task_cache(struct task_struct *t)
+{
+	struct sigqueue *q = t->sigqueue_cache;
+
+	if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
+		return NULL;
+
+	return q;
+}
+
+static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
+{
+	if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
+		return 0;
+
+	return 1;
+}
+
+#else
+
+static inline struct sigqueue *get_task_cache(struct task_struct *t)
+{
+	return NULL;
+}
+
+static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
+{
+	return 1;
+}
+
+#endif
+
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an
  *   appopriate lock must be held to stop the target task from exiting
  */
-static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
-					 int override_rlimit)
+static struct sigqueue *__sigqueue_do_alloc(struct task_struct *t, gfp_t flags,
+					    int override_rlimit, int fromslab)
 {
 	struct sigqueue *q = NULL;
 	struct user_struct *user;
@@ -209,8 +242,14 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 	atomic_inc(&user->sigpending);
 	if (override_rlimit ||
 	    atomic_read(&user->sigpending) <=
-			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
-		q = kmem_cache_alloc(sigqueue_cachep, flags);
+	    t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
+
+		if (!fromslab)
+			q = get_task_cache(t);
+		if (!q)
+			q = kmem_cache_alloc(sigqueue_cachep, flags);
+	}
+
 	if (unlikely(q == NULL)) {
 		atomic_dec(&user->sigpending);
 		free_uid(user);
@@ -223,6 +262,12 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 	return q;
 }
 
+static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
+					 int override_rlimit)
+{
+	return __sigqueue_do_alloc(t, flags, override_rlimit, 0);
+}
+
 static void __sigqueue_free(struct sigqueue *q)
 {
 	if (q->flags & SIGQUEUE_PREALLOC)
@@ -232,6 +277,21 @@ static void __sigqueue_free(struct sigqueue *q)
 	kmem_cache_free(sigqueue_cachep, q);
 }
 
+static void sigqueue_free_current(struct sigqueue *q)
+{
+	struct user_struct *up;
+
+	if (q->flags & SIGQUEUE_PREALLOC)
+		return;
+
+	up = q->user;
+	if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
+		atomic_dec(&up->sigpending);
+		free_uid(up);
+	} else
+		  __sigqueue_free(q);
+}
+
 void flush_sigqueue(struct sigpending *queue)
 {
 	struct sigqueue *q;
@@ -245,6 +305,21 @@ void flush_sigqueue(struct sigpending *queue)
 }
 
 /*
+ * Called from __exit_signal. Flush tsk->pending and
+ * tsk->sigqueue_cache
+ */
+void flush_task_sigqueue(struct task_struct *tsk)
+{
+	struct sigqueue *q;
+
+	flush_sigqueue(&tsk->pending);
+
+	q = get_task_cache(tsk);
+	if (q)
+		kmem_cache_free(sigqueue_cachep, q);
+}
+
+/*
  * Flush all pending signals for a task.
  */
 void __flush_signals(struct task_struct *t)
@@ -392,7 +467,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 still_pending:
 		list_del_init(&first->list);
 		copy_siginfo(info, &first->info);
-		__sigqueue_free(first);
+		sigqueue_free_current(first);
 	} else {
 		/* Ok, it wasn't in the queue.  This must be
 		   a fast-pathed signal or we must have been
@@ -437,6 +512,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 {
 	int signr;
 
+	WARN_ON_ONCE(tsk != current);
+
 	/* We only dequeue private signals from ourselves, we don't let
 	 * signalfd steal them
 	 */
@@ -519,6 +596,9 @@ void signal_wake_up(struct task_struct *t, int resume)
 
 	set_tsk_thread_flag(t, TIF_SIGPENDING);
 
+	if (unlikely(t == current))
+		return;
+
 	/*
 	 * For SIGKILL, we want to wake it up in the stopped/traced/killable
 	 * case. We don't check t->state here because there is a race with it
@@ -1312,7 +1392,8 @@ struct sigqueue *sigqueue_alloc(void)
 {
 	struct sigqueue *q;
 
-	if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
+	/* Preallocated sigqueue objects always from the slabcache ! */
+	if ((q = __sigqueue_do_alloc(current, GFP_KERNEL, 0, 1)))
 		q->flags |= SIGQUEUE_PREALLOC;
 	return(q);
 }
@@ -1619,8 +1700,7 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
 		 */
 		preempt_disable();
 		read_unlock(&tasklist_lock);
-		preempt_enable_no_resched();
-		schedule();
+		preempt_enable_and_schedule();
 	} else {
 		/*
 		 * By the time we got the lock, our tracer went away.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index eb5e131a0485..5fc1b0eefe9b 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -67,7 +67,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
  * to the pending events, so lets the scheduler to balance
  * the softirq load for us.
  */
-void wakeup_softirqd(void)
+static void wakeup_softirqd(void)
 {
 	/* Interrupts are disabled: no need to stop preemption */
 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
@@ -308,7 +308,7 @@ void irq_exit(void)
 	if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
 		tick_nohz_stop_sched_tick(0);
 #endif
-	preempt_enable_no_resched();
+	__preempt_enable_no_resched();
 }
 
 /*
@@ -704,8 +704,7 @@ static int ksoftirqd(void * __bind_cpu)
 	while (!kthread_should_stop()) {
 		preempt_disable();
 		if (!local_softirq_pending()) {
-			preempt_enable_no_resched();
-			schedule();
+			preempt_enable_and_schedule();
 			preempt_disable();
 		}
 
@@ -718,7 +717,7 @@ static int ksoftirqd(void * __bind_cpu)
 			if (cpu_is_offline((long)__bind_cpu))
 				goto wait_to_die;
 			do_softirq();
-			preempt_enable_no_resched();
+			__preempt_enable_no_resched();
 			cond_resched();
 			preempt_disable();
 			rcu_qsctr_inc((long)__bind_cpu);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 6a3c0c474316..d116e821ad13 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -170,7 +170,7 @@ void __lockfunc _atomic_spin_unlock_bh(atomic_spinlock_t *lock)
 {
 	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
-	preempt_enable_no_resched();
+	__preempt_enable_no_resched();
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_atomic_spin_unlock_bh);
@@ -184,7 +184,7 @@ int __lockfunc _atomic_spin_trylock_bh(atomic_spinlock_t *lock)
 		return 1;
 	}
 
-	preempt_enable_no_resched();
+	__preempt_enable_no_resched();
 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 	return 0;
 }
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 912823e2a11b..22d1d77f9a62 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -40,6 +40,8 @@ static atomic_t thread_ack;
 static DEFINE_MUTEX(lock);
 /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
 static DEFINE_MUTEX(setup_lock);
+/* do not start up until all worklets have been placed: */
+static DEFINE_MUTEX(startup_lock);
 /* Users of stop_machine. */
 static int refcount;
 static struct workqueue_struct *stop_machine_wq;
@@ -71,6 +73,15 @@ static void stop_cpu(struct work_struct *unused)
 	int cpu = smp_processor_id();
 	int err;
 
+	/*
+	 * Wait for the startup loop to finish:
+	 */
+	mutex_lock(&startup_lock);
+	/*
+	 * Let other threads continue too:
+	 */
+	mutex_unlock(&startup_lock);
+
 	if (!active_cpus) {
 		if (cpu == cpumask_first(cpu_online_mask))
 			smdata = &active;
@@ -166,16 +177,21 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 
 	set_state(STOPMACHINE_PREPARE);
 
-	/* Schedule the stop_cpu work on all cpus: hold this CPU so one
-	 * doesn't hit this CPU until we're ready. */
-	get_cpu();
+	/*
+	 * Schedule the stop_cpu work on all cpus before allowing any
+	 * of the CPUs to execute it:
+	 */
+	mutex_lock(&startup_lock);
+
 	for_each_online_cpu(i) {
 		sm_work = per_cpu_ptr(stop_machine_work, i);
 		INIT_WORK(sm_work, stop_cpu);
 		queue_work_on(i, stop_machine_wq, sm_work);
 	}
-	/* This will release the thread on our CPU. */
-	put_cpu();
+
+	/* This will release the thread on all CPUs: */
+	mutex_unlock(&startup_lock);
+
 	flush_workqueue(stop_machine_wq);
 	ret = active.fnret;
 	mutex_unlock(&lock);
author	Thomas Gleixner <tglx@linutronix.de>	2009-07-28 14:31:31 +0200
committer	Thomas Gleixner <tglx@linutronix.de>	2009-07-28 14:31:31 +0200
commit	54b371e002ee69a47e051e02702bf4eb1a360bab (patch)
tree	123385fd61821a1bf918697b6ef92cb4359a6b1f
parent	b49f8d26493ee0c1f016115ad25912571c284411 (diff)
parent	f9d9cfe4fa444b93238d7c3cf07af2dd85b151e8 (diff)
download	lwn-54b371e002ee69a47e051e02702bf4eb1a360bab.tar.gz lwn-54b371e002ee69a47e051e02702bf4eb1a360bab.zip