summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-29 14:46:59 +0200
committerIngo Molnar <mingo@elte.hu>2009-04-29 14:47:05 +0200
commite7fd5d4b3d240f42c30a9e3d20a4689c4d3a795a (patch)
tree4ba588631dd8189a818a91c9e3976526071178b6 /kernel
parent1130b0296184bc21806225fd06d533515a99d2db (diff)
parent56a50adda49b2020156616c4eb15353e0f9ad7de (diff)
downloadlwn-e7fd5d4b3d240f42c30a9e3d20a4689c4d3a795a.tar.gz
lwn-e7fd5d4b3d240f42c30a9e3d20a4689c4d3a795a.zip
Merge branch 'linus' into perfcounters/core
Merge reason: This brach was on -rc1, refresh it to almost-rc4 to pick up the latest upstream fixes. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_tree.c3
-rw-r--r--kernel/fork.c13
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/irq/manage.c5
-rw-r--r--kernel/irq/numa_migrate.c1
-rw-r--r--kernel/kthread.c26
-rw-r--r--kernel/lockdep.c22
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/mutex.c3
-rw-r--r--kernel/panic.c12
-rw-r--r--kernel/posix-cpu-timers.c9
-rw-r--r--kernel/power/disk.c45
-rw-r--r--kernel/power/main.c24
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/power/user.c9
-rw-r--r--kernel/ptrace.c27
-rw-r--r--kernel/rcupdate.c18
-rw-r--r--kernel/rcutree.c19
-rw-r--r--kernel/rcutree_trace.c14
-rw-r--r--kernel/resource.c46
-rw-r--r--kernel/sched.c164
-rw-r--r--kernel/sched_cpupri.c5
-rw-r--r--kernel/sched_rt.c15
-rw-r--r--kernel/slow-work.c4
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/sys.c24
-rw-r--r--kernel/sysctl.c20
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/timekeeping.c12
-rw-r--r--kernel/timer.c7
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/blktrace.c10
-rw-r--r--kernel/trace/trace.c36
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_events.c12
-rw-r--r--kernel/trace/trace_events_filter.c14
-rw-r--r--kernel/trace/trace_events_stage_2.h4
-rw-r--r--kernel/trace/trace_power.c7
-rw-r--r--kernel/trace/trace_syscalls.c2
-rw-r--r--kernel/workqueue.c36
41 files changed, 441 insertions, 265 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 917ab9525568..6e7351739a82 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -734,9 +734,6 @@ int audit_tag_tree(char *old, char *new)
dentry = dget(path.dentry);
path_put(&path);
- if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
- follow_up(&mnt, &dentry);
-
list_add_tail(&list, &tagged->mnt_list);
mutex_lock(&audit_filter_mutex);
diff --git a/kernel/fork.c b/kernel/fork.c
index 89c1efb3ccf4..d32fef4d38e5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -800,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
sig->cputime_expires.virt_exp = cputime_zero;
sig->cputime_expires.sched_exp = 0;
+ if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+ sig->cputime_expires.prof_exp =
+ secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+ sig->cputimer.running = 1;
+ }
+
/* The timer lists. */
INIT_LIST_HEAD(&sig->cpu_timers[0]);
INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -815,11 +821,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
atomic_inc(&current->signal->live);
return 0;
}
- sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
-
- if (sig)
- posix_cpu_timers_init_group(sig);
+ sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
tsk->signal = sig;
if (!sig)
return -ENOMEM;
@@ -859,6 +862,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
task_unlock(current->group_leader);
+ posix_cpu_timers_init_group(sig);
+
acct_init_pacct(&sig->pacct);
tty_audit_fork(sig);
diff --git a/kernel/futex.c b/kernel/futex.c
index 6b50a024bca2..eef8cd26b5e5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -883,7 +883,12 @@ retry_private:
out_unlock:
double_unlock_hb(hb1, hb2);
- /* drop_futex_key_refs() must be called outside the spinlocks. */
+ /*
+ * drop_futex_key_refs() must be called outside the spinlocks. During
+ * the requeue we moved futex_q's from the hash bucket at key1 to the
+ * one at key2 and updated their key pointer. We no longer need to
+ * hold the references to key1.
+ */
while (--drop_count >= 0)
drop_futex_key_refs(&key1);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7e2e7dd4cd2f..2734eca59243 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -109,10 +109,9 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
spin_lock_irqsave(&desc->lock, flags);
#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
- cpumask_copy(desc->affinity, cpumask);
+ if (desc->status & IRQ_MOVE_PCNTXT)
desc->chip->set_affinity(irq, cpumask);
- } else {
+ else {
desc->status |= IRQ_MOVE_PENDING;
cpumask_copy(desc->pending_mask, cpumask);
}
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 243d6121e50e..44bbdcbaf8d2 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -54,6 +54,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
{
free_kstat_irqs(old_desc, desc);
+ free_desc_masks(old_desc, desc);
arch_free_chip_data(old_desc, desc);
}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 84bbadd4d021..4ebaf8519abf 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -76,6 +76,7 @@ static int kthread(void *_create)
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
+ create->result = current;
complete(&create->started);
schedule();
@@ -96,22 +97,10 @@ static void create_kthread(struct kthread_create_info *create)
/* We want our own signal handler (we take no signals by default). */
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
- if (pid < 0) {
+ if (pid < 0)
create->result = ERR_PTR(pid);
- } else {
- struct sched_param param = { .sched_priority = 0 };
+ else
wait_for_completion(&create->started);
- read_lock(&tasklist_lock);
- create->result = find_task_by_pid_ns(pid, &init_pid_ns);
- read_unlock(&tasklist_lock);
- /*
- * root may have changed our (kthreadd's) priority or CPU mask.
- * The kernel thread should not inherit these properties.
- */
- sched_setscheduler(create->result, SCHED_NORMAL, &param);
- set_user_nice(create->result, KTHREAD_NICE_LEVEL);
- set_cpus_allowed_ptr(create->result, cpu_all_mask);
- }
complete(&create->done);
}
@@ -154,11 +143,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
wait_for_completion(&create.done);
if (!IS_ERR(create.result)) {
+ struct sched_param param = { .sched_priority = 0 };
va_list args;
+
va_start(args, namefmt);
vsnprintf(create.result->comm, sizeof(create.result->comm),
namefmt, args);
va_end(args);
+ /*
+ * root may have changed our (kthreadd's) priority or CPU mask.
+ * The kernel thread should not inherit these properties.
+ */
+ sched_setscheduler_nocheck(create.result, SCHED_NORMAL, &param);
+ set_user_nice(create.result, KTHREAD_NICE_LEVEL);
+ set_cpus_allowed_ptr(create.result, cpu_all_mask);
}
return create.result;
}
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index b0f011866969..accb40cdb12a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2490,13 +2490,20 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
void lockdep_init_map(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass)
{
- if (unlikely(!debug_locks))
+ lock->class_cache = NULL;
+#ifdef CONFIG_LOCK_STAT
+ lock->cpu = raw_smp_processor_id();
+#endif
+
+ if (DEBUG_LOCKS_WARN_ON(!name)) {
+ lock->name = "NULL";
return;
+ }
+
+ lock->name = name;
if (DEBUG_LOCKS_WARN_ON(!key))
return;
- if (DEBUG_LOCKS_WARN_ON(!name))
- return;
/*
* Sanity check, the lock-class key must be persistent:
*/
@@ -2505,12 +2512,11 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
DEBUG_LOCKS_WARN_ON(1);
return;
}
- lock->name = name;
lock->key = key;
- lock->class_cache = NULL;
-#ifdef CONFIG_LOCK_STAT
- lock->cpu = raw_smp_processor_id();
-#endif
+
+ if (unlikely(!debug_locks))
+ return;
+
if (subclass)
register_lock_class(lock, subclass, 1);
}
diff --git a/kernel/module.c b/kernel/module.c
index 05f014efa32c..e797812a4d95 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2388,6 +2388,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_LIVE, mod);
+ /* We need to finish all async code before the module init sequence is done */
+ async_synchronize_full();
+
mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index fd95eaa672e6..f415e80a9119 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -148,7 +148,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
preempt_disable();
mutex_acquire(&lock->dep_map, subclass, 0, ip);
-#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
+#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \
+ !defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES)
/*
* Optimistic spinning.
*
diff --git a/kernel/panic.c b/kernel/panic.c
index 3fd8c5bf8b39..3dcaa1661357 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -213,8 +213,16 @@ unsigned long get_taint(void)
void add_taint(unsigned flag)
{
- /* can't trust the integrity of the kernel anymore: */
- debug_locks = 0;
+ /*
+ * Can't trust the integrity of the kernel anymore.
+ * We don't call directly debug_locks_off() because the issue
+ * is not necessarily serious enough to set oops_in_progress to 1
+ * Also we want to keep up lockdep for staging development and
+ * post-warning case.
+ */
+ if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off())
+ printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n");
+
set_bit(flag, &tainted_mask);
}
EXPORT_SYMBOL(add_taint);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 8e5d9a68b022..c9dcf98b4463 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -18,7 +18,7 @@ void update_rlimit_cpu(unsigned long rlim_new)
cputime = secs_to_cputime(rlim_new);
if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
- cputime_lt(current->signal->it_prof_expires, cputime)) {
+ cputime_gt(current->signal->it_prof_expires, cputime)) {
spin_lock_irq(&current->sighand->siglock);
set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
spin_unlock_irq(&current->sighand->siglock);
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
cpu->cpu = virt_ticks(p);
break;
case CPUCLOCK_SCHED:
- cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
+ cpu->sched = task_sched_runtime(p);
break;
}
return 0;
@@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
{
struct task_cputime cputime;
- thread_group_cputime(p, &cputime);
switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
+ thread_group_cputime(p, &cputime);
cpu->cpu = cputime_add(cputime.utime, cputime.stime);
break;
case CPUCLOCK_VIRT:
+ thread_group_cputime(p, &cputime);
cpu->cpu = cputime.utime;
break;
case CPUCLOCK_SCHED:
- cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+ cpu->sched = thread_group_sched_runtime(p);
break;
}
return 0;
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 5f21ab2bbcdf..e71ca9cd81b2 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,6 +22,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
+#include <scsi/scsi_scan.h>
#include <asm/suspend.h>
#include "power.h"
@@ -655,32 +656,42 @@ static int software_resume(void)
* here to avoid lockdep complaining.
*/
mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+
+ if (swsusp_resume_device)
+ goto Check_image;
+
+ if (!strlen(resume_file)) {
+ error = -ENOENT;
+ goto Unlock;
+ }
+
+ pr_debug("PM: Checking image partition %s\n", resume_file);
+
+ /* Check if the device is there */
+ swsusp_resume_device = name_to_dev_t(resume_file);
if (!swsusp_resume_device) {
- if (!strlen(resume_file)) {
- mutex_unlock(&pm_mutex);
- return -ENOENT;
- }
/*
* Some device discovery might still be in progress; we need
* to wait for this to finish.
*/
wait_for_device_probe();
+ /*
+ * We can't depend on SCSI devices being available after loading
+ * one of their modules until scsi_complete_async_scans() is
+ * called and the resume device usually is a SCSI one.
+ */
+ scsi_complete_async_scans();
+
swsusp_resume_device = name_to_dev_t(resume_file);
- pr_debug("PM: Resume from partition %s\n", resume_file);
- } else {
- pr_debug("PM: Resume from partition %d:%d\n",
- MAJOR(swsusp_resume_device),
- MINOR(swsusp_resume_device));
+ if (!swsusp_resume_device) {
+ error = -ENODEV;
+ goto Unlock;
+ }
}
- if (noresume) {
- /**
- * FIXME: If noresume is specified, we need to find the
- * partition and reset it back to normal swap space.
- */
- mutex_unlock(&pm_mutex);
- return 0;
- }
+ Check_image:
+ pr_debug("PM: Resume from partition %d:%d\n",
+ MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
pr_debug("PM: Checking hibernation image.\n");
error = swsusp_check();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f172f41858bb..f99ed6a75eac 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -291,20 +291,26 @@ static int suspend_enter(suspend_state_t state)
device_pm_lock();
+ if (suspend_ops->prepare) {
+ error = suspend_ops->prepare();
+ if (error)
+ goto Done;
+ }
+
error = device_power_down(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down\n");
- goto Done;
+ goto Platfrom_finish;
}
- if (suspend_ops->prepare) {
- error = suspend_ops->prepare();
+ if (suspend_ops->prepare_late) {
+ error = suspend_ops->prepare_late();
if (error)
goto Power_up_devices;
}
if (suspend_test(TEST_PLATFORM))
- goto Platfrom_finish;
+ goto Platform_wake;
error = disable_nonboot_cpus();
if (error || suspend_test(TEST_CPUS))
@@ -326,13 +332,17 @@ static int suspend_enter(suspend_state_t state)
Enable_cpus:
enable_nonboot_cpus();
- Platfrom_finish:
- if (suspend_ops->finish)
- suspend_ops->finish();
+ Platform_wake:
+ if (suspend_ops->wake)
+ suspend_ops->wake();
Power_up_devices:
device_power_up(PMSG_RESUME);
+ Platfrom_finish:
+ if (suspend_ops->finish)
+ suspend_ops->finish();
+
Done:
device_pm_unlock();
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 505f319e489c..8ba052c86d48 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -64,8 +64,6 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
struct bio *bio;
bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
- if (!bio)
- return -ENOMEM;
bio->bi_sector = page_off * (PAGE_SIZE >> 9);
bio->bi_bdev = resume_bdev;
bio->bi_end_io = end_swap_bio_read;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6c85359364f2..ed97375daae9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -24,6 +24,7 @@
#include <linux/cpu.h>
#include <linux/freezer.h>
#include <linux/smp_lock.h>
+#include <scsi/scsi_scan.h>
#include <asm/uaccess.h>
@@ -92,6 +93,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
filp->private_data = data;
memset(&data->handle, 0, sizeof(struct snapshot_handle));
if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
+ /* Hibernating. The image device should be accessible. */
data->swap = swsusp_resume_device ?
swap_type_of(swsusp_resume_device, 0, NULL) : -1;
data->mode = O_RDONLY;
@@ -99,6 +101,13 @@ static int snapshot_open(struct inode *inode, struct file *filp)
if (error)
pm_notifier_call_chain(PM_POST_HIBERNATION);
} else {
+ /*
+ * Resuming. We may need to wait for the image device to
+ * appear.
+ */
+ wait_for_device_probe();
+ scsi_complete_async_scans();
+
data->swap = -1;
data->mode = O_WRONLY;
error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index aaad0ec34194..0692ab5a0d67 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -21,9 +21,7 @@
#include <linux/audit.h>
#include <linux/pid_namespace.h>
#include <linux/syscalls.h>
-
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
/*
@@ -48,7 +46,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
}
-
+
/*
* Turn a tracing stop into a normal stop now, since with no tracer there
* would be no way to wake it up with SIGCONT or SIGKILL. If there was a
@@ -173,7 +171,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
task_lock(task);
err = __ptrace_may_access(task, mode);
task_unlock(task);
- return (!err ? true : false);
+ return !err;
}
int ptrace_attach(struct task_struct *task)
@@ -190,7 +188,7 @@ int ptrace_attach(struct task_struct *task)
/* Protect exec's credential calculations against our interference;
* SUID, SGID and LSM creds get determined differently under ptrace.
*/
- retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+ retval = mutex_lock_interruptible(&task->cred_exec_mutex);
if (retval < 0)
goto out;
@@ -234,7 +232,7 @@ repeat:
bad:
write_unlock_irqrestore(&tasklist_lock, flags);
task_unlock(task);
- mutex_unlock(&current->cred_exec_mutex);
+ mutex_unlock(&task->cred_exec_mutex);
out:
return retval;
}
@@ -358,7 +356,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
copied += retval;
src += retval;
dst += retval;
- len -= retval;
+ len -= retval;
}
return copied;
}
@@ -383,7 +381,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
copied += retval;
src += retval;
dst += retval;
- len -= retval;
+ len -= retval;
}
return copied;
}
@@ -496,9 +494,9 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
if (unlikely(!arch_has_single_step()))
return -EIO;
user_enable_single_step(child);
- }
- else
+ } else {
user_disable_single_step(child);
+ }
child->exit_code = data;
wake_up_process(child);
@@ -606,10 +604,11 @@ repeat:
ret = security_ptrace_traceme(current->parent);
/*
- * Set the ptrace bit in the process ptrace flags.
- * Then link us on our parent's ptraced list.
+ * Check PF_EXITING to ensure ->real_parent has not passed
+ * exit_ptrace(). Otherwise we don't report the error but
+ * pretend ->real_parent untraces us right after return.
*/
- if (!ret) {
+ if (!ret && !(current->real_parent->flags & PF_EXITING)) {
current->ptrace |= PT_PTRACED;
__ptrace_link(current, current->real_parent);
}
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2c7b8457d0d2..a967c9feb90a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -58,6 +58,10 @@ static DEFINE_MUTEX(rcu_barrier_mutex);
static struct completion rcu_barrier_completion;
int rcu_scheduler_active __read_mostly;
+static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
+static struct rcu_head rcu_migrate_head[3];
+static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
+
/*
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
@@ -122,7 +126,10 @@ static void rcu_barrier_func(void *type)
}
}
-static inline void wait_migrated_callbacks(void);
+static inline void wait_migrated_callbacks(void)
+{
+ wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
+}
/*
* Orchestrate the specified type of RCU barrier, waiting for all
@@ -179,21 +186,12 @@ void rcu_barrier_sched(void)
}
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
-static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
-static struct rcu_head rcu_migrate_head[3];
-static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
-
static void rcu_migrate_callback(struct rcu_head *notused)
{
if (atomic_dec_and_test(&rcu_migrate_type_count))
wake_up(&rcu_migrate_wq);
}
-static inline void wait_migrated_callbacks(void)
-{
- wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
-}
-
static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
unsigned long action, void *hcpu)
{
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7f3266922572..d2a372fb0b9b 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -530,8 +530,6 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
rdp->qs_pending = 1;
rdp->passed_quiesc = 0;
rdp->gpnum = rsp->gpnum;
- rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
- RCU_JIFFIES_TILL_FORCE_QS;
}
/*
@@ -578,8 +576,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rsp->gpnum++;
rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
- rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
- RCU_JIFFIES_TILL_FORCE_QS;
record_gp_stall_check_time(rsp);
dyntick_record_completed(rsp, rsp->completed - 1);
note_new_gpnum(rsp, rdp);
@@ -1055,7 +1051,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
{
unsigned long flags;
long lastcomp;
- struct rcu_data *rdp = rsp->rda[smp_processor_id()];
struct rcu_node *rnp = rcu_get_root(rsp);
u8 signaled;
@@ -1066,16 +1061,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
return; /* Someone else is already on the job. */
}
if (relaxed &&
- (long)(rsp->jiffies_force_qs - jiffies) >= 0 &&
- (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0)
+ (long)(rsp->jiffies_force_qs - jiffies) >= 0)
goto unlock_ret; /* no emergency and done recently. */
rsp->n_force_qs++;
spin_lock(&rnp->lock);
lastcomp = rsp->completed;
signaled = rsp->signaled;
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
- rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
- RCU_JIFFIES_TILL_FORCE_QS;
if (lastcomp == rsp->gpnum) {
rsp->n_force_qs_ngp++;
spin_unlock(&rnp->lock);
@@ -1144,8 +1136,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
* If an RCU GP has gone long enough, go check for dyntick
* idle CPUs and, if needed, send resched IPIs.
*/
- if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
- (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
+ if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
force_quiescent_state(rsp, 1);
/*
@@ -1230,8 +1221,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
if (unlikely(++rdp->qlen > qhimark)) {
rdp->blimit = LONG_MAX;
force_quiescent_state(rsp, 0);
- } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
- (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
+ } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
force_quiescent_state(rsp, 1);
local_irq_restore(flags);
}
@@ -1290,8 +1280,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
/* Has an RCU GP gone long enough to send resched IPIs &c? */
if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
- ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
- (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0))
+ ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
return 1;
/* nothing to do */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4ee954f6a8d5..4b1875ba9404 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -49,14 +49,12 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
- seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x",
+ seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
rdp->completed, rdp->gpnum,
rdp->passed_quiesc, rdp->passed_quiesc_completed,
- rdp->qs_pending,
- rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
- (int)(rdp->n_rcu_pending & 0xffff));
+ rdp->qs_pending);
#ifdef CONFIG_NO_HZ
seq_printf(m, " dt=%d/%d dn=%d df=%lu",
rdp->dynticks->dynticks,
@@ -102,14 +100,12 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
- seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld",
+ seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
rdp->completed, rdp->gpnum,
rdp->passed_quiesc, rdp->passed_quiesc_completed,
- rdp->qs_pending,
- rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
- rdp->n_rcu_pending);
+ rdp->qs_pending);
#ifdef CONFIG_NO_HZ
seq_printf(m, ",%d,%d,%d,%lu",
rdp->dynticks->dynticks,
@@ -123,7 +119,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
static int show_rcudata_csv(struct seq_file *m, void *unused)
{
- seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\",");
+ seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
#ifdef CONFIG_NO_HZ
seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
#endif /* #ifdef CONFIG_NO_HZ */
diff --git a/kernel/resource.c b/kernel/resource.c
index fd5d7d574bb9..ac5f3a36923f 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -533,43 +533,21 @@ static void __init __reserve_region_with_split(struct resource *root,
res->end = end;
res->flags = IORESOURCE_BUSY;
- for (;;) {
- conflict = __request_resource(parent, res);
- if (!conflict)
- break;
- if (conflict != parent) {
- parent = conflict;
- if (!(conflict->flags & IORESOURCE_BUSY))
- continue;
- }
-
- /* Uhhuh, that didn't work out.. */
- kfree(res);
- res = NULL;
- break;
- }
-
- if (!res) {
- /* failed, split and try again */
-
- /* conflict covered whole area */
- if (conflict->start <= start && conflict->end >= end)
- return;
+ conflict = __request_resource(parent, res);
+ if (!conflict)
+ return;
- if (conflict->start > start)
- __reserve_region_with_split(root, start, conflict->start-1, name);
- if (!(conflict->flags & IORESOURCE_BUSY)) {
- resource_size_t common_start, common_end;
+ /* failed, split and try again */
+ kfree(res);
- common_start = max(conflict->start, start);
- common_end = min(conflict->end, end);
- if (common_start < common_end)
- __reserve_region_with_split(root, common_start, common_end, name);
- }
- if (conflict->end < end)
- __reserve_region_with_split(root, conflict->end+1, end, name);
- }
+ /* conflict covered whole area */
+ if (conflict->start <= start && conflict->end >= end)
+ return;
+ if (conflict->start > start)
+ __reserve_region_with_split(root, start, conflict->start-1, name);
+ if (conflict->end < end)
+ __reserve_region_with_split(root, conflict->end+1, end, name);
}
void __init reserve_region_with_split(struct resource *root,
diff --git a/kernel/sched.c b/kernel/sched.c
index a69278eef425..2f600e30dcf0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1419,10 +1419,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct rq_iterator *iterator);
#endif
+/* Time spent by the tasks of the cpu accounting group executing in ... */
+enum cpuacct_stat_index {
+ CPUACCT_STAT_USER, /* ... user mode */
+ CPUACCT_STAT_SYSTEM, /* ... kernel mode */
+
+ CPUACCT_STAT_NSTATS,
+};
+
#ifdef CONFIG_CGROUP_CPUACCT
static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+static void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val);
#else
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+static inline void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val) {}
#endif
static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -4547,9 +4559,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
EXPORT_PER_CPU_SYMBOL(kstat);
/*
- * Return any ns on the sched_clock that have not yet been banked in
+ * Return any ns on the sched_clock that have not yet been accounted in
* @p in case that task is currently running.
+ *
+ * Called with task_rq_lock() held on @rq.
*/
+static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
+{
+ u64 ns = 0;
+
+ if (task_current(rq, p)) {
+ update_rq_clock(rq);
+ ns = rq->clock - p->se.exec_start;
+ if ((s64)ns < 0)
+ ns = 0;
+ }
+
+ return ns;
+}
+
unsigned long long task_delta_exec(struct task_struct *p)
{
unsigned long flags;
@@ -4557,16 +4585,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
u64 ns = 0;
rq = task_rq_lock(p, &flags);
+ ns = do_task_delta_exec(p, rq);
+ task_rq_unlock(rq, &flags);
- if (task_current(rq, p)) {
- u64 delta_exec;
+ return ns;
+}
- update_rq_clock(rq);
- delta_exec = rq->clock - p->se.exec_start;
- if ((s64)delta_exec > 0)
- ns = delta_exec;
- }
+/*
+ * Return accounted runtime for the task.
+ * In case the task is currently running, return the runtime plus current's
+ * pending runtime that have not been accounted yet.
+ */
+unsigned long long task_sched_runtime(struct task_struct *p)
+{
+ unsigned long flags;
+ struct rq *rq;
+ u64 ns = 0;
+
+ rq = task_rq_lock(p, &flags);
+ ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
+ task_rq_unlock(rq, &flags);
+
+ return ns;
+}
+
+/*
+ * Return sum_exec_runtime for the thread group.
+ * In case the task is currently running, return the sum plus current's
+ * pending runtime that have not been accounted yet.
+ *
+ * Note that the thread group might have other running tasks as well,
+ * so the return value not includes other pending runtime that other
+ * running tasks might have.
+ */
+unsigned long long thread_group_sched_runtime(struct task_struct *p)
+{
+ struct task_cputime totals;
+ unsigned long flags;
+ struct rq *rq;
+ u64 ns;
+ rq = task_rq_lock(p, &flags);
+ thread_group_cputime(p, &totals);
+ ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);
return ns;
@@ -4595,6 +4656,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
cpustat->nice = cputime64_add(cpustat->nice, tmp);
else
cpustat->user = cputime64_add(cpustat->user, tmp);
+
+ cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
/* Account for user time used */
acct_update_integrals(p);
}
@@ -4656,6 +4719,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
else
cpustat->system = cputime64_add(cpustat->system, tmp);
+ cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+
/* Account for system time used */
acct_update_integrals(p);
}
@@ -4818,7 +4883,7 @@ void scheduler_tick(void)
#endif
}
-unsigned long get_parent_ip(unsigned long addr)
+notrace unsigned long get_parent_ip(unsigned long addr)
{
if (in_lock_functions(addr)) {
addr = CALLER_ADDR2;
@@ -7340,7 +7405,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
cpumask_or(groupmask, groupmask, sched_group_cpus(group));
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
+
printk(KERN_CONT " %s", str);
+ if (group->__cpu_power != SCHED_LOAD_SCALE) {
+ printk(KERN_CONT " (__cpu_power = %d)",
+ group->__cpu_power);
+ }
group = group->next;
} while (group != sd->groups);
@@ -9963,6 +10033,7 @@ struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
struct cpuacct *parent;
};
@@ -9987,20 +10058,32 @@ static struct cgroup_subsys_state *cpuacct_create(
struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ int i;
if (!ca)
- return ERR_PTR(-ENOMEM);
+ goto out;
ca->cpuusage = alloc_percpu(u64);
- if (!ca->cpuusage) {
- kfree(ca);
- return ERR_PTR(-ENOMEM);
- }
+ if (!ca->cpuusage)
+ goto out_free_ca;
+
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+ if (percpu_counter_init(&ca->cpustat[i], 0))
+ goto out_free_counters;
if (cgrp->parent)
ca->parent = cgroup_ca(cgrp->parent);
return &ca->css;
+
+out_free_counters:
+ while (--i >= 0)
+ percpu_counter_destroy(&ca->cpustat[i]);
+ free_percpu(ca->cpuusage);
+out_free_ca:
+ kfree(ca);
+out:
+ return ERR_PTR(-ENOMEM);
}
/* destroy an existing cpu accounting group */
@@ -10008,7 +10091,10 @@ static void
cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct cpuacct *ca = cgroup_ca(cgrp);
+ int i;
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+ percpu_counter_destroy(&ca->cpustat[i]);
free_percpu(ca->cpuusage);
kfree(ca);
}
@@ -10095,6 +10181,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
return 0;
}
+static const char *cpuacct_stat_desc[] = {
+ [CPUACCT_STAT_USER] = "user",
+ [CPUACCT_STAT_SYSTEM] = "system",
+};
+
+static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ struct cpuacct *ca = cgroup_ca(cgrp);
+ int i;
+
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
+ s64 val = percpu_counter_read(&ca->cpustat[i]);
+ val = cputime64_to_clock_t(val);
+ cb->fill(cb, cpuacct_stat_desc[i], val);
+ }
+ return 0;
+}
+
static struct cftype files[] = {
{
.name = "usage",
@@ -10105,7 +10210,10 @@ static struct cftype files[] = {
.name = "usage_percpu",
.read_seq_string = cpuacct_percpu_seq_read,
},
-
+ {
+ .name = "stat",
+ .read_map = cpuacct_stats_show,
+ },
};
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -10127,12 +10235,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
return;
cpu = task_cpu(tsk);
+
+ rcu_read_lock();
+
ca = task_ca(tsk);
for (; ca; ca = ca->parent) {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
+
+ rcu_read_unlock();
+}
+
+/*
+ * Charge the system/user time to the task's accounting group.
+ */
+static void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val)
+{
+ struct cpuacct *ca;
+
+ if (unlikely(!cpuacct_subsys.active))
+ return;
+
+ rcu_read_lock();
+ ca = task_ca(tsk);
+
+ do {
+ percpu_counter_add(&ca->cpustat[idx], val);
+ ca = ca->parent;
+ } while (ca);
+ rcu_read_unlock();
}
struct cgroup_subsys cpuacct_subsys = {
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 1e00bfacf9b8..cdd3c89574cd 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -55,7 +55,7 @@ static int convert_prio(int prio)
* cpupri_find - find the best (lowest-pri) CPU in the system
* @cp: The cpupri context
* @p: The task
- * @lowest_mask: A mask to fill in with selected CPUs
+ * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
*
* Note: This function returns the recommended CPUs as calculated during the
* current invokation. By the time the call returns, the CPUs may have in
@@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
continue;
- cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+ if (lowest_mask)
+ cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
return 1;
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 299d012b4394..f2c66f8f9712 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
{
- cpumask_var_t mask;
-
if (rq->curr->rt.nr_cpus_allowed == 1)
return;
- if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
- return;
-
if (p->rt.nr_cpus_allowed != 1
- && cpupri_find(&rq->rd->cpupri, p, mask))
- goto free;
+ && cpupri_find(&rq->rd->cpupri, p, NULL))
+ return;
- if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
- goto free;
+ if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
+ return;
/*
* There appears to be other cpus that can accept
@@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
*/
requeue_task_rt(rq, p, 1);
resched_task(rq->curr);
-free:
- free_cpumask_var(mask);
}
#endif /* CONFIG_SMP */
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index cf2bc01186ef..b28d19135f43 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -609,14 +609,14 @@ void slow_work_unregister_user(void)
if (slow_work_user_count == 0) {
printk(KERN_NOTICE "Slow work thread pool: Shutting down\n");
slow_work_threads_should_exit = true;
+ del_timer_sync(&slow_work_cull_timer);
+ del_timer_sync(&slow_work_oom_timer);
wake_up_all(&slow_work_thread_wq);
wait_for_completion(&slow_work_last_thread_exited);
printk(KERN_NOTICE "Slow work thread pool:"
" Shut down complete\n");
}
- del_timer_sync(&slow_work_cull_timer);
-
mutex_unlock(&slow_work_user_lock);
}
EXPORT_SYMBOL(slow_work_unregister_user);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2fecefacdc5b..b525dd348511 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -472,9 +472,9 @@ void tasklet_kill(struct tasklet_struct *t)
printk("Attempt to kill tasklet from interrupt\n");
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
- do
+ do {
yield();
- while (test_bit(TASKLET_STATE_SCHED, &t->state));
+ } while (test_bit(TASKLET_STATE_SCHED, &t->state));
}
tasklet_unlock_wait(t);
clear_bit(TASKLET_STATE_SCHED, &t->state);
diff --git a/kernel/sys.c b/kernel/sys.c
index 14c4c5613118..438d99a38c87 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -361,6 +361,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
void __user *, arg)
{
char buffer[256];
+ int ret = 0;
/* We only trust the superuser with rebooting the system. */
if (!capable(CAP_SYS_BOOT))
@@ -398,7 +399,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
kernel_halt();
unlock_kernel();
do_exit(0);
- break;
+ panic("cannot halt");
case LINUX_REBOOT_CMD_POWER_OFF:
kernel_power_off();
@@ -418,29 +419,22 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
#ifdef CONFIG_KEXEC
case LINUX_REBOOT_CMD_KEXEC:
- {
- int ret;
- ret = kernel_kexec();
- unlock_kernel();
- return ret;
- }
+ ret = kernel_kexec();
+ break;
#endif
#ifdef CONFIG_HIBERNATION
case LINUX_REBOOT_CMD_SW_SUSPEND:
- {
- int ret = hibernate();
- unlock_kernel();
- return ret;
- }
+ ret = hibernate();
+ break;
#endif
default:
- unlock_kernel();
- return -EINVAL;
+ ret = -EINVAL;
+ break;
}
unlock_kernel();
- return 0;
+ return ret;
}
static void deferred_cad(struct work_struct *dummy)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8ba457838d95..8203d70928d5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -903,16 +903,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
-#ifdef CONFIG_UNEVICTABLE_LRU
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "scan_unevictable_pages",
- .data = &scan_unevictable_pages,
- .maxlen = sizeof(scan_unevictable_pages),
- .mode = 0644,
- .proc_handler = &scan_unevictable_handler,
- },
-#endif
#ifdef CONFIG_SLOW_WORK
{
.ctl_name = CTL_UNNUMBERED,
@@ -1313,6 +1303,16 @@ static struct ctl_table vm_table[] = {
.extra2 = &one,
},
#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "scan_unevictable_pages",
+ .data = &scan_unevictable_pages,
+ .maxlen = sizeof(scan_unevictable_pages),
+ .mode = 0644,
+ .proc_handler = &scan_unevictable_handler,
+ },
+#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c46c931a7fe7..ecfd7b5187e0 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -181,12 +181,12 @@ static void clocksource_watchdog(unsigned long data)
resumed = test_and_clear_bit(0, &watchdog_resumed);
- wdnow = watchdog->read();
+ wdnow = watchdog->read(watchdog);
wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
watchdog_last = wdnow;
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
- csnow = cs->read();
+ csnow = cs->read(cs);
if (unlikely(resumed)) {
cs->wd_last = csnow;
@@ -247,7 +247,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
list_add(&cs->wd_list, &watchdog_list);
if (!started && watchdog) {
- watchdog_last = watchdog->read();
+ watchdog_last = watchdog->read(watchdog);
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer,
cpumask_first(cpu_online_mask));
@@ -268,7 +268,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
cse->flags &= ~CLOCK_SOURCE_WATCHDOG;
/* Start if list is not empty */
if (!list_empty(&watchdog_list)) {
- watchdog_last = watchdog->read();
+ watchdog_last = watchdog->read(watchdog);
watchdog_timer.expires =
jiffies + WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer,
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 06f197560f3b..c3f6c30816e3 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -50,7 +50,7 @@
*/
#define JIFFIES_SHIFT 8
-static cycle_t jiffies_read(void)
+static cycle_t jiffies_read(struct clocksource *cs)
{
return (cycle_t) jiffies;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 900f1b6598d1..687dff49f6e7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -182,7 +182,7 @@ EXPORT_SYMBOL(do_settimeofday);
*/
static void change_clocksource(void)
{
- struct clocksource *new;
+ struct clocksource *new, *old;
new = clocksource_get_next();
@@ -191,11 +191,16 @@ static void change_clocksource(void)
clocksource_forward_now();
- new->raw_time = clock->raw_time;
+ if (clocksource_enable(new))
+ return;
+ new->raw_time = clock->raw_time;
+ old = clock;
clock = new;
+ clocksource_disable(old);
+
clock->cycle_last = 0;
- clock->cycle_last = clocksource_read(new);
+ clock->cycle_last = clocksource_read(clock);
clock->error = 0;
clock->xtime_nsec = 0;
clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -292,6 +297,7 @@ void __init timekeeping_init(void)
ntp_init();
clock = clocksource_get_next();
+ clocksource_enable(clock);
clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
clock->cycle_last = clocksource_read(clock);
diff --git a/kernel/timer.c b/kernel/timer.c
index 672ca25fbc43..fed53be44fd9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -532,10 +532,13 @@ static void __init_timer(struct timer_list *timer,
}
/**
- * init_timer - initialize a timer.
+ * init_timer_key - initialize a timer
* @timer: the timer to be initialized
+ * @name: name of the timer
+ * @key: lockdep class key of the fake lock used for tracking timer
+ * sync lock dependencies
*
- * init_timer() must be done to a timer prior calling *any* of the
+ * init_timer_key() must be done to a timer prior calling *any* of the
* other timer functions.
*/
void init_timer_key(struct timer_list *timer,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2246141bda4d..417d1985e299 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -312,7 +312,7 @@ config KMEMTRACE
and profile kernel code.
This requires an userspace application to use. See
- Documentation/vm/kmemtrace.txt for more information.
+ Documentation/trace/kmemtrace.txt for more information.
Saying Y will make the kernel somewhat larger and slower. However,
if you disable kmemtrace at run-time or boot-time, the performance
@@ -403,7 +403,7 @@ config MMIOTRACE
implementation and works via page faults. Tracing is disabled by
default and can be enabled at run-time.
- See Documentation/tracers/mmiotrace.txt.
+ See Documentation/trace/mmiotrace.txt.
If you are not helping to develop drivers, say N.
config MMIOTRACE_TEST
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b32ff446c3fb..921ef5d1f0ba 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1377,12 +1377,12 @@ static int blk_trace_str2mask(const char *str)
{
int i;
int mask = 0;
- char *s, *token;
+ char *buf, *s, *token;
- s = kstrdup(str, GFP_KERNEL);
- if (s == NULL)
+ buf = kstrdup(str, GFP_KERNEL);
+ if (buf == NULL)
return -ENOMEM;
- s = strstrip(s);
+ s = strstrip(buf);
while (1) {
token = strsep(&s, ",");
@@ -1403,7 +1403,7 @@ static int blk_trace_str2mask(const char *str)
break;
}
}
- kfree(s);
+ kfree(buf);
return mask;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9d28476a9851..1ce5dc6372b8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3277,19 +3277,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
info->tr = &global_trace;
info->cpu = cpu;
- info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
+ info->spare = NULL;
/* Force reading ring buffer for first read */
info->read = (unsigned int)-1;
- if (!info->spare)
- goto out;
filp->private_data = info;
- return 0;
-
- out:
- kfree(info);
- return -ENOMEM;
+ return nonseekable_open(inode, filp);
}
static ssize_t
@@ -3304,6 +3298,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if (!count)
return 0;
+ if (!info->spare)
+ info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
+ if (!info->spare)
+ return -ENOMEM;
+
/* Do we have previous read data to read? */
if (info->read < PAGE_SIZE)
goto read;
@@ -3342,7 +3341,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
{
struct ftrace_buffer_info *info = file->private_data;
- ring_buffer_free_read_page(info->tr->buffer, info->spare);
+ if (info->spare)
+ ring_buffer_free_read_page(info->tr->buffer, info->spare);
kfree(info);
return 0;
@@ -3428,14 +3428,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
int size, i;
size_t ret;
- /*
- * We can't seek on a buffer input
- */
- if (unlikely(*ppos))
- return -ESPIPE;
+ if (*ppos & (PAGE_SIZE - 1)) {
+ WARN_ONCE(1, "Ftrace: previous read must page-align\n");
+ return -EINVAL;
+ }
+ if (len & (PAGE_SIZE - 1)) {
+ WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
+ if (len < PAGE_SIZE)
+ return -EINVAL;
+ len &= PAGE_MASK;
+ }
- for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
+ for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
struct page *page;
int r;
@@ -3474,6 +3479,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
spd.partial[i].offset = 0;
spd.partial[i].private = (unsigned long)ref;
spd.nr_pages++;
+ *ppos += PAGE_SIZE;
}
spd.nr_pages = i;
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index ad8c22efff41..8333715e4066 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -155,6 +155,13 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter,
return TRACE_TYPE_HANDLED;
}
+static void branch_print_header(struct seq_file *s)
+{
+ seq_puts(s, "# TASK-PID CPU# TIMESTAMP CORRECT"
+ " FUNC:FILE:LINE\n");
+ seq_puts(s, "# | | | | | "
+ " |\n");
+}
static struct trace_event trace_branch_event = {
.type = TRACE_BRANCH,
@@ -169,6 +176,7 @@ static struct tracer branch_trace __read_mostly =
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_branch,
#endif /* CONFIG_FTRACE_SELFTEST */
+ .print_header = branch_print_header,
};
__init static int init_branch_tracer(void)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 64ec4d278ffb..576f4fa2af0d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -503,6 +503,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
+ buf[cnt] = '\0';
pred = kzalloc(sizeof(*pred), GFP_KERNEL);
if (!pred)
@@ -520,9 +521,10 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
return cnt;
}
- if (filter_add_pred(call, pred)) {
+ err = filter_add_pred(call, pred);
+ if (err < 0) {
filter_free_pred(pred);
- return -EINVAL;
+ return err;
}
*ppos += cnt;
@@ -569,6 +571,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
+ buf[cnt] = '\0';
pred = kzalloc(sizeof(*pred), GFP_KERNEL);
if (!pred)
@@ -586,10 +589,11 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
return cnt;
}
- if (filter_add_subsystem_pred(system, pred)) {
+ err = filter_add_subsystem_pred(system, pred);
+ if (err < 0) {
filter_free_subsystem_preds(system);
filter_free_pred(pred);
- return -EINVAL;
+ return err;
}
*ppos += cnt;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 026be412f356..e03cbf1e38f3 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -215,7 +215,7 @@ static int __filter_add_pred(struct ftrace_event_call *call,
}
}
- return -ENOMEM;
+ return -ENOSPC;
}
static int is_string_field(const char *type)
@@ -319,7 +319,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
}
if (i == MAX_FILTER_PRED)
- return -EINVAL;
+ return -ENOSPC;
events_for_each(call) {
int err;
@@ -410,16 +410,22 @@ int filter_parse(char **pbuf, struct filter_pred *pred)
}
}
+ if (!val_str) {
+ pred->field_name = NULL;
+ return -EINVAL;
+ }
+
pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
if (!pred->field_name)
return -ENOMEM;
- pred->val = simple_strtoull(val_str, &tmp, 10);
+ pred->val = simple_strtoull(val_str, &tmp, 0);
if (tmp == val_str) {
pred->str_val = kstrdup(val_str, GFP_KERNEL);
if (!pred->str_val)
return -ENOMEM;
- }
+ } else if (*tmp != '\0')
+ return -EINVAL;
return 0;
}
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 30743f7d4110..d363c6672c6c 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -105,10 +105,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
return 0;
#undef __entry
-#define __entry "REC"
+#define __entry REC
#undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
#undef TP_fast_assign
#define TP_fast_assign(args...) args
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index bae791ebcc51..118439709fb7 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -186,6 +186,12 @@ static enum print_line_t power_print_line(struct trace_iterator *iter)
return TRACE_TYPE_UNHANDLED;
}
+static void power_print_header(struct seq_file *s)
+{
+ seq_puts(s, "# TIMESTAMP STATE EVENT\n");
+ seq_puts(s, "# | | |\n");
+}
+
static struct tracer power_tracer __read_mostly =
{
.name = "power",
@@ -194,6 +200,7 @@ static struct tracer power_tracer __read_mostly =
.stop = stop_power_trace,
.reset = power_trace_reset,
.print_line = power_print_line,
+ .print_header = power_print_header,
};
static int init_power_trace(void)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a2a3af29c943..5e579645ac86 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,5 @@
+#include <trace/syscall.h>
#include <linux/kernel.h>
-#include <linux/ftrace.h>
#include <asm/syscall.h>
#include "trace_output.h"
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b6b966ce1451..f71fb2a08950 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -966,20 +966,20 @@ undo:
}
#ifdef CONFIG_SMP
-static struct workqueue_struct *work_on_cpu_wq __read_mostly;
struct work_for_cpu {
- struct work_struct work;
+ struct completion completion;
long (*fn)(void *);
void *arg;
long ret;
};
-static void do_work_for_cpu(struct work_struct *w)
+static int do_work_for_cpu(void *_wfc)
{
- struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
-
+ struct work_for_cpu *wfc = _wfc;
wfc->ret = wfc->fn(wfc->arg);
+ complete(&wfc->completion);
+ return 0;
}
/**
@@ -990,17 +990,23 @@ static void do_work_for_cpu(struct work_struct *w)
*
* This will return the value @fn returns.
* It is up to the caller to ensure that the cpu doesn't go offline.
+ * The caller must not hold any locks which would prevent @fn from completing.
*/
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{
- struct work_for_cpu wfc;
-
- INIT_WORK(&wfc.work, do_work_for_cpu);
- wfc.fn = fn;
- wfc.arg = arg;
- queue_work_on(cpu, work_on_cpu_wq, &wfc.work);
- flush_work(&wfc.work);
-
+ struct task_struct *sub_thread;
+ struct work_for_cpu wfc = {
+ .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
+ .fn = fn,
+ .arg = arg,
+ };
+
+ sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
+ if (IS_ERR(sub_thread))
+ return PTR_ERR(sub_thread);
+ kthread_bind(sub_thread, cpu);
+ wake_up_process(sub_thread);
+ wait_for_completion(&wfc.completion);
return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu);
@@ -1016,8 +1022,4 @@ void __init init_workqueues(void)
hotcpu_notifier(workqueue_cpu_callback, 0);
keventd_wq = create_workqueue("events");
BUG_ON(!keventd_wq);
-#ifdef CONFIG_SMP
- work_on_cpu_wq = create_workqueue("work_on_cpu");
- BUG_ON(!work_on_cpu_wq);
-#endif
}