diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-03-21 10:47:40 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-03-21 10:47:40 +0100 |
commit | 42e405f7b1d252c90a2468dd2140f47b8142b7a0 (patch) | |
tree | baeda52d8726fe694d3344a0d0fda1dadc30a901 /kernel/sched/core.c | |
parent | e9532e69b8d1d1284e8ecf8d2586de34aec61244 (diff) | |
parent | 710d60cbf1b312a8075a2158cbfbbd9c66132dcc (diff) | |
download | lwn-42e405f7b1d252c90a2468dd2140f47b8142b7a0.tar.gz lwn-42e405f7b1d252c90a2468dd2140f47b8142b7a0.zip |
Merge branch 'linus' into sched/urgent, to pick up dependencies
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 507 |
1 files changed, 122 insertions, 385 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 406182af99ac..4d872e19244b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -26,6 +26,7 @@ * Thomas Gleixner, Mike Kravetz */ +#include <linux/kasan.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/nmi.h> @@ -66,12 +67,10 @@ #include <linux/pagemap.h> #include <linux/hrtimer.h> #include <linux/tick.h> -#include <linux/debugfs.h> #include <linux/ctype.h> #include <linux/ftrace.h> #include <linux/slab.h> #include <linux/init_task.h> -#include <linux/binfmts.h> #include <linux/context_tracking.h> #include <linux/compiler.h> @@ -124,138 +123,6 @@ const_debug unsigned int sysctl_sched_features = #undef SCHED_FEAT -#ifdef CONFIG_SCHED_DEBUG -#define SCHED_FEAT(name, enabled) \ - #name , - -static const char * const sched_feat_names[] = { -#include "features.h" -}; - -#undef SCHED_FEAT - -static int sched_feat_show(struct seq_file *m, void *v) -{ - int i; - - for (i = 0; i < __SCHED_FEAT_NR; i++) { - if (!(sysctl_sched_features & (1UL << i))) - seq_puts(m, "NO_"); - seq_printf(m, "%s ", sched_feat_names[i]); - } - seq_puts(m, "\n"); - - return 0; -} - -#ifdef HAVE_JUMP_LABEL - -#define jump_label_key__true STATIC_KEY_INIT_TRUE -#define jump_label_key__false STATIC_KEY_INIT_FALSE - -#define SCHED_FEAT(name, enabled) \ - jump_label_key__##enabled , - -struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { -#include "features.h" -}; - -#undef SCHED_FEAT - -static void sched_feat_disable(int i) -{ - static_key_disable(&sched_feat_keys[i]); -} - -static void sched_feat_enable(int i) -{ - static_key_enable(&sched_feat_keys[i]); -} -#else -static void sched_feat_disable(int i) { }; -static void sched_feat_enable(int i) { }; -#endif /* HAVE_JUMP_LABEL */ - -static int sched_feat_set(char *cmp) -{ - int i; - int neg = 0; - - if (strncmp(cmp, "NO_", 3) == 0) { - neg = 1; - cmp += 3; - } - - for (i = 0; i < __SCHED_FEAT_NR; i++) { - if (strcmp(cmp, sched_feat_names[i]) == 0) { - if (neg) { - sysctl_sched_features &= ~(1UL << i); - sched_feat_disable(i); - } else { - sysctl_sched_features |= (1UL << i); - sched_feat_enable(i); - } - break; - } - } - - return i; -} - -static ssize_t -sched_feat_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - char *cmp; - int i; - struct inode *inode; - - if (cnt > 63) - cnt = 63; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - cmp = strstrip(buf); - - /* Ensure the static_key remains in a consistent state */ - inode = file_inode(filp); - inode_lock(inode); - i = sched_feat_set(cmp); - inode_unlock(inode); - if (i == __SCHED_FEAT_NR) - return -EINVAL; - - *ppos += cnt; - - return cnt; -} - -static int sched_feat_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, sched_feat_show, NULL); -} - -static const struct file_operations sched_feat_fops = { - .open = sched_feat_open, - .write = sched_feat_write, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static __init int sched_init_debug(void) -{ - debugfs_create_file("sched_features", 0644, NULL, NULL, - &sched_feat_fops); - - return 0; -} -late_initcall(sched_init_debug); -#endif /* CONFIG_SCHED_DEBUG */ - /* * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. @@ -453,20 +320,6 @@ static inline void init_hrtick(void) } #endif /* CONFIG_SCHED_HRTICK */ -/* - * cmpxchg based fetch_or, macro so it works for different integer types - */ -#define fetch_or(ptr, val) \ -({ typeof(*(ptr)) __old, __val = *(ptr); \ - for (;;) { \ - __old = cmpxchg((ptr), __val, __val | (val)); \ - if (__old == __val) \ - break; \ - __val = __old; \ - } \ - __old; \ -}) - #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) /* * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, @@ -715,31 +568,36 @@ static inline bool got_nohz_idle_kick(void) #endif /* CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL -bool sched_can_stop_tick(void) +bool sched_can_stop_tick(struct rq *rq) { + int fifo_nr_running; + + /* Deadline tasks, even if single, need the tick */ + if (rq->dl.dl_nr_running) + return false; + /* - * FIFO realtime policy runs the highest priority task. Other runnable - * tasks are of a lower priority. The scheduler tick does nothing. + * FIFO realtime policy runs the highest priority task (after DEADLINE). + * Other runnable tasks are of a lower priority. The scheduler tick + * isn't needed. */ - if (current->policy == SCHED_FIFO) + fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running; + if (fifo_nr_running) return true; /* * Round-robin realtime tasks time slice with other tasks at the same - * realtime priority. Is this task the only one at this priority? + * realtime priority. */ - if (current->policy == SCHED_RR) { - struct sched_rt_entity *rt_se = ¤t->rt; - - return list_is_singular(&rt_se->run_list); + if (rq->rt.rr_nr_running) { + if (rq->rt.rr_nr_running == 1) + return true; + else + return false; } - /* - * More than one running task need preemption. - * nr_running update is assumed to be visible - * after IPI is sent from wakers. - */ - if (this_rq()->nr_running > 1) + /* Normal multitasking need periodic preemption checks */ + if (rq->cfs.nr_running > 1) return false; return true; @@ -2093,7 +1951,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ttwu_queue(p, cpu); stat: - ttwu_stat(p, cpu, wake_flags); + if (schedstat_enabled()) + ttwu_stat(p, cpu, wake_flags); out: raw_spin_unlock_irqrestore(&p->pi_lock, flags); @@ -2141,7 +2000,8 @@ static void try_to_wake_up_local(struct task_struct *p) ttwu_activate(rq, p, ENQUEUE_WAKEUP); ttwu_do_wakeup(rq, p, 0); - ttwu_stat(p, smp_processor_id(), 0); + if (schedstat_enabled()) + ttwu_stat(p, smp_processor_id(), 0); out: raw_spin_unlock(&p->pi_lock); } @@ -2183,7 +2043,6 @@ void __dl_clear_params(struct task_struct *p) dl_se->dl_bw = 0; dl_se->dl_throttled = 0; - dl_se->dl_new = 1; dl_se->dl_yielded = 0; } @@ -2210,6 +2069,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #endif #ifdef CONFIG_SCHEDSTATS + /* Even if schedstat is disabled, there should not be garbage */ memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif @@ -2218,6 +2078,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) __dl_clear_params(p); INIT_LIST_HEAD(&p->rt.run_list); + p->rt.timeout = 0; + p->rt.time_slice = sched_rr_timeslice; + p->rt.on_rq = 0; + p->rt.on_list = 0; #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); @@ -2281,6 +2145,69 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, #endif #endif +DEFINE_STATIC_KEY_FALSE(sched_schedstats); + +#ifdef CONFIG_SCHEDSTATS +static void set_schedstats(bool enabled) +{ + if (enabled) + static_branch_enable(&sched_schedstats); + else + static_branch_disable(&sched_schedstats); +} + +void force_schedstat_enabled(void) +{ + if (!schedstat_enabled()) { + pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n"); + static_branch_enable(&sched_schedstats); + } +} + +static int __init setup_schedstats(char *str) +{ + int ret = 0; + if (!str) + goto out; + + if (!strcmp(str, "enable")) { + set_schedstats(true); + ret = 1; + } else if (!strcmp(str, "disable")) { + set_schedstats(false); + ret = 1; + } +out: + if (!ret) + pr_warn("Unable to parse schedstats=\n"); + + return ret; +} +__setup("schedstats=", setup_schedstats); + +#ifdef CONFIG_PROC_SYSCTL +int sysctl_schedstats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = static_branch_likely(&sched_schedstats); + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + if (write) + set_schedstats(state); + return err; +} +#endif +#endif + /* * fork()/clone()-time setup: */ @@ -3010,16 +2937,6 @@ u64 scheduler_tick_max_deferment(void) } #endif -notrace unsigned long get_parent_ip(unsigned long addr) -{ - if (in_lock_functions(addr)) { - addr = CALLER_ADDR2; - if (in_lock_functions(addr)) - addr = CALLER_ADDR3; - } - return addr; -} - #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ defined(CONFIG_PREEMPT_TRACER)) @@ -3041,7 +2958,7 @@ void preempt_count_add(int val) PREEMPT_MASK - 10); #endif if (preempt_count() == val) { - unsigned long ip = get_parent_ip(CALLER_ADDR1); + unsigned long ip = get_lock_parent_ip(); #ifdef CONFIG_DEBUG_PREEMPT current->preempt_disable_ip = ip; #endif @@ -3068,7 +2985,7 @@ void preempt_count_sub(int val) #endif if (preempt_count() == val) - trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); __preempt_count_sub(val); } EXPORT_SYMBOL(preempt_count_sub); @@ -3280,7 +3197,6 @@ static void __sched notrace __schedule(bool preempt) trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next); /* unlocks the rq */ - cpu = cpu_of(rq); } else { lockdep_unpin_lock(&rq->lock); raw_spin_unlock_irq(&rq->lock); @@ -3466,7 +3382,7 @@ EXPORT_SYMBOL(default_wake_function); */ void rt_mutex_setprio(struct task_struct *p, int prio) { - int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE; + int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; struct rq *rq; const struct sched_class *prev_class; @@ -3494,11 +3410,15 @@ void rt_mutex_setprio(struct task_struct *p, int prio) trace_sched_pi_setprio(p, prio); oldprio = p->prio; + + if (oldprio == prio) + queue_flag &= ~DEQUEUE_MOVE; + prev_class = p->sched_class; queued = task_on_rq_queued(p); running = task_current(rq, p); if (queued) - dequeue_task(rq, p, DEQUEUE_SAVE); + dequeue_task(rq, p, queue_flag); if (running) put_prev_task(rq, p); @@ -3516,7 +3436,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) if (!dl_prio(p->normal_prio) || (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { p->dl.dl_boosted = 1; - enqueue_flag |= ENQUEUE_REPLENISH; + queue_flag |= ENQUEUE_REPLENISH; } else p->dl.dl_boosted = 0; p->sched_class = &dl_sched_class; @@ -3524,7 +3444,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) if (dl_prio(oldprio)) p->dl.dl_boosted = 0; if (oldprio < prio) - enqueue_flag |= ENQUEUE_HEAD; + queue_flag |= ENQUEUE_HEAD; p->sched_class = &rt_sched_class; } else { if (dl_prio(oldprio)) @@ -3539,7 +3459,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) if (running) p->sched_class->set_curr_task(rq); if (queued) - enqueue_task(rq, p, enqueue_flag); + enqueue_task(rq, p, queue_flag); check_class_changed(rq, p, prev_class, oldprio); out_unlock: @@ -3895,6 +3815,7 @@ static int __sched_setscheduler(struct task_struct *p, const struct sched_class *prev_class; struct rq *rq; int reset_on_fork; + int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); @@ -4077,17 +3998,14 @@ change: * itself. */ new_effective_prio = rt_mutex_get_effective_prio(p, newprio); - if (new_effective_prio == oldprio) { - __setscheduler_params(p, attr); - task_rq_unlock(rq, p, &flags); - return 0; - } + if (new_effective_prio == oldprio) + queue_flags &= ~DEQUEUE_MOVE; } queued = task_on_rq_queued(p); running = task_current(rq, p); if (queued) - dequeue_task(rq, p, DEQUEUE_SAVE); + dequeue_task(rq, p, queue_flags); if (running) put_prev_task(rq, p); @@ -4097,15 +4015,14 @@ change: if (running) p->sched_class->set_curr_task(rq); if (queued) { - int enqueue_flags = ENQUEUE_RESTORE; /* * We enqueue to tail when the priority of a task is * increased (user space view). */ - if (oldprio <= p->prio) - enqueue_flags |= ENQUEUE_HEAD; + if (oldprio < p->prio) + queue_flags |= ENQUEUE_HEAD; - enqueue_task(rq, p, enqueue_flags); + enqueue_task(rq, p, queue_flags); } check_class_changed(rq, p, prev_class, oldprio); @@ -5096,6 +5013,8 @@ void init_idle(struct task_struct *idle, int cpu) idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); + kasan_unpoison_task_stack(idle); + #ifdef CONFIG_SMP /* * Its possible that init_idle() gets called multiple times on a task, @@ -5405,183 +5324,6 @@ static void migrate_tasks(struct rq *dead_rq) } #endif /* CONFIG_HOTPLUG_CPU */ -#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) - -static struct ctl_table sd_ctl_dir[] = { - { - .procname = "sched_domain", - .mode = 0555, - }, - {} -}; - -static struct ctl_table sd_ctl_root[] = { - { - .procname = "kernel", - .mode = 0555, - .child = sd_ctl_dir, - }, - {} -}; - -static struct ctl_table *sd_alloc_ctl_entry(int n) -{ - struct ctl_table *entry = - kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL); - - return entry; -} - -static void sd_free_ctl_entry(struct ctl_table **tablep) -{ - struct ctl_table *entry; - - /* - * In the intermediate directories, both the child directory and - * procname are dynamically allocated and could fail but the mode - * will always be set. In the lowest directory the names are - * static strings and all have proc handlers. - */ - for (entry = *tablep; entry->mode; entry++) { - if (entry->child) - sd_free_ctl_entry(&entry->child); - if (entry->proc_handler == NULL) - kfree(entry->procname); - } - - kfree(*tablep); - *tablep = NULL; -} - -static int min_load_idx = 0; -static int max_load_idx = CPU_LOAD_IDX_MAX-1; - -static void -set_table_entry(struct ctl_table *entry, - const char *procname, void *data, int maxlen, - umode_t mode, proc_handler *proc_handler, - bool load_idx) -{ - entry->procname = procname; - entry->data = data; - entry->maxlen = maxlen; - entry->mode = mode; - entry->proc_handler = proc_handler; - - if (load_idx) { - entry->extra1 = &min_load_idx; - entry->extra2 = &max_load_idx; - } -} - -static struct ctl_table * -sd_alloc_ctl_domain_table(struct sched_domain *sd) -{ - struct ctl_table *table = sd_alloc_ctl_entry(14); - - if (table == NULL) - return NULL; - - set_table_entry(&table[0], "min_interval", &sd->min_interval, - sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[1], "max_interval", &sd->max_interval, - sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[2], "busy_idx", &sd->busy_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[3], "idle_idx", &sd->idle_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[5], "wake_idx", &sd->wake_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, - sizeof(int), 0644, proc_dointvec_minmax, true); - set_table_entry(&table[7], "busy_factor", &sd->busy_factor, - sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, - sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[9], "cache_nice_tries", - &sd->cache_nice_tries, - sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[10], "flags", &sd->flags, - sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[11], "max_newidle_lb_cost", - &sd->max_newidle_lb_cost, - sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[12], "name", sd->name, - CORENAME_MAX_SIZE, 0444, proc_dostring, false); - /* &table[13] is terminator */ - - return table; -} - -static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu) -{ - struct ctl_table *entry, *table; - struct sched_domain *sd; - int domain_num = 0, i; - char buf[32]; - - for_each_domain(cpu, sd) - domain_num++; - entry = table = sd_alloc_ctl_entry(domain_num + 1); - if (table == NULL) - return NULL; - - i = 0; - for_each_domain(cpu, sd) { - snprintf(buf, 32, "domain%d", i); - entry->procname = kstrdup(buf, GFP_KERNEL); - entry->mode = 0555; - entry->child = sd_alloc_ctl_domain_table(sd); - entry++; - i++; - } - return table; -} - -static struct ctl_table_header *sd_sysctl_header; -static void register_sched_domain_sysctl(void) -{ - int i, cpu_num = num_possible_cpus(); - struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); - char buf[32]; - - WARN_ON(sd_ctl_dir[0].child); - sd_ctl_dir[0].child = entry; - - if (entry == NULL) - return; - - for_each_possible_cpu(i) { - snprintf(buf, 32, "cpu%d", i); - entry->procname = kstrdup(buf, GFP_KERNEL); - entry->mode = 0555; - entry->child = sd_alloc_ctl_cpu_table(i); - entry++; - } - - WARN_ON(sd_sysctl_header); - sd_sysctl_header = register_sysctl_table(sd_ctl_root); -} - -/* may be called multiple times per register */ -static void unregister_sched_domain_sysctl(void) -{ - unregister_sysctl_table(sd_sysctl_header); - sd_sysctl_header = NULL; - if (sd_ctl_dir[0].child) - sd_free_ctl_entry(&sd_ctl_dir[0].child); -} -#else -static void register_sched_domain_sysctl(void) -{ -} -static void unregister_sched_domain_sysctl(void) -{ -} -#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */ - static void set_rq_online(struct rq *rq) { if (!rq->online) { @@ -5693,16 +5435,6 @@ static int sched_cpu_active(struct notifier_block *nfb, set_cpu_rq_start_time(); return NOTIFY_OK; - case CPU_ONLINE: - /* - * At this point a starting CPU has marked itself as online via - * set_cpu_online(). But it might not yet have marked itself - * as active, which is essential from here on. - */ - set_cpu_active(cpu, true); - stop_machine_unpark(cpu); - return NOTIFY_OK; - case CPU_DOWN_FAILED: set_cpu_active(cpu, true); return NOTIFY_OK; @@ -6174,11 +5906,16 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { + int ret; + alloc_bootmem_cpumask_var(&cpu_isolated_map); - cpulist_parse(str, cpu_isolated_map); + ret = cpulist_parse(str, cpu_isolated_map); + if (ret) { + pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids); + return 0; + } return 1; } - __setup("isolcpus=", isolated_cpu_setup); struct s_data { @@ -7889,7 +7626,7 @@ void sched_move_task(struct task_struct *tsk) queued = task_on_rq_queued(tsk); if (queued) - dequeue_task(rq, tsk, DEQUEUE_SAVE); + dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); if (unlikely(running)) put_prev_task(rq, tsk); @@ -7913,7 +7650,7 @@ void sched_move_task(struct task_struct *tsk) if (unlikely(running)) tsk->sched_class->set_curr_task(rq); if (queued) - enqueue_task(rq, tsk, ENQUEUE_RESTORE); + enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE); task_rq_unlock(rq, tsk, &flags); } |