diff options
author | Gregory Haskins <ghaskins@novell.com> | 2008-01-25 21:08:09 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 21:08:09 +0100 |
commit | e7693a362ec84bb5b6fd441d8a8b4b9d568a7a0c (patch) | |
tree | 078940540641a59aaf199695bfc6de3f062a987b /kernel/sched.c | |
parent | 697f0a487f294e634a342764472b79375bb3158a (diff) | |
download | lwn-e7693a362ec84bb5b6fd441d8a8b4b9d568a7a0c.tar.gz lwn-e7693a362ec84bb5b6fd441d8a8b4b9d568a7a0c.zip |
sched: de-SCHED_OTHER-ize the RT path
The current wake-up code path tries to determine if it can optimize the
wake-up to "this_cpu" by computing load calculations. The problem is that
these calculations are only relevant to SCHED_OTHER tasks where load is king.
For RT tasks, priority is king. So the load calculation is completely wasted
bandwidth.
Therefore, we create a new sched_class interface to help with
pre-wakeup routing decisions and move the load calculation as a function
of CFS task's class.
Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 167 |
1 files changed, 27 insertions, 140 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 66e99b419b31..3344ba776b97 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -960,6 +960,13 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) update_load_sub(&rq->load, load); } +#ifdef CONFIG_SMP +static unsigned long source_load(int cpu, int type); +static unsigned long target_load(int cpu, int type); +static unsigned long cpu_avg_load_per_task(int cpu); +static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); +#endif /* CONFIG_SMP */ + #include "sched_stats.h" #include "sched_idletask.c" #include "sched_fair.c" @@ -1118,7 +1125,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) /* * Is this task likely cache-hot: */ -static inline int +static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) { s64 delta; @@ -1343,7 +1350,7 @@ static unsigned long target_load(int cpu, int type) /* * Return the average load per task on the cpu's run queue */ -static inline unsigned long cpu_avg_load_per_task(int cpu) +static unsigned long cpu_avg_load_per_task(int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long total = weighted_cpuload(cpu); @@ -1500,58 +1507,6 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ -/* - * wake_idle() will wake a task on an idle cpu if task->cpu is - * not idle and an idle cpu is available. The span of cpus to - * search starts with cpus closest then further out as needed, - * so we always favor a closer, idle cpu. - * - * Returns the CPU we should wake onto. - */ -#if defined(ARCH_HAS_SCHED_WAKE_IDLE) -static int wake_idle(int cpu, struct task_struct *p) -{ - cpumask_t tmp; - struct sched_domain *sd; - int i; - - /* - * If it is idle, then it is the best cpu to run this task. - * - * This cpu is also the best, if it has more than one task already. - * Siblings must be also busy(in most cases) as they didn't already - * pickup the extra load from this cpu and hence we need not check - * sibling runqueue info. This will avoid the checks and cache miss - * penalities associated with that. - */ - if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1) - return cpu; - - for_each_domain(cpu, sd) { - if (sd->flags & SD_WAKE_IDLE) { - cpus_and(tmp, sd->span, p->cpus_allowed); - for_each_cpu_mask(i, tmp) { - if (idle_cpu(i)) { - if (i != task_cpu(p)) { - schedstat_inc(p, - se.nr_wakeups_idle); - } - return i; - } - } - } else { - break; - } - } - return cpu; -} -#else -static inline int wake_idle(int cpu, struct task_struct *p) -{ - return cpu; -} -#endif - /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -1573,8 +1528,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) long old_state; struct rq *rq; #ifdef CONFIG_SMP - struct sched_domain *sd, *this_sd = NULL; - unsigned long load, this_load; int new_cpu; #endif @@ -1594,90 +1547,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) if (unlikely(task_running(rq, p))) goto out_activate; - new_cpu = cpu; - - schedstat_inc(rq, ttwu_count); - if (cpu == this_cpu) { - schedstat_inc(rq, ttwu_local); - goto out_set_cpu; - } - - for_each_domain(this_cpu, sd) { - if (cpu_isset(cpu, sd->span)) { - schedstat_inc(sd, ttwu_wake_remote); - this_sd = sd; - break; - } - } - - if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) - goto out_set_cpu; - - /* - * Check for affine wakeup and passive balancing possibilities. - */ - if (this_sd) { - int idx = this_sd->wake_idx; - unsigned int imbalance; - - imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; - - load = source_load(cpu, idx); - this_load = target_load(this_cpu, idx); - - new_cpu = this_cpu; /* Wake to this CPU if we can */ - - if (this_sd->flags & SD_WAKE_AFFINE) { - unsigned long tl = this_load; - unsigned long tl_per_task; - - /* - * Attract cache-cold tasks on sync wakeups: - */ - if (sync && !task_hot(p, rq->clock, this_sd)) - goto out_set_cpu; - - schedstat_inc(p, se.nr_wakeups_affine_attempts); - tl_per_task = cpu_avg_load_per_task(this_cpu); - - /* - * If sync wakeup then subtract the (maximum possible) - * effect of the currently running task from the load - * of the current CPU: - */ - if (sync) - tl -= current->se.load.weight; - - if ((tl <= load && - tl + target_load(cpu, idx) <= tl_per_task) || - 100*(tl + p->se.load.weight) <= imbalance*load) { - /* - * This domain has SD_WAKE_AFFINE and - * p is cache cold in this domain, and - * there is no bad imbalance. - */ - schedstat_inc(this_sd, ttwu_move_affine); - schedstat_inc(p, se.nr_wakeups_affine); - goto out_set_cpu; - } - } - - /* - * Start passive balancing when half the imbalance_pct - * limit is reached. - */ - if (this_sd->flags & SD_WAKE_BALANCE) { - if (imbalance*this_load <= 100*load) { - schedstat_inc(this_sd, ttwu_move_balance); - schedstat_inc(p, se.nr_wakeups_passive); - goto out_set_cpu; - } - } - } - - new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ -out_set_cpu: - new_cpu = wake_idle(new_cpu, p); + new_cpu = p->sched_class->select_task_rq(p, sync); if (new_cpu != cpu) { set_task_cpu(p, new_cpu); task_rq_unlock(rq, &flags); @@ -1693,6 +1563,23 @@ out_set_cpu: cpu = task_cpu(p); } +#ifdef CONFIG_SCHEDSTATS + schedstat_inc(rq, ttwu_count); + if (cpu == this_cpu) + schedstat_inc(rq, ttwu_local); + else { + struct sched_domain *sd; + for_each_domain(this_cpu, sd) { + if (cpu_isset(cpu, sd->span)) { + schedstat_inc(sd, ttwu_wake_remote); + break; + } + } + } + +#endif + + out_activate: #endif /* CONFIG_SMP */ schedstat_inc(p, se.nr_wakeups); |