summaryrefslogtreecommitdiff
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c139
1 files changed, 79 insertions, 60 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c798d2795243..e43993a4e580 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -74,12 +74,12 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
/*
* Minimal preemption granularity for CPU-bound tasks:
*
- * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
+ * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
-unsigned int sysctl_sched_base_slice = 750000ULL;
-static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
+unsigned int sysctl_sched_base_slice = 700000ULL;
+static unsigned int normalized_sysctl_sched_base_slice = 700000ULL;
-const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+__read_mostly unsigned int sysctl_sched_migration_cost = 500000UL;
static int __init setup_sched_thermal_decay_shift(char *str)
{
@@ -399,7 +399,7 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
static inline void assert_list_leaf_cfs_rq(struct rq *rq)
{
- SCHED_WARN_ON(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list);
+ WARN_ON_ONCE(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list);
}
/* Iterate through all leaf cfs_rq's on a runqueue */
@@ -696,7 +696,7 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
s64 vlag, limit;
- SCHED_WARN_ON(!se->on_rq);
+ WARN_ON_ONCE(!se->on_rq);
vlag = avg_vruntime(cfs_rq) - se->vruntime;
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
@@ -884,6 +884,26 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
}
/*
+ * HACK, stash a copy of deadline at the point of pick in vlag,
+ * which isn't used until dequeue.
+ */
+static inline void set_protect_slice(struct sched_entity *se)
+{
+ se->vlag = se->deadline;
+}
+
+static inline bool protect_slice(struct sched_entity *se)
+{
+ return se->vlag == se->deadline;
+}
+
+static inline void cancel_protect_slice(struct sched_entity *se)
+{
+ if (protect_slice(se))
+ se->vlag = se->deadline + 1;
+}
+
+/*
* Earliest Eligible Virtual Deadline First
*
* In order to provide latency guarantees for different request sizes
@@ -919,11 +939,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
curr = NULL;
- /*
- * Once selected, run a task until it either becomes non-eligible or
- * until it gets a new slice. See the HACK in set_next_entity().
- */
- if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
+ if (sched_feat(RUN_TO_PARITY) && curr && protect_slice(curr))
return curr;
/* Pick the leftmost entity if it's eligible */
@@ -967,7 +983,6 @@ found:
return best;
}
-#ifdef CONFIG_SCHED_DEBUG
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
@@ -994,7 +1009,6 @@ int sched_update_scaling(void)
return 0;
}
#endif
-#endif
static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
@@ -3301,7 +3315,7 @@ static void task_numa_work(struct callback_head *work)
bool vma_pids_skipped;
bool vma_pids_forced = false;
- SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work));
+ WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
work->next = work;
/*
@@ -4020,7 +4034,7 @@ static inline bool load_avg_is_decayed(struct sched_avg *sa)
* Make sure that rounding and/or propagation of PELT values never
* break this.
*/
- SCHED_WARN_ON(sa->load_avg ||
+ WARN_ON_ONCE(sa->load_avg ||
sa->util_avg ||
sa->runnable_avg);
@@ -5444,7 +5458,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
clear_buddies(cfs_rq, se);
if (flags & DEQUEUE_DELAYED) {
- SCHED_WARN_ON(!se->sched_delayed);
+ WARN_ON_ONCE(!se->sched_delayed);
} else {
bool delay = sleep;
/*
@@ -5454,7 +5468,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (flags & DEQUEUE_SPECIAL)
delay = false;
- SCHED_WARN_ON(delay && se->sched_delayed);
+ WARN_ON_ONCE(delay && se->sched_delayed);
if (sched_feat(DELAY_DEQUEUE) && delay &&
!entity_eligible(cfs_rq, se)) {
@@ -5530,15 +5544,12 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
update_stats_wait_end_fair(cfs_rq, se);
__dequeue_entity(cfs_rq, se);
update_load_avg(cfs_rq, se, UPDATE_TG);
- /*
- * HACK, stash a copy of deadline at the point of pick in vlag,
- * which isn't used until dequeue.
- */
- se->vlag = se->deadline;
+
+ set_protect_slice(se);
}
update_stats_curr_start(cfs_rq, se);
- SCHED_WARN_ON(cfs_rq->curr);
+ WARN_ON_ONCE(cfs_rq->curr);
cfs_rq->curr = se;
/*
@@ -5579,7 +5590,7 @@ pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
if (sched_feat(PICK_BUDDY) &&
cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) {
/* ->next will never be delayed */
- SCHED_WARN_ON(cfs_rq->next->sched_delayed);
+ WARN_ON_ONCE(cfs_rq->next->sched_delayed);
return cfs_rq->next;
}
@@ -5615,7 +5626,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
/* in !on_rq case, update occurred at dequeue */
update_load_avg(cfs_rq, prev, 0);
}
- SCHED_WARN_ON(cfs_rq->curr != prev);
+ WARN_ON_ONCE(cfs_rq->curr != prev);
cfs_rq->curr = NULL;
}
@@ -5838,7 +5849,7 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
cfs_rq->throttled_clock_self = 0;
- if (SCHED_WARN_ON((s64)delta < 0))
+ if (WARN_ON_ONCE((s64)delta < 0))
delta = 0;
cfs_rq->throttled_clock_self_time += delta;
@@ -5858,7 +5869,7 @@ static int tg_throttle_down(struct task_group *tg, void *data)
cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
list_del_leaf_cfs_rq(cfs_rq);
- SCHED_WARN_ON(cfs_rq->throttled_clock_self);
+ WARN_ON_ONCE(cfs_rq->throttled_clock_self);
if (cfs_rq->nr_queued)
cfs_rq->throttled_clock_self = rq_clock(rq);
}
@@ -5967,7 +5978,7 @@ done:
* throttled-list. rq->lock protects completion.
*/
cfs_rq->throttled = 1;
- SCHED_WARN_ON(cfs_rq->throttled_clock);
+ WARN_ON_ONCE(cfs_rq->throttled_clock);
if (cfs_rq->nr_queued)
cfs_rq->throttled_clock = rq_clock(rq);
return true;
@@ -6123,7 +6134,7 @@ static inline void __unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq)
}
/* Already enqueued */
- if (SCHED_WARN_ON(!list_empty(&cfs_rq->throttled_csd_list)))
+ if (WARN_ON_ONCE(!list_empty(&cfs_rq->throttled_csd_list)))
return;
first = list_empty(&rq->cfsb_csd_list);
@@ -6142,7 +6153,7 @@ static void unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq)
{
lockdep_assert_rq_held(rq_of(cfs_rq));
- if (SCHED_WARN_ON(!cfs_rq_throttled(cfs_rq) ||
+ if (WARN_ON_ONCE(!cfs_rq_throttled(cfs_rq) ||
cfs_rq->runtime_remaining <= 0))
return;
@@ -6178,7 +6189,7 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
goto next;
/* By the above checks, this should never be true */
- SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
+ WARN_ON_ONCE(cfs_rq->runtime_remaining > 0);
raw_spin_lock(&cfs_b->lock);
runtime = -cfs_rq->runtime_remaining + 1;
@@ -6199,7 +6210,7 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
* We currently only expect to be unthrottling
* a single cfs_rq locally.
*/
- SCHED_WARN_ON(!list_empty(&local_unthrottle));
+ WARN_ON_ONCE(!list_empty(&local_unthrottle));
list_add_tail(&cfs_rq->throttled_csd_list,
&local_unthrottle);
}
@@ -6224,7 +6235,7 @@ next:
rq_unlock_irqrestore(rq, &rf);
}
- SCHED_WARN_ON(!list_empty(&local_unthrottle));
+ WARN_ON_ONCE(!list_empty(&local_unthrottle));
rcu_read_unlock();
@@ -6541,14 +6552,14 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *paren
cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF;
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
- hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
- cfs_b->period_timer.function = sched_cfs_period_timer;
+ hrtimer_setup(&cfs_b->period_timer, sched_cfs_period_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS_PINNED);
/* Add a random offset so that timers interleave */
hrtimer_set_expires(&cfs_b->period_timer,
get_random_u32_below(cfs_b->period));
- hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- cfs_b->slack_timer.function = sched_cfs_slack_timer;
+ hrtimer_setup(&cfs_b->slack_timer, sched_cfs_slack_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
cfs_b->slack_started = false;
}
@@ -6776,7 +6787,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
{
struct sched_entity *se = &p->se;
- SCHED_WARN_ON(task_rq(p) != rq);
+ WARN_ON_ONCE(task_rq(p) != rq);
if (rq->cfs.h_nr_queued > 1) {
u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
@@ -6887,8 +6898,8 @@ requeue_delayed_entity(struct sched_entity *se)
* Because a delayed entity is one that is still on
* the runqueue competing until elegibility.
*/
- SCHED_WARN_ON(!se->sched_delayed);
- SCHED_WARN_ON(!se->on_rq);
+ WARN_ON_ONCE(!se->sched_delayed);
+ WARN_ON_ONCE(!se->on_rq);
if (sched_feat(DELAY_ZERO)) {
update_entity_lag(cfs_rq, se);
@@ -6991,6 +7002,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
update_cfs_group(se);
se->slice = slice;
+ if (se != cfs_rq->curr)
+ min_vruntime_cb_propagate(&se->run_node, NULL);
slice = cfs_rq_min_slice(cfs_rq);
cfs_rq->h_nr_runnable += h_nr_runnable;
@@ -7120,6 +7133,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
update_cfs_group(se);
se->slice = slice;
+ if (se != cfs_rq->curr)
+ min_vruntime_cb_propagate(&se->run_node, NULL);
slice = cfs_rq_min_slice(cfs_rq);
cfs_rq->h_nr_runnable -= h_nr_runnable;
@@ -7144,8 +7159,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
rq->next_balance = jiffies;
if (p && task_delayed) {
- SCHED_WARN_ON(!task_sleep);
- SCHED_WARN_ON(p->on_rq != 1);
+ WARN_ON_ONCE(!task_sleep);
+ WARN_ON_ONCE(p->on_rq != 1);
/* Fix-up what dequeue_task_fair() skipped */
hrtick_update(rq);
@@ -8723,7 +8738,7 @@ static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
static void set_next_buddy(struct sched_entity *se)
{
for_each_sched_entity(se) {
- if (SCHED_WARN_ON(!se->on_rq))
+ if (WARN_ON_ONCE(!se->on_rq))
return;
if (se_is_idle(se))
return;
@@ -8783,8 +8798,15 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
* Preempt an idle entity in favor of a non-idle entity (and don't preempt
* in the inverse case).
*/
- if (cse_is_idle && !pse_is_idle)
+ if (cse_is_idle && !pse_is_idle) {
+ /*
+ * When non-idle entity preempt an idle entity,
+ * don't give idle entity slice protection.
+ */
+ cancel_protect_slice(se);
goto preempt;
+ }
+
if (cse_is_idle != pse_is_idle)
return;
@@ -8803,8 +8825,8 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
* Note that even if @p does not turn out to be the most eligible
* task at this moment, current's slice protection will be lost.
*/
- if (do_preempt_short(cfs_rq, pse, se) && se->vlag == se->deadline)
- se->vlag = se->deadline + 1;
+ if (do_preempt_short(cfs_rq, pse, se))
+ cancel_protect_slice(se);
/*
* If @p has become the most eligible task, force preemption.
@@ -9417,12 +9439,11 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
return 0;
/* Prevent to re-select dst_cpu via env's CPUs: */
- for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
- if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
- env->flags |= LBF_DST_PINNED;
- env->new_dst_cpu = cpu;
- break;
- }
+ cpu = cpumask_first_and_and(env->dst_grpmask, env->cpus, p->cpus_ptr);
+
+ if (cpu < nr_cpu_ids) {
+ env->flags |= LBF_DST_PINNED;
+ env->new_dst_cpu = cpu;
}
return 0;
@@ -12461,7 +12482,7 @@ unlock:
void nohz_balance_exit_idle(struct rq *rq)
{
- SCHED_WARN_ON(rq != this_rq());
+ WARN_ON_ONCE(rq != this_rq());
if (likely(!rq->nohz_tick_stopped))
return;
@@ -12497,7 +12518,7 @@ void nohz_balance_enter_idle(int cpu)
{
struct rq *rq = cpu_rq(cpu);
- SCHED_WARN_ON(cpu != smp_processor_id());
+ WARN_ON_ONCE(cpu != smp_processor_id());
/* If this CPU is going down, then nothing needs to be done: */
if (!cpu_active(cpu))
@@ -12580,7 +12601,7 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
int balance_cpu;
struct rq *rq;
- SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
+ WARN_ON_ONCE((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
/*
* We assume there will be no idle load after this update and clear
@@ -13020,7 +13041,7 @@ bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
struct cfs_rq *cfs_rqb;
s64 delta;
- SCHED_WARN_ON(task_rq(b)->core != rq->core);
+ WARN_ON_ONCE(task_rq(b)->core != rq->core);
#ifdef CONFIG_FAIR_GROUP_SCHED
/*
@@ -13223,7 +13244,7 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
static void switched_to_fair(struct rq *rq, struct task_struct *p)
{
- SCHED_WARN_ON(p->se.sched_delayed);
+ WARN_ON_ONCE(p->se.sched_delayed);
attach_task_cfs_rq(p);
@@ -13258,7 +13279,7 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
if (!first)
return;
- SCHED_WARN_ON(se->sched_delayed);
+ WARN_ON_ONCE(se->sched_delayed);
if (hrtick_enabled_fair(rq))
hrtick_start_fair(rq, p);
@@ -13645,7 +13666,6 @@ DEFINE_SCHED_CLASS(fair) = {
#endif
};
-#ifdef CONFIG_SCHED_DEBUG
void print_cfs_stats(struct seq_file *m, int cpu)
{
struct cfs_rq *cfs_rq, *pos;
@@ -13679,7 +13699,6 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
rcu_read_unlock();
}
#endif /* CONFIG_NUMA_BALANCING */
-#endif /* CONFIG_SCHED_DEBUG */
__init void init_sched_fair_class(void)
{