diff options
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 128 |
1 files changed, 106 insertions, 22 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1d4e94c1e5fe..3fd283892761 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -67,6 +67,7 @@ #include <linux/tsacct_kern.h> #include <asm/tlb.h> +#include <asm-generic/vmlinux.lds.h> #ifdef CONFIG_PARAVIRT # include <asm/paravirt.h> @@ -75,6 +76,8 @@ #include "cpupri.h" #include "cpudeadline.h" +#include <trace/events/sched.h> + #ifdef CONFIG_SCHED_DEBUG # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) #else @@ -96,6 +99,7 @@ extern atomic_long_t calc_load_tasks; extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq, long adjust); +extern void call_trace_sched_update_nr_running(struct rq *rq, int count); /* * Helpers for converting nanosecond timing to jiffy resolution */ @@ -310,11 +314,26 @@ void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus) __dl_update(dl_b, -((s32)tsk_bw / cpus)); } -static inline -bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) +static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap, + u64 old_bw, u64 new_bw) { return dl_b->bw != -1 && - dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; + cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw; +} + +/* + * Verify the fitness of task @p to run on @cpu taking into account the + * CPU original capacity and the runtime/deadline ratio of the task. + * + * The function will return true if the CPU original capacity of the + * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the + * task and false otherwise. + */ +static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu) +{ + unsigned long cap = arch_scale_cpu_capacity(cpu); + + return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime; } extern void init_dl_bw(struct dl_bw *dl_b); @@ -862,6 +881,8 @@ struct uclamp_rq { unsigned int value; struct uclamp_bucket bucket[UCLAMP_BUCKETS]; }; + +DECLARE_STATIC_KEY_FALSE(sched_uclamp_used); #endif /* CONFIG_UCLAMP_TASK */ /* @@ -1182,6 +1203,16 @@ struct rq_flags { #endif }; +/* + * Lockdep annotation that avoids accidental unlocks; it's like a + * sticky/continuous lockdep_assert_held(). + * + * This avoids code that has access to 'struct rq *rq' (basically everything in + * the scheduler) from accidentally unlocking the rq if they do not also have a + * copy of the (on-stack) 'struct rq_flags rf'. + * + * Also see Documentation/locking/lockdep-design.rst. + */ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) { rf->cookie = lockdep_pin_lock(&rq->lock); @@ -1682,7 +1713,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) #define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* Child wakeup after fork */ #define WF_MIGRATED 0x04 /* Internal use, task got migrated */ -#define WF_ON_RQ 0x08 /* Wakee is on_rq */ +#define WF_ON_CPU 0x08 /* Wakee is on_cpu */ /* * To aid in avoiding the subversion of "niceness" due to uneven distribution @@ -1739,7 +1770,6 @@ extern const u32 sched_prio_to_wmult[40]; #define RETRY_TASK ((void *)-1UL) struct sched_class { - const struct sched_class *next; #ifdef CONFIG_UCLAMP_TASK int uclamp_enabled; @@ -1748,7 +1778,7 @@ struct sched_class { void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); void (*yield_task) (struct rq *rq); - bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt); + bool (*yield_to_task)(struct rq *rq, struct task_struct *p); void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags); @@ -1796,7 +1826,7 @@ struct sched_class { #ifdef CONFIG_FAIR_GROUP_SCHED void (*task_change_group)(struct task_struct *p, int type); #endif -}; +} __aligned(STRUCT_ALIGNMENT); /* STRUCT_ALIGN(), vmlinux.lds.h */ static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { @@ -1810,17 +1840,18 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) next->sched_class->set_next_task(rq, next, false); } -#ifdef CONFIG_SMP -#define sched_class_highest (&stop_sched_class) -#else -#define sched_class_highest (&dl_sched_class) -#endif +/* Defined in include/asm-generic/vmlinux.lds.h */ +extern struct sched_class __begin_sched_classes[]; +extern struct sched_class __end_sched_classes[]; + +#define sched_class_highest (__end_sched_classes - 1) +#define sched_class_lowest (__begin_sched_classes - 1) #define for_class_range(class, _from, _to) \ - for (class = (_from); class != (_to); class = class->next) + for (class = (_from); class != (_to); class--) #define for_each_class(class) \ - for_class_range(class, sched_class_highest, NULL) + for_class_range(class, sched_class_highest, sched_class_lowest) extern const struct sched_class stop_sched_class; extern const struct sched_class dl_sched_class; @@ -1930,12 +1961,7 @@ extern int __init sched_tick_offload_init(void); */ static inline void sched_update_tick_dependency(struct rq *rq) { - int cpu; - - if (!tick_nohz_full_enabled()) - return; - - cpu = cpu_of(rq); + int cpu = cpu_of(rq); if (!tick_nohz_full_cpu(cpu)) return; @@ -1955,6 +1981,9 @@ static inline void add_nr_running(struct rq *rq, unsigned count) unsigned prev_nr = rq->nr_running; rq->nr_running = prev_nr + count; + if (trace_sched_update_nr_running_tp_enabled()) { + call_trace_sched_update_nr_running(rq, count); + } #ifdef CONFIG_SMP if (prev_nr < 2 && rq->nr_running >= 2) { @@ -1969,6 +1998,10 @@ static inline void add_nr_running(struct rq *rq, unsigned count) static inline void sub_nr_running(struct rq *rq, unsigned count) { rq->nr_running -= count; + if (trace_sched_update_nr_running_tp_enabled()) { + call_trace_sched_update_nr_running(rq, count); + } + /* Check if we still need preemption */ sched_update_tick_dependency(rq); } @@ -2016,6 +2049,16 @@ void arch_scale_freq_tick(void) #endif #ifndef arch_scale_freq_capacity +/** + * arch_scale_freq_capacity - get the frequency scale factor of a given CPU. + * @cpu: the CPU in question. + * + * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e. + * + * f_curr + * ------ * SCHED_CAPACITY_SCALE + * f_max + */ static __always_inline unsigned long arch_scale_freq_capacity(int cpu) { @@ -2349,12 +2392,35 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #ifdef CONFIG_UCLAMP_TASK unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); +/** + * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values. + * @rq: The rq to clamp against. Must not be NULL. + * @util: The util value to clamp. + * @p: The task to clamp against. Can be NULL if you want to clamp + * against @rq only. + * + * Clamps the passed @util to the max(@rq, @p) effective uclamp values. + * + * If sched_uclamp_used static key is disabled, then just return the util + * without any clamping since uclamp aggregation at the rq level in the fast + * path is disabled, rendering this operation a NOP. + * + * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It + * will return the correct effective uclamp value of the task even if the + * static key is disabled. + */ static __always_inline unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, struct task_struct *p) { - unsigned long min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value); - unsigned long max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); + unsigned long min_util; + unsigned long max_util; + + if (!static_branch_likely(&sched_uclamp_used)) + return util; + + min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value); + max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); if (p) { min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN)); @@ -2371,6 +2437,19 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, return clamp(util, min_util, max_util); } + +/* + * When uclamp is compiled in, the aggregation at rq level is 'turned off' + * by default in the fast path and only gets turned on once userspace performs + * an operation that requires it. + * + * Returns true if userspace opted-in to use uclamp and aggregation at rq level + * hence is active. + */ +static inline bool uclamp_is_used(void) +{ + return static_branch_likely(&sched_uclamp_used); +} #else /* CONFIG_UCLAMP_TASK */ static inline unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, @@ -2378,6 +2457,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, { return util; } + +static inline bool uclamp_is_used(void) +{ + return false; +} #endif /* CONFIG_UCLAMP_TASK */ #ifdef arch_scale_freq_capacity |