diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/preempt.h | 2 | ||||
-rw-r--r-- | include/linux/delayacct.h | 22 | ||||
-rw-r--r-- | include/linux/energy_model.h | 16 | ||||
-rw-r--r-- | include/linux/kthread.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 50 | ||||
-rw-r--r-- | include/linux/sched/cpufreq.h | 2 | ||||
-rw-r--r-- | include/linux/sched/debug.h | 2 | ||||
-rw-r--r-- | include/linux/sched/sd_flags.h | 10 | ||||
-rw-r--r-- | include/linux/sched/signal.h | 2 | ||||
-rw-r--r-- | include/linux/sched/stat.h | 16 | ||||
-rw-r--r-- | include/linux/sched_clock.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/prctl.h | 8 |
12 files changed, 91 insertions, 43 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index d683f5e6d791..b4d43a4af5f7 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -29,7 +29,7 @@ static __always_inline void preempt_count_set(int pc) } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0) static __always_inline void set_preempt_need_resched(void) diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 21651f946751..af7e6eb50283 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -58,16 +58,22 @@ struct task_delay_info { #include <linux/sched.h> #include <linux/slab.h> +#include <linux/jump_label.h> #ifdef CONFIG_TASK_DELAY_ACCT +DECLARE_STATIC_KEY_FALSE(delayacct_key); extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); + +extern int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); + extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(struct task_struct *); -extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); +extern int delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); extern void __delayacct_freepages_end(void); @@ -114,6 +120,9 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) static inline void delayacct_blkio_start(void) { + if (!static_branch_unlikely(&delayacct_key)) + return; + delayacct_set_flag(current, DELAYACCT_PF_BLKIO); if (current->delays) __delayacct_blkio_start(); @@ -121,19 +130,14 @@ static inline void delayacct_blkio_start(void) static inline void delayacct_blkio_end(struct task_struct *p) { + if (!static_branch_unlikely(&delayacct_key)) + return; + if (p->delays) __delayacct_blkio_end(p); delayacct_clear_flag(p, DELAYACCT_PF_BLKIO); } -static inline int delayacct_add_tsk(struct taskstats *d, - struct task_struct *tsk) -{ - if (!delayacct_on || !tsk->delays) - return 0; - return __delayacct_add_tsk(d, tsk); -} - static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) { if (tsk->delays) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 757fc60658fa..3f221dbf5f95 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -91,6 +91,8 @@ void em_dev_unregister_perf_domain(struct device *dev); * @pd : performance domain for which energy has to be estimated * @max_util : highest utilization among CPUs of the domain * @sum_util : sum of the utilization of all CPUs in the domain + * @allowed_cpu_cap : maximum allowed CPU capacity for the @pd, which + might reflect reduced frequency (due to thermal) * * This function must be used only for CPU devices. There is no validation, * i.e. if the EM is a CPU type and has cpumask allocated. It is called from @@ -100,7 +102,8 @@ void em_dev_unregister_perf_domain(struct device *dev); * a capacity state satisfying the max utilization of the domain. */ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, - unsigned long max_util, unsigned long sum_util) + unsigned long max_util, unsigned long sum_util, + unsigned long allowed_cpu_cap) { unsigned long freq, scale_cpu; struct em_perf_state *ps; @@ -112,11 +115,17 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, /* * In order to predict the performance state, map the utilization of * the most utilized CPU of the performance domain to a requested - * frequency, like schedutil. + * frequency, like schedutil. Take also into account that the real + * frequency might be set lower (due to thermal capping). Thus, clamp + * max utilization to the allowed CPU capacity before calculating + * effective frequency. */ cpu = cpumask_first(to_cpumask(pd->cpus)); scale_cpu = arch_scale_cpu_capacity(cpu); ps = &pd->table[pd->nr_perf_states - 1]; + + max_util = map_util_perf(max_util); + max_util = min(max_util, allowed_cpu_cap); freq = map_util_freq(max_util, ps->frequency, scale_cpu); /* @@ -209,7 +218,8 @@ static inline struct em_perf_domain *em_pd_get(struct device *dev) return NULL; } static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, - unsigned long max_util, unsigned long sum_util) + unsigned long max_util, unsigned long sum_util, + unsigned long allowed_cpu_cap) { return 0; } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2484ed97e72f..d9133d6db308 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -33,6 +33,8 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), unsigned int cpu, const char *namefmt); +void set_kthread_struct(struct task_struct *p); + void kthread_set_per_cpu(struct task_struct *k, int cpu); bool kthread_is_per_cpu(struct task_struct *k); diff --git a/include/linux/sched.h b/include/linux/sched.h index 32813c345115..31aaf9d7b3e9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -113,11 +113,13 @@ struct task_group; __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ TASK_PARKED) -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) +#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) -#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) +#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) -#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) +#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) + +#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP @@ -132,14 +134,14 @@ struct task_group; do { \ WARN_ON_ONCE(is_special_task_state(state_value));\ current->task_state_change = _THIS_IP_; \ - current->state = (state_value); \ + WRITE_ONCE(current->__state, (state_value)); \ } while (0) #define set_current_state(state_value) \ do { \ WARN_ON_ONCE(is_special_task_state(state_value));\ current->task_state_change = _THIS_IP_; \ - smp_store_mb(current->state, (state_value)); \ + smp_store_mb(current->__state, (state_value)); \ } while (0) #define set_special_state(state_value) \ @@ -148,7 +150,7 @@ struct task_group; WARN_ON_ONCE(!is_special_task_state(state_value)); \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ current->task_state_change = _THIS_IP_; \ - current->state = (state_value); \ + WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) #else @@ -190,10 +192,10 @@ struct task_group; * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ - current->state = (state_value) + WRITE_ONCE(current->__state, (state_value)) #define set_current_state(state_value) \ - smp_store_mb(current->state, (state_value)) + smp_store_mb(current->__state, (state_value)) /* * set_special_state() should be used for those states when the blocking task @@ -205,12 +207,14 @@ struct task_group; do { \ unsigned long flags; /* may shadow */ \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ - current->state = (state_value); \ + WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) #endif +#define get_current_state() READ_ONCE(current->__state) + /* Task command name length: */ #define TASK_COMM_LEN 16 @@ -662,8 +666,7 @@ struct task_struct { */ struct thread_info thread_info; #endif - /* -1 unrunnable, 0 runnable, >0 stopped: */ - volatile long state; + unsigned int __state; /* * This begins the randomizable portion of task_struct. Only @@ -708,10 +711,17 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; + struct sched_dl_entity dl; + +#ifdef CONFIG_SCHED_CORE + struct rb_node core_node; + unsigned long core_cookie; + unsigned int core_occupation; +#endif + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif - struct sched_dl_entity dl; #ifdef CONFIG_UCLAMP_TASK /* @@ -1520,7 +1530,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) static inline unsigned int task_state_index(struct task_struct *tsk) { - unsigned int tsk_state = READ_ONCE(tsk->state); + unsigned int tsk_state = READ_ONCE(tsk->__state); unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); @@ -1828,10 +1838,10 @@ static __always_inline void scheduler_ipi(void) */ preempt_fold_need_resched(); } -extern unsigned long wait_task_inactive(struct task_struct *, long match_state); +extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); #else static inline void scheduler_ipi(void) { } -static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) +static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) { return 1; } @@ -2179,4 +2189,14 @@ int sched_trace_rq_nr_running(struct rq *rq); const struct cpumask *sched_trace_rd_span(struct root_domain *rd); +#ifdef CONFIG_SCHED_CORE +extern void sched_core_free(struct task_struct *tsk); +extern void sched_core_fork(struct task_struct *p); +extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, + unsigned long uaddr); +#else +static inline void sched_core_free(struct task_struct *tsk) { } +static inline void sched_core_fork(struct task_struct *p) { } +#endif + #endif diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index 6205578ab6ee..bdd31ab93bc5 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -26,7 +26,7 @@ bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy); static inline unsigned long map_util_freq(unsigned long util, unsigned long freq, unsigned long cap) { - return (freq + (freq >> 2)) * util / cap; + return freq * util / cap; } static inline unsigned long map_util_perf(unsigned long util) diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index ae51f4529fc9..b5035afa2396 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -14,7 +14,7 @@ extern void dump_cpu_task(int cpu); /* * Only dump TASK_* tasks. (0 for all tasks) */ -extern void show_state_filter(unsigned long state_filter); +extern void show_state_filter(unsigned int state_filter); static inline void show_state(void) { diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 34b21e971d77..57bde66d95f7 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -91,6 +91,16 @@ SD_FLAG(SD_WAKE_AFFINE, SDF_SHARED_CHILD) SD_FLAG(SD_ASYM_CPUCAPACITY, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) /* + * Domain members have different CPU capacities spanning all unique CPU + * capacity values. + * + * SHARED_PARENT: Set from the topmost domain down to the first domain where + * all available CPU capacities are visible + * NEEDS_GROUPS: Per-CPU capacity is asymmetric between groups. + */ +SD_FLAG(SD_ASYM_CPUCAPACITY_FULL, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) + +/* * Domain members share CPU capacity (i.e. SMT) * * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7f4278fa21fe..c9cf678c347d 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -382,7 +382,7 @@ static inline int fatal_signal_pending(struct task_struct *p) return task_sigpending(p) && __fatal_signal_pending(p); } -static inline int signal_pending_state(long state, struct task_struct *p) +static inline int signal_pending_state(unsigned int state, struct task_struct *p) { if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) return 0; diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h index 568286411b43..0108a38bb64d 100644 --- a/include/linux/sched/stat.h +++ b/include/linux/sched/stat.h @@ -3,6 +3,7 @@ #define _LINUX_SCHED_STAT_H #include <linux/percpu.h> +#include <linux/kconfig.h> /* * Various counters maintained by the scheduler and fork(), @@ -16,21 +17,14 @@ extern unsigned long total_forks; extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); -extern unsigned long nr_running(void); +extern unsigned int nr_running(void); extern bool single_task_running(void); -extern unsigned long nr_iowait(void); -extern unsigned long nr_iowait_cpu(int cpu); +extern unsigned int nr_iowait(void); +extern unsigned int nr_iowait_cpu(int cpu); static inline int sched_info_on(void) { -#ifdef CONFIG_SCHEDSTATS - return 1; -#elif defined(CONFIG_TASK_DELAY_ACCT) - extern int delayacct_on; - return delayacct_on; -#else - return 0; -#endif + return IS_ENABLED(CONFIG_SCHED_INFO); } #ifdef CONFIG_SCHEDSTATS diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 528718e4ed52..835ee87ed792 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -14,7 +14,7 @@ * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit * clocks. * @read_sched_clock: Current clock source (or dummy source when suspended). - * @mult: Multipler for scaled math conversion. + * @mult: Multiplier for scaled math conversion. * @shift: Shift value for scaled math conversion. * * Care must be taken when updating this structure; it is read by diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 18a9f59dc067..967d9c55323d 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -259,4 +259,12 @@ struct prctl_mm_map { #define PR_PAC_SET_ENABLED_KEYS 60 #define PR_PAC_GET_ENABLED_KEYS 61 +/* Request the scheduler to share a core */ +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 + #endif /* _LINUX_PRCTL_H */ |