diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/cpuhotplug.h | 1 | ||||
-rw-r--r-- | include/linux/cpumask.h | 6 | ||||
-rw-r--r-- | include/linux/preempt.h | 69 | ||||
-rw-r--r-- | include/linux/sched.h | 5 | ||||
-rw-r--r-- | include/linux/sched/hotplug.h | 2 | ||||
-rw-r--r-- | include/linux/stop_machine.h | 5 |
6 files changed, 88 insertions, 0 deletions
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bc56287a1ed1..0042ef362511 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -152,6 +152,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f0d895d6ac39..383684e30f12 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p, return cpumask_next_and(-1, src1p, src2p); } +static inline int cpumask_any_distribute(const struct cpumask *srcp) +{ + return cpumask_first(srcp); +} + #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ @@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); +int cpumask_any_distribute(const struct cpumask *srcp); /** * for_each_cpu - iterate over every cpu in a mask diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7d9c1c0e149c..8b43922e65df 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -322,6 +322,73 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #endif +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + +/* + * Migrate-Disable and why it is undesired. + * + * When a preempted task becomes elegible to run under the ideal model (IOW it + * becomes one of the M highest priority tasks), it might still have to wait + * for the preemptee's migrate_disable() section to complete. Thereby suffering + * a reduction in bandwidth in the exact duration of the migrate_disable() + * section. + * + * Per this argument, the change from preempt_disable() to migrate_disable() + * gets us: + * + * - a higher priority tasks gains reduced wake-up latency; with preempt_disable() + * it would have had to wait for the lower priority task. + * + * - a lower priority tasks; which under preempt_disable() could've instantly + * migrated away when another CPU becomes available, is now constrained + * by the ability to push the higher priority task away, which might itself be + * in a migrate_disable() section, reducing it's available bandwidth. + * + * IOW it trades latency / moves the interference term, but it stays in the + * system, and as long as it remains unbounded, the system is not fully + * deterministic. + * + * + * The reason we have it anyway. + * + * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a + * number of primitives into becoming preemptible, they would also allow + * migration. This turns out to break a bunch of per-cpu usage. To this end, + * all these primitives employ migirate_disable() to restore this implicit + * assumption. + * + * This is a 'temporary' work-around at best. The correct solution is getting + * rid of the above assumptions and reworking the code to employ explicit + * per-cpu locking or short preempt-disable regions. + * + * The end goal must be to get rid of migrate_disable(), alternatively we need + * a schedulability theory that does not depend on abritrary migration. + * + * + * Notes on the implementation. + * + * The implementation is particularly tricky since existing code patterns + * dictate neither migrate_disable() nor migrate_enable() is allowed to block. + * This means that it cannot use cpus_read_lock() to serialize against hotplug, + * nor can it easily migrate itself into a pending affinity mask change on + * migrate_enable(). + * + * + * Note: even non-work-conserving schedulers like semi-partitioned depends on + * migration, so migrate_disable() is not only a problem for + * work-conserving schedulers. + * + */ +extern void migrate_disable(void); +extern void migrate_enable(void); + +#elif defined(CONFIG_PREEMPT_RT) + +static inline void migrate_disable(void) { } +static inline void migrate_enable(void) { } + +#else /* !CONFIG_PREEMPT_RT */ + /** * migrate_disable - Prevent migration of the current task * @@ -352,4 +419,6 @@ static __always_inline void migrate_enable(void) preempt_enable(); } +#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */ + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 063cd120b459..3af9d52fe093 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -714,6 +714,11 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; + void *migration_pending; +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + unsigned short migration_disabled; +#endif + unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index 9a62ffdd296f..412cdaba33eb 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu); extern int sched_cpu_deactivate(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_wait_empty(unsigned int cpu); extern int sched_cpu_dying(unsigned int cpu); #else +# define sched_cpu_wait_empty NULL # define sched_cpu_dying NULL #endif diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 76d8b09384a7..30577c3aecf8 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg); struct cpu_stop_work { struct list_head list; /* cpu_stopper->works */ cpu_stop_fn_t fn; + unsigned long caller; void *arg; struct cpu_stop_done *done; }; @@ -36,6 +37,8 @@ void stop_machine_park(int cpu); void stop_machine_unpark(int cpu); void stop_machine_yield(const struct cpumask *cpumask); +extern void print_stop_info(const char *log_lvl, struct task_struct *task); + #else /* CONFIG_SMP */ #include <linux/workqueue.h> @@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu, return false; } +static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { } + #endif /* CONFIG_SMP */ /* |