diff options
Diffstat (limited to 'kernel/kthread.c')
| -rw-r--r-- | kernel/kthread.c | 324 |
1 files changed, 250 insertions, 74 deletions
diff --git a/kernel/kthread.c b/kernel/kthread.c index a5ac612b1609..63beb59b7a3d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -35,6 +35,9 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; +static LIST_HEAD(kthread_affinity_list); +static DEFINE_MUTEX(kthread_affinity_lock); + struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ @@ -53,6 +56,8 @@ struct kthread_create_info struct kthread { unsigned long flags; unsigned int cpu; + unsigned int node; + int started; int result; int (*threadfn)(void *); void *data; @@ -63,6 +68,9 @@ struct kthread { #endif /* To store the full name if task comm is truncated. */ char *full_name; + struct task_struct *task; + struct list_head affinity_node; + struct cpumask *preferred_affinity; }; enum KTHREAD_BITS { @@ -77,25 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k) return k->worker_private; } -/* - * Variant of to_kthread() that doesn't assume @p is a kthread. - * - * Per construction; when: - * - * (p->flags & PF_KTHREAD) && p->worker_private - * - * the task is both a kthread and struct kthread is persistent. However - * PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and - * begin_new_exec()). - */ -static inline struct kthread *__to_kthread(struct task_struct *p) -{ - void *kthread = p->worker_private; - if (kthread && !(p->flags & PF_KTHREAD)) - kthread = NULL; - return kthread; -} - void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk) { struct kthread *kthread = to_kthread(tsk); @@ -115,14 +104,17 @@ bool set_kthread_struct(struct task_struct *p) if (WARN_ON_ONCE(to_kthread(p))) return false; - kthread = kzalloc(sizeof(*kthread), GFP_KERNEL); + kthread = kzalloc_obj(*kthread); if (!kthread) return false; init_completion(&kthread->exited); init_completion(&kthread->parked); + INIT_LIST_HEAD(&kthread->affinity_node); p->vfork_done = &kthread->exited; + kthread->task = p; + kthread->node = tsk_fork_get_node(current); p->worker_private = kthread; return true; } @@ -183,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park); bool kthread_should_stop_or_park(void) { - struct kthread *kthread = __to_kthread(current); + struct kthread *kthread = tsk_is_kthread(current); if (!kthread) return false; @@ -224,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); */ void *kthread_func(struct task_struct *task) { - struct kthread *kthread = __to_kthread(task); + struct kthread *kthread = tsk_is_kthread(task); if (kthread) return kthread->threadfn; return NULL; @@ -256,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data); */ void *kthread_probe_data(struct task_struct *task) { - struct kthread *kthread = __to_kthread(task); + struct kthread *kthread = tsk_is_kthread(task); void *data = NULL; if (kthread) @@ -299,23 +291,20 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -/** - * kthread_exit - Cause the current kthread return @result to kthread_stop(). - * @result: The integer value to return to kthread_stop(). - * - * While kthread_exit can be called directly, it exists so that - * functions which do some additional work in non-modular code such as - * module_put_and_kthread_exit can be implemented. - * - * Does not return. - */ -void __noreturn kthread_exit(long result) +void kthread_do_exit(struct kthread *kthread, long result) { - struct kthread *kthread = to_kthread(current); kthread->result = result; - do_exit(0); + if (!list_empty(&kthread->affinity_node)) { + mutex_lock(&kthread_affinity_lock); + list_del(&kthread->affinity_node); + mutex_unlock(&kthread_affinity_lock); + + if (kthread->preferred_affinity) { + kfree(kthread->preferred_affinity); + kthread->preferred_affinity = NULL; + } + } } -EXPORT_SYMBOL(kthread_exit); /** * kthread_complete_and_exit - Exit the current kthread. @@ -338,6 +327,56 @@ void __noreturn kthread_complete_and_exit(struct completion *comp, long code) } EXPORT_SYMBOL(kthread_complete_and_exit); +static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpumask) +{ + const struct cpumask *pref; + + guard(rcu)(); + + if (kthread->preferred_affinity) { + pref = kthread->preferred_affinity; + } else { + if (kthread->node == NUMA_NO_NODE) + pref = housekeeping_cpumask(HK_TYPE_DOMAIN); + else + pref = cpumask_of_node(kthread->node); + } + + cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_DOMAIN)); + if (cpumask_empty(cpumask)) + cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN)); +} + +static void kthread_affine_node(void) +{ + struct kthread *kthread = to_kthread(current); + cpumask_var_t affinity; + + if (WARN_ON_ONCE(kthread_is_per_cpu(current))) + return; + + if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) { + WARN_ON_ONCE(1); + return; + } + + mutex_lock(&kthread_affinity_lock); + WARN_ON_ONCE(!list_empty(&kthread->affinity_node)); + list_add_tail(&kthread->affinity_node, &kthread_affinity_list); + /* + * The node cpumask is racy when read from kthread() but: + * - a racing CPU going down will either fail on the subsequent + * call to set_cpus_allowed_ptr() or be migrated to housekeepers + * afterwards by the scheduler. + * - a racing CPU going up will be handled by kthreads_online_cpu() + */ + kthread_fetch_affinity(kthread, affinity); + set_cpus_allowed_ptr(current, affinity); + mutex_unlock(&kthread_affinity_lock); + + free_cpumask_var(affinity); +} + static int kthread(void *_create) { static const struct sched_param param = { .sched_priority = 0 }; @@ -368,7 +407,6 @@ static int kthread(void *_create) * back to default in case they have been changed. */ sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); - set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD)); /* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); @@ -382,6 +420,15 @@ static int kthread(void *_create) schedule_preempt_disabled(); preempt_enable(); + self->started = 1; + + /* + * Apply default node affinity if no call to kthread_bind[_mask]() nor + * kthread_affine_preferred() was issued before the first wake-up. + */ + if (!(current->flags & PF_NO_SETAFFINITY) && !self->preferred_affinity) + kthread_affine_node(); + ret = -EINTR; if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) { cgroup_kthread_ready(); @@ -433,8 +480,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), { DECLARE_COMPLETION_ONSTACK(done); struct task_struct *task; - struct kthread_create_info *create = kmalloc(sizeof(*create), - GFP_KERNEL); + struct kthread_create_info *create = kmalloc_obj(*create); if (!create) return ERR_PTR(-ENOMEM); @@ -519,18 +565,16 @@ EXPORT_SYMBOL(kthread_create_on_node); static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state) { - unsigned long flags; - if (!wait_task_inactive(p, state)) { WARN_ON(1); return; } + scoped_guard (raw_spinlock_irqsave, &p->pi_lock) + set_cpus_allowed_force(p, mask); + /* It's safe because the task is inactive. */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - do_set_cpus_allowed(p, mask); p->flags |= PF_NO_SETAFFINITY; - raw_spin_unlock_irqrestore(&p->pi_lock, flags); } static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state) @@ -540,7 +584,9 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask) { + struct kthread *kthread = to_kthread(p); __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE); + WARN_ON_ONCE(kthread->started); } /** @@ -554,7 +600,9 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask) */ void kthread_bind(struct task_struct *p, unsigned int cpu) { + struct kthread *kthread = to_kthread(p); __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE); + WARN_ON_ONCE(kthread->started); } EXPORT_SYMBOL(kthread_bind); @@ -604,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu) bool kthread_is_per_cpu(struct task_struct *p) { - struct kthread *kthread = __to_kthread(p); + struct kthread *kthread = tsk_is_kthread(p); if (!kthread) return false; @@ -738,17 +786,19 @@ EXPORT_SYMBOL(kthread_stop_put); int kthreadd(void *unused) { + static const char comm[TASK_COMM_LEN] = "kthreadd"; struct task_struct *tsk = current; /* Setup a clean context for our children to inherit. */ - set_task_comm(tsk, "kthreadd"); + set_task_comm(tsk, comm); ignore_signals(tsk); - set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD)); set_mems_allowed(node_states[N_MEMORY]); current->flags |= PF_NOFREEZE; cgroup_init_kthreadd(); + kthread_affine_node(); + for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (list_empty(&kthread_create_list)) @@ -774,6 +824,137 @@ int kthreadd(void *unused) return 0; } +/** + * kthread_affine_preferred - Define a kthread's preferred affinity + * @p: thread created by kthread_create(). + * @mask: preferred mask of CPUs (might not be online, must be possible) for @p + * to run on. + * + * Similar to kthread_bind_mask() except that the affinity is not a requirement + * but rather a preference that can be constrained by CPU isolation or CPU hotplug. + * Must be called before the first wakeup of the kthread. + * + * Returns 0 if the affinity has been applied. + */ +int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) +{ + struct kthread *kthread = to_kthread(p); + cpumask_var_t affinity; + int ret = 0; + + if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) { + WARN_ON(1); + return -EINVAL; + } + + WARN_ON_ONCE(kthread->preferred_affinity); + + if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) + return -ENOMEM; + + kthread->preferred_affinity = kzalloc(sizeof(struct cpumask), GFP_KERNEL); + if (!kthread->preferred_affinity) { + ret = -ENOMEM; + goto out; + } + + mutex_lock(&kthread_affinity_lock); + cpumask_copy(kthread->preferred_affinity, mask); + WARN_ON_ONCE(!list_empty(&kthread->affinity_node)); + list_add_tail(&kthread->affinity_node, &kthread_affinity_list); + kthread_fetch_affinity(kthread, affinity); + + scoped_guard (raw_spinlock_irqsave, &p->pi_lock) + set_cpus_allowed_force(p, affinity); + + mutex_unlock(&kthread_affinity_lock); +out: + free_cpumask_var(affinity); + + return ret; +} +EXPORT_SYMBOL_GPL(kthread_affine_preferred); + +static int kthreads_update_affinity(bool force) +{ + cpumask_var_t affinity; + struct kthread *k; + int ret; + + guard(mutex)(&kthread_affinity_lock); + + if (list_empty(&kthread_affinity_list)) + return 0; + + if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) + return -ENOMEM; + + ret = 0; + + list_for_each_entry(k, &kthread_affinity_list, affinity_node) { + if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) || + kthread_is_per_cpu(k->task))) { + ret = -EINVAL; + continue; + } + + /* + * Unbound kthreads without preferred affinity are already affine + * to housekeeping, whether those CPUs are online or not. So no need + * to handle newly online CPUs for them. However housekeeping changes + * have to be applied. + * + * But kthreads with a preferred affinity or node are different: + * if none of their preferred CPUs are online and part of + * housekeeping at the same time, they must be affine to housekeeping. + * But as soon as one of their preferred CPU becomes online, they must + * be affine to them. + */ + if (force || k->preferred_affinity || k->node != NUMA_NO_NODE) { + kthread_fetch_affinity(k, affinity); + set_cpus_allowed_ptr(k->task, affinity); + } + } + + free_cpumask_var(affinity); + + return ret; +} + +/** + * kthreads_update_housekeeping - Update kthreads affinity on cpuset change + * + * When cpuset changes a partition type to/from "isolated" or updates related + * cpumasks, propagate the housekeeping cpumask change to preferred kthreads + * affinity. + * + * Returns 0 if successful, -ENOMEM if temporary mask couldn't + * be allocated or -EINVAL in case of internal error. + */ +int kthreads_update_housekeeping(void) +{ + return kthreads_update_affinity(true); +} + +/* + * Re-affine kthreads according to their preferences + * and the newly online CPU. The CPU down part is handled + * by select_fallback_rq() which default re-affines to + * housekeepers from other nodes in case the preferred + * affinity doesn't apply anymore. + */ +static int kthreads_online_cpu(unsigned int cpu) +{ + return kthreads_update_affinity(false); +} + +static int kthreads_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_KTHREADS_ONLINE, "kthreads:online", + kthreads_online_cpu, NULL); +} +early_initcall(kthreads_init); + void __kthread_init_worker(struct kthread_worker *worker, const char *name, struct lock_class_key *key) @@ -865,33 +1046,26 @@ repeat: EXPORT_SYMBOL_GPL(kthread_worker_fn); static __printf(3, 0) struct kthread_worker * -__kthread_create_worker(int cpu, unsigned int flags, - const char namefmt[], va_list args) +__kthread_create_worker_on_node(unsigned int flags, int node, + const char namefmt[], va_list args) { struct kthread_worker *worker; struct task_struct *task; - int node = NUMA_NO_NODE; - worker = kzalloc(sizeof(*worker), GFP_KERNEL); + worker = kzalloc_obj(*worker); if (!worker) return ERR_PTR(-ENOMEM); kthread_init_worker(worker); - if (cpu >= 0) - node = cpu_to_node(cpu); - task = __kthread_create_on_node(kthread_worker_fn, worker, - node, namefmt, args); + node, namefmt, args); if (IS_ERR(task)) goto fail_task; - if (cpu >= 0) - kthread_bind(task, cpu); - worker->flags = flags; worker->task = task; - wake_up_process(task); + return worker; fail_task: @@ -900,8 +1074,9 @@ fail_task: } /** - * kthread_create_worker - create a kthread worker + * kthread_create_worker_on_node - create a kthread worker * @flags: flags modifying the default behavior of the worker + * @node: task structure for the thread is allocated on this node * @namefmt: printf-style name for the kthread worker (task). * * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM) @@ -909,25 +1084,26 @@ fail_task: * when the caller was killed by a fatal signal. */ struct kthread_worker * -kthread_create_worker(unsigned int flags, const char namefmt[], ...) +kthread_create_worker_on_node(unsigned int flags, int node, const char namefmt[], ...) { struct kthread_worker *worker; va_list args; va_start(args, namefmt); - worker = __kthread_create_worker(-1, flags, namefmt, args); + worker = __kthread_create_worker_on_node(flags, node, namefmt, args); va_end(args); return worker; } -EXPORT_SYMBOL(kthread_create_worker); +EXPORT_SYMBOL(kthread_create_worker_on_node); /** * kthread_create_worker_on_cpu - create a kthread worker and bind it * to a given CPU and the associated NUMA node. * @cpu: CPU number * @flags: flags modifying the default behavior of the worker - * @namefmt: printf-style name for the kthread worker (task). + * @namefmt: printf-style name for the thread. Format is restricted + * to "name.*%u". Code fills in cpu number. * * Use a valid CPU number if you want to bind the kthread worker * to the given CPU and the associated NUMA node. @@ -959,14 +1135,13 @@ EXPORT_SYMBOL(kthread_create_worker); */ struct kthread_worker * kthread_create_worker_on_cpu(int cpu, unsigned int flags, - const char namefmt[], ...) + const char namefmt[]) { struct kthread_worker *worker; - va_list args; - va_start(args, namefmt); - worker = __kthread_create_worker(cpu, flags, namefmt, args); - va_end(args); + worker = kthread_create_worker_on_node(flags, cpu_to_node(cpu), namefmt, cpu); + if (!IS_ERR(worker)) + kthread_bind(worker->task, cpu); return worker; } @@ -1015,7 +1190,7 @@ static void kthread_insert_work(struct kthread_worker *worker, * @work: kthread_work to queue * * Queue @work to work processor @task for async execution. @task - * must have been created with kthread_worker_create(). Returns %true + * must have been created with kthread_create_worker(). Returns %true * if @work was successfully queued, %false if it was already pending. * * Reinitialize the work if it needs to be used by another worker. @@ -1047,7 +1222,8 @@ EXPORT_SYMBOL_GPL(kthread_queue_work); */ void kthread_delayed_work_timer_fn(struct timer_list *t) { - struct kthread_delayed_work *dwork = from_timer(dwork, t, timer); + struct kthread_delayed_work *dwork = timer_container_of(dwork, t, + timer); struct kthread_work *work = &dwork->work; struct kthread_worker *worker = work->worker; unsigned long flags; @@ -1202,14 +1378,14 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work, struct kthread_worker *worker = work->worker; /* - * del_timer_sync() must be called to make sure that the timer + * timer_delete_sync() must be called to make sure that the timer * callback is not running. The lock must be temporary released * to avoid a deadlock with the callback. In the meantime, * any queuing is blocked by setting the canceling counter. */ work->canceling++; raw_spin_unlock_irqrestore(&worker->lock, *flags); - del_timer_sync(&dwork->timer); + timer_delete_sync(&dwork->timer); raw_spin_lock_irqsave(&worker->lock, *flags); work->canceling--; } |
