summaryrefslogtreecommitdiff
path: root/kernel/kthread.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/kthread.c')
-rw-r--r--kernel/kthread.c324
1 files changed, 250 insertions, 74 deletions
diff --git a/kernel/kthread.c b/kernel/kthread.c
index a5ac612b1609..63beb59b7a3d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -35,6 +35,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
+static LIST_HEAD(kthread_affinity_list);
+static DEFINE_MUTEX(kthread_affinity_lock);
+
struct kthread_create_info
{
/* Information passed to kthread() from kthreadd. */
@@ -53,6 +56,8 @@ struct kthread_create_info
struct kthread {
unsigned long flags;
unsigned int cpu;
+ unsigned int node;
+ int started;
int result;
int (*threadfn)(void *);
void *data;
@@ -63,6 +68,9 @@ struct kthread {
#endif
/* To store the full name if task comm is truncated. */
char *full_name;
+ struct task_struct *task;
+ struct list_head affinity_node;
+ struct cpumask *preferred_affinity;
};
enum KTHREAD_BITS {
@@ -77,25 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k)
return k->worker_private;
}
-/*
- * Variant of to_kthread() that doesn't assume @p is a kthread.
- *
- * Per construction; when:
- *
- * (p->flags & PF_KTHREAD) && p->worker_private
- *
- * the task is both a kthread and struct kthread is persistent. However
- * PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and
- * begin_new_exec()).
- */
-static inline struct kthread *__to_kthread(struct task_struct *p)
-{
- void *kthread = p->worker_private;
- if (kthread && !(p->flags & PF_KTHREAD))
- kthread = NULL;
- return kthread;
-}
-
void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
struct kthread *kthread = to_kthread(tsk);
@@ -115,14 +104,17 @@ bool set_kthread_struct(struct task_struct *p)
if (WARN_ON_ONCE(to_kthread(p)))
return false;
- kthread = kzalloc(sizeof(*kthread), GFP_KERNEL);
+ kthread = kzalloc_obj(*kthread);
if (!kthread)
return false;
init_completion(&kthread->exited);
init_completion(&kthread->parked);
+ INIT_LIST_HEAD(&kthread->affinity_node);
p->vfork_done = &kthread->exited;
+ kthread->task = p;
+ kthread->node = tsk_fork_get_node(current);
p->worker_private = kthread;
return true;
}
@@ -183,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park);
bool kthread_should_stop_or_park(void)
{
- struct kthread *kthread = __to_kthread(current);
+ struct kthread *kthread = tsk_is_kthread(current);
if (!kthread)
return false;
@@ -224,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
*/
void *kthread_func(struct task_struct *task)
{
- struct kthread *kthread = __to_kthread(task);
+ struct kthread *kthread = tsk_is_kthread(task);
if (kthread)
return kthread->threadfn;
return NULL;
@@ -256,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data);
*/
void *kthread_probe_data(struct task_struct *task)
{
- struct kthread *kthread = __to_kthread(task);
+ struct kthread *kthread = tsk_is_kthread(task);
void *data = NULL;
if (kthread)
@@ -299,23 +291,20 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
-/**
- * kthread_exit - Cause the current kthread return @result to kthread_stop().
- * @result: The integer value to return to kthread_stop().
- *
- * While kthread_exit can be called directly, it exists so that
- * functions which do some additional work in non-modular code such as
- * module_put_and_kthread_exit can be implemented.
- *
- * Does not return.
- */
-void __noreturn kthread_exit(long result)
+void kthread_do_exit(struct kthread *kthread, long result)
{
- struct kthread *kthread = to_kthread(current);
kthread->result = result;
- do_exit(0);
+ if (!list_empty(&kthread->affinity_node)) {
+ mutex_lock(&kthread_affinity_lock);
+ list_del(&kthread->affinity_node);
+ mutex_unlock(&kthread_affinity_lock);
+
+ if (kthread->preferred_affinity) {
+ kfree(kthread->preferred_affinity);
+ kthread->preferred_affinity = NULL;
+ }
+ }
}
-EXPORT_SYMBOL(kthread_exit);
/**
* kthread_complete_and_exit - Exit the current kthread.
@@ -338,6 +327,56 @@ void __noreturn kthread_complete_and_exit(struct completion *comp, long code)
}
EXPORT_SYMBOL(kthread_complete_and_exit);
+static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpumask)
+{
+ const struct cpumask *pref;
+
+ guard(rcu)();
+
+ if (kthread->preferred_affinity) {
+ pref = kthread->preferred_affinity;
+ } else {
+ if (kthread->node == NUMA_NO_NODE)
+ pref = housekeeping_cpumask(HK_TYPE_DOMAIN);
+ else
+ pref = cpumask_of_node(kthread->node);
+ }
+
+ cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_DOMAIN));
+ if (cpumask_empty(cpumask))
+ cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
+}
+
+static void kthread_affine_node(void)
+{
+ struct kthread *kthread = to_kthread(current);
+ cpumask_var_t affinity;
+
+ if (WARN_ON_ONCE(kthread_is_per_cpu(current)))
+ return;
+
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ mutex_lock(&kthread_affinity_lock);
+ WARN_ON_ONCE(!list_empty(&kthread->affinity_node));
+ list_add_tail(&kthread->affinity_node, &kthread_affinity_list);
+ /*
+ * The node cpumask is racy when read from kthread() but:
+ * - a racing CPU going down will either fail on the subsequent
+ * call to set_cpus_allowed_ptr() or be migrated to housekeepers
+ * afterwards by the scheduler.
+ * - a racing CPU going up will be handled by kthreads_online_cpu()
+ */
+ kthread_fetch_affinity(kthread, affinity);
+ set_cpus_allowed_ptr(current, affinity);
+ mutex_unlock(&kthread_affinity_lock);
+
+ free_cpumask_var(affinity);
+}
+
static int kthread(void *_create)
{
static const struct sched_param param = { .sched_priority = 0 };
@@ -368,7 +407,6 @@ static int kthread(void *_create)
* back to default in case they have been changed.
*/
sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
- set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
@@ -382,6 +420,15 @@ static int kthread(void *_create)
schedule_preempt_disabled();
preempt_enable();
+ self->started = 1;
+
+ /*
+ * Apply default node affinity if no call to kthread_bind[_mask]() nor
+ * kthread_affine_preferred() was issued before the first wake-up.
+ */
+ if (!(current->flags & PF_NO_SETAFFINITY) && !self->preferred_affinity)
+ kthread_affine_node();
+
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
cgroup_kthread_ready();
@@ -433,8 +480,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
{
DECLARE_COMPLETION_ONSTACK(done);
struct task_struct *task;
- struct kthread_create_info *create = kmalloc(sizeof(*create),
- GFP_KERNEL);
+ struct kthread_create_info *create = kmalloc_obj(*create);
if (!create)
return ERR_PTR(-ENOMEM);
@@ -519,18 +565,16 @@ EXPORT_SYMBOL(kthread_create_on_node);
static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state)
{
- unsigned long flags;
-
if (!wait_task_inactive(p, state)) {
WARN_ON(1);
return;
}
+ scoped_guard (raw_spinlock_irqsave, &p->pi_lock)
+ set_cpus_allowed_force(p, mask);
+
/* It's safe because the task is inactive. */
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- do_set_cpus_allowed(p, mask);
p->flags |= PF_NO_SETAFFINITY;
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
}
static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state)
@@ -540,7 +584,9 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int
void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
{
+ struct kthread *kthread = to_kthread(p);
__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
+ WARN_ON_ONCE(kthread->started);
}
/**
@@ -554,7 +600,9 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
*/
void kthread_bind(struct task_struct *p, unsigned int cpu)
{
+ struct kthread *kthread = to_kthread(p);
__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
+ WARN_ON_ONCE(kthread->started);
}
EXPORT_SYMBOL(kthread_bind);
@@ -604,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu)
bool kthread_is_per_cpu(struct task_struct *p)
{
- struct kthread *kthread = __to_kthread(p);
+ struct kthread *kthread = tsk_is_kthread(p);
if (!kthread)
return false;
@@ -738,17 +786,19 @@ EXPORT_SYMBOL(kthread_stop_put);
int kthreadd(void *unused)
{
+ static const char comm[TASK_COMM_LEN] = "kthreadd";
struct task_struct *tsk = current;
/* Setup a clean context for our children to inherit. */
- set_task_comm(tsk, "kthreadd");
+ set_task_comm(tsk, comm);
ignore_signals(tsk);
- set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
cgroup_init_kthreadd();
+ kthread_affine_node();
+
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&kthread_create_list))
@@ -774,6 +824,137 @@ int kthreadd(void *unused)
return 0;
}
+/**
+ * kthread_affine_preferred - Define a kthread's preferred affinity
+ * @p: thread created by kthread_create().
+ * @mask: preferred mask of CPUs (might not be online, must be possible) for @p
+ * to run on.
+ *
+ * Similar to kthread_bind_mask() except that the affinity is not a requirement
+ * but rather a preference that can be constrained by CPU isolation or CPU hotplug.
+ * Must be called before the first wakeup of the kthread.
+ *
+ * Returns 0 if the affinity has been applied.
+ */
+int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
+{
+ struct kthread *kthread = to_kthread(p);
+ cpumask_var_t affinity;
+ int ret = 0;
+
+ if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ WARN_ON_ONCE(kthread->preferred_affinity);
+
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
+ return -ENOMEM;
+
+ kthread->preferred_affinity = kzalloc(sizeof(struct cpumask), GFP_KERNEL);
+ if (!kthread->preferred_affinity) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_lock(&kthread_affinity_lock);
+ cpumask_copy(kthread->preferred_affinity, mask);
+ WARN_ON_ONCE(!list_empty(&kthread->affinity_node));
+ list_add_tail(&kthread->affinity_node, &kthread_affinity_list);
+ kthread_fetch_affinity(kthread, affinity);
+
+ scoped_guard (raw_spinlock_irqsave, &p->pi_lock)
+ set_cpus_allowed_force(p, affinity);
+
+ mutex_unlock(&kthread_affinity_lock);
+out:
+ free_cpumask_var(affinity);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kthread_affine_preferred);
+
+static int kthreads_update_affinity(bool force)
+{
+ cpumask_var_t affinity;
+ struct kthread *k;
+ int ret;
+
+ guard(mutex)(&kthread_affinity_lock);
+
+ if (list_empty(&kthread_affinity_list))
+ return 0;
+
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = 0;
+
+ list_for_each_entry(k, &kthread_affinity_list, affinity_node) {
+ if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) ||
+ kthread_is_per_cpu(k->task))) {
+ ret = -EINVAL;
+ continue;
+ }
+
+ /*
+ * Unbound kthreads without preferred affinity are already affine
+ * to housekeeping, whether those CPUs are online or not. So no need
+ * to handle newly online CPUs for them. However housekeeping changes
+ * have to be applied.
+ *
+ * But kthreads with a preferred affinity or node are different:
+ * if none of their preferred CPUs are online and part of
+ * housekeeping at the same time, they must be affine to housekeeping.
+ * But as soon as one of their preferred CPU becomes online, they must
+ * be affine to them.
+ */
+ if (force || k->preferred_affinity || k->node != NUMA_NO_NODE) {
+ kthread_fetch_affinity(k, affinity);
+ set_cpus_allowed_ptr(k->task, affinity);
+ }
+ }
+
+ free_cpumask_var(affinity);
+
+ return ret;
+}
+
+/**
+ * kthreads_update_housekeeping - Update kthreads affinity on cpuset change
+ *
+ * When cpuset changes a partition type to/from "isolated" or updates related
+ * cpumasks, propagate the housekeeping cpumask change to preferred kthreads
+ * affinity.
+ *
+ * Returns 0 if successful, -ENOMEM if temporary mask couldn't
+ * be allocated or -EINVAL in case of internal error.
+ */
+int kthreads_update_housekeeping(void)
+{
+ return kthreads_update_affinity(true);
+}
+
+/*
+ * Re-affine kthreads according to their preferences
+ * and the newly online CPU. The CPU down part is handled
+ * by select_fallback_rq() which default re-affines to
+ * housekeepers from other nodes in case the preferred
+ * affinity doesn't apply anymore.
+ */
+static int kthreads_online_cpu(unsigned int cpu)
+{
+ return kthreads_update_affinity(false);
+}
+
+static int kthreads_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_KTHREADS_ONLINE, "kthreads:online",
+ kthreads_online_cpu, NULL);
+}
+early_initcall(kthreads_init);
+
void __kthread_init_worker(struct kthread_worker *worker,
const char *name,
struct lock_class_key *key)
@@ -865,33 +1046,26 @@ repeat:
EXPORT_SYMBOL_GPL(kthread_worker_fn);
static __printf(3, 0) struct kthread_worker *
-__kthread_create_worker(int cpu, unsigned int flags,
- const char namefmt[], va_list args)
+__kthread_create_worker_on_node(unsigned int flags, int node,
+ const char namefmt[], va_list args)
{
struct kthread_worker *worker;
struct task_struct *task;
- int node = NUMA_NO_NODE;
- worker = kzalloc(sizeof(*worker), GFP_KERNEL);
+ worker = kzalloc_obj(*worker);
if (!worker)
return ERR_PTR(-ENOMEM);
kthread_init_worker(worker);
- if (cpu >= 0)
- node = cpu_to_node(cpu);
-
task = __kthread_create_on_node(kthread_worker_fn, worker,
- node, namefmt, args);
+ node, namefmt, args);
if (IS_ERR(task))
goto fail_task;
- if (cpu >= 0)
- kthread_bind(task, cpu);
-
worker->flags = flags;
worker->task = task;
- wake_up_process(task);
+
return worker;
fail_task:
@@ -900,8 +1074,9 @@ fail_task:
}
/**
- * kthread_create_worker - create a kthread worker
+ * kthread_create_worker_on_node - create a kthread worker
* @flags: flags modifying the default behavior of the worker
+ * @node: task structure for the thread is allocated on this node
* @namefmt: printf-style name for the kthread worker (task).
*
* Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
@@ -909,25 +1084,26 @@ fail_task:
* when the caller was killed by a fatal signal.
*/
struct kthread_worker *
-kthread_create_worker(unsigned int flags, const char namefmt[], ...)
+kthread_create_worker_on_node(unsigned int flags, int node, const char namefmt[], ...)
{
struct kthread_worker *worker;
va_list args;
va_start(args, namefmt);
- worker = __kthread_create_worker(-1, flags, namefmt, args);
+ worker = __kthread_create_worker_on_node(flags, node, namefmt, args);
va_end(args);
return worker;
}
-EXPORT_SYMBOL(kthread_create_worker);
+EXPORT_SYMBOL(kthread_create_worker_on_node);
/**
* kthread_create_worker_on_cpu - create a kthread worker and bind it
* to a given CPU and the associated NUMA node.
* @cpu: CPU number
* @flags: flags modifying the default behavior of the worker
- * @namefmt: printf-style name for the kthread worker (task).
+ * @namefmt: printf-style name for the thread. Format is restricted
+ * to "name.*%u". Code fills in cpu number.
*
* Use a valid CPU number if you want to bind the kthread worker
* to the given CPU and the associated NUMA node.
@@ -959,14 +1135,13 @@ EXPORT_SYMBOL(kthread_create_worker);
*/
struct kthread_worker *
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
- const char namefmt[], ...)
+ const char namefmt[])
{
struct kthread_worker *worker;
- va_list args;
- va_start(args, namefmt);
- worker = __kthread_create_worker(cpu, flags, namefmt, args);
- va_end(args);
+ worker = kthread_create_worker_on_node(flags, cpu_to_node(cpu), namefmt, cpu);
+ if (!IS_ERR(worker))
+ kthread_bind(worker->task, cpu);
return worker;
}
@@ -1015,7 +1190,7 @@ static void kthread_insert_work(struct kthread_worker *worker,
* @work: kthread_work to queue
*
* Queue @work to work processor @task for async execution. @task
- * must have been created with kthread_worker_create(). Returns %true
+ * must have been created with kthread_create_worker(). Returns %true
* if @work was successfully queued, %false if it was already pending.
*
* Reinitialize the work if it needs to be used by another worker.
@@ -1047,7 +1222,8 @@ EXPORT_SYMBOL_GPL(kthread_queue_work);
*/
void kthread_delayed_work_timer_fn(struct timer_list *t)
{
- struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
+ struct kthread_delayed_work *dwork = timer_container_of(dwork, t,
+ timer);
struct kthread_work *work = &dwork->work;
struct kthread_worker *worker = work->worker;
unsigned long flags;
@@ -1202,14 +1378,14 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
struct kthread_worker *worker = work->worker;
/*
- * del_timer_sync() must be called to make sure that the timer
+ * timer_delete_sync() must be called to make sure that the timer
* callback is not running. The lock must be temporary released
* to avoid a deadlock with the callback. In the meantime,
* any queuing is blocked by setting the canceling counter.
*/
work->canceling++;
raw_spin_unlock_irqrestore(&worker->lock, *flags);
- del_timer_sync(&dwork->timer);
+ timer_delete_sync(&dwork->timer);
raw_spin_lock_irqsave(&worker->lock, *flags);
work->canceling--;
}