summaryrefslogtreecommitdiff
path: root/kernel/kthread.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/kthread.c')
-rw-r--r--kernel/kthread.c233
1 files changed, 125 insertions, 108 deletions
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5dc5b0d7238e..791210daf8b4 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -35,8 +35,8 @@ static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
-static LIST_HEAD(kthreads_hotplug);
-static DEFINE_MUTEX(kthreads_hotplug_lock);
+static LIST_HEAD(kthread_affinity_list);
+static DEFINE_MUTEX(kthread_affinity_lock);
struct kthread_create_info
{
@@ -69,7 +69,7 @@ struct kthread {
/* To store the full name if task comm is truncated. */
char *full_name;
struct task_struct *task;
- struct list_head hotplug_node;
+ struct list_head affinity_node;
struct cpumask *preferred_affinity;
};
@@ -85,25 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k)
return k->worker_private;
}
-/*
- * Variant of to_kthread() that doesn't assume @p is a kthread.
- *
- * Per construction; when:
- *
- * (p->flags & PF_KTHREAD) && p->worker_private
- *
- * the task is both a kthread and struct kthread is persistent. However
- * PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and
- * begin_new_exec()).
- */
-static inline struct kthread *__to_kthread(struct task_struct *p)
-{
- void *kthread = p->worker_private;
- if (kthread && !(p->flags & PF_KTHREAD))
- kthread = NULL;
- return kthread;
-}
-
void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
struct kthread *kthread = to_kthread(tsk);
@@ -123,13 +104,13 @@ bool set_kthread_struct(struct task_struct *p)
if (WARN_ON_ONCE(to_kthread(p)))
return false;
- kthread = kzalloc(sizeof(*kthread), GFP_KERNEL);
+ kthread = kzalloc_obj(*kthread);
if (!kthread)
return false;
init_completion(&kthread->exited);
init_completion(&kthread->parked);
- INIT_LIST_HEAD(&kthread->hotplug_node);
+ INIT_LIST_HEAD(&kthread->affinity_node);
p->vfork_done = &kthread->exited;
kthread->task = p;
@@ -194,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park);
bool kthread_should_stop_or_park(void)
{
- struct kthread *kthread = __to_kthread(current);
+ struct kthread *kthread = tsk_is_kthread(current);
if (!kthread)
return false;
@@ -235,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
*/
void *kthread_func(struct task_struct *task)
{
- struct kthread *kthread = __to_kthread(task);
+ struct kthread *kthread = tsk_is_kthread(task);
if (kthread)
return kthread->threadfn;
return NULL;
@@ -267,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data);
*/
void *kthread_probe_data(struct task_struct *task)
{
- struct kthread *kthread = __to_kthread(task);
+ struct kthread *kthread = tsk_is_kthread(task);
void *data = NULL;
if (kthread)
@@ -310,33 +291,20 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
-/**
- * kthread_exit - Cause the current kthread return @result to kthread_stop().
- * @result: The integer value to return to kthread_stop().
- *
- * While kthread_exit can be called directly, it exists so that
- * functions which do some additional work in non-modular code such as
- * module_put_and_kthread_exit can be implemented.
- *
- * Does not return.
- */
-void __noreturn kthread_exit(long result)
+void kthread_do_exit(struct kthread *kthread, long result)
{
- struct kthread *kthread = to_kthread(current);
kthread->result = result;
- if (!list_empty(&kthread->hotplug_node)) {
- mutex_lock(&kthreads_hotplug_lock);
- list_del(&kthread->hotplug_node);
- mutex_unlock(&kthreads_hotplug_lock);
+ if (!list_empty(&kthread->affinity_node)) {
+ mutex_lock(&kthread_affinity_lock);
+ list_del(&kthread->affinity_node);
+ mutex_unlock(&kthread_affinity_lock);
if (kthread->preferred_affinity) {
kfree(kthread->preferred_affinity);
kthread->preferred_affinity = NULL;
}
}
- do_exit(0);
}
-EXPORT_SYMBOL(kthread_exit);
/**
* kthread_complete_and_exit - Exit the current kthread.
@@ -363,17 +331,20 @@ static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpum
{
const struct cpumask *pref;
+ guard(rcu)();
+
if (kthread->preferred_affinity) {
pref = kthread->preferred_affinity;
} else {
- if (WARN_ON_ONCE(kthread->node == NUMA_NO_NODE))
- return;
- pref = cpumask_of_node(kthread->node);
+ if (kthread->node == NUMA_NO_NODE)
+ pref = housekeeping_cpumask(HK_TYPE_DOMAIN);
+ else
+ pref = cpumask_of_node(kthread->node);
}
- cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_KTHREAD));
+ cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_DOMAIN));
if (cpumask_empty(cpumask))
- cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_KTHREAD));
+ cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
}
static void kthread_affine_node(void)
@@ -381,32 +352,29 @@ static void kthread_affine_node(void)
struct kthread *kthread = to_kthread(current);
cpumask_var_t affinity;
- WARN_ON_ONCE(kthread_is_per_cpu(current));
+ if (WARN_ON_ONCE(kthread_is_per_cpu(current)))
+ return;
- if (kthread->node == NUMA_NO_NODE) {
- housekeeping_affine(current, HK_TYPE_KTHREAD);
- } else {
- if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) {
- WARN_ON_ONCE(1);
- return;
- }
+ if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
- mutex_lock(&kthreads_hotplug_lock);
- WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
- list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
- /*
- * The node cpumask is racy when read from kthread() but:
- * - a racing CPU going down will either fail on the subsequent
- * call to set_cpus_allowed_ptr() or be migrated to housekeepers
- * afterwards by the scheduler.
- * - a racing CPU going up will be handled by kthreads_online_cpu()
- */
- kthread_fetch_affinity(kthread, affinity);
- set_cpus_allowed_ptr(current, affinity);
- mutex_unlock(&kthreads_hotplug_lock);
+ mutex_lock(&kthread_affinity_lock);
+ WARN_ON_ONCE(!list_empty(&kthread->affinity_node));
+ list_add_tail(&kthread->affinity_node, &kthread_affinity_list);
+ /*
+ * The node cpumask is racy when read from kthread() but:
+ * - a racing CPU going down will either fail on the subsequent
+ * call to set_cpus_allowed_ptr() or be migrated to housekeepers
+ * afterwards by the scheduler.
+ * - a racing CPU going up will be handled by kthreads_online_cpu()
+ */
+ kthread_fetch_affinity(kthread, affinity);
+ set_cpus_allowed_ptr(current, affinity);
+ mutex_unlock(&kthread_affinity_lock);
- free_cpumask_var(affinity);
- }
+ free_cpumask_var(affinity);
}
static int kthread(void *_create)
@@ -454,6 +422,10 @@ static int kthread(void *_create)
self->started = 1;
+ /*
+ * Apply default node affinity if no call to kthread_bind[_mask]() nor
+ * kthread_affine_preferred() was issued before the first wake-up.
+ */
if (!(current->flags & PF_NO_SETAFFINITY) && !self->preferred_affinity)
kthread_affine_node();
@@ -508,8 +480,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
{
DECLARE_COMPLETION_ONSTACK(done);
struct task_struct *task;
- struct kthread_create_info *create = kmalloc(sizeof(*create),
- GFP_KERNEL);
+ struct kthread_create_info *create = kmalloc_obj(*create);
if (!create)
return ERR_PTR(-ENOMEM);
@@ -594,18 +565,16 @@ EXPORT_SYMBOL(kthread_create_on_node);
static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state)
{
- unsigned long flags;
-
if (!wait_task_inactive(p, state)) {
WARN_ON(1);
return;
}
+ scoped_guard (raw_spinlock_irqsave, &p->pi_lock)
+ set_cpus_allowed_force(p, mask);
+
/* It's safe because the task is inactive. */
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- do_set_cpus_allowed(p, mask);
p->flags |= PF_NO_SETAFFINITY;
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
}
static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state)
@@ -683,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu)
bool kthread_is_per_cpu(struct task_struct *p)
{
- struct kthread *kthread = __to_kthread(p);
+ struct kthread *kthread = tsk_is_kthread(p);
if (!kthread)
return false;
@@ -823,12 +792,13 @@ int kthreadd(void *unused)
/* Setup a clean context for our children to inherit. */
set_task_comm(tsk, comm);
ignore_signals(tsk);
- set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
cgroup_init_kthreadd();
+ kthread_affine_node();
+
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&kthread_create_list))
@@ -854,11 +824,22 @@ int kthreadd(void *unused)
return 0;
}
+/**
+ * kthread_affine_preferred - Define a kthread's preferred affinity
+ * @p: thread created by kthread_create().
+ * @mask: preferred mask of CPUs (might not be online, must be possible) for @p
+ * to run on.
+ *
+ * Similar to kthread_bind_mask() except that the affinity is not a requirement
+ * but rather a preference that can be constrained by CPU isolation or CPU hotplug.
+ * Must be called before the first wakeup of the kthread.
+ *
+ * Returns 0 if the affinity has been applied.
+ */
int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
{
struct kthread *kthread = to_kthread(p);
cpumask_var_t affinity;
- unsigned long flags;
int ret = 0;
if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) {
@@ -877,40 +858,32 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
goto out;
}
- mutex_lock(&kthreads_hotplug_lock);
+ mutex_lock(&kthread_affinity_lock);
cpumask_copy(kthread->preferred_affinity, mask);
- WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
- list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
+ WARN_ON_ONCE(!list_empty(&kthread->affinity_node));
+ list_add_tail(&kthread->affinity_node, &kthread_affinity_list);
kthread_fetch_affinity(kthread, affinity);
- /* It's safe because the task is inactive. */
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- do_set_cpus_allowed(p, affinity);
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ scoped_guard (raw_spinlock_irqsave, &p->pi_lock)
+ set_cpus_allowed_force(p, affinity);
- mutex_unlock(&kthreads_hotplug_lock);
+ mutex_unlock(&kthread_affinity_lock);
out:
free_cpumask_var(affinity);
return ret;
}
+EXPORT_SYMBOL_GPL(kthread_affine_preferred);
-/*
- * Re-affine kthreads according to their preferences
- * and the newly online CPU. The CPU down part is handled
- * by select_fallback_rq() which default re-affines to
- * housekeepers from other nodes in case the preferred
- * affinity doesn't apply anymore.
- */
-static int kthreads_online_cpu(unsigned int cpu)
+static int kthreads_update_affinity(bool force)
{
cpumask_var_t affinity;
struct kthread *k;
int ret;
- guard(mutex)(&kthreads_hotplug_lock);
+ guard(mutex)(&kthread_affinity_lock);
- if (list_empty(&kthreads_hotplug))
+ if (list_empty(&kthread_affinity_list))
return 0;
if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
@@ -918,14 +891,29 @@ static int kthreads_online_cpu(unsigned int cpu)
ret = 0;
- list_for_each_entry(k, &kthreads_hotplug, hotplug_node) {
+ list_for_each_entry(k, &kthread_affinity_list, affinity_node) {
if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) ||
kthread_is_per_cpu(k->task))) {
ret = -EINVAL;
continue;
}
- kthread_fetch_affinity(k, affinity);
- set_cpus_allowed_ptr(k->task, affinity);
+
+ /*
+ * Unbound kthreads without preferred affinity are already affine
+ * to housekeeping, whether those CPUs are online or not. So no need
+ * to handle newly online CPUs for them. However housekeeping changes
+ * have to be applied.
+ *
+ * But kthreads with a preferred affinity or node are different:
+ * if none of their preferred CPUs are online and part of
+ * housekeeping at the same time, they must be affine to housekeeping.
+ * But as soon as one of their preferred CPU becomes online, they must
+ * be affine to them.
+ */
+ if (force || k->preferred_affinity || k->node != NUMA_NO_NODE) {
+ kthread_fetch_affinity(k, affinity);
+ set_cpus_allowed_ptr(k->task, affinity);
+ }
}
free_cpumask_var(affinity);
@@ -933,6 +921,33 @@ static int kthreads_online_cpu(unsigned int cpu)
return ret;
}
+/**
+ * kthreads_update_housekeeping - Update kthreads affinity on cpuset change
+ *
+ * When cpuset changes a partition type to/from "isolated" or updates related
+ * cpumasks, propagate the housekeeping cpumask change to preferred kthreads
+ * affinity.
+ *
+ * Returns 0 if successful, -ENOMEM if temporary mask couldn't
+ * be allocated or -EINVAL in case of internal error.
+ */
+int kthreads_update_housekeeping(void)
+{
+ return kthreads_update_affinity(true);
+}
+
+/*
+ * Re-affine kthreads according to their preferences
+ * and the newly online CPU. The CPU down part is handled
+ * by select_fallback_rq() which default re-affines to
+ * housekeepers from other nodes in case the preferred
+ * affinity doesn't apply anymore.
+ */
+static int kthreads_online_cpu(unsigned int cpu)
+{
+ return kthreads_update_affinity(false);
+}
+
static int kthreads_init(void)
{
return cpuhp_setup_state(CPUHP_AP_KTHREADS_ONLINE, "kthreads:online",
@@ -1037,7 +1052,7 @@ __kthread_create_worker_on_node(unsigned int flags, int node,
struct kthread_worker *worker;
struct task_struct *task;
- worker = kzalloc(sizeof(*worker), GFP_KERNEL);
+ worker = kzalloc_obj(*worker);
if (!worker)
return ERR_PTR(-ENOMEM);
@@ -1207,7 +1222,8 @@ EXPORT_SYMBOL_GPL(kthread_queue_work);
*/
void kthread_delayed_work_timer_fn(struct timer_list *t)
{
- struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
+ struct kthread_delayed_work *dwork = timer_container_of(dwork, t,
+ timer);
struct kthread_work *work = &dwork->work;
struct kthread_worker *worker = work->worker;
unsigned long flags;
@@ -1362,14 +1378,14 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
struct kthread_worker *worker = work->worker;
/*
- * del_timer_sync() must be called to make sure that the timer
+ * timer_delete_sync() must be called to make sure that the timer
* callback is not running. The lock must be temporary released
* to avoid a deadlock with the callback. In the meantime,
* any queuing is blocked by setting the canceling counter.
*/
work->canceling++;
raw_spin_unlock_irqrestore(&worker->lock, *flags);
- del_timer_sync(&dwork->timer);
+ timer_delete_sync(&dwork->timer);
raw_spin_lock_irqsave(&worker->lock, *flags);
work->canceling--;
}
@@ -1603,6 +1619,7 @@ void kthread_use_mm(struct mm_struct *mm)
WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD));
WARN_ON_ONCE(tsk->mm);
+ WARN_ON_ONCE(!mm->user_ns);
/*
* It is possible for mm to be the same as tsk->active_mm, but