diff options
author | Tejun Heo <tj@kernel.org> | 2024-02-14 08:33:55 -1000 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2024-02-14 08:33:55 -1000 |
commit | 2f34d7337d98f3eae7bd3d1270efaf9d8a17cfc6 (patch) | |
tree | ba618a794f4b55650672325b5c76d2d8e1cb0f05 /kernel/workqueue.c | |
parent | bf52b1ac6ab41a060511d56d0f2da12f3a2486db (diff) | |
download | lwn-2f34d7337d98f3eae7bd3d1270efaf9d8a17cfc6.tar.gz lwn-2f34d7337d98f3eae7bd3d1270efaf9d8a17cfc6.zip |
workqueue: Fix queue_work_on() with BH workqueues
When queue_work_on() is used to queue a BH work item on a remote CPU, the
work item is queued on that CPU but kick_pool() raises softirq on the local
CPU. This leads to stalls as the work item won't be executed until something
else on the remote CPU schedules a BH work item or tasklet locally.
Fix it by bouncing raising softirq to the target CPU using per-cpu irq_work.
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 4cb1ef64609f ("workqueue: Implement BH workqueues to eventually replace tasklets")
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r-- | kernel/workqueue.c | 41 |
1 files changed, 36 insertions, 5 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4950bfc2cdcc..04e35dbe6799 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -54,6 +54,7 @@ #include <linux/nmi.h> #include <linux/kvm_para.h> #include <linux/delay.h> +#include <linux/irq_work.h> #include "workqueue_internal.h" @@ -457,6 +458,10 @@ static bool wq_debug_force_rr_cpu = false; #endif module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644); +/* to raise softirq for the BH worker pools on other CPUs */ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS], + bh_pool_irq_works); + /* the BH worker pools */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], bh_worker_pools); @@ -1197,6 +1202,13 @@ static bool assign_work(struct work_struct *work, struct worker *worker, return true; } +static struct irq_work *bh_pool_irq_work(struct worker_pool *pool) +{ + int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? 1 : 0; + + return &per_cpu(bh_pool_irq_works, pool->cpu)[high]; +} + /** * kick_pool - wake up an idle worker if necessary * @pool: pool to kick @@ -1215,10 +1227,15 @@ static bool kick_pool(struct worker_pool *pool) return false; if (pool->flags & POOL_BH) { - if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) - raise_softirq_irqoff(HI_SOFTIRQ); - else - raise_softirq_irqoff(TASKLET_SOFTIRQ); + if (likely(pool->cpu == smp_processor_id())) { + if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) + raise_softirq_irqoff(HI_SOFTIRQ); + else + raise_softirq_irqoff(TASKLET_SOFTIRQ); + } else { + irq_work_queue_on(bh_pool_irq_work(pool), pool->cpu); + } + return true; } @@ -7367,6 +7384,16 @@ static inline void wq_watchdog_init(void) { } #endif /* CONFIG_WQ_WATCHDOG */ +static void bh_pool_kick_normal(struct irq_work *irq_work) +{ + raise_softirq_irqoff(TASKLET_SOFTIRQ); +} + +static void bh_pool_kick_highpri(struct irq_work *irq_work) +{ + raise_softirq_irqoff(HI_SOFTIRQ); +} + static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask) { if (!cpumask_intersects(wq_unbound_cpumask, mask)) { @@ -7408,6 +7435,8 @@ void __init workqueue_init_early(void) { struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_SYSTEM]; int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; + void (*irq_work_fns[2])(struct irq_work *) = { bh_pool_kick_normal, + bh_pool_kick_highpri }; int i, cpu; BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); @@ -7455,8 +7484,10 @@ void __init workqueue_init_early(void) i = 0; for_each_bh_worker_pool(pool, cpu) { - init_cpu_worker_pool(pool, cpu, std_nice[i++]); + init_cpu_worker_pool(pool, cpu, std_nice[i]); pool->flags |= POOL_BH; + init_irq_work(bh_pool_irq_work(pool), irq_work_fns[i]); + i++; } i = 0; |