summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2024-09-27 10:02:40 -1000
committerTejun Heo <tj@kernel.org>2024-09-27 10:02:40 -1000
commit4269c603cc26df154e0db303a9347e6ec3cc805e (patch)
treefc9197e0068cb6a7b0506906b2a81c099c446557 /kernel
parent9753358a6a2b011478e8efdabbb489216252426f (diff)
downloadlwn-4269c603cc26df154e0db303a9347e6ec3cc805e.tar.gz
lwn-4269c603cc26df154e0db303a9347e6ec3cc805e.zip
sched_ext: Enable scx_ops_init_task() separately
scx_ops_init_task() and the follow-up scx_ops_enable_task() in the fork path were gated by scx_enabled() test and thus __scx_ops_enabled had to be turned on before the first scx_ops_init_task() loop in scx_ops_enable(). However, if an external entity causes sched_class switch before the loop is complete, tasks which are not initialized could be switched to SCX. The following can be reproduced by running a program which keeps toggling a process between SCHED_OTHER and SCHED_EXT using sched_setscheduler(2). sched_ext: Invalid task state transition 0 -> 3 for fish[1623] WARNING: CPU: 1 PID: 1650 at kernel/sched/ext.c:3392 scx_ops_enable_task+0x1a1/0x200 ... Sched_ext: simple (enabling) RIP: 0010:scx_ops_enable_task+0x1a1/0x200 ... switching_to_scx+0x13/0xa0 __sched_setscheduler+0x850/0xa50 do_sched_setscheduler+0x104/0x1c0 __x64_sys_sched_setscheduler+0x18/0x30 do_syscall_64+0x7b/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix it by gating scx_ops_init_task() separately using scx_ops_init_task_enabled. __scx_ops_enabled is now set after all tasks are finished with scx_ops_init_task(). Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/ext.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index e83af19de59d..7729594882d9 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -853,6 +853,7 @@ DEFINE_STATIC_KEY_FALSE(__scx_ops_enabled);
DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
static atomic_t scx_ops_enable_state_var = ATOMIC_INIT(SCX_OPS_DISABLED);
static atomic_t scx_ops_bypass_depth = ATOMIC_INIT(0);
+static bool scx_ops_init_task_enabled;
static bool scx_switching_all;
DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
@@ -3565,7 +3566,7 @@ int scx_fork(struct task_struct *p)
{
percpu_rwsem_assert_held(&scx_fork_rwsem);
- if (scx_enabled())
+ if (scx_ops_init_task_enabled)
return scx_ops_init_task(p, task_group(p), true);
else
return 0;
@@ -3573,7 +3574,7 @@ int scx_fork(struct task_struct *p)
void scx_post_fork(struct task_struct *p)
{
- if (scx_enabled()) {
+ if (scx_ops_init_task_enabled) {
scx_set_task_state(p, SCX_TASK_READY);
/*
@@ -4453,6 +4454,8 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
cpus_read_lock();
scx_cgroup_lock();
+ scx_ops_init_task_enabled = false;
+
spin_lock_irq(&scx_tasks_lock);
scx_task_iter_init(&sti);
/*
@@ -5132,7 +5135,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (ret)
goto err_disable_unlock_all;
- static_branch_enable_cpuslocked(&__scx_ops_enabled);
+ WARN_ON_ONCE(scx_ops_init_task_enabled);
+ scx_ops_init_task_enabled = true;
/*
* Enable ops for every task. Fork is excluded by scx_fork_rwsem
@@ -5175,9 +5179,11 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
spin_unlock_irq(&scx_tasks_lock);
/*
- * All tasks are prepped but the tasks are not enabled. Switch everyone.
+ * All tasks are READY. It's safe to turn on scx_enabled() and switch
+ * all eligible tasks.
*/
WRITE_ONCE(scx_switching_all, !(ops->flags & SCX_OPS_SWITCH_PARTIAL));
+ static_branch_enable_cpuslocked(&__scx_ops_enabled);
/*
* We're fully committed and can't fail. The task READY -> ENABLED