diff options
| author | Juri Lelli <juri.lelli@redhat.com> | 2025-03-13 18:10:21 +0100 |
|---|---|---|
| committer | Peter Zijlstra <peterz@infradead.org> | 2025-03-17 11:23:42 +0100 |
| commit | 2ff899e3516437354204423ef0a94994717b8e6a (patch) | |
| tree | 1e9fc392a4f839351bf9575c84ff1ece95c41cdf /kernel | |
| parent | 45007c6fb5860cf63556a9cadc87c8984927e23d (diff) | |
| download | lwn-2ff899e3516437354204423ef0a94994717b8e6a.tar.gz lwn-2ff899e3516437354204423ef0a94994717b8e6a.zip | |
sched/deadline: Rebuild root domain accounting after every update
Rebuilding of root domains accounting information (total_bw) is
currently broken on some cases, e.g. suspend/resume on aarch64. Problem
is that the way we keep track of domain changes and try to add bandwidth
back is convoluted and fragile.
Fix it by simplify things by making sure bandwidth accounting is cleared
and completely restored after root domains changes (after root domains
are again stable).
To be sure we always call dl_rebuild_rd_accounting while holding
cpuset_mutex we also add cpuset_reset_sched_domains() wrapper.
Fixes: 53916d5fd3c0 ("sched/deadline: Check bandwidth overflow earlier for hotplug")
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Co-developed-by: Waiman Long <llong@redhat.com>
Signed-off-by: Waiman Long <llong@redhat.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/Z9MRfeJKJUOyUSto@jlelli-thinkpadt14gen4.remote.csb
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup/cpuset.c | 23 | ||||
| -rw-r--r-- | kernel/sched/core.c | 4 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 16 | ||||
| -rw-r--r-- | kernel/sched/topology.c | 1 |
4 files changed, 29 insertions, 15 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f87526edb2a4..1892dc8cd211 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -954,10 +954,12 @@ static void dl_update_tasks_root_domain(struct cpuset *cs) css_task_iter_end(&it); } -static void dl_rebuild_rd_accounting(void) +void dl_rebuild_rd_accounting(void) { struct cpuset *cs = NULL; struct cgroup_subsys_state *pos_css; + int cpu; + u64 cookie = ++dl_cookie; lockdep_assert_held(&cpuset_mutex); lockdep_assert_cpus_held(); @@ -965,11 +967,12 @@ static void dl_rebuild_rd_accounting(void) rcu_read_lock(); - /* - * Clear default root domain DL accounting, it will be computed again - * if a task belongs to it. - */ - dl_clear_root_domain(&def_root_domain); + for_each_possible_cpu(cpu) { + if (dl_bw_visited(cpu, cookie)) + continue; + + dl_clear_root_domain_cpu(cpu); + } cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { @@ -996,7 +999,6 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], { sched_domains_mutex_lock(); partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); - dl_rebuild_rd_accounting(); sched_domains_mutex_unlock(); } @@ -1083,6 +1085,13 @@ void rebuild_sched_domains(void) cpus_read_unlock(); } +void cpuset_reset_sched_domains(void) +{ + mutex_lock(&cpuset_mutex); + partition_sched_domains(1, NULL, NULL); + mutex_unlock(&cpuset_mutex); +} + /** * cpuset_update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 84f68007e08f..affa99f56f65 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8229,7 +8229,7 @@ static void cpuset_cpu_active(void) * operation in the resume sequence, just build a single sched * domain, ignoring cpusets. */ - partition_sched_domains(1, NULL, NULL); + cpuset_reset_sched_domains(); if (--num_cpus_frozen) return; /* @@ -8248,7 +8248,7 @@ static void cpuset_cpu_inactive(unsigned int cpu) cpuset_update_active_cpus(); } else { num_cpus_frozen++; - partition_sched_domains(1, NULL, NULL); + cpuset_reset_sched_domains(); } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 3e05032e9e0e..5dca336cdd7c 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -166,7 +166,7 @@ static inline unsigned long dl_bw_capacity(int i) } } -static inline bool dl_bw_visited(int cpu, u64 cookie) +bool dl_bw_visited(int cpu, u64 cookie) { struct root_domain *rd = cpu_rq(cpu)->rd; @@ -207,7 +207,7 @@ static inline unsigned long dl_bw_capacity(int i) return SCHED_CAPACITY_SCALE; } -static inline bool dl_bw_visited(int cpu, u64 cookie) +bool dl_bw_visited(int cpu, u64 cookie) { return false; } @@ -2981,18 +2981,22 @@ void dl_clear_root_domain(struct root_domain *rd) rd->dl_bw.total_bw = 0; /* - * dl_server bandwidth is only restored when CPUs are attached to root - * domains (after domains are created or CPUs moved back to the - * default root doamin). + * dl_servers are not tasks. Since dl_add_task_root_domain ignores + * them, we need to account for them here explicitly. */ for_each_cpu(i, rd->span) { struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server; if (dl_server(dl_se) && cpu_active(i)) - rd->dl_bw.total_bw += dl_se->dl_bw; + __dl_add(&rd->dl_bw, dl_se->dl_bw, dl_bw_cpus(i)); } } +void dl_clear_root_domain_cpu(int cpu) +{ + dl_clear_root_domain(cpu_rq(cpu)->rd); +} + #endif /* CONFIG_SMP */ static void switched_from_dl(struct rq *rq, struct task_struct *p) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 44093339761c..363ad268a25b 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2791,6 +2791,7 @@ match3: ndoms_cur = ndoms_new; update_sched_domain_debugfs(); + dl_rebuild_rd_accounting(); } /* |
