summaryrefslogtreecommitdiff
path: root/include/linux/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-14 13:33:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-14 13:33:36 -0700
commit1c3b68f0d55b5932eb38eda602a61aec6d6f5e5e (patch)
treeec22e8344526e4f2968507472f3f578429392dd4 /include/linux/sched
parent33c66eb5e9844429911bf5478c96c60f9f8af9d0 (diff)
parent78cde54ea5f03398f1cf6656de2472068f6da966 (diff)
downloadlwn-1c3b68f0d55b5932eb38eda602a61aec6d6f5e5e.tar.gz
lwn-1c3b68f0d55b5932eb38eda602a61aec6d6f5e5e.zip
Merge tag 'sched-core-2026-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "Fair scheduling updates: - Skip SCHED_IDLE rq for SCHED_IDLE tasks (Christian Loehle) - Remove superfluous rcu_read_lock() in the wakeup path (K Prateek Nayak) - Simplify the entry condition for update_idle_cpu_scan() (K Prateek Nayak) - Simplify SIS_UTIL handling in select_idle_cpu() (K Prateek Nayak) - Avoid overflow in enqueue_entity() (K Prateek Nayak) - Update overutilized detection (Vincent Guittot) - Prevent negative lag increase during delayed dequeue (Vincent Guittot) - Clear buddies for preempt_short (Vincent Guittot) - Implement more complex proportional newidle balance (Peter Zijlstra) - Increase weight bits for avg_vruntime (Peter Zijlstra) - Use full weight to __calc_delta() (Peter Zijlstra) RT and DL scheduling updates: - Fix incorrect schedstats for rt and dl thread (Dengjun Su) - Skip group schedulable check with rt_group_sched=0 (Michal Koutný) - Move group schedulability check to sched_rt_global_validate() (Michal Koutný) - Add reporting of runtime left & abs deadline to sched_getattr() for DEADLINE tasks (Tommaso Cucinotta) Scheduling topology updates by K Prateek Nayak: - Compute sd_weight considering cpuset partitions - Extract "imb_numa_nr" calculation into a separate helper - Allocate per-CPU sched_domain_shared in s_data - Switch to assigning "sd->shared" from s_data - Remove sched_domain_shared allocation with sd_data Energy-aware scheduling updates: - Filter false overloaded_group case for EAS (Vincent Guittot) - PM: EM: Switch to rcu_dereference_all() in wakeup path (Dietmar Eggemann) Infrastructure updates: - Replace use of system_unbound_wq with system_dfl_wq (Marco Crivellari) Proxy scheduling updates by John Stultz: - Make class_schedulers avoid pushing current, and get rid of proxy_tag_curr() - Minimise repeated sched_proxy_exec() checking - Fix potentially missing balancing with Proxy Exec - Fix and improve task::blocked_on et al handling - Add assert_balance_callbacks_empty() helper - Add logic to zap balancing callbacks if we pick again - Move attach_one_task() and attach_task() helpers to sched.h - Handle blocked-waiter migration (and return migration) - Add K Prateek Nayak to scheduler reviewers for proxy execution Misc cleanups and fixes by John Stultz, Joseph Salisbury, Peter Zijlstra, K Prateek Nayak, Michal Koutný, Randy Dunlap, Shrikanth Hegde, Vincent Guittot, Zhan Xusheng, Xie Yuanbin and Vincent Guittot" * tag 'sched-core-2026-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (46 commits) sched/eevdf: Clear buddies for preempt_short sched/rt: Cleanup global RT bandwidth functions sched/rt: Move group schedulability check to sched_rt_global_validate() sched/rt: Skip group schedulable check with rt_group_sched=0 sched/fair: Avoid overflow in enqueue_entity() sched: Use u64 for bandwidth ratio calculations sched/fair: Prevent negative lag increase during delayed dequeue sched/fair: Use sched_energy_enabled() sched: Handle blocked-waiter migration (and return migration) sched: Move attach_one_task and attach_task helpers to sched.h sched: Add logic to zap balance callbacks if we pick again sched: Add assert_balance_callbacks_empty helper sched/locking: Add special p->blocked_on==PROXY_WAKING value for proxy return-migration sched: Fix modifying donor->blocked on without proper locking locking: Add task::blocked_lock to serialize blocked_on state sched: Fix potentially missing balancing with Proxy Exec sched: Minimise repeated sched_proxy_exec() checking sched: Make class_schedulers avoid pushing current, and get rid of proxy_tag_curr() MAINTAINERS: Add K Prateek Nayak to scheduler reviewers sched/core: Get this cpu once in ttwu_queue_cond() ...
Diffstat (limited to 'include/linux/sched')
-rw-r--r--include/linux/sched/topology.h26
1 files changed, 19 insertions, 7 deletions
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 45c0022b91ce..36553e14866d 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -95,6 +95,7 @@ struct sched_domain {
unsigned int newidle_call;
unsigned int newidle_success;
unsigned int newidle_ratio;
+ u64 newidle_stamp;
u64 max_newidle_lb_cost;
unsigned long last_decay_max_lb_cost;
@@ -141,18 +142,30 @@ struct sched_domain {
unsigned int span_weight;
/*
- * Span of all CPUs in this domain.
+ * See sched_domain_span(), on why flex arrays are broken.
*
- * NOTE: this field is variable length. (Allocated dynamically
- * by attaching extra space to the end of the structure,
- * depending on how many CPUs the kernel has booted up with)
- */
unsigned long span[];
+ */
};
static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
{
- return to_cpumask(sd->span);
+ /*
+ * Turns out that C flexible arrays are fundamentally broken since it
+ * is allowed for offsetof(*sd, span) < sizeof(*sd), this means that
+ * structure initialzation *sd = { ... }; which writes every byte
+ * inside sizeof(*type), will over-write the start of the flexible
+ * array.
+ *
+ * Luckily, the way we allocate sched_domain is by:
+ *
+ * sizeof(*sd) + cpumask_size()
+ *
+ * this means that we have sufficient space for the whole flex array
+ * *outside* of sizeof(*sd). So use that, and avoid using sd->span.
+ */
+ unsigned long *bitmap = (void *)sd + sizeof(*sd);
+ return to_cpumask(bitmap);
}
extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
@@ -171,7 +184,6 @@ typedef int (*sched_domain_flags_f)(void);
struct sd_data {
struct sched_domain *__percpu *sd;
- struct sched_domain_shared *__percpu *sds;
struct sched_group *__percpu *sg;
struct sched_group_capacity *__percpu *sgc;
};