diff options
| author | Peter Zijlstra (Intel) <peterz@infradead.org> | 2026-04-01 14:52:13 -0700 |
|---|---|---|
| committer | Peter Zijlstra <peterz@infradead.org> | 2026-04-09 15:49:47 +0200 |
| commit | df0d98475954d655571979aa061ecb07d7e00392 (patch) | |
| tree | cc5ae8bea7965016d76ed8ce1e70d059e8f6bd8a /include/linux | |
| parent | abb12b9b52cfe272c03a859b43a658f0d9cbf285 (diff) | |
| download | lwn-df0d98475954d655571979aa061ecb07d7e00392.tar.gz lwn-df0d98475954d655571979aa061ecb07d7e00392.zip | |
sched/cache: Introduce infrastructure for cache-aware load balancing
Adds infrastructure to enable cache-aware load balancing,
which improves cache locality by grouping tasks that share resources
within the same cache domain. This reduces cache misses and improves
overall data access efficiency.
In this initial implementation, threads belonging to the same process
are treated as entities that likely share working sets. The mechanism
tracks per-process CPU occupancy across cache domains and attempts to
migrate threads toward cache-hot domains where their process already
has active threads, thereby enhancing locality.
This provides a basic model for cache affinity. While the current code
targets the last-level cache (LLC), the approach could be extended to
other domain types such as clusters (L2) or node-internal groupings.
At present, the mechanism selects the CPU within an LLC that has the
highest recent runtime. Subsequent patches in this series will use this
information in the load-balancing path to guide task placement toward
preferred LLCs.
In the future, more advanced policies could be integrated through NUMA
balancing-for example, migrating a task to its preferred LLC when spare
capacity exists, or swapping tasks across LLCs to improve cache affinity.
Grouping of tasks could also be generalized from that of a process
to be that of a NUMA group, or be user configurable.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/6269a53221b9439b9ca00d18a9d1946fb64d8cff.1775065312.git.tim.c.chen@linux.intel.com
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/mm_types.h | 32 | ||||
| -rw-r--r-- | include/linux/sched.h | 24 |
2 files changed, 56 insertions, 0 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3cc8ae722886..67b2dfcc71ea 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1173,6 +1173,8 @@ struct mm_struct { /* MM CID related storage */ struct mm_mm_cid mm_cid; + /* sched_cache related statistics */ + struct sched_cache_stat sc_stat; #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* size of all page tables */ #endif @@ -1575,6 +1577,36 @@ static inline unsigned int mm_cid_size(void) # define MM_CID_STATIC_SIZE 0 #endif /* CONFIG_SCHED_MM_CID */ +#ifdef CONFIG_SCHED_CACHE +void mm_init_sched(struct mm_struct *mm, + struct sched_cache_time __percpu *pcpu_sched); + +static inline int mm_alloc_sched_noprof(struct mm_struct *mm) +{ + struct sched_cache_time __percpu *pcpu_sched = + alloc_percpu_noprof(struct sched_cache_time); + + if (!pcpu_sched) + return -ENOMEM; + + mm_init_sched(mm, pcpu_sched); + return 0; +} + +#define mm_alloc_sched(...) alloc_hooks(mm_alloc_sched_noprof(__VA_ARGS__)) + +static inline void mm_destroy_sched(struct mm_struct *mm) +{ + free_percpu(mm->sc_stat.pcpu_sched); + mm->sc_stat.pcpu_sched = NULL; +} +#else /* !CONFIG_SCHED_CACHE */ + +static inline int mm_alloc_sched(struct mm_struct *mm) { return 0; } +static inline void mm_destroy_sched(struct mm_struct *mm) { } + +#endif /* CONFIG_SCHED_CACHE */ + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); diff --git a/include/linux/sched.h b/include/linux/sched.h index 8ec3b6d7d718..2bf261bcd7b6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1407,6 +1407,10 @@ struct task_struct { unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_SCHED_CACHE + struct callback_head cache_work; +#endif + struct rseq_data rseq; struct sched_mm_cid mm_cid; @@ -2407,6 +2411,26 @@ static __always_inline int task_mm_cid(struct task_struct *t) } #endif +#ifdef CONFIG_SCHED_CACHE + +struct sched_cache_time { + u64 runtime; + unsigned long epoch; +}; + +struct sched_cache_stat { + struct sched_cache_time __percpu *pcpu_sched; + raw_spinlock_t lock; + unsigned long epoch; + int cpu; +} ____cacheline_aligned_in_smp; + +#else + +struct sched_cache_stat { }; + +#endif + #ifndef MODULE #ifndef COMPILE_OFFSETS |
