sched/cache: Introduce infrastructure for cache-aware load balancing

Adds infrastructure to enable cache-aware load balancing, which improves cache locality by grouping tasks that share resources within the same cache domain. This reduces cache misses and improves overall data access efficiency. In this initial implementation, threads belonging to the same process are treated as entities that likely share working sets. The mechanism tracks per-process CPU occupancy across cache domains and attempts to migrate threads toward cache-hot domains where their process already has active threads, thereby enhancing locality. This provides a basic model for cache affinity. While the current code targets the last-level cache (LLC), the approach could be extended to other domain types such as clusters (L2) or node-internal groupings. At present, the mechanism selects the CPU within an LLC that has the highest recent runtime. Subsequent patches in this series will use this information in the load-balancing path to guide task placement toward preferred LLCs. In the future, more advanced policies could be integrated through NUMA balancing-for example, migrating a task to its preferred LLC when spare capacity exists, or swapping tasks across LLCs to improve cache affinity. Grouping of tasks could also be generalized from that of a process to be that of a NUMA group, or be user configurable. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Chen Yu <yu.c.chen@intel.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/6269a53221b9439b9ca00d18a9d1946fb64d8cff.1775065312.git.tim.c.chen@linux.intel.com
author: Peter Zijlstra (Intel) <peterz@infradead.org> 2026-04-01 14:52:13 -0700
committer: Peter Zijlstra <peterz@infradead.org> 2026-04-09 15:49:47 +0200
commit: df0d98475954d655571979aa061ecb07d7e00392 (patch)
tree: cc5ae8bea7965016d76ed8ce1e70d059e8f6bd8a /include/linux
parent: abb12b9b52cfe272c03a859b43a658f0d9cbf285 (diff)
download: lwn-df0d98475954d655571979aa061ecb07d7e00392.tar.gz
lwn-df0d98475954d655571979aa061ecb07d7e00392.zip
2 files changed, 56 insertions, 0 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3cc8ae722886..67b2dfcc71ea 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1173,6 +1173,8 @@ struct mm_struct {
 		/* MM CID related storage */
 		struct mm_mm_cid mm_cid;
 
+		/* sched_cache related statistics */
+		struct sched_cache_stat sc_stat;
 #ifdef CONFIG_MMU
 		atomic_long_t pgtables_bytes;	/* size of all page tables */
 #endif
@@ -1575,6 +1577,36 @@ static inline unsigned int mm_cid_size(void)
 # define MM_CID_STATIC_SIZE	0
 #endif /* CONFIG_SCHED_MM_CID */
 
+#ifdef CONFIG_SCHED_CACHE
+void mm_init_sched(struct mm_struct *mm,
+		   struct sched_cache_time __percpu *pcpu_sched);
+
+static inline int mm_alloc_sched_noprof(struct mm_struct *mm)
+{
+	struct sched_cache_time __percpu *pcpu_sched =
+		alloc_percpu_noprof(struct sched_cache_time);
+
+	if (!pcpu_sched)
+		return -ENOMEM;
+
+	mm_init_sched(mm, pcpu_sched);
+	return 0;
+}
+
+#define mm_alloc_sched(...)	alloc_hooks(mm_alloc_sched_noprof(__VA_ARGS__))
+
+static inline void mm_destroy_sched(struct mm_struct *mm)
+{
+	free_percpu(mm->sc_stat.pcpu_sched);
+	mm->sc_stat.pcpu_sched = NULL;
+}
+#else /* !CONFIG_SCHED_CACHE */
+
+static inline int mm_alloc_sched(struct mm_struct *mm) { return 0; }
+static inline void mm_destroy_sched(struct mm_struct *mm) { }
+
+#endif /* CONFIG_SCHED_CACHE */
+
 struct mmu_gather;
 extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
 extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8ec3b6d7d718..2bf261bcd7b6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1407,6 +1407,10 @@ struct task_struct {
 	unsigned long			numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
+#ifdef CONFIG_SCHED_CACHE
+	struct callback_head		cache_work;
+#endif
+
 	struct rseq_data		rseq;
 	struct sched_mm_cid		mm_cid;
 
@@ -2407,6 +2411,26 @@ static __always_inline int task_mm_cid(struct task_struct *t)
 }
 #endif
 
+#ifdef CONFIG_SCHED_CACHE
+
+struct sched_cache_time {
+	u64 runtime;
+	unsigned long epoch;
+};
+
+struct sched_cache_stat {
+	struct sched_cache_time __percpu *pcpu_sched;
+	raw_spinlock_t lock;
+	unsigned long epoch;
+	int cpu;
+} ____cacheline_aligned_in_smp;
+
+#else
+
+struct sched_cache_stat { };
+
+#endif
+
 #ifndef MODULE
 #ifndef COMPILE_OFFSETS
author	Peter Zijlstra (Intel) <peterz@infradead.org>	2026-04-01 14:52:13 -0700
committer	Peter Zijlstra <peterz@infradead.org>	2026-04-09 15:49:47 +0200
commit	df0d98475954d655571979aa061ecb07d7e00392 (patch)
tree	cc5ae8bea7965016d76ed8ce1e70d059e8f6bd8a /include/linux
parent	abb12b9b52cfe272c03a859b43a658f0d9cbf285 (diff)
download	lwn-df0d98475954d655571979aa061ecb07d7e00392.tar.gz lwn-df0d98475954d655571979aa061ecb07d7e00392.zip