sched: Basic tracking of matching tasks

Introduce task_struct::core_cookie as an opaque identifier for core scheduling. When enabled; core scheduling will only allow matching task to be on the core; where idle matches everything. When task_struct::core_cookie is set (and core scheduling is enabled) these tasks are indexed in a second RB-tree, first on cookie value then on scheduling function, such that matching task selection always finds the most elegible match. NOTE: *shudder* at the overhead... NOTE: *sigh*, a 3rd copy of the scheduling function; the alternative is per class tracking of cookies and that just duplicates a lot of stuff for no raisin (the 2nd copy lives in the rt-mutex PI code). [Joel: folded fixes] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Don Hiatt <dhiatt@digitalocean.com> Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com> Tested-by: Vincent Guittot <vincent.guittot@linaro.org> Link: https://lkml.kernel.org/r/20210422123308.496975854@infradead.org
author: Peter Zijlstra <peterz@infradead.org> 2020-11-17 18:19:36 -0500
committer: Peter Zijlstra <peterz@infradead.org> 2021-05-12 11:43:28 +0200
commit: 8a311c740b53324ec584e0e3bb7077d56b123c28 (patch)
tree: b39bff43b1eb21f18c94272c2ad9118ca9727021 /kernel/sched/core.c
parent: 21f56ffe4482e501b9e83737612493eeaac21f5a (diff)
download: lwn-8a311c740b53324ec584e0e3bb7077d56b123c28.tar.gz
lwn-8a311c740b53324ec584e0e3bb7077d56b123c28.zip
1 files changed, 148 insertions, 4 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 85147bea9d93..c057d471c025 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -88,6 +88,133 @@ __read_mostly int scheduler_running;
 
 DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
 
+/* kernel prio, less is more */
+static inline int __task_prio(struct task_struct *p)
+{
+	if (p->sched_class == &stop_sched_class) /* trumps deadline */
+		return -2;
+
+	if (rt_prio(p->prio)) /* includes deadline */
+		return p->prio; /* [-1, 99] */
+
+	if (p->sched_class == &idle_sched_class)
+		return MAX_RT_PRIO + NICE_WIDTH; /* 140 */
+
+	return MAX_RT_PRIO + MAX_NICE; /* 120, squash fair */
+}
+
+/*
+ * l(a,b)
+ * le(a,b) := !l(b,a)
+ * g(a,b)  := l(b,a)
+ * ge(a,b) := !l(a,b)
+ */
+
+/* real prio, less is less */
+static inline bool prio_less(struct task_struct *a, struct task_struct *b)
+{
+
+	int pa = __task_prio(a), pb = __task_prio(b);
+
+	if (-pa < -pb)
+		return true;
+
+	if (-pb < -pa)
+		return false;
+
+	if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
+		return !dl_time_before(a->dl.deadline, b->dl.deadline);
+
+	if (pa == MAX_RT_PRIO + MAX_NICE)  { /* fair */
+		u64 vruntime = b->se.vruntime;
+
+		/*
+		 * Normalize the vruntime if tasks are in different cpus.
+		 */
+		if (task_cpu(a) != task_cpu(b)) {
+			vruntime -= task_cfs_rq(b)->min_vruntime;
+			vruntime += task_cfs_rq(a)->min_vruntime;
+		}
+
+		return !((s64)(a->se.vruntime - vruntime) <= 0);
+	}
+
+	return false;
+}
+
+static inline bool __sched_core_less(struct task_struct *a, struct task_struct *b)
+{
+	if (a->core_cookie < b->core_cookie)
+		return true;
+
+	if (a->core_cookie > b->core_cookie)
+		return false;
+
+	/* flip prio, so high prio is leftmost */
+	if (prio_less(b, a))
+		return true;
+
+	return false;
+}
+
+#define __node_2_sc(node) rb_entry((node), struct task_struct, core_node)
+
+static inline bool rb_sched_core_less(struct rb_node *a, const struct rb_node *b)
+{
+	return __sched_core_less(__node_2_sc(a), __node_2_sc(b));
+}
+
+static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node)
+{
+	const struct task_struct *p = __node_2_sc(node);
+	unsigned long cookie = (unsigned long)key;
+
+	if (cookie < p->core_cookie)
+		return -1;
+
+	if (cookie > p->core_cookie)
+		return 1;
+
+	return 0;
+}
+
+static void sched_core_enqueue(struct rq *rq, struct task_struct *p)
+{
+	rq->core->core_task_seq++;
+
+	if (!p->core_cookie)
+		return;
+
+	rb_add(&p->core_node, &rq->core_tree, rb_sched_core_less);
+}
+
+static void sched_core_dequeue(struct rq *rq, struct task_struct *p)
+{
+	rq->core->core_task_seq++;
+
+	if (!p->core_cookie)
+		return;
+
+	rb_erase(&p->core_node, &rq->core_tree);
+}
+
+/*
+ * Find left-most (aka, highest priority) task matching @cookie.
+ */
+static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
+{
+	struct rb_node *node;
+
+	node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
+	/*
+	 * The idle task always matches any cookie!
+	 */
+	if (!node)
+		return idle_sched_class.pick_task(rq);
+
+	return __node_2_sc(node);
+}
+
 /*
  * Magic required such that:
  *
@@ -147,10 +274,16 @@ static void __sched_core_flip(bool enabled)
 	cpus_read_unlock();
 }
 
-static void __sched_core_enable(void)
+static void sched_core_assert_empty(void)
 {
-	// XXX verify there are no cookie tasks (yet)
+	int cpu;
 
+	for_each_possible_cpu(cpu)
+		WARN_ON_ONCE(!RB_EMPTY_ROOT(&cpu_rq(cpu)->core_tree));
+}
+
+static void __sched_core_enable(void)
+{
 	static_branch_enable(&__sched_core_enabled);
 	/*
 	 * Ensure all previous instances of raw_spin_rq_*lock() have finished
@@ -158,12 +291,12 @@ static void __sched_core_enable(void)
 	 */
 	synchronize_rcu();
 	__sched_core_flip(true);
+	sched_core_assert_empty();
 }
 
 static void __sched_core_disable(void)
 {
-	// XXX verify there are no cookie tasks (left)
-
+	sched_core_assert_empty();
 	__sched_core_flip(false);
 	static_branch_disable(&__sched_core_enabled);
 }
@@ -205,6 +338,11 @@ void sched_core_put(void)
 		schedule_work(&_work);
 }
 
+#else /* !CONFIG_SCHED_CORE */
+
+static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
+static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
+
 #endif /* CONFIG_SCHED_CORE */
 
 /*
@@ -1797,10 +1935,16 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 
 	uclamp_rq_inc(rq, p);
 	p->sched_class->enqueue_task(rq, p, flags);
+
+	if (sched_core_enabled(rq))
+		sched_core_enqueue(rq, p);
 }
 
 static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 {
+	if (sched_core_enabled(rq))
+		sched_core_dequeue(rq, p);
+
 	if (!(flags & DEQUEUE_NOCLOCK))
 		update_rq_clock(rq);
author	Peter Zijlstra <peterz@infradead.org>	2020-11-17 18:19:36 -0500
committer	Peter Zijlstra <peterz@infradead.org>	2021-05-12 11:43:28 +0200
commit	8a311c740b53324ec584e0e3bb7077d56b123c28 (patch)
tree	b39bff43b1eb21f18c94272c2ad9118ca9727021 /kernel/sched/core.c
parent	21f56ffe4482e501b9e83737612493eeaac21f5a (diff)
download	lwn-8a311c740b53324ec584e0e3bb7077d56b123c28.tar.gz lwn-8a311c740b53324ec584e0e3bb7077d56b123c28.zip