summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/scheduler/sched-ext.rst10
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/ext.c24
-rw-r--r--kernel/sched/sched.h10
-rw-r--r--tools/sched_ext/scx_show_state.py1
5 files changed, 40 insertions, 8 deletions
diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst
index a707d2181a77..6c0d70e2e27d 100644
--- a/Documentation/scheduler/sched-ext.rst
+++ b/Documentation/scheduler/sched-ext.rst
@@ -83,6 +83,15 @@ The current status of the BPF scheduler can be determined as follows:
# cat /sys/kernel/sched_ext/root/ops
simple
+You can check if any BPF scheduler has ever been loaded since boot by examining
+this monotonically incrementing counter (a value of zero indicates that no BPF
+scheduler has been loaded):
+
+.. code-block:: none
+
+ # cat /sys/kernel/sched_ext/enable_seq
+ 1
+
``tools/sched_ext/scx_show_state.py`` is a drgn script which shows more
detailed information:
@@ -96,6 +105,7 @@ detailed information:
enable_state : enabled (2)
bypass_depth : 0
nr_rejected : 0
+ enable_seq : 1
If ``CONFIG_SCHED_DEBUG`` is set, whether a given task is on sched_ext can
be determined as follows:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b6cc1cf499d6..43e453ab7e20 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6591,7 +6591,8 @@ static void __sched notrace __schedule(int sched_mode)
*/
prev_state = READ_ONCE(prev->__state);
if (sched_mode == SM_IDLE) {
- if (!rq->nr_running) {
+ /* SCX must consult the BPF scheduler to tell if rq is empty */
+ if (!rq->nr_running && !scx_enabled()) {
next = prev;
goto picked;
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 9ee5a9a261cc..c09e3dc38c34 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -875,6 +875,13 @@ static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
/*
+ * A monotically increasing sequence number that is incremented every time a
+ * scheduler is enabled. This can be used by to check if any custom sched_ext
+ * scheduler has ever been used in the system.
+ */
+static atomic_long_t scx_enable_seq = ATOMIC_LONG_INIT(0);
+
+/*
* The maximum amount of time in jiffies that a task may be runnable without
* being scheduled on a CPU. If this timeout is exceeded, it will trigger
* scx_ops_error().
@@ -4154,11 +4161,19 @@ static ssize_t scx_attr_hotplug_seq_show(struct kobject *kobj,
}
SCX_ATTR(hotplug_seq);
+static ssize_t scx_attr_enable_seq_show(struct kobject *kobj,
+ struct kobj_attribute *ka, char *buf)
+{
+ return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_enable_seq));
+}
+SCX_ATTR(enable_seq);
+
static struct attribute *scx_global_attrs[] = {
&scx_attr_state.attr,
&scx_attr_switch_all.attr,
&scx_attr_nr_rejected.attr,
&scx_attr_hotplug_seq.attr,
+ &scx_attr_enable_seq.attr,
NULL,
};
@@ -4469,8 +4484,9 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
if (ei->msg[0] != '\0')
pr_err("sched_ext: %s: %s\n", scx_ops.name, ei->msg);
-
+#ifdef CONFIG_STACKTRACE
stack_trace_print(ei->bt, ei->bt_len, 2);
+#endif
} else {
pr_info("sched_ext: BPF scheduler \"%s\" disabled (%s)\n",
scx_ops.name, ei->reason);
@@ -4847,10 +4863,10 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
return;
ei->exit_code = exit_code;
-
+#ifdef CONFIG_STACKTRACE
if (kind >= SCX_EXIT_ERROR)
ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1);
-
+#endif
va_start(args, fmt);
vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args);
va_end(args);
@@ -5176,6 +5192,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
kobject_uevent(scx_root_kobj, KOBJ_ADD);
mutex_unlock(&scx_ops_enable_mutex);
+ atomic_long_inc(&scx_enable_seq);
+
return 0;
err_del:
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8063db62b027..b1c3588a8f00 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -432,16 +432,17 @@ struct cfs_bandwidth {
struct task_group {
struct cgroup_subsys_state css;
+#ifdef CONFIG_GROUP_SCHED_WEIGHT
+ /* A positive value indicates that this is a SCHED_IDLE group. */
+ int idle;
+#endif
+
#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each CPU */
struct sched_entity **se;
/* runqueue "owned" by this group on each CPU */
struct cfs_rq **cfs_rq;
unsigned long shares;
-
- /* A positive value indicates that this is a SCHED_IDLE group. */
- int idle;
-
#ifdef CONFIG_SMP
/*
* load_avg can be heavily contended at clock tick time, so put
@@ -582,6 +583,7 @@ static inline void set_task_rq_fair(struct sched_entity *se,
#endif /* CONFIG_SMP */
#else /* !CONFIG_FAIR_GROUP_SCHED */
static inline int sched_group_set_shares(struct task_group *tg, unsigned long shares) { return 0; }
+static inline int sched_group_set_idle(struct task_group *tg, long idle) { return 0; }
#endif /* CONFIG_FAIR_GROUP_SCHED */
#else /* CONFIG_CGROUP_SCHED */
diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py
index d457d2a74e1e..8bc626ede1c4 100644
--- a/tools/sched_ext/scx_show_state.py
+++ b/tools/sched_ext/scx_show_state.py
@@ -37,3 +37,4 @@ print(f'switched_all : {read_static_key("__scx_switched_all")}')
print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})')
print(f'bypass_depth : {read_atomic("scx_ops_bypass_depth")}')
print(f'nr_rejected : {read_atomic("scx_nr_rejected")}')
+print(f'enable_seq : {read_atomic("scx_enable_seq")}')