1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
// SPDX-License-Identifier: GPL-2.0
/*
* A scheduler that validates the behavior of the NUMA-aware
* functionalities.
*
* The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
* are exclusively processed by CPUs within their respective nodes. Idle
* CPUs are selected only within the same node, so task migration can only
* occurs between CPUs belonging to the same node.
*
* Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
*/
#include <scx/common.bpf.h>
char _license[] SEC("license") = "GPL";
UEI_DEFINE(uei);
const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
static bool is_cpu_idle(s32 cpu, int node)
{
const struct cpumask *idle_cpumask;
bool idle;
idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
scx_bpf_put_cpumask(idle_cpumask);
return idle;
}
s32 BPF_STRUCT_OPS(numa_select_cpu,
struct task_struct *p, s32 prev_cpu, u64 wake_flags)
{
int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
s32 cpu;
/*
* We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
* since it already tries to pick an idle CPU within the node
* first, but let's use both functions for better testing coverage.
*/
cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
__COMPAT_SCX_PICK_IDLE_IN_NODE);
if (cpu < 0)
cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
__COMPAT_SCX_PICK_IDLE_IN_NODE);
if (is_cpu_idle(cpu, node))
scx_bpf_error("CPU %d should be marked as busy", cpu);
if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
scx_bpf_error("CPU %d should be in node %d", cpu, node);
return cpu;
}
void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
{
int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
}
void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
{
int node = __COMPAT_scx_bpf_cpu_node(cpu);
scx_bpf_dsq_move_to_local(node);
}
s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
{
int node, err;
bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
err = scx_bpf_create_dsq(node, node);
if (err)
return err;
}
return 0;
}
void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
{
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops numa_ops = {
.select_cpu = (void *)numa_select_cpu,
.enqueue = (void *)numa_enqueue,
.dispatch = (void *)numa_dispatch,
.init = (void *)numa_init,
.exit = (void *)numa_exit,
.name = "numa",
};
|