diff options
Diffstat (limited to 'tools/testing/selftests/rseq')
18 files changed, 748 insertions, 90 deletions
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index 16496de5f6ce..ec01d164c1f0 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -9,3 +9,5 @@ param_test_compare_twice param_test_mm_cid param_test_mm_cid_benchmark param_test_mm_cid_compare_twice +syscall_errors_test +slice_test diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 5a3432fceb58..50d69e22ee7a 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -14,13 +14,20 @@ LDLIBS += -lpthread -ldl # still track changes to header files and depend on shared object. OVERRIDE_TARGETS = 1 -TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ - param_test_benchmark param_test_compare_twice param_test_mm_cid \ - param_test_mm_cid_benchmark param_test_mm_cid_compare_twice +TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test \ + param_test_benchmark param_test_mm_cid_benchmark -TEST_GEN_PROGS_EXTENDED = librseq.so +TEST_GEN_PROGS_EXTENDED = librseq.so \ + param_test \ + param_test_compare_twice \ + param_test_mm_cid \ + param_test_mm_cid_compare_twice \ + syscall_errors_test \ + legacy_check \ + slice_test \ + check_optimized -TEST_PROGS = run_param_test.sh +TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh run_legacy_check.sh run_timeslice_test.sh TEST_FILES := settings @@ -54,3 +61,13 @@ $(OUTPUT)/param_test_mm_cid_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ $(OUTPUT)/param_test_mm_cid_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ rseq.h rseq-*.h $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/syscall_errors_test: syscall_errors_test.c $(TEST_GEN_PROGS_EXTENDED) \ + rseq.h rseq-*.h + $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/slice_test: slice_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h + $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/check_optimized: check_optimized.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h + $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index 2348d2c20d0a..1193612bf327 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -9,7 +9,7 @@ #include <string.h> #include <stddef.h> -#include "../kselftest.h" +#include "kselftest.h" #include "rseq.h" #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID diff --git a/tools/testing/selftests/rseq/check_optimized.c b/tools/testing/selftests/rseq/check_optimized.c new file mode 100644 index 000000000000..a13e3f2c8fc6 --- /dev/null +++ b/tools/testing/selftests/rseq/check_optimized.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <sys/time.h> + +#include "rseq.h" + +int main(int argc, char **argv) +{ + if (__rseq_register_current_thread(true, false)) + return -1; + return 0; +} diff --git a/tools/testing/selftests/rseq/legacy_check.c b/tools/testing/selftests/rseq/legacy_check.c new file mode 100644 index 000000000000..3f7de4e28303 --- /dev/null +++ b/tools/testing/selftests/rseq/legacy_check.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <errno.h> +#include <signal.h> +#include <stdint.h> +#include <unistd.h> + +#include "rseq.h" + +#include "../kselftest_harness.h" + +FIXTURE(legacy) +{ +}; + +static int cpu_id_in_sigfn = -1; + +static void sigfn(int sig) +{ + struct rseq_abi *rs = rseq_get_abi(); + + cpu_id_in_sigfn = rs->cpu_id_start; +} + +FIXTURE_SETUP(legacy) +{ + int res = __rseq_register_current_thread(true, true); + + switch (res) { + case -ENOSYS: + SKIP(return, "RSEQ not enabled\n"); + case -EBUSY: + SKIP(return, "GLIBC owns RSEQ. Disable GLIBC RSEQ registration\n"); + default: + ASSERT_EQ(res, 0); + } + + ASSERT_NE(signal(SIGUSR1, sigfn), SIG_ERR); +} + +FIXTURE_TEARDOWN(legacy) +{ +} + +TEST_F(legacy, legacy_test) +{ + struct rseq_abi *rs = rseq_get_abi(); + + ASSERT_NE(rs, NULL); + + /* Overwrite rs::cpu_id_start */ + rs->cpu_id_start = -1; + sleep(1); + ASSERT_NE(rs->cpu_id_start, -1); + + rs->cpu_id_start = -1; + ASSERT_EQ(raise(SIGUSR1), 0); + ASSERT_NE(rs->cpu_id_start, -1); + ASSERT_NE(cpu_id_in_sigfn, -1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 05d03e679e06..e1e98dbabe4b 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -38,7 +38,7 @@ static int opt_modulo, verbose; static int opt_yield, opt_signal, opt_sleep, opt_disable_rseq, opt_threads = 200, opt_disable_mod = 0, opt_test = 's'; - +static bool opt_rseq_legacy; static long long opt_reps = 5000; static __thread __attribute__((tls_model("initial-exec"))) @@ -281,9 +281,12 @@ unsigned int yield_mod_cnt, nr_abort; } \ } +#define rseq_no_glibc true + #else #define printf_verbose(fmt, ...) +#define rseq_no_glibc false #endif /* BENCHMARK */ @@ -481,7 +484,7 @@ void *test_percpu_spinlock_thread(void *arg) long long i, reps; if (!opt_disable_rseq && thread_data->reg && - rseq_register_current_thread()) + __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { @@ -558,7 +561,7 @@ void *test_percpu_inc_thread(void *arg) long long i, reps; if (!opt_disable_rseq && thread_data->reg && - rseq_register_current_thread()) + __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { @@ -712,7 +715,7 @@ void *test_percpu_list_thread(void *arg) long long i, reps; struct percpu_list *list = (struct percpu_list *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -895,7 +898,7 @@ void *test_percpu_buffer_thread(void *arg) long long i, reps; struct percpu_buffer *buffer = (struct percpu_buffer *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -1105,7 +1108,7 @@ void *test_percpu_memcpy_buffer_thread(void *arg) long long i, reps; struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -1258,7 +1261,7 @@ void *test_membarrier_worker_thread(void *arg) const int iters = opt_reps; int i; - if (rseq_register_current_thread()) { + if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", errno, strerror(errno)); abort(); @@ -1323,7 +1326,7 @@ void *test_membarrier_manager_thread(void *arg) intptr_t expect_a = 0, expect_b = 0; int cpu_a = 0, cpu_b = 0; - if (rseq_register_current_thread()) { + if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", errno, strerror(errno)); abort(); @@ -1475,6 +1478,7 @@ static void show_usage(int argc, char **argv) printf(" [-D M] Disable rseq for each M threads\n"); printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); + printf(" [-O] Test with optimized RSEQ\n"); printf(" [-v] Verbose output.\n"); printf(" [-h] Show this help.\n"); printf("\n"); @@ -1602,6 +1606,9 @@ int main(int argc, char **argv) case 'M': opt_mo = RSEQ_MO_RELEASE; break; + case 'L': + opt_rseq_legacy = true; + break; default: show_usage(argc, argv); goto error; @@ -1618,7 +1625,7 @@ int main(int argc, char **argv) if (set_signal_handler()) goto error; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) goto error; if (!opt_disable_rseq && !rseq_validate_cpu_id()) { fprintf(stderr, "Error: cpu id getter unavailable\n"); diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h index fb4ec8a75dd4..5f4ea2152c2f 100644 --- a/tools/testing/selftests/rseq/rseq-abi.h +++ b/tools/testing/selftests/rseq/rseq-abi.h @@ -53,6 +53,27 @@ struct rseq_abi_cs { __u64 abort_ip; } __attribute__((aligned(4 * sizeof(__u64)))); +/** + * rseq_abi_slice_ctrl - Time slice extension control structure + * @all: Compound value + * @request: Request for a time slice extension + * @granted: Granted time slice extension + * + * @request is set by user space and can be cleared by user space or kernel + * space. @granted is set and cleared by the kernel and must only be read + * by user space. + */ +struct rseq_abi_slice_ctrl { + union { + __u32 all; + struct { + __u8 request; + __u8 granted; + __u16 __reserved; + }; + }; +}; + /* * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always * contained within a single cache-line. @@ -165,9 +186,20 @@ struct rseq_abi { __u32 mm_cid; /* + * Time slice extension control structure. CPU local updates from + * kernel and user space. + */ + struct rseq_abi_slice_ctrl slice_ctrl; + + /* + * Place holder to push the size above 32 bytes. + */ + __u8 __reserved; + + /* * Flexible array member at end of structure, after last feature field. */ char end[]; -} __attribute__((aligned(4 * sizeof(__u64)))); +} __attribute__((aligned(256))); #endif /* _RSEQ_ABI_H */ diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 67d544aaa9a3..06c840e81c8b 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -8,6 +8,7 @@ * exception when executed in all modes. */ #include <endian.h> +#include <asm/fence.h> #if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN) #define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */ @@ -24,8 +25,6 @@ #define REG_L __REG_SEL("ld ", "lw ") #define REG_S __REG_SEL("sd ", "sw ") -#define RISCV_FENCE(p, s) \ - __asm__ __volatile__ ("fence " #p "," #s : : : "memory") #define rseq_smp_mb() RISCV_FENCE(rw, rw) #define rseq_smp_rmb() RISCV_FENCE(r, r) #define rseq_smp_wmb() RISCV_FENCE(w, w) diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 33baaa9f9997..e7b858cd3736 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -28,8 +28,6 @@ do { \ RSEQ_WRITE_ONCE(*(p), v); \ } while (0) -#ifdef __s390x__ - #define LONG_L "lg" #define LONG_S "stg" #define LONG_LT_R "ltgr" @@ -63,43 +61,6 @@ do { \ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \ ".popsection\n\t" -#elif __s390__ - -#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ - start_ip, post_commit_offset, abort_ip) \ - ".pushsection __rseq_cs, \"aw\"\n\t" \ - ".balign 32\n\t" \ - __rseq_str(label) ":\n\t" \ - ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ - ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \ - ".popsection\n\t" \ - ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \ - ".long 0x0, " __rseq_str(label) "b\n\t" \ - ".popsection\n\t" - -/* - * Exit points of a rseq critical section consist of all instructions outside - * of the critical section where a critical section can either branch to or - * reach through the normal course of its execution. The abort IP and the - * post-commit IP are already part of the __rseq_cs section and should not be - * explicitly defined as additional exit points. Knowing all exit points is - * useful to assist debuggers stepping over the critical section. - */ -#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \ - ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \ - ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n\t" \ - ".popsection\n\t" - -#define LONG_L "l" -#define LONG_S "st" -#define LONG_LT_R "ltr" -#define LONG_CMP "c" -#define LONG_CMP_R "cr" -#define LONG_ADDI "ahi" -#define LONG_ADD_R "ar" - -#endif - #define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ (post_commit_ip - start_ip), abort_ip) diff --git a/tools/testing/selftests/rseq/rseq-slice-hist.py b/tools/testing/selftests/rseq/rseq-slice-hist.py new file mode 100644 index 000000000000..b7933eeaefb9 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-slice-hist.py @@ -0,0 +1,132 @@ +#!/usr/bin/python3 + +# +# trace-cmd record -e hrtimer_start -e hrtimer_cancel -e hrtimer_expire_entry -- $cmd +# + +from tracecmd import * + +def load_kallsyms(file_path='/proc/kallsyms'): + """ + Parses /proc/kallsyms into a dictionary. + Returns: { address_int: symbol_name } + """ + kallsyms_map = {} + + try: + with open(file_path, 'r') as f: + for line in f: + # The format is: [address] [type] [name] [module] + parts = line.split() + if len(parts) < 3: + continue + + addr = int(parts[0], 16) + name = parts[2] + + kallsyms_map[addr] = name + + except PermissionError: + print(f"Error: Permission denied reading {file_path}. Try running with sudo.") + except FileNotFoundError: + print(f"Error: {file_path} not found.") + + return kallsyms_map + +ksyms = load_kallsyms() + +# pending[timer_ptr] = {'ts': timestamp, 'comm': comm} +pending = {} + +# histograms[comm][bucket] = count +histograms = {} + +class OnlineHarmonicMean: + def __init__(self): + self.n = 0 # Count of elements + self.S = 0.0 # Cumulative sum of reciprocals + + def update(self, x): + if x == 0: + raise ValueError("Harmonic mean is undefined for zero.") + + self.n += 1 + self.S += 1.0 / x + return self.n / self.S + + @property + def mean(self): + return self.n / self.S if self.n > 0 else 0 + +ohms = {} + +def handle_start(record): + func_name = ksyms[record.num_field("function")] + if "rseq_slice_expired" in func_name: + timer_ptr = record.num_field("hrtimer") + pending[timer_ptr] = { + 'ts': record.ts, + 'comm': record.comm + } + return None + +def handle_cancel(record): + timer_ptr = record.num_field("hrtimer") + + if timer_ptr in pending: + start_data = pending.pop(timer_ptr) + duration_ns = record.ts - start_data['ts'] + duration_us = duration_ns // 1000 + + comm = start_data['comm'] + + if comm not in ohms: + ohms[comm] = OnlineHarmonicMean() + + ohms[comm].update(duration_ns) + + if comm not in histograms: + histograms[comm] = {} + + histograms[comm][duration_us] = histograms[comm].get(duration_us, 0) + 1 + return None + +def handle_expire(record): + timer_ptr = record.num_field("hrtimer") + + if timer_ptr in pending: + start_data = pending.pop(timer_ptr) + comm = start_data['comm'] + + if comm not in histograms: + histograms[comm] = {} + + # Record -1 bucket for expired (failed to cancel) + histograms[comm][-1] = histograms[comm].get(-1, 0) + 1 + return None + +if __name__ == "__main__": + t = Trace("trace.dat") + for cpu in range(0, t.cpus): + ev = t.read_event(cpu) + while ev: + if "hrtimer_start" in ev.name: + handle_start(ev) + if "hrtimer_cancel" in ev.name: + handle_cancel(ev) + if "hrtimer_expire_entry" in ev.name: + handle_expire(ev) + + ev = t.read_event(cpu) + + print("\n" + "="*40) + print("RSEQ SLICE HISTOGRAM (us)") + print("="*40) + for comm, buckets in histograms.items(): + print(f"\nTask: {comm} Mean: {ohms[comm].mean:.3f} ns") + print(f" {'Latency (us)':<15} | {'Count'}") + print(f" {'-'*30}") + # Sort buckets numerically, putting -1 at the top + for bucket in sorted(buckets.keys()): + label = "EXPIRED" if bucket == -1 else f"{bucket} us" + print(f" {label:<15} | {buckets[bucket]}") diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index f6156790c3b4..be0d0a97031e 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -33,16 +33,16 @@ #include <linux/compiler.h> -#include "../kselftest.h" +#include "kselftest.h" #include "rseq.h" /* * Define weak versions to play nice with binaries that are statically linked * against a libc that doesn't support registering its own rseq. */ -__weak ptrdiff_t __rseq_offset; -__weak unsigned int __rseq_size; -__weak unsigned int __rseq_flags; +extern __weak ptrdiff_t __rseq_offset; +extern __weak unsigned int __rseq_size; +extern __weak unsigned int __rseq_flags; static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; static const unsigned int *libc_rseq_size_p = &__rseq_size; @@ -56,6 +56,7 @@ ptrdiff_t rseq_offset; * unsuccessful. */ unsigned int rseq_size = -1U; +static unsigned int rseq_alloc_size; /* Flags used during rseq registration. */ unsigned int rseq_flags; @@ -71,9 +72,20 @@ static int rseq_ownership; /* Original struct rseq allocation size is 32 bytes. */ #define ORIG_RSEQ_ALLOC_SIZE 32 +/* + * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an + * rseq registration that is larger than the current rseq ABI. + */ +union rseq_tls { + struct rseq_abi abi; + char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE]; +}; + static -__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { - .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, +__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = { + .abi = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, + }, }; static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, @@ -87,7 +99,7 @@ static int sys_getcpu(unsigned *cpu, unsigned *node) return syscall(__NR_getcpu, cpu, node, NULL); } -int rseq_available(void) +bool rseq_available(void) { int rc; @@ -96,37 +108,25 @@ int rseq_available(void) abort(); switch (errno) { case ENOSYS: - return 0; + return false; case EINVAL: - return 1; + return true; default: abort(); } } -/* The rseq areas need to be at least 32 bytes. */ -static -unsigned int get_rseq_min_alloc_size(void) -{ - unsigned int alloc_size = rseq_size; - - if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) - alloc_size = ORIG_RSEQ_ALLOC_SIZE; - return alloc_size; -} - /* * Return the feature size supported by the kernel. * * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): * - * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). * * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. */ -static -unsigned int get_rseq_kernel_feature_size(void) +static unsigned int get_rseq_kernel_feature_size(void) { unsigned long auxv_rseq_feature_size, auxv_rseq_align; @@ -141,15 +141,24 @@ unsigned int get_rseq_kernel_feature_size(void) return ORIG_RSEQ_FEATURE_SIZE; } -int rseq_register_current_thread(void) +int __rseq_register_current_thread(bool nolibc, bool legacy) { + unsigned int size; int rc; if (!rseq_ownership) { /* Treat libc's ownership as a successful registration. */ - return 0; + return nolibc ? -EBUSY : 0; } - rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); + + /* The minimal allocation size is 32, which is the legacy allocation size */ + size = get_rseq_kernel_feature_size(); + if (legacy || size < ORIG_RSEQ_ALLOC_SIZE) + rseq_alloc_size = ORIG_RSEQ_ALLOC_SIZE; + else + rseq_alloc_size = size; + + rc = sys_rseq(&__rseq.abi, rseq_alloc_size, 0, RSEQ_SIG); if (rc) { /* * After at least one thread has registered successfully @@ -168,9 +177,8 @@ int rseq_register_current_thread(void) * The first thread to register sets the rseq_size to mimic the libc * behavior. */ - if (RSEQ_READ_ONCE(rseq_size) == 0) { - RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); - } + if (RSEQ_READ_ONCE(rseq_size) == 0) + RSEQ_WRITE_ONCE(rseq_size, size); return 0; } @@ -183,7 +191,7 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq.abi, rseq_alloc_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; @@ -198,7 +206,7 @@ void rseq_init(void) * libc not having registered a restartable sequence. Try to find the * symbols if that's the case. */ - if (!*libc_rseq_size_p) { + if (!libc_rseq_size_p || !*libc_rseq_size_p) { libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); @@ -249,7 +257,7 @@ void rseq_init(void) rseq_ownership = 1; /* Calculate the offset of the rseq area from the thread pointer. */ - rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer(); /* rseq flags are deprecated, always set to 0. */ rseq_flags = 0; diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index ba424ce80a71..c62ebb9290c0 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -8,6 +8,7 @@ #ifndef RSEQ_H #define RSEQ_H +#include <assert.h> #include <stdint.h> #include <stdbool.h> #include <pthread.h> @@ -142,7 +143,12 @@ static inline struct rseq_abi *rseq_get_abi(void) * succeed. A restartable sequence executed from a non-registered * thread will always fail. */ -int rseq_register_current_thread(void); +int __rseq_register_current_thread(bool nolibc, bool legacy); + +static inline int rseq_register_current_thread(void) +{ + return __rseq_register_current_thread(false, false); +} /* * Unregister rseq for current thread. @@ -160,6 +166,11 @@ int32_t rseq_fallback_current_cpu(void); int32_t rseq_fallback_current_node(void); /* + * Returns true if rseq is supported. + */ +bool rseq_available(void); + +/* * Values returned can be either the current CPU number, -1 (rseq is * uninitialized), or -2 (rseq initialization has failed). */ diff --git a/tools/testing/selftests/rseq/run_legacy_check.sh b/tools/testing/selftests/rseq/run_legacy_check.sh new file mode 100755 index 000000000000..5577b46ea092 --- /dev/null +++ b/tools/testing/selftests/rseq/run_legacy_check.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./legacy_check diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh index 8d31426ab41f..69a3fa049929 100755 --- a/tools/testing/selftests/rseq/run_param_test.sh +++ b/tools/testing/selftests/rseq/run_param_test.sh @@ -34,6 +34,11 @@ REPS=1000 SLOW_REPS=100 NR_THREADS=$((6*${NR_CPUS})) +# Prevent GLIBC from registering RSEQ so the selftest can run in legacy and +# performance optimized mode. +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" +export GLIBC_TUNABLES + function do_tests() { local i=0 @@ -103,6 +108,40 @@ function inject_blocking() NR_LOOPS= } +echo "Testing in legacy RSEQ mode" +echo "Yield injection (25%)" +inject_blocking -m 4 -y -L + +echo "Yield injection (50%)" +inject_blocking -m 2 -y -L + +echo "Yield injection (100%)" +inject_blocking -m 1 -y -L + +echo "Kill injection (25%)" +inject_blocking -m 4 -k -L + +echo "Kill injection (50%)" +inject_blocking -m 2 -k -L + +echo "Kill injection (100%)" +inject_blocking -m 1 -k -L + +echo "Sleep injection (1ms, 25%)" +inject_blocking -m 4 -s 1 -L + +echo "Sleep injection (1ms, 50%)" +inject_blocking -m 2 -s 1 -L + +echo "Sleep injection (1ms, 100%)" +inject_blocking -m 1 -s 1 -L + +./check_optimized || { + echo "Skipping optimized RSEQ mode test. Not supported"; + exit 0 +} + +echo "Testing in optimized RSEQ mode" echo "Yield injection (25%)" inject_blocking -m 4 -y diff --git a/tools/testing/selftests/rseq/run_syscall_errors_test.sh b/tools/testing/selftests/rseq/run_syscall_errors_test.sh new file mode 100755 index 000000000000..9272246b39f2 --- /dev/null +++ b/tools/testing/selftests/rseq/run_syscall_errors_test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com> + +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./syscall_errors_test diff --git a/tools/testing/selftests/rseq/run_timeslice_test.sh b/tools/testing/selftests/rseq/run_timeslice_test.sh new file mode 100755 index 000000000000..551ebed71ec6 --- /dev/null +++ b/tools/testing/selftests/rseq/run_timeslice_test.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ + +# Prevent GLIBC from registering RSEQ so the selftest can run in legacy +# and performance optimized mode. +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" +export GLIBC_TUNABLES + +./check_optimized || { + echo "Skipping optimized RSEQ mode test. Not supported"; + exit 0 +} + +./slice_test diff --git a/tools/testing/selftests/rseq/slice_test.c b/tools/testing/selftests/rseq/slice_test.c new file mode 100644 index 000000000000..e402d4440bc2 --- /dev/null +++ b/tools/testing/selftests/rseq/slice_test.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <syscall.h> +#include <unistd.h> + +#include <linux/prctl.h> +#include <sys/prctl.h> +#include <sys/time.h> + +#include "rseq.h" + +#include "../kselftest_harness.h" + +#ifndef __NR_rseq_slice_yield +# define __NR_rseq_slice_yield 471 +#endif + +#define BITS_PER_INT 32 +#define BITS_PER_BYTE 8 + +#ifndef PR_RSEQ_SLICE_EXTENSION +# define PR_RSEQ_SLICE_EXTENSION 79 +# define PR_RSEQ_SLICE_EXTENSION_GET 1 +# define PR_RSEQ_SLICE_EXTENSION_SET 2 +# define PR_RSEQ_SLICE_EXT_ENABLE 0x01 +#endif + +#ifndef RSEQ_SLICE_EXT_REQUEST_BIT +# define RSEQ_SLICE_EXT_REQUEST_BIT 0 +# define RSEQ_SLICE_EXT_GRANTED_BIT 1 +#endif + +#ifndef asm_inline +# define asm_inline asm __inline +#endif + +#define NSEC_PER_SEC 1000000000L +#define NSEC_PER_USEC 1000L + +struct noise_params { + int64_t noise_nsecs; + int64_t sleep_nsecs; + int64_t run; +}; + +FIXTURE(slice_ext) +{ + pthread_t noise_thread; + struct noise_params noise_params; +}; + +FIXTURE_VARIANT(slice_ext) +{ + int64_t total_nsecs; + int64_t slice_nsecs; + int64_t noise_nsecs; + int64_t sleep_nsecs; + bool no_yield; +}; + +FIXTURE_VARIANT_ADD(slice_ext, n2_2_50) +{ + .total_nsecs = 5LL * NSEC_PER_SEC, + .slice_nsecs = 2LL * NSEC_PER_USEC, + .noise_nsecs = 2LL * NSEC_PER_USEC, + .sleep_nsecs = 50LL * NSEC_PER_USEC, +}; + +FIXTURE_VARIANT_ADD(slice_ext, n50_2_50) +{ + .total_nsecs = 5LL * NSEC_PER_SEC, + .slice_nsecs = 50LL * NSEC_PER_USEC, + .noise_nsecs = 2LL * NSEC_PER_USEC, + .sleep_nsecs = 50LL * NSEC_PER_USEC, +}; + +FIXTURE_VARIANT_ADD(slice_ext, n2_2_50_no_yield) +{ + .total_nsecs = 5LL * NSEC_PER_SEC, + .slice_nsecs = 2LL * NSEC_PER_USEC, + .noise_nsecs = 2LL * NSEC_PER_USEC, + .sleep_nsecs = 50LL * NSEC_PER_USEC, + .no_yield = true, +}; + + +static inline bool elapsed(struct timespec *start, struct timespec *now, + int64_t span) +{ + int64_t delta = now->tv_sec - start->tv_sec; + + delta *= NSEC_PER_SEC; + delta += now->tv_nsec - start->tv_nsec; + return delta >= span; +} + +static void *noise_thread(void *arg) +{ + struct noise_params *p = arg; + + while (RSEQ_READ_ONCE(p->run)) { + struct timespec ts_start, ts_now; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + do { + clock_gettime(CLOCK_MONOTONIC, &ts_now); + } while (!elapsed(&ts_start, &ts_now, p->noise_nsecs)); + + ts_start.tv_sec = 0; + ts_start.tv_nsec = p->sleep_nsecs; + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts_start, NULL); + } + return NULL; +} + +FIXTURE_SETUP(slice_ext) +{ + cpu_set_t affinity; + + if (__rseq_register_current_thread(true, false)) + SKIP(return, "RSEQ not supported\n"); + + if (prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, + PR_RSEQ_SLICE_EXT_ENABLE, 0, 0)) + SKIP(return, "Time slice extension not supported\n"); + + ASSERT_EQ(sched_getaffinity(0, sizeof(affinity), &affinity), 0); + + /* Pin it on a single CPU. Avoid CPU 0 */ + for (int i = 1; i < CPU_SETSIZE; i++) { + if (!CPU_ISSET(i, &affinity)) + continue; + + CPU_ZERO(&affinity); + CPU_SET(i, &affinity); + ASSERT_EQ(sched_setaffinity(0, sizeof(affinity), &affinity), 0); + break; + } + + self->noise_params.noise_nsecs = variant->noise_nsecs; + self->noise_params.sleep_nsecs = variant->sleep_nsecs; + self->noise_params.run = 1; + + ASSERT_EQ(pthread_create(&self->noise_thread, NULL, noise_thread, &self->noise_params), 0); +} + +FIXTURE_TEARDOWN(slice_ext) +{ + self->noise_params.run = 0; + pthread_join(self->noise_thread, NULL); +} + +TEST_F(slice_ext, slice_test) +{ + unsigned long success = 0, yielded = 0, scheduled = 0, raced = 0; + unsigned long total = 0, aborted = 0; + struct rseq_abi *rs = rseq_get_abi(); + struct timespec ts_start, ts_now; + + ASSERT_NE(rs, NULL); + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + do { + struct timespec ts_cs; + bool req = false; + + clock_gettime(CLOCK_MONOTONIC, &ts_cs); + + total++; + RSEQ_WRITE_ONCE(rs->slice_ctrl.request, 1); + do { + clock_gettime(CLOCK_MONOTONIC, &ts_now); + } while (!elapsed(&ts_cs, &ts_now, variant->slice_nsecs)); + + /* + * request can be cleared unconditionally, but for making + * the stats work this is actually checking it first + */ + if (RSEQ_READ_ONCE(rs->slice_ctrl.request)) { + RSEQ_WRITE_ONCE(rs->slice_ctrl.request, 0); + /* Race between check and clear! */ + req = true; + success++; + } + + if (RSEQ_READ_ONCE(rs->slice_ctrl.granted)) { + /* The above raced against a late grant */ + if (req) + success--; + if (variant->no_yield) { + syscall(__NR_getpid); + aborted++; + } else { + yielded++; + if (!syscall(__NR_rseq_slice_yield)) + raced++; + } + } else { + if (!req) + scheduled++; + } + + clock_gettime(CLOCK_MONOTONIC, &ts_now); + } while (!elapsed(&ts_start, &ts_now, variant->total_nsecs)); + + printf("# Total %12ld\n", total); + printf("# Success %12ld\n", success); + printf("# Yielded %12ld\n", yielded); + printf("# Aborted %12ld\n", aborted); + printf("# Scheduled %12ld\n", scheduled); + printf("# Raced %12ld\n", raced); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/rseq/syscall_errors_test.c b/tools/testing/selftests/rseq/syscall_errors_test.c new file mode 100644 index 000000000000..a5d9e1f8a2dc --- /dev/null +++ b/tools/testing/selftests/rseq/syscall_errors_test.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com> + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <assert.h> +#include <stdint.h> +#include <syscall.h> +#include <string.h> +#include <unistd.h> + +#include "rseq.h" + +static int sys_rseq(void *rseq_abi, uint32_t rseq_len, + int flags, uint32_t sig) +{ + return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); +} + +/* + * Check the value of errno on some expected failures of the rseq syscall. + */ + +int main(void) +{ + struct rseq_abi *global_rseq = rseq_get_abi(); + int ret; + int errno_copy; + + if (!rseq_available()) { + fprintf(stderr, "rseq syscall unavailable"); + goto error; + } + + /* The current thread is NOT registered. */ + + /* EINVAL */ + errno = 0; + ret = sys_rseq(global_rseq, 32, -1, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration with invalid flag fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EINVAL) + goto error; + + errno = 0; + ret = sys_rseq((char *) global_rseq + 1, 32, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration with unaligned rseq_abi fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EINVAL) + goto error; + + errno = 0; + ret = sys_rseq(global_rseq, 31, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration with invalid size fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EINVAL) + goto error; + + +#if defined(__LP64__) && (!defined(__s390__) && !defined(__s390x__)) + /* + * We haven't found a reliable way to find an invalid address when + * running a 32bit userspace on a 64bit kernel, so only run this test + * on 64bit builds for the moment. + * + * Also exclude architectures that select + * CONFIG_ALTERNATE_USER_ADDRESS_SPACE where the kernel and userspace + * have their own address space and this failure can't happen. + */ + + /* EFAULT */ + errno = 0; + ret = sys_rseq((void *) -4096UL, 32, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration with invalid address fails with errno set to EFAULT (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EFAULT) + goto error; +#endif + + errno = 0; + ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Registration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret != 0 && errno != 0) + goto error; + + /* The current thread is registered. */ + + /* EBUSY */ + errno = 0; + ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Double registration fails with errno set to EBUSY (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EBUSY) + goto error; + + /* EPERM */ + errno = 0; + ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG + 1); + errno_copy = errno; + fprintf(stderr, "Unregistration with wrong RSEQ_SIG fails with errno to EPERM (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EPERM) + goto error; + + errno = 0; + ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Unregistration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret != 0) + goto error; + + errno = 0; + ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + errno_copy = errno; + fprintf(stderr, "Double unregistration fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy)); + if (ret == 0 || errno_copy != EINVAL) + goto error; + + return 0; +error: + return -1; +} |
