diff options
Diffstat (limited to 'tools/testing/selftests')
56 files changed, 3090 insertions, 1008 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 0aedcd76cf0f..de11992dc577 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -11,6 +11,7 @@ TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += damon TARGETS += drivers/dma-buf +TARGETS += drivers/s390x/uvdevice TARGETS += efivarfs TARGETS += exec TARGETS += filesystems diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index eb2213540fe3..a38b89c28030 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -27,7 +27,7 @@ #include "../kselftest.h" -#define TESTS_PER_CONTROL 6 +#define TESTS_PER_CONTROL 7 struct card_data { snd_ctl_t *handle; @@ -456,6 +456,44 @@ out: ctl->card->card, ctl->elem); } +static bool strend(const char *haystack, const char *needle) +{ + size_t haystack_len = strlen(haystack); + size_t needle_len = strlen(needle); + + if (needle_len > haystack_len) + return false; + return strcmp(haystack + haystack_len - needle_len, needle) == 0; +} + +static void test_ctl_name(struct ctl_data *ctl) +{ + bool name_ok = true; + bool check; + + /* Only boolean controls should end in Switch */ + if (strend(ctl->name, " Switch")) { + if (snd_ctl_elem_info_get_type(ctl->info) != SND_CTL_ELEM_TYPE_BOOLEAN) { + ksft_print_msg("%d.%d %s ends in Switch but is not boolean\n", + ctl->card->card, ctl->elem, ctl->name); + name_ok = false; + } + } + + /* Writeable boolean controls should end in Switch */ + if (snd_ctl_elem_info_get_type(ctl->info) == SND_CTL_ELEM_TYPE_BOOLEAN && + snd_ctl_elem_info_is_writable(ctl->info)) { + if (!strend(ctl->name, " Switch")) { + ksft_print_msg("%d.%d %s is a writeable boolean but not a Switch\n", + ctl->card->card, ctl->elem, ctl->name); + name_ok = false; + } + } + + ksft_test_result(name_ok, "name.%d.%d\n", + ctl->card->card, ctl->elem); +} + static bool show_mismatch(struct ctl_data *ctl, int index, snd_ctl_elem_value_t *read_val, snd_ctl_elem_value_t *expected_val) @@ -1062,6 +1100,7 @@ int main(void) * test stores the default value for later cleanup. */ test_ctl_get_value(ctl); + test_ctl_name(ctl); test_ctl_write_default(ctl); test_ctl_write_valid(ctl); test_ctl_write_invalid(ctl); diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 4297d580e3f8..4c52cc6f2f9c 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -19,6 +19,7 @@ #include "cgroup_util.h" #include "../clone3/clone3_selftests.h" +/* Returns read len on success, or -errno on failure. */ static ssize_t read_text(const char *path, char *buf, size_t max_len) { ssize_t len; @@ -26,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len) fd = open(path, O_RDONLY); if (fd < 0) - return fd; + return -errno; len = read(fd, buf, max_len - 1); - if (len < 0) - goto out; - buf[len] = 0; -out: + if (len >= 0) + buf[len] = 0; + close(fd); - return len; + return len < 0 ? -errno : len; } +/* Returns written len on success, or -errno on failure. */ static ssize_t write_text(const char *path, char *buf, ssize_t len) { int fd; fd = open(path, O_WRONLY | O_APPEND); if (fd < 0) - return fd; + return -errno; len = write(fd, buf, len); - if (len < 0) { - close(fd); - return len; - } - close(fd); - - return len; + return len < 0 ? -errno : len; } char *cg_name(const char *root, const char *name) @@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control) return ret; } +/* Returns 0 on success, or -errno on failure. */ int cg_read(const char *cgroup, const char *control, char *buf, size_t len) { char path[PATH_MAX]; + ssize_t ret; snprintf(path, sizeof(path), "%s/%s", cgroup, control); - if (read_text(path, buf, len) >= 0) - return 0; - - return -1; + ret = read_text(path, buf, len); + return ret >= 0 ? 0 : ret; } int cg_read_strcmp(const char *cgroup, const char *control, @@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control) return cnt; } +/* Returns 0 on success, or -errno on failure. */ int cg_write(const char *cgroup, const char *control, char *buf) { char path[PATH_MAX]; - ssize_t len = strlen(buf); + ssize_t len = strlen(buf), ret; snprintf(path, sizeof(path), "%s/%s", cgroup, control); - - if (write_text(path, buf, len) == len) - return 0; - - return -1; + ret = write_text(path, buf, len); + return ret == len ? 0 : ret; } int cg_write_numeric(const char *cgroup, const char *control, long value) @@ -547,6 +540,18 @@ int set_oom_adj_score(int pid, int score) return 0; } +int proc_mount_contains(const char *option) +{ + char buf[4 * PAGE_SIZE]; + ssize_t read; + + read = read_text("/proc/mounts", buf, sizeof(buf)); + if (read < 0) + return read; + + return strstr(buf, option) != NULL; +} + ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) { char path[PATH_MAX]; @@ -557,7 +562,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t else snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); - return read_text(path, buf, size); + size = read_text(path, buf, size); + return size < 0 ? -1 : size; } int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 2ee2119281d7..c92df4e5d395 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -52,6 +52,7 @@ extern int is_swap_enabled(void); extern int set_oom_adj_score(int pid, int score); extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); +int proc_mount_contains(const char *option); extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size); extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle); extern pid_t clone_into_cgroup(int cgroup_fd); diff --git a/tools/testing/selftests/cgroup/memcg_protection.m b/tools/testing/selftests/cgroup/memcg_protection.m new file mode 100644 index 000000000000..051daa3477b6 --- /dev/null +++ b/tools/testing/selftests/cgroup/memcg_protection.m @@ -0,0 +1,89 @@ +% SPDX-License-Identifier: GPL-2.0 +% +% run as: octave-cli memcg_protection.m +% +% This script simulates reclaim protection behavior on a single level of memcg +% hierarchy to illustrate how overcommitted protection spreads among siblings +% (as it depends also on their current consumption). +% +% Simulation assumes siblings consumed the initial amount of memory (w/out +% reclaim) and then the reclaim starts, all memory is reclaimable, i.e. treated +% same. It simulates only non-low reclaim and assumes all memory.min = 0. +% +% Input configurations +% -------------------- +% E number parent effective protection +% n vector nominal protection of siblings set at the given level (memory.low) +% c vector current consumption -,,- (memory.current) + +% example from testcase (values in GB) +E = 50 / 1024; +n = [75 25 0 500 ] / 1024; +c = [50 50 50 0] / 1024; + +% Reclaim parameters +% ------------------ + +% Minimal reclaim amount (GB) +cluster = 32*4 / 2**20; + +% Reclaim coefficient (think as 0.5^sc->priority) +alpha = .1 + +% Simulation parameters +% --------------------- +epsilon = 1e-7; +timeout = 1000; + +% Simulation loop +% --------------- + +ch = []; +eh = []; +rh = []; + +for t = 1:timeout + % low_usage + u = min(c, n); + siblings = sum(u); + + % effective_protection() + protected = min(n, c); % start with nominal + e = protected * min(1, E / siblings); % normalize overcommit + + % recursive protection + unclaimed = max(0, E - siblings); + parent_overuse = sum(c) - siblings; + if (unclaimed > 0 && parent_overuse > 0) + overuse = max(0, c - protected); + e += unclaimed * (overuse / parent_overuse); + endif + + % get_scan_count() + r = alpha * c; % assume all memory is in a single LRU list + + % commit 1bc63fb1272b ("mm, memcg: make scan aggression always exclude protection") + sz = max(e, c); + r .*= (1 - (e+epsilon) ./ (sz+epsilon)); + + % uncomment to debug prints + % e, c, r + + % nothing to reclaim, reached equilibrium + if max(r) < epsilon + break; + endif + + % SWAP_CLUSTER_MAX roundup + r = max(r, (r > epsilon) .* cluster); + % XXX here I do parallel reclaim of all siblings + % in reality reclaim is serialized and each sibling recalculates own residual + c = max(c - r, 0); + + ch = [ch ; c]; + eh = [eh ; e]; + rh = [rh ; r]; +endfor + +t +c, e diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 36ccf2322e21..8833359556f3 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -21,6 +21,9 @@ #include "../kselftest.h" #include "cgroup_util.h" +static bool has_localevents; +static bool has_recursiveprot; + /* * This test creates two nested cgroups with and without enabling * the memory controller. @@ -187,13 +190,6 @@ cleanup: return ret; } -static int alloc_pagecache_50M(const char *cgroup, void *arg) -{ - int fd = (long)arg; - - return alloc_pagecache(fd, MB(50)); -} - static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) { int fd = (long)arg; @@ -211,13 +207,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) static int alloc_anon_noexit(const char *cgroup, void *arg) { int ppid = getppid(); + size_t size = (unsigned long)arg; + char *buf, *ptr; - if (alloc_anon(cgroup, arg)) - return -1; + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; while (getppid() == ppid) sleep(1); + free(buf); return 0; } @@ -240,33 +240,39 @@ static int cg_test_proc_killed(const char *cgroup) /* * First, this test creates the following hierarchy: - * A memory.min = 50M, memory.max = 200M - * A/B memory.min = 50M, memory.current = 50M + * A memory.min = 0, memory.max = 200M + * A/B memory.min = 50M * A/B/C memory.min = 75M, memory.current = 50M * A/B/D memory.min = 25M, memory.current = 50M - * A/B/E memory.min = 500M, memory.current = 0 - * A/B/F memory.min = 0, memory.current = 50M + * A/B/E memory.min = 0, memory.current = 50M + * A/B/F memory.min = 500M, memory.current = 0 + * + * (or memory.low if we test soft protection) * - * Usages are pagecache, but the test keeps a running + * Usages are pagecache and the test keeps a running * process in every leaf cgroup. * Then it creates A/G and creates a significant - * memory pressure in it. + * memory pressure in A. * + * Then it checks actual memory usages and expects that: * A/B memory.current ~= 50M - * A/B/C memory.current ~= 33M - * A/B/D memory.current ~= 17M + * A/B/C memory.current ~= 29M + * A/B/D memory.current ~= 21M * A/B/E memory.current ~= 0 + * A/B/F memory.current = 0 + * (for origin of the numbers, see model in memcg_protection.m.) * * After that it tries to allocate more than there is - * unprotected memory in A available, and checks - * checks that memory.min protects pagecache even - * in this case. + * unprotected memory in A available, and checks that: + * a) memory.min protects pagecache even in this case, + * b) memory.low allows reclaiming page cache with low events. */ -static int test_memcg_min(const char *root) +static int test_memcg_protection(const char *root, bool min) { - int ret = KSFT_FAIL; + int ret = KSFT_FAIL, rc; char *parent[3] = {NULL}; char *children[4] = {NULL}; + const char *attribute = min ? "memory.min" : "memory.low"; long c[4]; int i, attempts; int fd; @@ -290,8 +296,10 @@ static int test_memcg_min(const char *root) if (cg_create(parent[0])) goto cleanup; - if (cg_read_long(parent[0], "memory.min")) { - ret = KSFT_SKIP; + if (cg_read_long(parent[0], attribute)) { + /* No memory.min on older kernels is fine */ + if (min) + ret = KSFT_SKIP; goto cleanup; } @@ -321,24 +329,22 @@ static int test_memcg_min(const char *root) if (cg_create(children[i])) goto cleanup; - if (i == 2) + if (i > 2) continue; cg_run_nowait(children[i], alloc_pagecache_50M_noexit, (void *)(long)fd); } - if (cg_write(parent[0], "memory.min", "50M")) + if (cg_write(parent[1], attribute, "50M")) goto cleanup; - if (cg_write(parent[1], "memory.min", "50M")) + if (cg_write(children[0], attribute, "75M")) goto cleanup; - if (cg_write(children[0], "memory.min", "75M")) + if (cg_write(children[1], attribute, "25M")) goto cleanup; - if (cg_write(children[1], "memory.min", "25M")) + if (cg_write(children[2], attribute, "0")) goto cleanup; - if (cg_write(children[2], "memory.min", "500M")) - goto cleanup; - if (cg_write(children[3], "memory.min", "0")) + if (cg_write(children[3], attribute, "500M")) goto cleanup; attempts = 0; @@ -358,178 +364,46 @@ static int test_memcg_min(const char *root) for (i = 0; i < ARRAY_SIZE(children); i++) c[i] = cg_read_long(children[i], "memory.current"); - if (!values_close(c[0], MB(33), 10)) - goto cleanup; - - if (!values_close(c[1], MB(17), 10)) - goto cleanup; - - if (!values_close(c[2], 0, 1)) - goto cleanup; - - if (!cg_run(parent[2], alloc_anon, (void *)MB(170))) - goto cleanup; - - if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) - goto cleanup; - - ret = KSFT_PASS; - -cleanup: - for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { - if (!children[i]) - continue; - - cg_destroy(children[i]); - free(children[i]); - } - - for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { - if (!parent[i]) - continue; - - cg_destroy(parent[i]); - free(parent[i]); - } - close(fd); - return ret; -} - -/* - * First, this test creates the following hierarchy: - * A memory.low = 50M, memory.max = 200M - * A/B memory.low = 50M, memory.current = 50M - * A/B/C memory.low = 75M, memory.current = 50M - * A/B/D memory.low = 25M, memory.current = 50M - * A/B/E memory.low = 500M, memory.current = 0 - * A/B/F memory.low = 0, memory.current = 50M - * - * Usages are pagecache. - * Then it creates A/G an creates a significant - * memory pressure in it. - * - * Then it checks actual memory usages and expects that: - * A/B memory.current ~= 50M - * A/B/ memory.current ~= 33M - * A/B/D memory.current ~= 17M - * A/B/E memory.current ~= 0 - * - * After that it tries to allocate more than there is - * unprotected memory in A available, - * and checks low and oom events in memory.events. - */ -static int test_memcg_low(const char *root) -{ - int ret = KSFT_FAIL; - char *parent[3] = {NULL}; - char *children[4] = {NULL}; - long low, oom; - long c[4]; - int i; - int fd; - - fd = get_temp_fd(); - if (fd < 0) - goto cleanup; - - parent[0] = cg_name(root, "memcg_test_0"); - if (!parent[0]) - goto cleanup; - - parent[1] = cg_name(parent[0], "memcg_test_1"); - if (!parent[1]) - goto cleanup; - - parent[2] = cg_name(parent[0], "memcg_test_2"); - if (!parent[2]) - goto cleanup; - - if (cg_create(parent[0])) - goto cleanup; - - if (cg_read_long(parent[0], "memory.low")) - goto cleanup; - - if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) - goto cleanup; - - if (cg_write(parent[0], "memory.max", "200M")) + if (!values_close(c[0], MB(29), 10)) goto cleanup; - if (cg_write(parent[0], "memory.swap.max", "0")) + if (!values_close(c[1], MB(21), 10)) goto cleanup; - if (cg_create(parent[1])) + if (c[3] != 0) goto cleanup; - if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) + rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); + if (min && !rc) goto cleanup; - - if (cg_create(parent[2])) + else if (!min && rc) { + fprintf(stderr, + "memory.low prevents from allocating anon memory\n"); goto cleanup; - - for (i = 0; i < ARRAY_SIZE(children); i++) { - children[i] = cg_name_indexed(parent[1], "child_memcg", i); - if (!children[i]) - goto cleanup; - - if (cg_create(children[i])) - goto cleanup; - - if (i == 2) - continue; - - if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd)) - goto cleanup; } - if (cg_write(parent[0], "memory.low", "50M")) - goto cleanup; - if (cg_write(parent[1], "memory.low", "50M")) - goto cleanup; - if (cg_write(children[0], "memory.low", "75M")) - goto cleanup; - if (cg_write(children[1], "memory.low", "25M")) - goto cleanup; - if (cg_write(children[2], "memory.low", "500M")) - goto cleanup; - if (cg_write(children[3], "memory.low", "0")) - goto cleanup; - - if (cg_run(parent[2], alloc_anon, (void *)MB(148))) - goto cleanup; - if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) goto cleanup; - for (i = 0; i < ARRAY_SIZE(children); i++) - c[i] = cg_read_long(children[i], "memory.current"); - - if (!values_close(c[0], MB(33), 10)) - goto cleanup; - - if (!values_close(c[1], MB(17), 10)) - goto cleanup; - - if (!values_close(c[2], 0, 1)) - goto cleanup; - - if (cg_run(parent[2], alloc_anon, (void *)MB(166))) { - fprintf(stderr, - "memory.low prevents from allocating anon memory\n"); + if (min) { + ret = KSFT_PASS; goto cleanup; } for (i = 0; i < ARRAY_SIZE(children); i++) { + int no_low_events_index = 1; + long low, oom; + oom = cg_read_key_long(children[i], "memory.events", "oom "); low = cg_read_key_long(children[i], "memory.events", "low "); if (oom) goto cleanup; - if (i < 2 && low <= 0) + if (i <= no_low_events_index && low <= 0) goto cleanup; - if (i >= 2 && low) + if (i > no_low_events_index && low) goto cleanup; + } ret = KSFT_PASS; @@ -554,13 +428,28 @@ cleanup: return ret; } +static int test_memcg_min(const char *root) +{ + return test_memcg_protection(root, true); +} + +static int test_memcg_low(const char *root) +{ + return test_memcg_protection(root, false); +} + static int alloc_pagecache_max_30M(const char *cgroup, void *arg) { size_t size = MB(50); int ret = -1; - long current; + long current, high, max; int fd; + high = cg_read_long(cgroup, "memory.high"); + max = cg_read_long(cgroup, "memory.max"); + if (high != MB(30) && max != MB(30)) + return -1; + fd = get_temp_fd(); if (fd < 0) return -1; @@ -569,7 +458,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg) goto cleanup; current = cg_read_long(cgroup, "memory.current"); - if (current <= MB(29) || current > MB(30)) + if (!values_close(current, MB(30), 5)) goto cleanup; ret = 0; @@ -607,7 +496,7 @@ static int test_memcg_high(const char *root) if (cg_write(memcg, "memory.high", "30M")) goto cleanup; - if (cg_run(memcg, alloc_anon, (void *)MB(100))) + if (cg_run(memcg, alloc_anon, (void *)MB(31))) goto cleanup; if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) @@ -756,6 +645,111 @@ cleanup: return ret; } +/* + * This test checks that memory.reclaim reclaims the given + * amount of memory (from both anon and file, if possible). + */ +static int test_memcg_reclaim(const char *root) +{ + int ret = KSFT_FAIL, fd, retries; + char *memcg; + long current, expected_usage, to_reclaim; + char buf[64]; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + current = cg_read_long(memcg, "memory.current"); + if (current != 0) + goto cleanup; + + fd = get_temp_fd(); + if (fd < 0) + goto cleanup; + + cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); + + /* + * If swap is enabled, try to reclaim from both anon and file, else try + * to reclaim from file only. + */ + if (is_swap_enabled()) { + cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); + expected_usage = MB(100); + } else + expected_usage = MB(50); + + /* + * Wait until current usage reaches the expected usage (or we run out of + * retries). + */ + retries = 5; + while (!values_close(cg_read_long(memcg, "memory.current"), + expected_usage, 10)) { + if (retries--) { + sleep(1); + continue; + } else { + fprintf(stderr, + "failed to allocate %ld for memcg reclaim test\n", + expected_usage); + goto cleanup; + } + } + + /* + * Reclaim until current reaches 30M, this makes sure we hit both anon + * and file if swap is enabled. + */ + retries = 5; + while (true) { + int err; + + current = cg_read_long(memcg, "memory.current"); + to_reclaim = current - MB(30); + + /* + * We only keep looping if we get EAGAIN, which means we could + * not reclaim the full amount. + */ + if (to_reclaim <= 0) + goto cleanup; + + + snprintf(buf, sizeof(buf), "%ld", to_reclaim); + err = cg_write(memcg, "memory.reclaim", buf); + if (!err) { + /* + * If writing succeeds, then the written amount should have been + * fully reclaimed (and maybe more). + */ + current = cg_read_long(memcg, "memory.current"); + if (!values_close(current, MB(30), 3) && current > MB(30)) + goto cleanup; + break; + } + + /* The kernel could not reclaim the full amount, try again. */ + if (err == -EAGAIN && retries--) + continue; + + /* We got an unexpected error or ran out of retries. */ + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + cg_destroy(memcg); + free(memcg); + close(fd); + + return ret; +} + static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) { long mem_max = (long)arg; @@ -987,9 +981,6 @@ static int tcp_client(const char *cgroup, unsigned short port) if (current < 0 || sock < 0) goto close_sk; - if (current < sock) - goto close_sk; - if (values_close(current, sock, 10)) { ret = KSFT_PASS; break; @@ -1079,12 +1070,14 @@ cleanup: /* * This test disables swapping and tries to allocate anonymous memory * up to OOM with memory.group.oom set. Then it checks that all - * processes in the leaf (but not the parent) were killed. + * processes in the leaf were killed. It also checks that oom_events + * were propagated to the parent level. */ static int test_memcg_oom_group_leaf_events(const char *root) { int ret = KSFT_FAIL; char *parent, *child; + long parent_oom_events; parent = cg_name(root, "memcg_test_0"); child = cg_name(root, "memcg_test_0/memcg_test_1"); @@ -1122,7 +1115,16 @@ static int test_memcg_oom_group_leaf_events(const char *root) if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) goto cleanup; - if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0) + parent_oom_events = cg_read_key_long( + parent, "memory.events", "oom_kill "); + /* + * If memory_localevents is not enabled (the default), the parent should + * count OOM events in its children groups. Otherwise, it should not + * have observed any events. + */ + if (has_localevents && parent_oom_events != 0) + goto cleanup; + else if (!has_localevents && parent_oom_events <= 0) goto cleanup; ret = KSFT_PASS; @@ -1246,7 +1248,6 @@ cleanup: return ret; } - #define T(x) { x, #x } struct memcg_test { int (*fn)(const char *root); @@ -1259,6 +1260,7 @@ struct memcg_test { T(test_memcg_high), T(test_memcg_high_sync), T(test_memcg_max), + T(test_memcg_reclaim), T(test_memcg_oom_events), T(test_memcg_swap_max), T(test_memcg_sock), @@ -1271,7 +1273,7 @@ struct memcg_test { int main(int argc, char **argv) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; + int i, proc_status, ret = EXIT_SUCCESS; if (cg_find_unified_root(root, sizeof(root))) ksft_exit_skip("cgroup v2 isn't mounted\n"); @@ -1287,6 +1289,16 @@ int main(int argc, char **argv) if (cg_write(root, "cgroup.subtree_control", "+memory")) ksft_exit_skip("Failed to set memory controller\n"); + proc_status = proc_mount_contains("memory_recursiveprot"); + if (proc_status < 0) + ksft_exit_skip("Failed to query cgroup mount option\n"); + has_recursiveprot = proc_status; + + proc_status = proc_mount_contains("memory_localevents"); + if (proc_status < 0) + ksft_exit_skip("Failed to query cgroup mount option\n"); + has_localevents = proc_status; + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 2e3ae77cb6db..89592c64462f 100644 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -231,6 +231,7 @@ test_context() { context_dir=$1 ensure_dir "$context_dir" "exist" + ensure_file "$context_dir/avail_operations" "exit" 400 ensure_file "$context_dir/operations" "exist" 600 test_monitoring_attrs "$context_dir/monitoring_attrs" test_targets "$context_dir/targets" diff --git a/tools/testing/selftests/drivers/.gitignore b/tools/testing/selftests/drivers/.gitignore index ca74f2e1c719..09e23b5afa96 100644 --- a/tools/testing/selftests/drivers/.gitignore +++ b/tools/testing/selftests/drivers/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only /dma-buf/udmabuf +/s390x/uvdevice/test_uvdevice diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile new file mode 100644 index 000000000000..5e701d2708d4 --- /dev/null +++ b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile @@ -0,0 +1,22 @@ +include ../../../../../build/Build.include + +UNAME_M := $(shell uname -m) + +ifneq ($(UNAME_M),s390x) +nothing: +.PHONY: all clean run_tests install +.SILENT: +else + +TEST_GEN_PROGS := test_uvdevice + +top_srcdir ?= ../../../../../.. +KSFT_KHDR_INSTALL := 1 +khdr_dir = $(top_srcdir)/usr/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include + +CFLAGS += -Wall -Werror -static -I$(khdr_dir) -I$(LINUX_TOOL_ARCH_INCLUDE) + +include ../../../lib.mk + +endif diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/config b/tools/testing/selftests/drivers/s390x/uvdevice/config new file mode 100644 index 000000000000..f28a04b99eff --- /dev/null +++ b/tools/testing/selftests/drivers/s390x/uvdevice/config @@ -0,0 +1 @@ +CONFIG_S390_UV_UAPI=y diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c new file mode 100644 index 000000000000..ea0cdc37b44f --- /dev/null +++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * selftest for the Ultravisor UAPI device + * + * Copyright IBM Corp. 2022 + * Author(s): Steffen Eiden <seiden@linux.ibm.com> + */ + +#include <stdint.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <asm/uvdevice.h> + +#include "../../../kselftest_harness.h" + +#define UV_PATH "/dev/uv" +#define BUFFER_SIZE 0x200 +FIXTURE(uvio_fixture) { + int uv_fd; + struct uvio_ioctl_cb uvio_ioctl; + uint8_t buffer[BUFFER_SIZE]; + __u64 fault_page; +}; + +FIXTURE_VARIANT(uvio_fixture) { + unsigned long ioctl_cmd; + uint32_t arg_size; +}; + +FIXTURE_VARIANT_ADD(uvio_fixture, att) { + .ioctl_cmd = UVIO_IOCTL_ATT, + .arg_size = sizeof(struct uvio_attest), +}; + +FIXTURE_SETUP(uvio_fixture) +{ + self->uv_fd = open(UV_PATH, O_ACCMODE); + + self->uvio_ioctl.argument_addr = (__u64)self->buffer; + self->uvio_ioctl.argument_len = variant->arg_size; + self->fault_page = + (__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0); +} + +FIXTURE_TEARDOWN(uvio_fixture) +{ + if (self->uv_fd) + close(self->uv_fd); + munmap((void *)self->fault_page, (size_t)getpagesize()); +} + +TEST_F(uvio_fixture, fault_ioctl_arg) +{ + int rc, errno_cache; + + rc = ioctl(self->uv_fd, variant->ioctl_cmd, NULL); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); + + rc = ioctl(self->uv_fd, variant->ioctl_cmd, self->fault_page); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); +} + +TEST_F(uvio_fixture, fault_uvio_arg) +{ + int rc, errno_cache; + + self->uvio_ioctl.argument_addr = 0; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); + + self->uvio_ioctl.argument_addr = self->fault_page; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); +} + +/* + * Test to verify that IOCTLs with invalid values in the ioctl_control block + * are rejected. + */ +TEST_F(uvio_fixture, inval_ioctl_cb) +{ + int rc, errno_cache; + + self->uvio_ioctl.argument_len = 0; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + + self->uvio_ioctl.argument_len = (uint32_t)-1; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + self->uvio_ioctl.argument_len = variant->arg_size; + + self->uvio_ioctl.flags = (uint32_t)-1; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + self->uvio_ioctl.flags = 0; + + memset(self->uvio_ioctl.reserved14, 0xff, sizeof(self->uvio_ioctl.reserved14)); + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + + memset(&self->uvio_ioctl, 0x11, sizeof(self->uvio_ioctl)); + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + ASSERT_EQ(rc, -1); +} + +TEST_F(uvio_fixture, inval_ioctl_cmd) +{ + int rc, errno_cache; + uint8_t nr = _IOC_NR(variant->ioctl_cmd); + unsigned long cmds[] = { + _IOWR('a', nr, struct uvio_ioctl_cb), + _IOWR(UVIO_TYPE_UVC, nr, int), + _IO(UVIO_TYPE_UVC, nr), + _IOR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb), + _IOW(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb), + }; + + for (size_t i = 0; i < ARRAY_SIZE(cmds); i++) { + rc = ioctl(self->uv_fd, cmds[i], &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, ENOTTY); + } +} + +struct test_attest_buffer { + uint8_t arcb[0x180]; + uint8_t meas[64]; + uint8_t add[32]; +}; + +FIXTURE(attest_fixture) { + int uv_fd; + struct uvio_ioctl_cb uvio_ioctl; + struct uvio_attest uvio_attest; + struct test_attest_buffer attest_buffer; + __u64 fault_page; +}; + +FIXTURE_SETUP(attest_fixture) +{ + self->uv_fd = open(UV_PATH, O_ACCMODE); + + self->uvio_ioctl.argument_addr = (__u64)&self->uvio_attest; + self->uvio_ioctl.argument_len = sizeof(self->uvio_attest); + + self->uvio_attest.arcb_addr = (__u64)&self->attest_buffer.arcb; + self->uvio_attest.arcb_len = sizeof(self->attest_buffer.arcb); + + self->uvio_attest.meas_addr = (__u64)&self->attest_buffer.meas; + self->uvio_attest.meas_len = sizeof(self->attest_buffer.meas); + + self->uvio_attest.add_data_addr = (__u64)&self->attest_buffer.add; + self->uvio_attest.add_data_len = sizeof(self->attest_buffer.add); + self->fault_page = + (__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0); +} + +FIXTURE_TEARDOWN(attest_fixture) +{ + if (self->uv_fd) + close(self->uv_fd); + munmap((void *)self->fault_page, (size_t)getpagesize()); +} + +static void att_inval_sizes_test(uint32_t *size, uint32_t max_size, bool test_zero, + struct __test_metadata *_metadata, + FIXTURE_DATA(attest_fixture) *self) +{ + int rc, errno_cache; + uint32_t tmp = *size; + + if (test_zero) { + *size = 0; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + } + *size = max_size + 1; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + *size = tmp; +} + +/* + * Test to verify that attestation IOCTLs with invalid values in the UVIO + * attestation control block are rejected. + */ +TEST_F(attest_fixture, att_inval_request) +{ + int rc, errno_cache; + + att_inval_sizes_test(&self->uvio_attest.add_data_len, UVIO_ATT_ADDITIONAL_MAX_LEN, + false, _metadata, self); + att_inval_sizes_test(&self->uvio_attest.meas_len, UVIO_ATT_MEASUREMENT_MAX_LEN, + true, _metadata, self); + att_inval_sizes_test(&self->uvio_attest.arcb_len, UVIO_ATT_ARCB_MAX_LEN, + true, _metadata, self); + + self->uvio_attest.reserved136 = (uint16_t)-1; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + + memset(&self->uvio_attest, 0x11, sizeof(self->uvio_attest)); + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + ASSERT_EQ(rc, -1); +} + +static void att_inval_addr_test(__u64 *addr, struct __test_metadata *_metadata, + FIXTURE_DATA(attest_fixture) *self) +{ + int rc, errno_cache; + __u64 tmp = *addr; + + *addr = 0; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); + *addr = self->fault_page; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); + *addr = tmp; +} + +TEST_F(attest_fixture, att_inval_addr) +{ + att_inval_addr_test(&self->uvio_attest.arcb_addr, _metadata, self); + att_inval_addr_test(&self->uvio_attest.add_data_addr, _metadata, self); + att_inval_addr_test(&self->uvio_attest.meas_addr, _metadata, self); +} + +static void __attribute__((constructor)) __constructor_order_last(void) +{ + if (!__constructor_order) + __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; +} + +int main(int argc, char **argv) +{ + int fd = open(UV_PATH, O_ACCMODE); + + if (fd < 0) + ksft_exit_skip("No uv-device or cannot access " UV_PATH "\n" + "Enable CONFIG_S390_UV_UAPI and check the access rights on " + UV_PATH ".\n"); + close(fd); + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index 312d23780096..be754f5bcf79 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -25,6 +25,8 @@ if [ $L -ne 256 ]; then exit_fail fi +cat kprobe_events >> $testlog + echo 1 > events/kprobes/enable echo 0 > events/kprobes/enable echo > kprobe_events diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 0b0e4402bba6..4509a3a7eeae 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -2,7 +2,8 @@ /aarch64/arch_timer /aarch64/debug-exceptions /aarch64/get-reg-list -/aarch64/psci_cpu_on_test +/aarch64/hypercalls +/aarch64/psci_test /aarch64/vcpu_width_config /aarch64/vgic_init /aarch64/vgic_irq @@ -16,6 +17,7 @@ /x86_64/debug_regs /x86_64/evmcs_test /x86_64/emulator_error_test +/x86_64/fix_hypercall_test /x86_64/get_msr_index_features /x86_64/kvm_clock_test /x86_64/kvm_pv_test @@ -53,7 +55,7 @@ /x86_64/xen_shinfo_test /x86_64/xen_vmcall_test /x86_64/xss_msr_test -/x86_64/vmx_pmu_msrs_test +/x86_64/vmx_pmu_caps_test /access_tracking_perf_test /demand_paging_test /dirty_log_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 681b173aa87c..81470a99ed1c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -48,6 +48,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test +TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features @@ -65,6 +66,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test +TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test @@ -81,7 +83,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests @@ -105,7 +107,8 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list -TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test +TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 0b571f3fe64c..d3a7dbfcbb3d 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -294,6 +294,11 @@ static void print_reg(struct vcpu_config *c, __u64 id) "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id); printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; + case KVM_REG_ARM_FW_FEAT_BMAP: + TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff), + "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", config_name(c), id); + printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff); + break; case KVM_REG_ARM64_SVE: if (has_cap(c, KVM_CAP_ARM_SVE)) printf("\t%s,\n", sve_id_to_str(c, id)); @@ -692,6 +697,9 @@ static __u64 base_regs[] = { KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */ KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */ KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */ + KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */ + KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ + KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 2), diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c new file mode 100644 index 000000000000..41e0210b7a5e --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface. + * + * The test validates the basic hypercall functionalities that are exposed + * via the psuedo-firmware bitmap register. This includes the registers' + * read/write behavior before and after the VM has started, and if the + * hypercalls are properly masked or unmasked to the guest when disabled or + * enabled from the KVM userspace, respectively. + */ + +#include <errno.h> +#include <linux/arm-smccc.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "processor.h" + +#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0)) + +/* Last valid bits of the bitmapped firmware registers */ +#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0 +#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0 +#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1 + +struct kvm_fw_reg_info { + uint64_t reg; /* Register definition */ + uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */ +}; + +#define FW_REG_INFO(r) \ + { \ + .reg = r, \ + .max_feat_bit = r##_BIT_MAX, \ + } + +static const struct kvm_fw_reg_info fw_reg_info[] = { + FW_REG_INFO(KVM_REG_ARM_STD_BMAP), + FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP), + FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP), +}; + +enum test_stage { + TEST_STAGE_REG_IFACE, + TEST_STAGE_HVC_IFACE_FEAT_DISABLED, + TEST_STAGE_HVC_IFACE_FEAT_ENABLED, + TEST_STAGE_HVC_IFACE_FALSE_INFO, + TEST_STAGE_END, +}; + +static int stage = TEST_STAGE_REG_IFACE; + +struct test_hvc_info { + uint32_t func_id; + uint64_t arg1; +}; + +#define TEST_HVC_INFO(f, a1) \ + { \ + .func_id = f, \ + .arg1 = a1, \ + } + +static const struct test_hvc_info hvc_info[] = { + /* KVM_REG_ARM_STD_BMAP */ + TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0), + TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64), + TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0), + TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0), + TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0), + + /* KVM_REG_ARM_STD_HYP_BMAP */ + TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES), + TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST), + TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0), + + /* KVM_REG_ARM_VENDOR_HYP_BMAP */ + TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID, + ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID), + TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0), + TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER), +}; + +/* Feed false hypercall info to test the KVM behavior */ +static const struct test_hvc_info false_hvc_info[] = { + /* Feature support check against a different family of hypercalls */ + TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID), + TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64), + TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64), +}; + +static void guest_test_hvc(const struct test_hvc_info *hc_info) +{ + unsigned int i; + struct arm_smccc_res res; + unsigned int hvc_info_arr_sz; + + hvc_info_arr_sz = + hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info); + + for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { + memset(&res, 0, sizeof(res)); + smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); + + switch (stage) { + case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: + case TEST_STAGE_HVC_IFACE_FALSE_INFO: + GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED, + res.a0, hc_info->func_id, hc_info->arg1); + break; + case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: + GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED, + res.a0, hc_info->func_id, hc_info->arg1); + break; + default: + GUEST_ASSERT_1(0, stage); + } + } +} + +static void guest_code(void) +{ + while (stage != TEST_STAGE_END) { + switch (stage) { + case TEST_STAGE_REG_IFACE: + break; + case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: + case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: + guest_test_hvc(hvc_info); + break; + case TEST_STAGE_HVC_IFACE_FALSE_INFO: + guest_test_hvc(false_hvc_info); + break; + default: + GUEST_ASSERT_1(0, stage); + } + + GUEST_SYNC(stage); + } + + GUEST_DONE(); +} + +static int set_fw_reg(struct kvm_vm *vm, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { + .id = id, + .addr = (uint64_t)&val, + }; + + return _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); +} + +static void get_fw_reg(struct kvm_vm *vm, uint64_t id, uint64_t *addr) +{ + struct kvm_one_reg reg = { + .id = id, + .addr = (uint64_t)addr, + }; + + vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); +} + +struct st_time { + uint32_t rev; + uint32_t attr; + uint64_t st_time; +}; + +#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63) +#define ST_GPA_BASE (1 << 30) + +static void steal_time_init(struct kvm_vm *vm) +{ + uint64_t st_ipa = (ulong)ST_GPA_BASE; + unsigned int gpages; + struct kvm_device_attr dev = { + .group = KVM_ARM_VCPU_PVTIME_CTRL, + .attr = KVM_ARM_VCPU_PVTIME_IPA, + .addr = (uint64_t)&st_ipa, + }; + + gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); + + vcpu_ioctl(vm, 0, KVM_SET_DEVICE_ATTR, &dev); +} + +static void test_fw_regs_before_vm_start(struct kvm_vm *vm) +{ + uint64_t val; + unsigned int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { + const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; + + /* First 'read' should be an upper limit of the features supported */ + get_fw_reg(vm, reg_info->reg, &val); + TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), + "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n", + reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); + + /* Test a 'write' by disabling all the features of the register map */ + ret = set_fw_reg(vm, reg_info->reg, 0); + TEST_ASSERT(ret == 0, + "Failed to clear all the features of reg: 0x%lx; ret: %d\n", + reg_info->reg, errno); + + get_fw_reg(vm, reg_info->reg, &val); + TEST_ASSERT(val == 0, + "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg); + + /* + * Test enabling a feature that's not supported. + * Avoid this check if all the bits are occupied. + */ + if (reg_info->max_feat_bit < 63) { + ret = set_fw_reg(vm, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); + TEST_ASSERT(ret != 0 && errno == EINVAL, + "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n", + errno, reg_info->reg); + } + } +} + +static void test_fw_regs_after_vm_start(struct kvm_vm *vm) +{ + uint64_t val; + unsigned int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { + const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; + + /* + * Before starting the VM, the test clears all the bits. + * Check if that's still the case. + */ + get_fw_reg(vm, reg_info->reg, &val); + TEST_ASSERT(val == 0, + "Expected all the features to be cleared for reg: 0x%lx\n", + reg_info->reg); + + /* + * Since the VM has run at least once, KVM shouldn't allow modification of + * the registers and should return EBUSY. Set the registers and check for + * the expected errno. + */ + ret = set_fw_reg(vm, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); + TEST_ASSERT(ret != 0 && errno == EBUSY, + "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n", + errno, reg_info->reg); + } +} + +static struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + + vm = vm_create_default(0, 0, guest_code); + + ucall_init(vm, NULL); + steal_time_init(vm); + + return vm; +} + +static struct kvm_vm *test_guest_stage(struct kvm_vm *vm) +{ + struct kvm_vm *ret_vm = vm; + + pr_debug("Stage: %d\n", stage); + + switch (stage) { + case TEST_STAGE_REG_IFACE: + test_fw_regs_after_vm_start(vm); + break; + case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: + /* Start a new VM so that all the features are now enabled by default */ + kvm_vm_free(vm); + ret_vm = test_vm_create(); + break; + case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: + case TEST_STAGE_HVC_IFACE_FALSE_INFO: + break; + default: + TEST_FAIL("Unknown test stage: %d\n", stage); + } + + stage++; + sync_global_to_guest(vm, stage); + + return ret_vm; +} + +static void test_run(void) +{ + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = test_vm_create(); + + test_fw_regs_before_vm_start(vm); + + while (!guest_done) { + vcpu_run(vm, 0); + + switch (get_ucall(vm, 0, &uc)) { + case UCALL_SYNC: + vm = test_guest_stage(vm); + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld\n\tvalues: 0x%lx, 0x%lx; 0x%lx, stage: %u", + (const char *)uc.args[0], __FILE__, uc.args[1], + uc.args[2], uc.args[3], uc.args[4], stage); + break; + default: + TEST_FAIL("Unexpected guest exit\n"); + } + } + + kvm_vm_free(vm); +} + +int main(void) +{ + setbuf(stdout, NULL); + + test_run(); + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c deleted file mode 100644 index 4c5f6814030f..000000000000 --- a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c +++ /dev/null @@ -1,121 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the - * CPU_ON PSCI call matches what the caller requested. - * - * Copyright (c) 2021 Google LLC. - * - * This is a regression test for a race between KVM servicing the PSCI call and - * userspace reading the vCPUs registers. - */ - -#define _GNU_SOURCE - -#include <linux/psci.h> - -#include "kvm_util.h" -#include "processor.h" -#include "test_util.h" - -#define VCPU_ID_SOURCE 0 -#define VCPU_ID_TARGET 1 - -#define CPU_ON_ENTRY_ADDR 0xfeedf00dul -#define CPU_ON_CONTEXT_ID 0xdeadc0deul - -static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, - uint64_t context_id) -{ - register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON; - register uint64_t x1 asm("x1") = target_cpu; - register uint64_t x2 asm("x2") = entry_addr; - register uint64_t x3 asm("x3") = context_id; - - asm("hvc #0" - : "=r"(x0) - : "r"(x0), "r"(x1), "r"(x2), "r"(x3) - : "memory"); - - return x0; -} - -static uint64_t psci_affinity_info(uint64_t target_affinity, - uint64_t lowest_affinity_level) -{ - register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO; - register uint64_t x1 asm("x1") = target_affinity; - register uint64_t x2 asm("x2") = lowest_affinity_level; - - asm("hvc #0" - : "=r"(x0) - : "r"(x0), "r"(x1), "r"(x2) - : "memory"); - - return x0; -} - -static void guest_main(uint64_t target_cpu) -{ - GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID)); - uint64_t target_state; - - do { - target_state = psci_affinity_info(target_cpu, 0); - - GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) || - (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF)); - } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON); - - GUEST_DONE(); -} - -int main(void) -{ - uint64_t target_mpidr, obs_pc, obs_x0; - struct kvm_vcpu_init init; - struct kvm_vm *vm; - struct ucall uc; - - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name); - ucall_init(vm, NULL); - - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); - init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); - - aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main); - - /* - * make sure the target is already off when executing the test. - */ - init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF); - aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main); - - get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); - vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK); - vcpu_run(vm, VCPU_ID_SOURCE); - - switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) { - case UCALL_DONE: - break; - case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__, - uc.args[1]); - break; - default: - TEST_FAIL("Unhandled ucall: %lu", uc.cmd); - } - - get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc); - get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0); - - TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, - "unexpected target cpu pc: %lx (expected: %lx)", - obs_pc, CPU_ON_ENTRY_ADDR); - TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID, - "unexpected target context id: %lx (expected: %lx)", - obs_x0, CPU_ON_CONTEXT_ID); - - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c new file mode 100644 index 000000000000..88541de21c41 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the + * CPU_ON PSCI call matches what the caller requested. + * + * Copyright (c) 2021 Google LLC. + * + * This is a regression test for a race between KVM servicing the PSCI call and + * userspace reading the vCPUs registers. + */ + +#define _GNU_SOURCE + +#include <linux/psci.h> + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +#define VCPU_ID_SOURCE 0 +#define VCPU_ID_TARGET 1 + +#define CPU_ON_ENTRY_ADDR 0xfeedf00dul +#define CPU_ON_CONTEXT_ID 0xdeadc0deul + +static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, + uint64_t context_id) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, + 0, 0, 0, 0, &res); + + return res.a0; +} + +static uint64_t psci_affinity_info(uint64_t target_affinity, + uint64_t lowest_affinity_level) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, + 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, + 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +static uint64_t psci_features(uint32_t func_id) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +static void vcpu_power_off(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_mp_state mp_state = { + .mp_state = KVM_MP_STATE_STOPPED, + }; + + vcpu_set_mp_state(vm, vcpuid, &mp_state); +} + +static struct kvm_vm *setup_vm(void *guest_code) +{ + struct kvm_vcpu_init init; + struct kvm_vm *vm; + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name); + ucall_init(vm, NULL); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); + + aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_code); + aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_code); + + return vm; +} + +static void enter_guest(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct ucall uc; + + vcpu_run(vm, vcpuid); + if (get_ucall(vm, vcpuid, &uc) == UCALL_ABORT) + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__, + uc.args[1]); +} + +static void assert_vcpu_reset(struct kvm_vm *vm, uint32_t vcpuid) +{ + uint64_t obs_pc, obs_x0; + + get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &obs_pc); + get_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[0]), &obs_x0); + + TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, + "unexpected target cpu pc: %lx (expected: %lx)", + obs_pc, CPU_ON_ENTRY_ADDR); + TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID, + "unexpected target context id: %lx (expected: %lx)", + obs_x0, CPU_ON_CONTEXT_ID); +} + +static void guest_test_cpu_on(uint64_t target_cpu) +{ + uint64_t target_state; + + GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID)); + + do { + target_state = psci_affinity_info(target_cpu, 0); + + GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) || + (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF)); + } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON); + + GUEST_DONE(); +} + +static void host_test_cpu_on(void) +{ + uint64_t target_mpidr; + struct kvm_vm *vm; + struct ucall uc; + + vm = setup_vm(guest_test_cpu_on); + + /* + * make sure the target is already off when executing the test. + */ + vcpu_power_off(vm, VCPU_ID_TARGET); + + get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); + vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK); + enter_guest(vm, VCPU_ID_SOURCE); + + if (get_ucall(vm, VCPU_ID_SOURCE, &uc) != UCALL_DONE) + TEST_FAIL("Unhandled ucall: %lu", uc.cmd); + + assert_vcpu_reset(vm, VCPU_ID_TARGET); + kvm_vm_free(vm); +} + +static void enable_system_suspend(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = { + .cap = KVM_CAP_ARM_SYSTEM_SUSPEND, + }; + + vm_enable_cap(vm, &cap); +} + +static void guest_test_system_suspend(void) +{ + uint64_t ret; + + /* assert that SYSTEM_SUSPEND is discoverable */ + GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND)); + GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND)); + + ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID); + GUEST_SYNC(ret); +} + +static void host_test_system_suspend(void) +{ + struct kvm_run *run; + struct kvm_vm *vm; + + vm = setup_vm(guest_test_system_suspend); + enable_system_suspend(vm); + + vcpu_power_off(vm, VCPU_ID_TARGET); + run = vcpu_state(vm, VCPU_ID_SOURCE); + + enter_guest(vm, VCPU_ID_SOURCE); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_SYSTEM_EVENT, + "Unhandled exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND, + "Unhandled system event: %u (expected: %u)", + run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND); + + kvm_vm_free(vm); +} + +int main(void) +{ + if (!kvm_check_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)) { + print_skip("KVM_CAP_ARM_SYSTEM_SUSPEND not supported"); + exit(KSFT_SKIP); + } + + host_test_cpu_on(); + host_test_system_suspend(); + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 8f9f46979a00..59ece9d4e0d1 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -185,4 +185,26 @@ static inline void local_irq_disable(void) asm volatile("msr daifset, #3" : : : "memory"); } +/** + * struct arm_smccc_res - Result from SMC/HVC call + * @a0-a3 result values from registers 0 to 3 + */ +struct arm_smccc_res { + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; +}; + +/** + * smccc_hvc - Invoke a SMCCC function using the hvc conduit + * @function_id: the SMCCC function to be called + * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7 + * @res: pointer to write the return values from registers x0-x3 + * + */ +void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, struct arm_smccc_res *res); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index eca5c622efd2..4fcfd1c0389d 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -119,10 +119,12 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, #define SATP_ASID_SHIFT 44 #define SATP_ASID_MASK _AC(0xFFFF, UL) -#define SBI_EXT_EXPERIMENTAL_START 0x08000000 -#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF +#define SBI_EXT_EXPERIMENTAL_START 0x08000000 +#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF -#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END +#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END +#define KVM_RISCV_SELFTESTS_SBI_UCALL 0 +#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1 struct sbiret { long error; diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 9343d82519b4..6a041289fa80 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -500,3 +500,28 @@ void __attribute__((constructor)) init_guest_modes(void) { guest_modes_append_default(); } + +void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, struct arm_smccc_res *res) +{ + asm volatile("mov w0, %w[function_id]\n" + "mov x1, %[arg0]\n" + "mov x2, %[arg1]\n" + "mov x3, %[arg2]\n" + "mov x4, %[arg3]\n" + "mov x5, %[arg4]\n" + "mov x6, %[arg5]\n" + "mov x7, %[arg6]\n" + "hvc #0\n" + "mov %[res0], x0\n" + "mov %[res1], x1\n" + "mov %[res2], x2\n" + "mov %[res3], x3\n" + : [res0] "=r"(res->a0), [res1] "=r"(res->a1), + [res2] "=r"(res->a2), [res3] "=r"(res->a3) + : [function_id] "r"(function_id), [arg0] "r"(arg0), + [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), + [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) + : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7"); +} diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 3961487a4870..abc0ae5a4fe1 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -268,10 +268,11 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); } -static void __aligned(16) guest_hang(void) +static void __aligned(16) guest_unexp_trap(void) { - while (1) - ; + sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, + KVM_RISCV_SELFTESTS_SBI_UNEXP, + 0, 0, 0, 0, 0, 0); } void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) @@ -310,7 +311,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) /* Setup default exception vector of guest */ set_reg(vm, vcpuid, RISCV_CSR_REG(stvec), - (unsigned long)guest_hang); + (unsigned long)guest_unexp_trap); } void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) @@ -350,7 +351,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) case 7: id = RISCV_CORE_REG(regs.a7); break; - }; + } set_reg(vm, vcpuid, id, va_arg(ap, uint64_t)); } diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 9e42d8248fa6..8550f424d093 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -60,8 +60,9 @@ void ucall(uint64_t cmd, int nargs, ...) uc.args[i] = va_arg(va, uint64_t); va_end(va); - sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 0, (vm_vaddr_t)&uc, - 0, 0, 0, 0, 0); + sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, + KVM_RISCV_SELFTESTS_SBI_UCALL, + (vm_vaddr_t)&uc, 0, 0, 0, 0, 0); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) @@ -73,14 +74,24 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_RISCV_SBI && - run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT && - run->riscv_sbi.function_id == 0) { - memcpy(&ucall, addr_gva2hva(vm, run->riscv_sbi.args[0]), - sizeof(ucall)); - - vcpu_run_complete_io(vm, vcpu_id); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) { + switch (run->riscv_sbi.function_id) { + case KVM_RISCV_SELFTESTS_SBI_UCALL: + memcpy(&ucall, addr_gva2hva(vm, + run->riscv_sbi.args[0]), sizeof(ucall)); + + vcpu_run_complete_io(vm, vcpu_id); + if (uc) + memcpy(uc, &ucall, sizeof(ucall)); + + break; + case KVM_RISCV_SELFTESTS_SBI_UNEXP: + vcpu_dump(stderr, vm, vcpu_id, 2); + TEST_ASSERT(0, "Unexpected trap taken by guest"); + break; + default: + break; + } } return ucall.cmd; diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index b04c2c1b3c30..49f26f544127 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -10,6 +10,8 @@ #include <string.h> #include <sys/ioctl.h> +#include <linux/bits.h> + #include "test_util.h" #include "kvm_util.h" @@ -194,6 +196,7 @@ static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) #define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o) #define AR(a) ._ar = 1, .ar = (a) #define KEY(a) .f_key = 1, .key = (a) +#define INJECT .f_inject = 1 #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) @@ -430,9 +433,18 @@ static void test_copy_key_fetch_prot(void) TEST_ASSERT(rv == 4, "Should result in protection exception"); \ }) +static void guest_error_key(void) +{ + GUEST_SYNC(STAGE_INITED); + set_storage_key_range(mem1, PAGE_SIZE, 0x18); + set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98); + GUEST_SYNC(STAGE_SKEYS_SET); + GUEST_SYNC(STAGE_IDLED); +} + static void test_errors_key(void) { - struct test_default t = test_default_init(guest_copy_key_fetch_prot); + struct test_default t = test_default_init(guest_error_key); HOST_SYNC(t.vcpu, STAGE_INITED); HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); @@ -446,6 +458,37 @@ static void test_errors_key(void) kvm_vm_free(t.kvm_vm); } +static void test_termination(void) +{ + struct test_default t = test_default_init(guest_error_key); + uint64_t prefix; + uint64_t teid; + uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61); + uint64_t psw[2]; + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vcpu, mismatching keys after first page */ + ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT); + /* + * The memop injected a program exception and the test needs to check the + * Translation-Exception Identification (TEID). It is necessary to run + * the guest in order to be able to read the TEID from guest memory. + * Set the guest program new PSW, so the guest state is not clobbered. + */ + prefix = t.run->s.regs.prefix; + psw[0] = t.run->psw_mask; + psw[1] = t.run->psw_addr; + MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464)); + HOST_SYNC(t.vcpu, STAGE_IDLED); + MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168)); + /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */ + ASSERT_EQ(teid & teid_mask, 0); + + kvm_vm_free(t.kvm_vm); +} + static void test_errors_key_storage_prot_override(void) { struct test_default t = test_default_init(guest_copy_key_fetch_prot); @@ -668,6 +711,7 @@ int main(int argc, char *argv[]) test_copy_key_fetch_prot(); test_copy_key_fetch_prot_override(); test_errors_key(); + test_termination(); test_errors_key_storage_prot_override(); test_errors_key_fetch_prot_override_not_enabled(); test_errors_key_fetch_prot_override_enabled(); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 62f2eb9ee3d5..8c4e811bd586 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -118,17 +118,10 @@ struct st_time { static int64_t smccc(uint32_t func, uint64_t arg) { - unsigned long ret; + struct arm_smccc_res res; - asm volatile( - "mov w0, %w1\n" - "mov x1, %2\n" - "hvc #0\n" - "mov %0, x0\n" - : "=r" (ret) : "r" (func), "r" (arg) : - "x0", "x1", "x2", "x3"); - - return ret; + smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res); + return res.a0; } static void check_status(struct st_time *st) diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c new file mode 100644 index 000000000000..1f5c32146f3d --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include <asm/kvm_para.h> +#include <linux/kvm_para.h> +#include <linux/stringify.h> +#include <stdint.h> + +#include "apic.h" +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 0 + +static bool ud_expected; + +static void guest_ud_handler(struct ex_regs *regs) +{ + GUEST_ASSERT(ud_expected); + GUEST_DONE(); +} + +extern unsigned char svm_hypercall_insn; +static uint64_t svm_do_sched_yield(uint8_t apic_id) +{ + uint64_t ret; + + asm volatile("mov %1, %%rax\n\t" + "mov %2, %%rbx\n\t" + "svm_hypercall_insn:\n\t" + "vmmcall\n\t" + "mov %%rax, %0\n\t" + : "=r"(ret) + : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id) + : "rax", "rbx", "memory"); + + return ret; +} + +extern unsigned char vmx_hypercall_insn; +static uint64_t vmx_do_sched_yield(uint8_t apic_id) +{ + uint64_t ret; + + asm volatile("mov %1, %%rax\n\t" + "mov %2, %%rbx\n\t" + "vmx_hypercall_insn:\n\t" + "vmcall\n\t" + "mov %%rax, %0\n\t" + : "=r"(ret) + : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id) + : "rax", "rbx", "memory"); + + return ret; +} + +static void assert_hypercall_insn(unsigned char *exp_insn, unsigned char *obs_insn) +{ + uint32_t exp = 0, obs = 0; + + memcpy(&exp, exp_insn, sizeof(exp)); + memcpy(&obs, obs_insn, sizeof(obs)); + + GUEST_ASSERT_EQ(exp, obs); +} + +static void guest_main(void) +{ + unsigned char *native_hypercall_insn, *hypercall_insn; + uint8_t apic_id; + + apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); + + if (is_intel_cpu()) { + native_hypercall_insn = &vmx_hypercall_insn; + hypercall_insn = &svm_hypercall_insn; + svm_do_sched_yield(apic_id); + } else if (is_amd_cpu()) { + native_hypercall_insn = &svm_hypercall_insn; + hypercall_insn = &vmx_hypercall_insn; + vmx_do_sched_yield(apic_id); + } else { + GUEST_ASSERT(0); + /* unreachable */ + return; + } + + GUEST_ASSERT(!ud_expected); + assert_hypercall_insn(native_hypercall_insn, hypercall_insn); + GUEST_DONE(); +} + +static void setup_ud_vector(struct kvm_vm *vm) +{ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); +} + +static void enter_guest(struct kvm_vm *vm) +{ + struct kvm_run *run; + struct ucall uc; + + run = vcpu_state(vm, VCPU_ID); + + vcpu_run(vm, VCPU_ID); + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]); + break; + case UCALL_DONE: + return; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__, uc.args[1]); + default: + TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)", + uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason)); + } +} + +static void test_fix_hypercall(void) +{ + struct kvm_vm *vm; + + vm = vm_create_default(VCPU_ID, 0, guest_main); + setup_ud_vector(vm); + + ud_expected = false; + sync_global_to_guest(vm, ud_expected); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + enter_guest(vm); +} + +static void test_fix_hypercall_disabled(void) +{ + struct kvm_enable_cap cap = {0}; + struct kvm_vm *vm; + + vm = vm_create_default(VCPU_ID, 0, guest_main); + setup_ud_vector(vm); + + cap.cap = KVM_CAP_DISABLE_QUIRKS2; + cap.args[0] = KVM_X86_QUIRK_FIX_HYPERCALL_INSN; + vm_enable_cap(vm, &cap); + + ud_expected = true; + sync_global_to_guest(vm, ud_expected); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + enter_guest(vm); +} + +int main(void) +{ + if (!(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN)) { + print_skip("KVM_X86_QUIRK_HYPERCALL_INSN not supported"); + exit(KSFT_SKIP); + } + + test_fix_hypercall(); + test_fix_hypercall_disabled(); +} diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c new file mode 100644 index 000000000000..f0083d8cfe98 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_vmcall_test + * + * Copyright © 2021 Amazon.com, Inc. or its affiliates. + * + * Xen shared_info / pvclock testing + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#include <stdint.h> +#include <time.h> +#include <sched.h> +#include <signal.h> +#include <pthread.h> + +#define NR_TEST_VCPUS 20 + +static struct kvm_vm *vm; +pthread_spinlock_t create_lock; + +#define TEST_TSC_KHZ 2345678UL +#define TEST_TSC_OFFSET 200000000 + +uint64_t tsc_sync; +static void guest_code(void) +{ + uint64_t start_tsc, local_tsc, tmp; + + start_tsc = rdtsc(); + do { + tmp = READ_ONCE(tsc_sync); + local_tsc = rdtsc(); + WRITE_ONCE(tsc_sync, local_tsc); + if (unlikely(local_tsc < tmp)) + GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0); + + } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ); + + GUEST_DONE(); +} + + +static void *run_vcpu(void *_cpu_nr) +{ + unsigned long cpu = (unsigned long)_cpu_nr; + unsigned long failures = 0; + static bool first_cpu_done; + + /* The kernel is fine, but vm_vcpu_add_default() needs locking */ + pthread_spin_lock(&create_lock); + + vm_vcpu_add_default(vm, cpu, guest_code); + + if (!first_cpu_done) { + first_cpu_done = true; + vcpu_set_msr(vm, cpu, MSR_IA32_TSC, TEST_TSC_OFFSET); + } + + pthread_spin_unlock(&create_lock); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, cpu); + struct ucall uc; + + vcpu_run(vm, cpu); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, cpu, &uc)) { + case UCALL_DONE: + goto out; + + case UCALL_SYNC: + printf("Guest %ld sync %lx %lx %ld\n", cpu, uc.args[2], uc.args[3], uc.args[2] - uc.args[3]); + failures++; + break; + + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + out: + return (void *)failures; +} + +int main(int argc, char *argv[]) +{ + if (!kvm_check_cap(KVM_CAP_VM_TSC_CONTROL)) { + print_skip("KVM_CAP_VM_TSC_CONTROL not available"); + exit(KSFT_SKIP); + } + + vm = vm_create_default_with_vcpus(0, DEFAULT_STACK_PGS * NR_TEST_VCPUS, 0, guest_code, NULL); + vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ); + + pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE); + pthread_t cpu_threads[NR_TEST_VCPUS]; + unsigned long cpu; + for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) + pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu); + + unsigned long failures = 0; + for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) { + void *this_cpu_failures; + pthread_join(cpu_threads[cpu], &this_cpu_failures); + failures += (unsigned long)this_cpu_failures; + } + + TEST_ASSERT(!failures, "TSC sync failed"); + pthread_spin_destroy(&create_lock); + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 2454a1f2ca0c..97b7fd4a9a3d 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -1,15 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* - * VMX-pmu related msrs test + * Test for VMX-pmu perf capability msr * * Copyright (C) 2021 Intel Corporation * - * Test to check the effect of various CPUID settings - * on the MSR_IA32_PERF_CAPABILITIES MSR, and check that - * whatever we write with KVM_SET_MSR is _not_ modified - * in the guest and test it can be retrieved with KVM_GET_MSR. - * - * Test to check that invalid LBR formats are rejected. + * Test to check the effect of various CPUID settings on + * MSR_IA32_PERF_CAPABILITIES MSR, and check that what + * we write with KVM_SET_MSR is _not_ modified by the guest + * and check it can be retrieved with KVM_GET_MSR, also test + * the invalid LBR formats are rejected. */ #define _GNU_SOURCE /* for program_invocation_short_name */ @@ -107,8 +106,11 @@ int main(int argc, char *argv[]) ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format); /* testcase 3, check invalid LBR format is rejected */ - ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT); + /* Note, on Arch LBR capable platforms, LBR_FMT in perf capability msr is 0x3f, + * to avoid the failure, use a true invalid format 0x30 for the test. */ + ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0x30); TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); + printf("Completed perf capability tests.\n"); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index bcd370827859..7a51bb648fbb 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -38,12 +38,36 @@ #define EVTCHN_VECTOR 0x10 +#define EVTCHN_TEST1 15 +#define EVTCHN_TEST2 66 +#define EVTCHN_TIMER 13 + static struct kvm_vm *vm; #define XEN_HYPERCALL_MSR 0x40000000 #define MIN_STEAL_TIME 50000 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_event_channel_op 32 + +#define SCHEDOP_poll 3 + +#define EVTCHNOP_send 4 + +#define EVTCHNSTAT_interdomain 2 + +struct evtchn_send { + u32 port; +}; + +struct sched_poll { + u32 *ports; + unsigned int nr_ports; + u64 timeout; +}; + struct pvclock_vcpu_time_info { u32 version; u32 pad0; @@ -106,15 +130,25 @@ struct { struct kvm_irq_routing_entry entries[2]; } irq_routes; +bool guest_saw_irq; + static void evtchn_handler(struct ex_regs *regs) { struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; vi->evtchn_upcall_pending = 0; vi->evtchn_pending_sel = 0; + guest_saw_irq = true; GUEST_SYNC(0x20); } +static void guest_wait_for_irq(void) +{ + while (!guest_saw_irq) + __asm__ __volatile__ ("rep nop" : : : "memory"); + guest_saw_irq = false; +} + static void guest_code(void) { struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; @@ -127,6 +161,8 @@ static void guest_code(void) /* Trigger an interrupt injection */ GUEST_SYNC(0); + guest_wait_for_irq(); + /* Test having the host set runstates manually */ GUEST_SYNC(RUNSTATE_runnable); GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); @@ -167,14 +203,132 @@ static void guest_code(void) /* Now deliver an *unmasked* interrupt */ GUEST_SYNC(8); - while (!si->evtchn_pending[1]) - __asm__ __volatile__ ("rep nop" : : : "memory"); + guest_wait_for_irq(); /* Change memslots and deliver an interrupt */ GUEST_SYNC(9); - for (;;) - __asm__ __volatile__ ("rep nop" : : : "memory"); + guest_wait_for_irq(); + + /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ + GUEST_SYNC(10); + + guest_wait_for_irq(); + + GUEST_SYNC(11); + + /* Our turn. Deliver event channel (to ourselves) with + * EVTCHNOP_send hypercall. */ + unsigned long rax; + struct evtchn_send s = { .port = 127 }; + __asm__ __volatile__ ("vmcall" : + "=a" (rax) : + "a" (__HYPERVISOR_event_channel_op), + "D" (EVTCHNOP_send), + "S" (&s)); + + GUEST_ASSERT(rax == 0); + + guest_wait_for_irq(); + + GUEST_SYNC(12); + + /* Deliver "outbound" event channel to an eventfd which + * happens to be one of our own irqfds. */ + s.port = 197; + __asm__ __volatile__ ("vmcall" : + "=a" (rax) : + "a" (__HYPERVISOR_event_channel_op), + "D" (EVTCHNOP_send), + "S" (&s)); + + GUEST_ASSERT(rax == 0); + + guest_wait_for_irq(); + + GUEST_SYNC(13); + + /* Set a timer 100ms in the future. */ + __asm__ __volatile__ ("vmcall" : + "=a" (rax) : + "a" (__HYPERVISOR_set_timer_op), + "D" (rs->state_entry_time + 100000000)); + GUEST_ASSERT(rax == 0); + + GUEST_SYNC(14); + + /* Now wait for the timer */ + guest_wait_for_irq(); + + GUEST_SYNC(15); + + /* The host has 'restored' the timer. Just wait for it. */ + guest_wait_for_irq(); + + GUEST_SYNC(16); + + /* Poll for an event channel port which is already set */ + u32 ports[1] = { EVTCHN_TIMER }; + struct sched_poll p = { + .ports = ports, + .nr_ports = 1, + .timeout = 0, + }; + + __asm__ __volatile__ ("vmcall" : + "=a" (rax) : + "a" (__HYPERVISOR_sched_op), + "D" (SCHEDOP_poll), + "S" (&p)); + + GUEST_ASSERT(rax == 0); + + GUEST_SYNC(17); + + /* Poll for an unset port and wait for the timeout. */ + p.timeout = 100000000; + __asm__ __volatile__ ("vmcall" : + "=a" (rax) : + "a" (__HYPERVISOR_sched_op), + "D" (SCHEDOP_poll), + "S" (&p)); + + GUEST_ASSERT(rax == 0); + + GUEST_SYNC(18); + + /* A timer will wake the masked port we're waiting on, while we poll */ + p.timeout = 0; + __asm__ __volatile__ ("vmcall" : + "=a" (rax) : + "a" (__HYPERVISOR_sched_op), + "D" (SCHEDOP_poll), + "S" (&p)); + + GUEST_ASSERT(rax == 0); + + GUEST_SYNC(19); + + /* A timer wake an *unmasked* port which should wake us with an + * actual interrupt, while we're polling on a different port. */ + ports[0]++; + p.timeout = 0; + __asm__ __volatile__ ("vmcall" : + "=a" (rax) : + "a" (__HYPERVISOR_sched_op), + "D" (SCHEDOP_poll), + "S" (&p)); + + GUEST_ASSERT(rax == 0); + + guest_wait_for_irq(); + + GUEST_SYNC(20); + + /* Timer should have fired already */ + guest_wait_for_irq(); + + GUEST_SYNC(21); } static int cmp_timespec(struct timespec *a, struct timespec *b) @@ -190,9 +344,13 @@ static int cmp_timespec(struct timespec *a, struct timespec *b) else return 0; } +struct vcpu_info *vinfo; static void handle_alrm(int sig) { + if (vinfo) + printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); + vcpu_dump(stdout, vm, VCPU_ID, 0); TEST_FAIL("IRQ delivery timed out"); } @@ -212,6 +370,7 @@ int main(int argc, char *argv[]) bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); + bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); clock_gettime(CLOCK_REALTIME, &min_ts); @@ -232,6 +391,12 @@ int main(int argc, char *argv[]) .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, .msr = XEN_HYPERCALL_MSR, }; + + /* Let the kernel know that we *will* use it for sending all + * event channels, which lets it intercept SCHEDOP_poll */ + if (do_evtchn_tests) + hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; + vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); struct kvm_xen_hvm_attr lm = { @@ -294,7 +459,7 @@ int main(int argc, char *argv[]) /* Unexpected, but not a KVM failure */ if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_eventfd_tests = false; + do_evtchn_tests = do_eventfd_tests = false; } if (do_eventfd_tests) { @@ -302,13 +467,13 @@ int main(int argc, char *argv[]) irq_routes.entries[0].gsi = 32; irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; - irq_routes.entries[0].u.xen_evtchn.port = 15; + irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID; irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; irq_routes.entries[1].gsi = 33; irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; - irq_routes.entries[1].u.xen_evtchn.port = 66; + irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID; irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; @@ -329,7 +494,39 @@ int main(int argc, char *argv[]) sigaction(SIGALRM, &sa, NULL); } - struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); + struct kvm_xen_vcpu_attr tmr = { + .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, + .u.timer.port = EVTCHN_TIMER, + .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, + .u.timer.expires_ns = 0 + }; + + if (do_evtchn_tests) { + struct kvm_xen_hvm_attr inj = { + .type = KVM_XEN_ATTR_TYPE_EVTCHN, + .u.evtchn.send_port = 127, + .u.evtchn.type = EVTCHNSTAT_interdomain, + .u.evtchn.flags = 0, + .u.evtchn.deliver.port.port = EVTCHN_TEST1, + .u.evtchn.deliver.port.vcpu = VCPU_ID + 1, + .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); + + /* Test migration to a different vCPU */ + inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; + inj.u.evtchn.deliver.port.vcpu = VCPU_ID; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); + + inj.u.evtchn.send_port = 197; + inj.u.evtchn.deliver.eventfd.port = 0; + inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; + inj.u.evtchn.flags = 0; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); + + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + } + vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); vinfo->evtchn_upcall_pending = 0; struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); @@ -422,7 +619,7 @@ int main(int argc, char *argv[]) goto done; if (verbose) printf("Testing masked event channel\n"); - shinfo->evtchn_mask[0] = 0x8000; + shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; eventfd_write(irq_fd[0], 1UL); alarm(1); break; @@ -439,6 +636,9 @@ int main(int argc, char *argv[]) break; case 9: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[1] = 0; if (verbose) printf("Testing event channel after memslot change\n"); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -448,12 +648,153 @@ int main(int argc, char *argv[]) alarm(1); break; + case 10: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + if (!do_evtchn_tests) + goto done; + + shinfo->evtchn_pending[0] = 0; + if (verbose) + printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); + + struct kvm_irq_routing_xen_evtchn e; + e.port = EVTCHN_TEST2; + e.vcpu = VCPU_ID; + e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; + + vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); + evtchn_irq_expected = true; + alarm(1); + break; + + case 11: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[1] = 0; + + if (verbose) + printf("Testing guest EVTCHNOP_send direct to evtchn\n"); + evtchn_irq_expected = true; + alarm(1); + break; + + case 12: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[0] = 0; + + if (verbose) + printf("Testing guest EVTCHNOP_send to eventfd\n"); + evtchn_irq_expected = true; + alarm(1); + break; + + case 13: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[1] = 0; + + if (verbose) + printf("Testing guest oneshot timer\n"); + break; + + case 14: + memset(&tmr, 0, sizeof(tmr)); + tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr); + TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, + "Timer port not returned"); + TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, + "Timer priority not returned"); + TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, + "Timer expiry not returned"); + evtchn_irq_expected = true; + alarm(1); + break; + + case 15: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + shinfo->evtchn_pending[0] = 0; + + if (verbose) + printf("Testing restored oneshot timer\n"); + + tmr.u.timer.expires_ns = rs->state_entry_time + 100000000, + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + evtchn_irq_expected = true; + alarm(1); + break; + + case 16: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + + if (verbose) + printf("Testing SCHEDOP_poll with already pending event\n"); + shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; + alarm(1); + break; + + case 17: + if (verbose) + printf("Testing SCHEDOP_poll timeout\n"); + shinfo->evtchn_pending[0] = 0; + alarm(1); + break; + + case 18: + if (verbose) + printf("Testing SCHEDOP_poll wake on masked event\n"); + + tmr.u.timer.expires_ns = rs->state_entry_time + 100000000, + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + alarm(1); + break; + + case 19: + shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; + if (verbose) + printf("Testing SCHEDOP_poll wake on unmasked event\n"); + + evtchn_irq_expected = true; + tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + + /* Read it back and check the pending time is reported correctly */ + tmr.u.timer.expires_ns = 0; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr); + TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, + "Timer not reported pending"); + alarm(1); + break; + + case 20: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + /* Read timer and check it is no longer pending */ + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr); + TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); + + shinfo->evtchn_pending[0] = 0; + if (verbose) + printf("Testing timer in the past\n"); + + evtchn_irq_expected = true; + tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + alarm(1); + break; + + case 21: + TEST_ASSERT(!evtchn_irq_expected, + "Expected event channel IRQ but it didn't happen"); + goto done; + case 0x20: TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); evtchn_irq_expected = false; - if (shinfo->evtchn_pending[1] && - shinfo->evtchn_pending[0]) - goto done; break; } break; @@ -466,6 +807,7 @@ int main(int argc, char *argv[]) } done: + alarm(0); clock_gettime(CLOCK_REALTIME, &max_ts); /* diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index b7d188fc87c7..b9fa9cd709df 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -135,6 +135,11 @@ do { \ #define PPC_FEATURE2_ARCH_3_1 0x00040000 #endif +/* POWER10 features */ +#ifndef PPC_FEATURE2_MMA +#define PPC_FEATURE2_MMA 0x00020000 +#endif + #if defined(__powerpc64__) #define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] #define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR] diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index fcc91c205984..3948f7c510aa 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt +TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt mma top_srcdir = ../../../../.. include ../../lib.mk @@ -17,3 +17,5 @@ $(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx $(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c + +$(OUTPUT)/mma: mma.c mma.S ../utils.c diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S new file mode 100644 index 000000000000..8528c9849565 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/mma.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * Test basic matrix multiply assist (MMA) functionality if available. + * + * Copyright 2020, Alistair Popple, IBM Corp. + */ + .global test_mma +test_mma: + /* Load accumulator via VSX registers from image passed in r3 */ + lxvh8x 4,0,3 + lxvh8x 5,0,4 + + /* Clear and prime the accumulator (xxsetaccz) */ + .long 0x7c030162 + + /* Prime the accumulator with MMA VSX move to accumulator + * X-form (xxmtacc) (not needed due to above zeroing) */ + //.long 0x7c010162 + + /* xvi16ger2s */ + .long 0xec042958 + + /* Store result in image passed in r5 */ + stxvw4x 0,0,5 + addi 5,5,16 + stxvw4x 1,0,5 + addi 5,5,16 + stxvw4x 2,0,5 + addi 5,5,16 + stxvw4x 3,0,5 + addi 5,5,16 + + blr diff --git a/tools/testing/selftests/powerpc/math/mma.c b/tools/testing/selftests/powerpc/math/mma.c new file mode 100644 index 000000000000..3a71808c993f --- /dev/null +++ b/tools/testing/selftests/powerpc/math/mma.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test basic matrix multiply assist (MMA) functionality if available. + * + * Copyright 2020, Alistair Popple, IBM Corp. + */ +#include <stdio.h> +#include <stdint.h> + +#include "utils.h" + +extern void test_mma(uint16_t (*)[8], uint16_t (*)[8], uint32_t (*)[4*4]); + +static int mma(void) +{ + int i; + int rc = 0; + uint16_t x[] = {1, 0, 2, 0, 3, 0, 4, 0}; + uint16_t y[] = {1, 0, 2, 0, 3, 0, 4, 0}; + uint32_t z[4*4]; + uint32_t exp[4*4] = {1, 2, 3, 4, + 2, 4, 6, 8, + 3, 6, 9, 12, + 4, 8, 12, 16}; + + SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_3_1), "Need ISAv3.1"); + SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_MMA), "Need MMA"); + + test_mma(&x, &y, &z); + + for (i = 0; i < 16; i++) { + printf("MMA[%d] = %d ", i, z[i]); + + if (z[i] == exp[i]) { + printf(" (Correct)\n"); + } else { + printf(" (Incorrect)\n"); + rc = 1; + } + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(mma, "mma"); +} diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index aac4a59f9e28..4e1a294eec35 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -12,3 +12,4 @@ pkey_exec_prot pkey_siginfo stack_expansion_ldst stack_expansion_signal +large_vm_gpr_corruption diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 40253abc6208..27dc09d0bfee 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -4,7 +4,8 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ - pkey_siginfo stack_expansion_signal stack_expansion_ldst + pkey_siginfo stack_expansion_signal stack_expansion_ldst \ + large_vm_gpr_corruption TEST_PROGS := stress_code_patching.sh TEST_GEN_PROGS_EXTENDED := tlbie_test @@ -19,6 +20,7 @@ $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 +$(OUTPUT)/large_vm_gpr_corruption: CFLAGS += -m64 $(OUTPUT)/bad_accesses: CFLAGS += -m64 $(OUTPUT)/pkey_exec_prot: CFLAGS += -m64 $(OUTPUT)/pkey_siginfo: CFLAGS += -m64 diff --git a/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c new file mode 100644 index 000000000000..927bfae99ed9 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2022, Michael Ellerman, IBM Corp. +// +// Test that the 4PB address space SLB handling doesn't corrupt userspace registers +// (r9-r13) due to a SLB fault while saving the PPR. +// +// The bug was introduced in f384796c4 ("powerpc/mm: Add support for handling > 512TB +// address in SLB miss") and fixed in 4c2de74cc869 ("powerpc/64: Interrupts save PPR on +// stack rather than thread_struct"). +// +// To hit the bug requires the task struct and kernel stack to be in different segments. +// Usually that requires more than 1TB of RAM, or if that's not practical, boot the kernel +// with "disable_1tb_segments". +// +// The test works by creating mappings above 512TB, to trigger the large address space +// support. It creates 64 mappings, double the size of the SLB, to cause SLB faults on +// each access (assuming naive replacement). It then loops over those mappings touching +// each, and checks that r9-r13 aren't corrupted. +// +// It then forks another child and tries again, because a new child process will get a new +// kernel stack and thread struct allocated, which may be more optimally placed to trigger +// the bug. It would probably be better to leave the previous child processes hanging +// around, so that kernel stack & thread struct allocations are not reused, but that would +// amount to a 30 second fork bomb. The current design reliably triggers the bug on +// unpatched kernels. + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB +#endif + +#define BASE_ADDRESS (1ul << 50) // 1PB +#define STRIDE (2ul << 40) // 2TB +#define SLB_SIZE 32 +#define NR_MAPPINGS (SLB_SIZE * 2) + +static volatile sig_atomic_t signaled; + +static void signal_handler(int sig) +{ + signaled = 1; +} + +#define CHECK_REG(_reg) \ + if (_reg != _reg##_orig) { \ + printf(str(_reg) " corrupted! Expected 0x%lx != 0x%lx\n", _reg##_orig, \ + _reg); \ + _exit(1); \ + } + +static int touch_mappings(void) +{ + unsigned long r9_orig, r10_orig, r11_orig, r12_orig, r13_orig; + unsigned long r9, r10, r11, r12, r13; + unsigned long addr, *p; + int i; + + for (i = 0; i < NR_MAPPINGS; i++) { + addr = BASE_ADDRESS + (i * STRIDE); + p = (unsigned long *)addr; + + asm volatile("mr %0, %%r9 ;" // Read original GPR values + "mr %1, %%r10 ;" + "mr %2, %%r11 ;" + "mr %3, %%r12 ;" + "mr %4, %%r13 ;" + "std %10, 0(%11) ;" // Trigger SLB fault + "mr %5, %%r9 ;" // Save possibly corrupted values + "mr %6, %%r10 ;" + "mr %7, %%r11 ;" + "mr %8, %%r12 ;" + "mr %9, %%r13 ;" + "mr %%r9, %0 ;" // Restore original values + "mr %%r10, %1 ;" + "mr %%r11, %2 ;" + "mr %%r12, %3 ;" + "mr %%r13, %4 ;" + : "=&b"(r9_orig), "=&b"(r10_orig), "=&b"(r11_orig), + "=&b"(r12_orig), "=&b"(r13_orig), "=&b"(r9), "=&b"(r10), + "=&b"(r11), "=&b"(r12), "=&b"(r13) + : "b"(i), "b"(p) + : "r9", "r10", "r11", "r12", "r13"); + + CHECK_REG(r9); + CHECK_REG(r10); + CHECK_REG(r11); + CHECK_REG(r12); + CHECK_REG(r13); + } + + return 0; +} + +static int test(void) +{ + unsigned long page_size, addr, *p; + struct sigaction action; + bool hash_mmu; + int i, status; + pid_t pid; + + // This tests a hash MMU specific bug. + FAIL_IF(using_hash_mmu(&hash_mmu)); + SKIP_IF(!hash_mmu); + + page_size = sysconf(_SC_PAGESIZE); + + for (i = 0; i < NR_MAPPINGS; i++) { + addr = BASE_ADDRESS + (i * STRIDE); + + p = mmap((void *)addr, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0); + if (p == MAP_FAILED) { + perror("mmap"); + printf("Error: couldn't mmap(), confirm kernel has 4PB support?\n"); + return 1; + } + } + + action.sa_handler = signal_handler; + action.sa_flags = SA_RESTART; + FAIL_IF(sigaction(SIGALRM, &action, NULL) < 0); + + // Seen to always crash in under ~10s on affected kernels. + alarm(30); + + while (!signaled) { + // Fork new processes, to increase the chance that we hit the case where + // the kernel stack and task struct are in different segments. + pid = fork(); + if (pid == 0) + exit(touch_mappings()); + + FAIL_IF(waitpid(-1, &status, 0) == -1); + FAIL_IF(WIFSIGNALED(status)); + FAIL_IF(!WIFEXITED(status)); + FAIL_IF(WEXITSTATUS(status)); + } + + return 0; +} + +int main(void) +{ + return test_harness(test, "large_vm_gpr_corruption"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S deleted file mode 100644 index 08a7b5f133b9..000000000000 --- a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2014, Michael Ellerman, IBM Corp. - */ - -#include <ppc-asm.h> - - .text - -FUNC_START(thirty_two_instruction_loop) - cmpwi r3,0 - beqlr - addi r4,r3,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 # 28 addi's - subi r3,r3,1 - b FUNC_NAME(thirty_two_instruction_loop) -FUNC_END(thirty_two_instruction_loop) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index fca054bbc094..c01a31d5f4ee 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -274,7 +274,7 @@ u64 *get_intr_regs(struct event *event, void *sample_buff) return intr_regs; } -static const unsigned int __perf_reg_mask(const char *register_name) +static const int __perf_reg_mask(const char *register_name) { if (!strcmp(register_name, "R0")) return 0; diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index d42ca8c676c3..5b2abb719ef2 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -182,17 +182,23 @@ int spectre_v2_test(void) case COUNT_CACHE_FLUSH_HW: // These should all not affect userspace branch prediction if (miss_percent > 15) { - printf("Branch misses > 15%% unexpected in this configuration!\n"); - printf("Possible mis-match between reported & actual mitigation\n"); - /* - * Such a mismatch may be caused by a guest system - * reporting as vulnerable when the host is mitigated. - * Return skip code to avoid detecting this as an error. - * We are not vulnerable and reporting otherwise, so - * missing such a mismatch is safe. - */ - if (miss_percent > 95) + if (miss_percent > 95) { + /* + * Such a mismatch may be caused by a system being unaware + * the count cache is disabled. This may be to enable + * guest migration between hosts with different settings. + * Return skip code to avoid detecting this as an error. + * We are not vulnerable and reporting otherwise, so + * missing such a mismatch is safe. + */ + printf("Branch misses > 95%% unexpected in this configuration.\n"); + printf("Count cache likely disabled without Linux knowing.\n"); + if (state == COUNT_CACHE_FLUSH_SW) + printf("WARNING: Kernel performing unnecessary flushes.\n"); return 4; + } + printf("Branch misses > 15%% unexpected in this configuration!\n"); + printf("Possible mismatch between reported & actual mitigation\n"); return 1; } @@ -201,14 +207,14 @@ int spectre_v2_test(void) // This seems to affect userspace branch prediction a bit? if (miss_percent > 25) { printf("Branch misses > 25%% unexpected in this configuration!\n"); - printf("Possible mis-match between reported & actual mitigation\n"); + printf("Possible mismatch between reported & actual mitigation\n"); return 1; } break; case COUNT_CACHE_DISABLED: if (miss_percent < 95) { - printf("Branch misses < 20%% unexpected in this configuration!\n"); - printf("Possible mis-match between reported & actual mitigation\n"); + printf("Branch misses < 95%% unexpected in this configuration!\n"); + printf("Possible mismatch between reported & actual mitigation\n"); return 1; } break; diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index d7507f3c7c76..31e5eea2a9b9 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -9,7 +9,9 @@ map_hugetlb map_populate thuge-gen compaction_test +migration mlock2-tests +mrelease_test mremap_dontunmap mremap_test on-fault-limit @@ -29,5 +31,6 @@ write_to_hugetlbfs hmm-tests memfd_secret local_config.* +soft-dirty split_huge_page_test ksm_tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 5b1ecd00695b..44f25acfbeca 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -36,20 +36,23 @@ TEST_GEN_FILES += hugepage-mremap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += hugepage-vmemmap TEST_GEN_FILES += khugepaged -TEST_GEN_FILES += madv_populate +TEST_GEN_PROGS = madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate TEST_GEN_FILES += memfd_secret +TEST_GEN_FILES += migration TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests +TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += split_huge_page_test +TEST_GEN_PROGS += soft-dirty +TEST_GEN_PROGS += split_huge_page_test TEST_GEN_FILES += ksm_tests ifeq ($(MACHINE),x86_64) @@ -89,10 +92,15 @@ endif TEST_PROGS := run_vmtests.sh TEST_FILES := test_vmalloc.sh +TEST_FILES += test_hmm.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk +$(OUTPUT)/madv_populate: vm_util.c +$(OUTPUT)/soft-dirty: vm_util.c +$(OUTPUT)/split_huge_page_test: vm_util.c + ifeq ($(MACHINE),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) @@ -149,6 +157,8 @@ $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) $(OUTPUT)/ksm_tests: LDLIBS += -lnuma +$(OUTPUT)/migration: LDLIBS += -lnuma + local_config.mk local_config.h: check_config.sh /bin/sh ./check_config.sh $(CC) diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 60e82da0de85..be087c4bc396 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -4,3 +4,5 @@ CONFIG_TEST_VMALLOC=m CONFIG_DEVICE_PRIVATE=y CONFIG_TEST_HMM=m CONFIG_GUP_TEST=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_MEM_SOFT_DIRTY=y diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index cda837a14736..6bb36ca71cb5 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -1,7 +1,9 @@ #include <fcntl.h> +#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> +#include <dirent.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/stat.h> @@ -9,6 +11,7 @@ #include <pthread.h> #include <assert.h> #include "../../../../mm/gup_test.h" +#include "../kselftest.h" #include "util.h" @@ -18,6 +21,8 @@ #define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_TOUCH 0x02 /* mark page accessed */ +#define GUP_TEST_FILE "/sys/kernel/debug/gup_test" + static unsigned long cmd = GUP_FAST_BENCHMARK; static int gup_fd, repeats = 1; static unsigned long size = 128 * MB; @@ -206,8 +211,23 @@ int main(int argc, char **argv) gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); if (gup_fd == -1) { - perror("open"); - exit(1); + switch (errno) { + case EACCES: + if (getuid()) + printf("Please run this test as root\n"); + break; + case ENOENT: + if (opendir("/sys/kernel/debug") == NULL) { + printf("mount debugfs at /sys/kernel/debug\n"); + break; + } + printf("check if CONFIG_GUP_TEST is enabled in kernel config\n"); + break; + default: + perror("failed to open /sys/kernel/debug/gup_test"); + break; + } + exit(KSFT_SKIP); } p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index 1d689084a54b..585978f181ed 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -178,6 +178,12 @@ int main(int argc, char *argv[]) munmap(addr, length); + addr = mremap(addr, length, length, 0); + if (addr != MAP_FAILED) { + printf("mremap: Expected failure, but call succeeded\n"); + exit(1); + } + close(fd); unlink(argv[argc-1]); diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index fd85f15869d1..2fcf24312da8 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -221,7 +221,8 @@ static bool assert_ksm_pages_count(long dupl_page_count) static int ksm_save_def(struct ksm_sysfs *ksm_sysfs) { if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) || - ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) || + numa_available() ? 0 : + ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) || ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) || ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) || ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) || @@ -236,7 +237,8 @@ static int ksm_save_def(struct ksm_sysfs *ksm_sysfs) static int ksm_restore(struct ksm_sysfs *ksm_sysfs) { if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) || - ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) || + numa_available() ? 0 : + ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) || ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) || ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) || ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) || @@ -720,7 +722,8 @@ int main(int argc, char *argv[]) if (ksm_write_sysfs(KSM_FP("run"), 2) || ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) || - ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) || + numa_available() ? 0 : + ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) || ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count)) return KSFT_FAIL; diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c index 3ee0e8275600..715a42e8e2cd 100644 --- a/tools/testing/selftests/vm/madv_populate.c +++ b/tools/testing/selftests/vm/madv_populate.c @@ -18,6 +18,7 @@ #include <sys/mman.h> #include "../kselftest.h" +#include "vm_util.h" /* * For now, we're using 2 MiB of private anonymous memory for all tests. @@ -26,18 +27,6 @@ static size_t pagesize; -static uint64_t pagemap_get_entry(int fd, char *start) -{ - const unsigned long pfn = (unsigned long)start / pagesize; - uint64_t entry; - int ret; - - ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); - if (ret != sizeof(entry)) - ksft_exit_fail_msg("reading pagemap failed\n"); - return entry; -} - static bool pagemap_is_populated(int fd, char *start) { uint64_t entry = pagemap_get_entry(fd, start); @@ -46,13 +35,6 @@ static bool pagemap_is_populated(int fd, char *start) return entry & 0xc000000000000000ull; } -static bool pagemap_is_softdirty(int fd, char *start) -{ - uint64_t entry = pagemap_get_entry(fd, start); - - return entry & 0x0080000000000000ull; -} - static void sense_support(void) { char *addr; @@ -258,20 +240,6 @@ static bool range_is_not_softdirty(char *start, ssize_t size) return ret; } -static void clear_softdirty(void) -{ - int fd = open("/proc/self/clear_refs", O_WRONLY); - const char *ctrl = "4"; - int ret; - - if (fd < 0) - ksft_exit_fail_msg("opening clear_refs failed\n"); - ret = write(fd, ctrl, strlen(ctrl)); - if (ret != strlen(ctrl)) - ksft_exit_fail_msg("writing clear_refs failed\n"); - close(fd); -} - static void test_softdirty(void) { char *addr; diff --git a/tools/testing/selftests/vm/migration.c b/tools/testing/selftests/vm/migration.c new file mode 100644 index 000000000000..1cec8425e3ca --- /dev/null +++ b/tools/testing/selftests/vm/migration.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The main purpose of the tests here is to exercise the migration entry code + * paths in the kernel. + */ + +#include "../kselftest_harness.h" +#include <strings.h> +#include <pthread.h> +#include <numa.h> +#include <numaif.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <signal.h> +#include <time.h> + +#define TWOMEG (2<<20) +#define RUNTIME (60) + +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) + +FIXTURE(migration) +{ + pthread_t *threads; + pid_t *pids; + int nthreads; + int n1; + int n2; +}; + +FIXTURE_SETUP(migration) +{ + int n; + + ASSERT_EQ(numa_available(), 0); + self->nthreads = numa_num_task_cpus() - 1; + self->n1 = -1; + self->n2 = -1; + + for (n = 0; n < numa_max_possible_node(); n++) + if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) { + if (self->n1 == -1) { + self->n1 = n; + } else { + self->n2 = n; + break; + } + } + + self->threads = malloc(self->nthreads * sizeof(*self->threads)); + ASSERT_NE(self->threads, NULL); + self->pids = malloc(self->nthreads * sizeof(*self->pids)); + ASSERT_NE(self->pids, NULL); +}; + +FIXTURE_TEARDOWN(migration) +{ + free(self->threads); + free(self->pids); +} + +int migrate(uint64_t *ptr, int n1, int n2) +{ + int ret, tmp; + int status = 0; + struct timespec ts1, ts2; + + if (clock_gettime(CLOCK_MONOTONIC, &ts1)) + return -1; + + while (1) { + if (clock_gettime(CLOCK_MONOTONIC, &ts2)) + return -1; + + if (ts2.tv_sec - ts1.tv_sec >= RUNTIME) + return 0; + + ret = move_pages(0, 1, (void **) &ptr, &n2, &status, + MPOL_MF_MOVE_ALL); + if (ret) { + if (ret > 0) + printf("Didn't migrate %d pages\n", ret); + else + perror("Couldn't migrate pages"); + return -2; + } + + tmp = n2; + n2 = n1; + n1 = tmp; + } + + return 0; +} + +void *access_mem(void *ptr) +{ + uint64_t y = 0; + volatile uint64_t *x = ptr; + + while (1) { + pthread_testcancel(); + y += *x; + } + + return NULL; +} + +/* + * Basic migration entry testing. One thread will move pages back and forth + * between nodes whilst other threads try and access them triggering the + * migration entry wait paths in the kernel. + */ +TEST_F_TIMEOUT(migration, private_anon, 2*RUNTIME) +{ + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) + if (pthread_create(&self->threads[i], NULL, access_mem, ptr)) + perror("Couldn't create thread"); + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(pthread_cancel(self->threads[i]), 0); +} + +/* + * Same as the previous test but with shared memory. + */ +TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME) +{ + pid_t pid; + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) { + pid = fork(); + if (!pid) + access_mem(ptr); + else + self->pids[i] = pid; + } + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(kill(self->pids[i], SIGTERM), 0); +} + +/* + * Tests the pmd migration entry paths. + */ +TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME) +{ + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, 2*TWOMEG, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG); + ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0); + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) + if (pthread_create(&self->threads[i], NULL, access_mem, ptr)) + perror("Couldn't create thread"); + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(pthread_cancel(self->threads[i]), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c new file mode 100644 index 000000000000..96671c2f7d48 --- /dev/null +++ b/tools/testing/selftests/vm/mrelease_test.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2022 Google LLC + */ +#define _GNU_SOURCE +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "util.h" + +#include "../kselftest.h" + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open -1 +#endif + +#ifndef __NR_process_mrelease +#define __NR_process_mrelease -1 +#endif + +#define MB(x) (x << 20) +#define MAX_SIZE_MB 1024 + +static int alloc_noexit(unsigned long nr_pages, int pipefd) +{ + int ppid = getppid(); + int timeout = 10; /* 10sec timeout to get killed */ + unsigned long i; + char *buf; + + buf = (char *)mmap(NULL, nr_pages * PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, 0, 0); + if (buf == MAP_FAILED) { + perror("mmap failed, halting the test"); + return KSFT_FAIL; + } + + for (i = 0; i < nr_pages; i++) + *((unsigned long *)(buf + (i * PAGE_SIZE))) = i; + + /* Signal the parent that the child is ready */ + if (write(pipefd, "", 1) < 0) { + perror("write"); + return KSFT_FAIL; + } + + /* Wait to be killed (when reparenting happens) */ + while (getppid() == ppid && timeout > 0) { + sleep(1); + timeout--; + } + + munmap(buf, nr_pages * PAGE_SIZE); + + return (timeout > 0) ? KSFT_PASS : KSFT_FAIL; +} + +/* The process_mrelease calls in this test are expected to fail */ +static void run_negative_tests(int pidfd) +{ + /* Test invalid flags. Expect to fail with EINVAL error code. */ + if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) || + errno != EINVAL) { + perror("process_mrelease with wrong flags"); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } + /* + * Test reaping while process is alive with no pending SIGKILL. + * Expect to fail with EINVAL error code. + */ + if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) { + perror("process_mrelease on a live process"); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } +} + +static int child_main(int pipefd[], size_t size) +{ + int res; + + /* Allocate and fault-in memory and wait to be killed */ + close(pipefd[0]); + res = alloc_noexit(MB(size) / PAGE_SIZE, pipefd[1]); + close(pipefd[1]); + return res; +} + +int main(void) +{ + int pipefd[2], pidfd; + bool success, retry; + size_t size; + pid_t pid; + char byte; + int res; + + /* Test a wrong pidfd */ + if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) { + perror("process_mrelease with wrong pidfd"); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } + + /* Start the test with 1MB child memory allocation */ + size = 1; +retry: + /* + * Pipe for the child to signal when it's done allocating + * memory + */ + if (pipe(pipefd)) { + perror("pipe"); + exit(KSFT_FAIL); + } + pid = fork(); + if (pid < 0) { + perror("fork"); + close(pipefd[0]); + close(pipefd[1]); + exit(KSFT_FAIL); + } + + if (pid == 0) { + /* Child main routine */ + res = child_main(pipefd, size); + exit(res); + } + + /* + * Parent main routine: + * Wait for the child to finish allocations, then kill and reap + */ + close(pipefd[1]); + /* Block until the child is ready */ + res = read(pipefd[0], &byte, 1); + close(pipefd[0]); + if (res < 0) { + perror("read"); + if (!kill(pid, SIGKILL)) + waitpid(pid, NULL, 0); + exit(KSFT_FAIL); + } + + pidfd = syscall(__NR_pidfd_open, pid, 0); + if (pidfd < 0) { + perror("pidfd_open"); + if (!kill(pid, SIGKILL)) + waitpid(pid, NULL, 0); + exit(KSFT_FAIL); + } + + /* Run negative tests which require a live child */ + run_negative_tests(pidfd); + + if (kill(pid, SIGKILL)) { + perror("kill"); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } + + success = (syscall(__NR_process_mrelease, pidfd, 0) == 0); + if (!success) { + /* + * If we failed to reap because the child exited too soon, + * before we could call process_mrelease. Double child's memory + * which causes it to spend more time on cleanup and increases + * our chances of reaping its memory before it exits. + * Retry until we succeed or reach MAX_SIZE_MB. + */ + if (errno == ESRCH) { + retry = (size <= MAX_SIZE_MB); + } else { + perror("process_mrelease"); + waitpid(pid, NULL, 0); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } + } + + /* Cleanup to prevent zombies */ + if (waitpid(pid, NULL, 0) < 0) { + perror("waitpid"); + exit(KSFT_FAIL); + } + close(pidfd); + + if (!success) { + if (retry) { + size *= 2; + goto retry; + } + printf("All process_mrelease attempts failed!\n"); + exit(KSFT_FAIL); + } + + printf("Success reaping a child with %zuMB of memory allocations\n", + size); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2d0ae88665db..291bc1e07842 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1523,7 +1523,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) /* * Reset the shadow, assuming that the above mprotect() * correctly changed PKRU, but to an unknown value since - * the actual alllocated pkey is unknown. + * the actual allocated pkey is unknown. */ shadow_pkey_reg = __read_pkey_reg(); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 352ba00cf26b..41fce8bea929 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -9,12 +9,12 @@ mnt=./huge exitcode=0 #get huge pagesize and freepages from /proc/meminfo -while read name size unit; do +while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then - freepgs=$size + freepgs="$size" fi if [ "$name" = "Hugepagesize:" ]; then - hpgsize_KB=$size + hpgsize_KB="$size" fi done < /proc/meminfo @@ -30,27 +30,26 @@ needmem_KB=$((half_ufd_size_MB * 2 * 1024)) #set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then - nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` + nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) needpgs=$((needmem_KB / hpgsize_KB)) tries=2 - while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do - lackpgs=$(( $needpgs - $freepgs )) + while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do + lackpgs=$((needpgs - freepgs)) echo 3 > /proc/sys/vm/drop_caches - echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages - if [ $? -ne 0 ]; then + if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then echo "Please run this test as root" exit $ksft_skip fi - while read name size unit; do + while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs=$size fi done < /proc/meminfo tries=$((tries - 1)) done - if [ $freepgs -lt $needpgs ]; then + if [ "$freepgs" -lt "$needpgs" ]; then printf "Not enough huge pages available (%d < %d)\n" \ - $freepgs $needpgs + "$freepgs" "$needpgs" exit 1 fi else @@ -60,458 +59,124 @@ fi #filter 64bit architectures ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" -if [ -z $ARCH ]; then - ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'` +if [ -z "$ARCH" ]; then + ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/') fi VADDR64=0 -echo "$ARCH64STR" | grep $ARCH && VADDR64=1 - -mkdir $mnt -mount -t hugetlbfs none $mnt - -echo "---------------------" -echo "running hugepage-mmap" -echo "---------------------" -./hugepage-mmap -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1 + +# Usage: run_test [test binary] [arbitrary test arguments...] +run_test() { + local title="running $*" + local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) + printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" + + "$@" + local ret=$? + if [ $ret -eq 0 ]; then + echo "[PASS]" + elif [ $ret -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip + else + echo "[FAIL]" + exitcode=1 + fi +} -shmmax=`cat /proc/sys/kernel/shmmax` -shmall=`cat /proc/sys/kernel/shmall` +mkdir "$mnt" +mount -t hugetlbfs none "$mnt" + +run_test ./hugepage-mmap + +shmmax=$(cat /proc/sys/kernel/shmmax) +shmall=$(cat /proc/sys/kernel/shmall) echo 268435456 > /proc/sys/kernel/shmmax echo 4194304 > /proc/sys/kernel/shmall -echo "--------------------" -echo "running hugepage-shm" -echo "--------------------" -./hugepage-shm -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -echo $shmmax > /proc/sys/kernel/shmmax -echo $shmall > /proc/sys/kernel/shmall - -echo "-------------------" -echo "running map_hugetlb" -echo "-------------------" -./map_hugetlb -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./hugepage-shm +echo "$shmmax" > /proc/sys/kernel/shmmax +echo "$shmall" > /proc/sys/kernel/shmall -echo "-----------------------" -echo "running hugepage-mremap" -echo "-----------------------" -./hugepage-mremap $mnt/huge_mremap -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -rm -f $mnt/huge_mremap - -echo "------------------------" -echo "running hugepage-vmemmap" -echo "------------------------" -./hugepage-vmemmap -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./map_hugetlb -echo "-----------------------" -echo "running hugetlb-madvise" -echo "-----------------------" -./hugetlb-madvise $mnt/madvise-test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -rm -f $mnt/madvise-test +run_test ./hugepage-mremap "$mnt"/huge_mremap +rm -f "$mnt"/huge_mremap + +run_test ./hugepage-vmemmap + +run_test ./hugetlb-madvise "$mnt"/madvise-test +rm -f "$mnt"/madvise-test echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " hugetlb regression testing." -echo "---------------------------" -echo "running map_fixed_noreplace" -echo "---------------------------" -./map_fixed_noreplace -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./map_fixed_noreplace -echo "------------------------------------------------------" -echo "running: gup_test -u # get_user_pages_fast() benchmark" -echo "------------------------------------------------------" -./gup_test -u -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +# get_user_pages_fast() benchmark +run_test ./gup_test -u +# pin_user_pages_fast() benchmark +run_test ./gup_test -a +# Dump pages 0, 19, and 4096, using pin_user_pages: +run_test ./gup_test -ct -F 0x1 0 19 0x1000 -echo "------------------------------------------------------" -echo "running: gup_test -a # pin_user_pages_fast() benchmark" -echo "------------------------------------------------------" -./gup_test -a -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "------------------------------------------------------------" -echo "# Dump pages 0, 19, and 4096, using pin_user_pages:" -echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test" -echo "------------------------------------------------------------" -./gup_test -ct -F 0x1 0 19 0x1000 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-------------------" -echo "running userfaultfd" -echo "-------------------" -./userfaultfd anon 20 16 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "---------------------------" -echo "running userfaultfd_hugetlb" -echo "---------------------------" +run_test ./userfaultfd anon 20 16 # Test requires source and destination huge pages. Size of source # (half_ufd_size_MB) is passed as argument to test. -./userfaultfd hugetlb $half_ufd_size_MB 32 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-------------------------" -echo "running userfaultfd_shmem" -echo "-------------------------" -./userfaultfd shmem 20 16 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32 +run_test ./userfaultfd shmem 20 16 #cleanup -umount $mnt -rm -rf $mnt -echo $nr_hugepgs > /proc/sys/vm/nr_hugepages - -echo "-----------------------" -echo "running compaction_test" -echo "-----------------------" -./compaction_test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "----------------------" -echo "running on-fault-limit" -echo "----------------------" -sudo -u nobody ./on-fault-limit -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "--------------------" -echo "running map_populate" -echo "--------------------" -./map_populate -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +umount "$mnt" +rm -rf "$mnt" +echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages -echo "-------------------------" -echo "running mlock-random-test" -echo "-------------------------" -./mlock-random-test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./compaction_test -echo "--------------------" -echo "running mlock2-tests" -echo "--------------------" -./mlock2-tests -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test sudo -u nobody ./on-fault-limit -echo "-------------------" -echo "running mremap_test" -echo "-------------------" -./mremap_test -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "-----------------" -echo "running thuge-gen" -echo "-----------------" -./thuge-gen -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -if [ $VADDR64 -ne 0 ]; then -echo "-----------------------------" -echo "running virtual_address_range" -echo "-----------------------------" -./virtual_address_range -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./map_populate -echo "-----------------------------" -echo "running virtual address 128TB switch test" -echo "-----------------------------" -./va_128TBswitch -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi -fi # VADDR64 +run_test ./mlock-random-test -echo "------------------------------------" -echo "running vmalloc stability smoke test" -echo "------------------------------------" -./test_vmalloc.sh smoke -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./mlock2-tests -echo "------------------------------------" -echo "running MREMAP_DONTUNMAP smoke test" -echo "------------------------------------" -./mremap_dontunmap -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./mrelease_test -echo "running HMM smoke test" -echo "------------------------------------" -./test_hmm.sh smoke -ret_val=$? +run_test ./mremap_test -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./thuge-gen -echo "--------------------------------------------------------" -echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests" -echo "--------------------------------------------------------" -./madv_populate -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +if [ $VADDR64 -ne 0 ]; then + run_test ./virtual_address_range -echo "running memfd_secret test" -echo "------------------------------------" -./memfd_secret -ret_val=$? + # virtual address 128TB switch test + run_test ./va_128TBswitch +fi # VADDR64 -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +# vmalloc stability smoke test +run_test ./test_vmalloc.sh smoke -echo "-------------------------------------------------------" -echo "running KSM MADV_MERGEABLE test with 10 identical pages" -echo "-------------------------------------------------------" -./ksm_tests -M -p 10 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./mremap_dontunmap -echo "------------------------" -echo "running KSM unmerge test" -echo "------------------------" -./ksm_tests -U -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./test_hmm.sh smoke -echo "----------------------------------------------------------" -echo "running KSM test with 10 zero pages and use_zero_pages = 0" -echo "----------------------------------------------------------" -./ksm_tests -Z -p 10 -z 0 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "----------------------------------------------------------" -echo "running KSM test with 10 zero pages and use_zero_pages = 1" -echo "----------------------------------------------------------" -./ksm_tests -Z -p 10 -z 1 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "-------------------------------------------------------------" -echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1" -echo "-------------------------------------------------------------" -./ksm_tests -N -m 1 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests +run_test ./madv_populate -echo "-------------------------------------------------------------" -echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0" -echo "-------------------------------------------------------------" -./ksm_tests -N -m 0 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./memfd_secret -exit $exitcode +# KSM MADV_MERGEABLE test with 10 identical pages +run_test ./ksm_tests -M -p 10 +# KSM unmerge test +run_test ./ksm_tests -U +# KSM test with 10 zero pages and use_zero_pages = 0 +run_test ./ksm_tests -Z -p 10 -z 0 +# KSM test with 10 zero pages and use_zero_pages = 1 +run_test ./ksm_tests -Z -p 10 -z 1 +# KSM test with 2 NUMA nodes and merge_across_nodes = 1 +run_test ./ksm_tests -N -m 1 +# KSM test with 2 NUMA nodes and merge_across_nodes = 0 +run_test ./ksm_tests -N -m 0 exit $exitcode diff --git a/tools/testing/selftests/vm/settings b/tools/testing/selftests/vm/settings new file mode 100644 index 000000000000..9abfc60e9e6f --- /dev/null +++ b/tools/testing/selftests/vm/settings @@ -0,0 +1 @@ +timeout=45 diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c new file mode 100644 index 000000000000..08ab62a4a9d0 --- /dev/null +++ b/tools/testing/selftests/vm/soft-dirty.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <fcntl.h> +#include <stdint.h> +#include <malloc.h> +#include <sys/mman.h> +#include "../kselftest.h" +#include "vm_util.h" + +#define PAGEMAP_FILE_PATH "/proc/self/pagemap" +#define TEST_ITERATIONS 10000 + +static void test_simple(int pagemap_fd, int pagesize) +{ + int i; + char *map; + + map = aligned_alloc(pagesize, pagesize); + if (!map) + ksft_exit_fail_msg("mmap failed\n"); + + clear_softdirty(); + + for (i = 0 ; i < TEST_ITERATIONS; i++) { + if (pagemap_is_softdirty(pagemap_fd, map) == 1) { + ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i); + break; + } + + clear_softdirty(); + // Write something to the page to get the dirty bit enabled on the page + map[0]++; + + if (pagemap_is_softdirty(pagemap_fd, map) == 0) { + ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i); + break; + } + + clear_softdirty(); + } + free(map); + + ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__); +} + +static void test_vma_reuse(int pagemap_fd, int pagesize) +{ + char *map, *map2; + + map = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0); + if (map == MAP_FAILED) + ksft_exit_fail_msg("mmap failed"); + + // The kernel always marks new regions as soft dirty + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1, + "Test %s dirty bit of allocated page\n", __func__); + + clear_softdirty(); + munmap(map, pagesize); + + map2 = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0); + if (map2 == MAP_FAILED) + ksft_exit_fail_msg("mmap failed"); + + // Dirty bit is set for new regions even if they are reused + if (map == map2) + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1, + "Test %s dirty bit of reused address page\n", __func__); + else + ksft_test_result_skip("Test %s dirty bit of reused address page\n", __func__); + + munmap(map2, pagesize); +} + +static void test_hugepage(int pagemap_fd, int pagesize) +{ + char *map; + int i, ret; + size_t hpage_len = read_pmd_pagesize(); + + map = memalign(hpage_len, hpage_len); + if (!map) + ksft_exit_fail_msg("memalign failed\n"); + + ret = madvise(map, hpage_len, MADV_HUGEPAGE); + if (ret) + ksft_exit_fail_msg("madvise failed %d\n", ret); + + for (i = 0; i < hpage_len; i++) + map[i] = (char)i; + + if (check_huge(map)) { + ksft_test_result_pass("Test %s huge page allocation\n", __func__); + + clear_softdirty(); + for (i = 0 ; i < TEST_ITERATIONS ; i++) { + if (pagemap_is_softdirty(pagemap_fd, map) == 1) { + ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i); + break; + } + + clear_softdirty(); + // Write something to the page to get the dirty bit enabled on the page + map[0]++; + + if (pagemap_is_softdirty(pagemap_fd, map) == 0) { + ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i); + break; + } + clear_softdirty(); + } + + ksft_test_result(i == TEST_ITERATIONS, "Test %s huge page dirty bit\n", __func__); + } else { + // hugepage allocation failed. skip these tests + ksft_test_result_skip("Test %s huge page allocation\n", __func__); + ksft_test_result_skip("Test %s huge page dirty bit\n", __func__); + } + free(map); +} + +int main(int argc, char **argv) +{ + int pagemap_fd; + int pagesize; + + ksft_print_header(); + ksft_set_plan(5); + + pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH); + + pagesize = getpagesize(); + + test_simple(pagemap_fd, pagesize); + test_vma_reuse(pagemap_fd, pagesize); + test_hugepage(pagemap_fd, pagesize); + + close(pagemap_fd); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 52497b7b9f1d..6aa2b8253aed 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -16,14 +16,13 @@ #include <sys/mount.h> #include <malloc.h> #include <stdbool.h> +#include "vm_util.h" uint64_t pagesize; unsigned int pageshift; uint64_t pmd_pagesize; -#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" -#define SMAP_PATH "/proc/self/smaps" #define INPUT_MAX 80 #define PID_FMT "%d,0x%lx,0x%lx" @@ -51,30 +50,6 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) return 0; } - -static uint64_t read_pmd_pagesize(void) -{ - int fd; - char buf[20]; - ssize_t num_read; - - fd = open(PMD_SIZE_PATH, O_RDONLY); - if (fd == -1) { - perror("Open hpage_pmd_size failed"); - exit(EXIT_FAILURE); - } - num_read = read(fd, buf, 19); - if (num_read < 1) { - close(fd); - perror("Read hpage_pmd_size failed"); - exit(EXIT_FAILURE); - } - buf[num_read] = '\0'; - close(fd); - - return strtoul(buf, NULL, 10); -} - static int write_file(const char *path, const char *buf, size_t buflen) { int fd; @@ -113,58 +88,6 @@ static void write_debugfs(const char *fmt, ...) } } -#define MAX_LINE_LENGTH 500 - -static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) -{ - while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { - if (!strncmp(buf, pattern, strlen(pattern))) - return true; - } - return false; -} - -static uint64_t check_huge(void *addr) -{ - uint64_t thp = 0; - int ret; - FILE *fp; - char buffer[MAX_LINE_LENGTH]; - char addr_pattern[MAX_LINE_LENGTH]; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", - (unsigned long) addr); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - - - fp = fopen(SMAP_PATH, "r"); - if (!fp) { - printf("%s: Failed to open file %s\n", __func__, SMAP_PATH); - exit(EXIT_FAILURE); - } - if (!check_for_pattern(fp, addr_pattern, buffer)) - goto err_out; - - /* - * Fetch the AnonHugePages: in the same block and check the number of - * hugepages. - */ - if (!check_for_pattern(fp, "AnonHugePages:", buffer)) - goto err_out; - - if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) { - printf("Reading smap error\n"); - exit(EXIT_FAILURE); - } - -err_out: - fclose(fp); - return thp; -} - void split_pmd_thp(void) { char *one_page; diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 92a4516f8f0d..0bdfc1955229 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -82,7 +82,7 @@ static int test_type; static volatile bool test_uffdio_copy_eexist = true; static volatile bool test_uffdio_zeropage_eexist = true; /* Whether to test uffd write-protection */ -static bool test_uffdio_wp = false; +static bool test_uffdio_wp = true; /* Whether to test uffd minor faults */ static bool test_uffdio_minor = false; @@ -1422,7 +1422,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize) static int userfaultfd_stress(void) { void *area; - char *tmp_area; unsigned long nr; struct uffdio_register uffdio_register; struct uffd_stats uffd_stats[nr_cpus]; @@ -1533,13 +1532,9 @@ static int userfaultfd_stress(void) count_verify[nr], nr); /* prepare next bounce */ - tmp_area = area_src; - area_src = area_dst; - area_dst = tmp_area; + swap(area_src, area_dst); - tmp_area = area_src_alias; - area_src_alias = area_dst_alias; - area_dst_alias = tmp_area; + swap(area_src_alias, area_dst_alias); uffd_stats_report(uffd_stats, nr_cpus); } @@ -1594,8 +1589,6 @@ static void set_test_type(const char *type) if (!strcmp(type, "anon")) { test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; - /* Only enable write-protect test for anonymous test */ - test_uffdio_wp = true; } else if (!strcmp(type, "hugetlb")) { test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c new file mode 100644 index 000000000000..b58ab11a7a30 --- /dev/null +++ b/tools/testing/selftests/vm/vm_util.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include <fcntl.h> +#include "../kselftest.h" +#include "vm_util.h" + +#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define SMAP_FILE_PATH "/proc/self/smaps" +#define MAX_LINE_LENGTH 500 + +uint64_t pagemap_get_entry(int fd, char *start) +{ + const unsigned long pfn = (unsigned long)start / getpagesize(); + uint64_t entry; + int ret; + + ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); + if (ret != sizeof(entry)) + ksft_exit_fail_msg("reading pagemap failed\n"); + return entry; +} + +bool pagemap_is_softdirty(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + // Check if dirty bit (55th bit) is set + return entry & 0x0080000000000000ull; +} + +void clear_softdirty(void) +{ + int ret; + const char *ctrl = "4"; + int fd = open("/proc/self/clear_refs", O_WRONLY); + + if (fd < 0) + ksft_exit_fail_msg("opening clear_refs failed\n"); + ret = write(fd, ctrl, strlen(ctrl)); + close(fd); + if (ret != strlen(ctrl)) + ksft_exit_fail_msg("writing clear_refs failed\n"); +} + +static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +{ + while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + if (!strncmp(buf, pattern, strlen(pattern))) + return true; + } + return false; +} + +uint64_t read_pmd_pagesize(void) +{ + int fd; + char buf[20]; + ssize_t num_read; + + fd = open(PMD_SIZE_FILE_PATH, O_RDONLY); + if (fd == -1) + ksft_exit_fail_msg("Open hpage_pmd_size failed\n"); + + num_read = read(fd, buf, 19); + if (num_read < 1) { + close(fd); + ksft_exit_fail_msg("Read hpage_pmd_size failed\n"); + } + buf[num_read] = '\0'; + close(fd); + + return strtoul(buf, NULL, 10); +} + +uint64_t check_huge(void *addr) +{ + uint64_t thp = 0; + int ret; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + char addr_pattern[MAX_LINE_LENGTH]; + + ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", + (unsigned long) addr); + if (ret >= MAX_LINE_LENGTH) + ksft_exit_fail_msg("%s: Pattern is too long\n", __func__); + + fp = fopen(SMAP_FILE_PATH, "r"); + if (!fp) + ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); + + if (!check_for_pattern(fp, addr_pattern, buffer)) + goto err_out; + + /* + * Fetch the AnonHugePages: in the same block and check the number of + * hugepages. + */ + if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + goto err_out; + + if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) + ksft_exit_fail_msg("Reading smap error\n"); + +err_out: + fclose(fp); + return thp; +} diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h new file mode 100644 index 000000000000..2e512bd57ae1 --- /dev/null +++ b/tools/testing/selftests/vm/vm_util.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <stdint.h> +#include <stdbool.h> + +uint64_t pagemap_get_entry(int fd, char *start); +bool pagemap_is_softdirty(int fd, char *start); +void clear_softdirty(void); +uint64_t read_pmd_pagesize(void); +uint64_t check_huge(void *addr); |