diff options
Diffstat (limited to 'tools/testing/selftests/cgroup')
17 files changed, 1836 insertions, 455 deletions
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 1b897152bab6..e01584c2189a 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -21,14 +21,15 @@ TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h include ../lib.mk +include lib/libcgroup.mk -$(OUTPUT)/test_core: cgroup_util.c -$(OUTPUT)/test_cpu: cgroup_util.c -$(OUTPUT)/test_cpuset: cgroup_util.c -$(OUTPUT)/test_freezer: cgroup_util.c -$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c -$(OUTPUT)/test_kill: cgroup_util.c -$(OUTPUT)/test_kmem: cgroup_util.c -$(OUTPUT)/test_memcontrol: cgroup_util.c -$(OUTPUT)/test_pids: cgroup_util.c -$(OUTPUT)/test_zswap: cgroup_util.c +$(OUTPUT)/test_core: $(LIBCGROUP_O) +$(OUTPUT)/test_cpu: $(LIBCGROUP_O) +$(OUTPUT)/test_cpuset: $(LIBCGROUP_O) +$(OUTPUT)/test_freezer: $(LIBCGROUP_O) +$(OUTPUT)/test_hugetlb_memcg: $(LIBCGROUP_O) +$(OUTPUT)/test_kill: $(LIBCGROUP_O) +$(OUTPUT)/test_kmem: $(LIBCGROUP_O) +$(OUTPUT)/test_memcontrol: $(LIBCGROUP_O) +$(OUTPUT)/test_pids: $(LIBCGROUP_O) +$(OUTPUT)/test_zswap: $(LIBCGROUP_O) diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 1e2d46636a0c..42f54936f4bb 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -17,10 +17,12 @@ #include <unistd.h> #include "cgroup_util.h" -#include "../clone3/clone3_selftests.h" +#include "../../clone3/clone3_selftests.h" + +bool cg_test_v1_named; /* Returns read len on success, or -errno on failure. */ -static ssize_t read_text(const char *path, char *buf, size_t max_len) +ssize_t read_text(const char *path, char *buf, size_t max_len) { ssize_t len; int fd; @@ -39,7 +41,7 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len) } /* Returns written len on success, or -errno on failure. */ -static ssize_t write_text(const char *path, char *buf, ssize_t len) +ssize_t write_text(const char *path, char *buf, ssize_t len) { int fd; @@ -104,8 +106,9 @@ int cg_read_strcmp(const char *cgroup, const char *control, /* Handle the case of comparing against empty string */ if (!expected) return -1; - else - size = strlen(expected) + 1; + + /* needs size > 1, otherwise cg_read() reads 0 bytes */ + size = (expected[0] == '\0') ? 2 : strlen(expected) + 1; buf = malloc(size); if (!buf) @@ -121,6 +124,21 @@ int cg_read_strcmp(const char *cgroup, const char *control, return ret; } +int cg_read_strcmp_wait(const char *cgroup, const char *control, + const char *expected) +{ + int i, ret; + + for (i = 0; i < 100; i++) { + ret = cg_read_strcmp(cgroup, control, expected); + if (!ret) + return ret; + usleep(10000); + } + + return ret; +} + int cg_read_strstr(const char *cgroup, const char *control, const char *needle) { char buf[PAGE_SIZE]; @@ -166,6 +184,27 @@ long cg_read_key_long(const char *cgroup, const char *control, const char *key) return atol(ptr + strlen(key)); } +long cg_read_key_long_poll(const char *cgroup, const char *control, + const char *key, long expected, int retries, + useconds_t wait_interval_us) +{ + long val = -1; + int i; + + for (i = 0; i < retries; i++) { + val = cg_read_key_long(cgroup, control, key); + if (val < 0) + return val; + + if (val == expected) + break; + + usleep(wait_interval_us); + } + + return val; +} + long cg_read_lc(const char *cgroup, const char *control) { char buf[PAGE_SIZE]; @@ -217,7 +256,8 @@ int cg_write_numeric(const char *cgroup, const char *control, long value) return cg_write(cgroup, control, buf); } -int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) +static int cg_find_root(char *root, size_t len, const char *controller, + bool *nsdelegate) { char buf[10 * PAGE_SIZE]; char *fs, *mount, *type, *options; @@ -236,18 +276,37 @@ int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) options = strtok(NULL, delim); strtok(NULL, delim); strtok(NULL, delim); - - if (strcmp(type, "cgroup2") == 0) { - strncpy(root, mount, len); - if (nsdelegate) - *nsdelegate = !!strstr(options, "nsdelegate"); - return 0; + if (strcmp(type, "cgroup") == 0) { + if (!controller || !strstr(options, controller)) + continue; + } else if (strcmp(type, "cgroup2") == 0) { + if (controller && + cg_read_strstr(mount, "cgroup.controllers", controller)) + continue; + } else { + continue; } + strncpy(root, mount, len); + + if (nsdelegate) + *nsdelegate = !!strstr(options, "nsdelegate"); + return 0; + } return -1; } +int cg_find_controller_root(char *root, size_t len, const char *controller) +{ + return cg_find_root(root, len, controller, NULL); +} + +int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) +{ + return cg_find_root(root, len, NULL, nsdelegate); +} + int cg_create(const char *cgroup) { return mkdir(cgroup, 0755); @@ -341,7 +400,7 @@ int cg_enter_current(const char *cgroup) int cg_enter_current_thread(const char *cgroup) { - return cg_write(cgroup, "cgroup.threads", "0"); + return cg_write(cgroup, CG_THREADS_FILE, "0"); } int cg_run(const char *cgroup, @@ -488,94 +547,28 @@ int cg_run_nowait(const char *cgroup, return pid; } -int get_temp_fd(void) -{ - return open(".", O_TMPFILE | O_RDWR | O_EXCL); -} - -int alloc_pagecache(int fd, size_t size) -{ - char buf[PAGE_SIZE]; - struct stat st; - int i; - - if (fstat(fd, &st)) - goto cleanup; - - size += st.st_size; - - if (ftruncate(fd, size)) - goto cleanup; - - for (i = 0; i < size; i += sizeof(buf)) - read(fd, buf, sizeof(buf)); - - return 0; - -cleanup: - return -1; -} - -int alloc_anon(const char *cgroup, void *arg) +int proc_mount_contains(const char *option) { - size_t size = (unsigned long)arg; - char *buf, *ptr; + char buf[4 * PAGE_SIZE]; + ssize_t read; - buf = malloc(size); - for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) - *ptr = 0; + read = read_text("/proc/mounts", buf, sizeof(buf)); + if (read < 0) + return read; - free(buf); - return 0; + return strstr(buf, option) != NULL; } -int is_swap_enabled(void) +int cgroup_feature(const char *feature) { char buf[PAGE_SIZE]; - const char delim[] = "\n"; - int cnt = 0; - char *line; - - if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) - return -1; - - for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) - cnt++; - - return cnt > 1; -} - -int set_oom_adj_score(int pid, int score) -{ - char path[PATH_MAX]; - int fd, len; - - sprintf(path, "/proc/%d/oom_score_adj", pid); - - fd = open(path, O_WRONLY | O_APPEND); - if (fd < 0) - return fd; - - len = dprintf(fd, "%d", score); - if (len < 0) { - close(fd); - return len; - } - - close(fd); - return 0; -} - -int proc_mount_contains(const char *option) -{ - char buf[4 * PAGE_SIZE]; ssize_t read; - read = read_text("/proc/mounts", buf, sizeof(buf)); + read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf)); if (read < 0) return read; - return strstr(buf, option) != NULL; + return strstr(buf, feature) != NULL; } ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index 19b131ee7707..567b1082974c 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -2,9 +2,9 @@ #include <stdbool.h> #include <stdlib.h> -#include "../kselftest.h" - +#ifndef PAGE_SIZE #define PAGE_SIZE 4096 +#endif #define MB(x) (x << 20) @@ -13,6 +13,12 @@ #define TEST_UID 65534 /* usually nobody, any !root is fine */ +#define CG_THREADS_FILE (!cg_test_v1_named ? "cgroup.threads" : "tasks") +#define CG_NAMED_NAME "selftest" +#define CG_PATH_FORMAT (!cg_test_v1_named ? "0::%s" : (":name=" CG_NAMED_NAME ":%s")) + +#define DEFAULT_WAIT_INTERVAL_US (100 * 1000) /* 100 ms */ + /* * Checks if two given values differ by less than err% of their sum. */ @@ -21,6 +27,30 @@ static inline int values_close(long a, long b, int err) return labs(a - b) <= (a + b) / 100 * err; } +/* + * Checks if two given values differ by less than err% of their sum and assert + * with detailed debug info if not. + */ +static inline int values_close_report(long a, long b, int err) +{ + long diff = labs(a - b); + long limit = (a + b) / 100 * err; + double actual_err = (a + b) ? (100.0 * diff / (a + b)) : 0.0; + int close = diff <= limit; + + if (!close) + fprintf(stderr, + "[FAIL] actual=%ld expected=%ld | diff=%ld | limit=%ld | " + "tolerance=%d%% | actual_error=%.2f%%\n", + a, b, diff, limit, err, actual_err); + + return close; +} + +extern ssize_t read_text(const char *path, char *buf, size_t max_len); +extern ssize_t write_text(const char *path, char *buf, ssize_t len); + +extern int cg_find_controller_root(char *root, size_t len, const char *controller); extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate); extern char *cg_name(const char *root, const char *name); extern char *cg_name_indexed(const char *root, const char *name, int index); @@ -31,11 +61,16 @@ extern int cg_read(const char *cgroup, const char *control, char *buf, size_t len); extern int cg_read_strcmp(const char *cgroup, const char *control, const char *expected); +extern int cg_read_strcmp_wait(const char *cgroup, const char *control, + const char *expected); extern int cg_read_strstr(const char *cgroup, const char *control, const char *needle); extern long cg_read_long(const char *cgroup, const char *control); extern long cg_read_long_fd(int fd); long cg_read_key_long(const char *cgroup, const char *control, const char *key); +long cg_read_key_long_poll(const char *cgroup, const char *control, + const char *key, long expected, int retries, + useconds_t wait_interval_us); extern long cg_read_lc(const char *cgroup, const char *control); extern int cg_write(const char *cgroup, const char *control, char *buf); extern int cg_open(const char *cgroup, const char *control, int flags); @@ -49,14 +84,10 @@ extern int cg_enter_current_thread(const char *cgroup); extern int cg_run_nowait(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg); -extern int get_temp_fd(void); -extern int alloc_pagecache(int fd, size_t size); -extern int alloc_anon(const char *cgroup, void *arg); -extern int is_swap_enabled(void); -extern int set_oom_adj_score(int pid, int score); extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); int proc_mount_contains(const char *option); +int cgroup_feature(const char *feature); extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size); extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle); extern pid_t clone_into_cgroup(int cgroup_fd); @@ -66,3 +97,4 @@ extern int dirfd_open_opath(const char *dir); extern int cg_prepare_for_wait(const char *cgroup); extern int memcg_prepare_for_wait(const char *cgroup); extern int cg_wait_for(int fd); +extern bool cg_test_v1_named; diff --git a/tools/testing/selftests/cgroup/lib/libcgroup.mk b/tools/testing/selftests/cgroup/lib/libcgroup.mk new file mode 100644 index 000000000000..7a73007204c3 --- /dev/null +++ b/tools/testing/selftests/cgroup/lib/libcgroup.mk @@ -0,0 +1,19 @@ +CGROUP_DIR := $(selfdir)/cgroup + +LIBCGROUP_C := lib/cgroup_util.c + +LIBCGROUP_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBCGROUP_C)) + +LIBCGROUP_O_DIRS := $(shell dirname $(LIBCGROUP_O) | uniq) + +CFLAGS += -I$(CGROUP_DIR)/lib/include + +EXTRA_HDRS := $(selfdir)/clone3/clone3_selftests.h + +$(LIBCGROUP_O_DIRS): + mkdir -p $@ + +$(LIBCGROUP_O): $(OUTPUT)/%.o : $(CGROUP_DIR)/%.c $(EXTRA_HDRS) $(LIBCGROUP_O_DIRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(LIBCGROUP_O) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index a5672a91d273..7b83c7e7c9d4 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -5,6 +5,8 @@ #include <linux/sched.h> #include <sys/types.h> #include <sys/mman.h> +#include <sys/mount.h> +#include <sys/stat.h> #include <sys/wait.h> #include <unistd.h> #include <fcntl.h> @@ -15,10 +17,13 @@ #include <string.h> #include <pthread.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" static bool nsdelegate; +#ifndef CLONE_NEWCGROUP +#define CLONE_NEWCGROUP 0 +#endif static int touch_anon(char *buf, size_t size) { @@ -148,6 +153,9 @@ static int test_cgcore_populated(const char *root) int cgroup_fd = -EBADF; pid_t pid; + if (cg_test_v1_named) + return KSFT_SKIP; + cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_a/cg_test_b"); cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c"); @@ -225,7 +233,8 @@ static int test_cgcore_populated(const char *root) if (err) goto cleanup; - if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) + if (cg_read_strcmp_wait(cg_test_d, "cgroup.events", + "populated 0\n")) goto cleanup; /* Remove cgroup. */ @@ -277,6 +286,9 @@ static int test_cgcore_invalid_domain(const char *root) int ret = KSFT_FAIL; char *grandparent = NULL, *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + grandparent = cg_name(root, "cg_test_grandparent"); parent = cg_name(root, "cg_test_grandparent/cg_test_parent"); child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child"); @@ -339,6 +351,9 @@ static int test_cgcore_parent_becomes_threaded(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -378,7 +393,8 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; - if (cg_read_strstr(root, "cgroup.controllers", "cpu") || + if (cg_test_v1_named || + cg_read_strstr(root, "cgroup.controllers", "cpu") || cg_write(root, "cgroup.subtree_control", "+cpu")) { ret = KSFT_SKIP; goto cleanup; @@ -430,6 +446,9 @@ static int test_cgcore_top_down_constraint_enable(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -465,6 +484,9 @@ static int test_cgcore_top_down_constraint_disable(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -506,6 +528,9 @@ static int test_cgcore_internal_process_constraint(const char *root) int ret = KSFT_FAIL; char *parent = NULL, *child = NULL; + if (cg_test_v1_named) + return KSFT_SKIP; + parent = cg_name(root, "cg_test_parent"); child = cg_name(root, "cg_test_parent/cg_test_child"); if (!parent || !child) @@ -573,7 +598,7 @@ static int test_cgcore_proc_migration(const char *root) } cg_enter_current(dst); - if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1) + if (cg_read_lc(dst, CG_THREADS_FILE) != n_threads + 1) goto cleanup; ret = KSFT_PASS; @@ -605,7 +630,7 @@ static void *migrating_thread_fn(void *arg) char lines[3][PATH_MAX]; for (g = 1; g < 3; ++g) - snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0])); + snprintf(lines[g], sizeof(lines[g]), CG_PATH_FORMAT, grps[g] + strlen(grps[0])); for (i = 0; i < n_iterations; ++i) { cg_enter_current_thread(grps[(i % 2) + 1]); @@ -642,10 +667,12 @@ static int test_cgcore_thread_migration(const char *root) if (cg_create(grps[2])) goto cleanup; - if (cg_write(grps[1], "cgroup.type", "threaded")) - goto cleanup; - if (cg_write(grps[2], "cgroup.type", "threaded")) - goto cleanup; + if (!cg_test_v1_named) { + if (cg_write(grps[1], "cgroup.type", "threaded")) + goto cleanup; + if (cg_write(grps[2], "cgroup.type", "threaded")) + goto cleanup; + } if (cg_enter_current(grps[1])) goto cleanup; @@ -659,7 +686,7 @@ static int test_cgcore_thread_migration(const char *root) if (retval) goto cleanup; - snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0])); + snprintf(line, sizeof(line), CG_PATH_FORMAT, grps[1] + strlen(grps[0])); if (proc_read_strstr(0, 1, "cgroup", line)) goto cleanup; @@ -842,6 +869,38 @@ cleanup: return ret; } +static int setup_named_v1_root(char *root, size_t len, const char *name) +{ + char options[PATH_MAX]; + int r; + + r = snprintf(root, len, "/mnt/cg_selftest"); + if (r < 0) + return r; + + r = snprintf(options, sizeof(options), "none,name=%s", name); + if (r < 0) + return r; + + r = mkdir(root, 0755); + if (r < 0 && errno != EEXIST) + return r; + + r = mount("none", root, "cgroup", 0, options); + if (r < 0) + return r; + + return 0; +} + +static void cleanup_named_v1_root(char *root) +{ + if (!cg_test_v1_named) + return; + umount(root); + rmdir(root); +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -865,15 +924,22 @@ struct corecg_test { int main(int argc, char *argv[]) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; - - if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) - ksft_exit_skip("cgroup v2 isn't mounted\n"); + int i; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); + if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) { + if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME)) + ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n"); + cg_test_v1_named = true; + goto post_v2_setup; + } if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) if (cg_write(root, "cgroup.subtree_control", "+memory")) ksft_exit_skip("Failed to set memory controller\n"); +post_v2_setup: for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: @@ -883,11 +949,11 @@ int main(int argc, char *argv[]) ksft_test_result_skip("%s\n", tests[i].name); break; default: - ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } - return ret; + cleanup_named_v1_root(root); + ksft_finished(); } diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index a2b50af8e9ee..c83f05438d7c 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -2,6 +2,7 @@ #define _GNU_SOURCE #include <linux/limits.h> +#include <sys/param.h> #include <sys/sysinfo.h> #include <sys/wait.h> #include <errno.h> @@ -10,7 +11,7 @@ #include <time.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" enum hog_clock_type { @@ -218,7 +219,7 @@ static int test_cpucg_stats(const char *root) if (user_usec <= 0) goto cleanup; - if (!values_close(usage_usec, expected_usage_usec, 1)) + if (!values_close_report(usage_usec, expected_usage_usec, 1)) goto cleanup; ret = KSFT_PASS; @@ -290,7 +291,7 @@ static int test_cpucg_nice(const char *root) user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec"); - if (!values_close(nice_usec, expected_nice_usec, 1)) + if (!values_close_report(nice_usec, expected_nice_usec, 1)) goto cleanup; ret = KSFT_PASS; @@ -403,7 +404,7 @@ overprovision_validate(const struct cpu_hogger *children, int num_children) goto cleanup; delta = children[i + 1].usage - children[i].usage; - if (!values_close(delta, children[0].usage, 35)) + if (!values_close_report(delta, children[0].usage, 35)) goto cleanup; } @@ -443,7 +444,7 @@ underprovision_validate(const struct cpu_hogger *children, int num_children) int ret = KSFT_FAIL, i; for (i = 0; i < num_children - 1; i++) { - if (!values_close(children[i + 1].usage, children[0].usage, 15)) + if (!values_close_report(children[i + 1].usage, children[0].usage, 15)) goto cleanup; } @@ -572,16 +573,16 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned) nested_leaf_usage = leaf[1].usage + leaf[2].usage; if (overprovisioned) { - if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) + if (!values_close_report(leaf[0].usage, nested_leaf_usage, 15)) goto cleanup; - } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15)) + } else if (!values_close_report(leaf[0].usage * 2, nested_leaf_usage, 15)) goto cleanup; child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec"); if (child_usage <= 0) goto cleanup; - if (!values_close(child_usage, nested_leaf_usage, 1)) + if (!values_close_report(child_usage, nested_leaf_usage, 1)) goto cleanup; ret = KSFT_PASS; @@ -645,10 +646,16 @@ test_cpucg_nested_weight_underprovisioned(const char *root) static int test_cpucg_max(const char *root) { int ret = KSFT_FAIL; - long usage_usec, user_usec; - long usage_seconds = 1; - long expected_usage_usec = usage_seconds * USEC_PER_SEC; + long quota_usec = 1000; + long default_period_usec = 100000; /* cpu.max's default period */ + long duration_seconds = 1; + + long duration_usec = duration_seconds * USEC_PER_SEC; + long usage_usec, n_periods, remainder_usec, expected_usage_usec; char *cpucg; + char quota_buf[32]; + + snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec); cpucg = cg_name(root, "cpucg_test"); if (!cpucg) @@ -657,13 +664,13 @@ static int test_cpucg_max(const char *root) if (cg_create(cpucg)) goto cleanup; - if (cg_write(cpucg, "cpu.max", "1000")) + if (cg_write(cpucg, "cpu.max", quota_buf)) goto cleanup; struct cpu_hog_func_param param = { .nprocs = 1, .ts = { - .tv_sec = usage_seconds, + .tv_sec = duration_seconds, .tv_nsec = 0, }, .clock_type = CPU_HOG_CLOCK_WALL, @@ -672,14 +679,19 @@ static int test_cpucg_max(const char *root) goto cleanup; usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); - user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); - if (user_usec <= 0) + if (usage_usec <= 0) goto cleanup; - if (user_usec >= expected_usage_usec) - goto cleanup; + /* + * The following calculation applies only since + * the cpu hog is set to run as per wall-clock time + */ + n_periods = duration_usec / default_period_usec; + remainder_usec = duration_usec - n_periods * default_period_usec; + expected_usage_usec + = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (values_close(usage_usec, expected_usage_usec, 95)) + if (!values_close_report(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; @@ -698,10 +710,16 @@ cleanup: static int test_cpucg_max_nested(const char *root) { int ret = KSFT_FAIL; - long usage_usec, user_usec; - long usage_seconds = 1; - long expected_usage_usec = usage_seconds * USEC_PER_SEC; + long quota_usec = 1000; + long default_period_usec = 100000; /* cpu.max's default period */ + long duration_seconds = 1; + + long duration_usec = duration_seconds * USEC_PER_SEC; + long usage_usec, n_periods, remainder_usec, expected_usage_usec; char *parent, *child; + char quota_buf[32]; + + snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec); parent = cg_name(root, "cpucg_parent"); child = cg_name(parent, "cpucg_child"); @@ -717,13 +735,13 @@ static int test_cpucg_max_nested(const char *root) if (cg_create(child)) goto cleanup; - if (cg_write(parent, "cpu.max", "1000")) + if (cg_write(parent, "cpu.max", quota_buf)) goto cleanup; struct cpu_hog_func_param param = { .nprocs = 1, .ts = { - .tv_sec = usage_seconds, + .tv_sec = duration_seconds, .tv_nsec = 0, }, .clock_type = CPU_HOG_CLOCK_WALL, @@ -732,14 +750,19 @@ static int test_cpucg_max_nested(const char *root) goto cleanup; usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec"); - user_usec = cg_read_key_long(child, "cpu.stat", "user_usec"); - if (user_usec <= 0) + if (usage_usec <= 0) goto cleanup; - if (user_usec >= expected_usage_usec) - goto cleanup; + /* + * The following calculation applies only since + * the cpu hog is set to run as per wall-clock time + */ + n_periods = duration_usec / default_period_usec; + remainder_usec = duration_usec - n_periods * default_period_usec; + expected_usage_usec + = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (values_close(usage_usec, expected_usage_usec, 95)) + if (!values_close_report(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; @@ -773,8 +796,10 @@ struct cpucg_test { int main(int argc, char *argv[]) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; + int i; + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); @@ -791,11 +816,10 @@ int main(int argc, char *argv[]) ksft_test_result_skip("%s\n", tests[i].name); break; default: - ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } - return ret; + ksft_finished(); } diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c index 4034d14ba69a..c5cf8b56ceb8 100644 --- a/tools/testing/selftests/cgroup/test_cpuset.c +++ b/tools/testing/selftests/cgroup/test_cpuset.c @@ -3,7 +3,7 @@ #include <linux/limits.h> #include <signal.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" static int idle_process_fn(const char *cgroup, void *arg) @@ -247,8 +247,10 @@ struct cpuset_test { int main(int argc, char *argv[]) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; + int i; + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); @@ -265,11 +267,10 @@ int main(int argc, char *argv[]) ksft_test_result_skip("%s\n", tests[i].name); break; default: - ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } - return ret; + ksft_finished(); } diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 400a696a0d21..a56f4153c64d 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -88,22 +88,32 @@ echo "" > test/cpuset.cpus # If isolated CPUs have been reserved at boot time (as shown in # cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These pre-isolated CPUs should stay in -# an isolated state throughout the testing process for now. +# the tests may fail incorrectly. Wait a bit and retry again just in case +# these isolated CPUs are leftover from previous run and have just been +# cleaned up earlier in this script. +# +# These pre-isolated CPUs should stay in an isolated state throughout the +# testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +[[ -n "$BOOT_ISOLCPUS" ]] && { + sleep 0.5 + BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +} if [[ -n "$BOOT_ISOLCPUS" ]] then [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi + cleanup() { online_cpus cd $CGROUP2 - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - rmdir test > /dev/null 2>&1 + rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1 + rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \ + rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose } @@ -173,14 +183,21 @@ test_add_proc() # # Cgroup test hierarchy # -# root -- A1 -- A2 -- A3 -# +- B1 +# root +# | +# +------+------+ +# | | +# A1 B1 +# | +# A2 +# | +# A3 # # P<v> = set cpus.partition (0:member, 1:root, 2:isolated) # C<l> = add cpu-list to cpuset.cpus # X<l> = add cpu-list to cpuset.cpus.exclusive -# S<p> = use prefix in subtree_control # T = put a task into cgroup +# CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive # O<c>=<v> = Write <v> to CPU online file of <c> # # ECPUs - effective CPUs of cpusets @@ -191,146 +208,146 @@ test_add_proc() # sched-debug matching which includes offline CPUs and single-CPU partitions # while the second one is for matching cpuset.cpus.isolated. # -SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1" +SETUP_A123_PARTITIONS="C1-3:P1 C2-3:P1 C3:P1" TEST_MATRIX=( # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- - " C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1" + " C0-1 . . C2-3 . C4-5 . . 0 A2:0-1" " C0-1 . . C2-3 P1 . . . 0 " - " C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 " - " C0-1 . . C2-3 P1:S+ C1:P1 . . 0 " - " C0-1:S+ . . C2-3 . . . P1 0 " - " C0-1:P1 . . C2-3 S+ C1 . . 0 " - " C0-1:P1 . . C2-3 S+ C1:P1 . . 0 " - " C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 " + " C0-1 . . C2-3 P1 C0-1:P1 . . 0 " + " C0-1 . . C2-3 P1 C1:P1 . . 0 " + " C0-1 . . C2-3 . . . P1 0 " + " C0-1:P1 . . C2-3 . C1 . . 0 " + " C0-1:P1 . . C2-3 . C1:P1 . . 0 " + " C0-1:P1 . . C2-3 . C1:P1 . P1 0 " " C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5" - " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" - "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" - "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" - "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + " C0-3:P1 C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" + " C0-3:P1 C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" + " C2-3:P1 C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" + " C2-3:P1 C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" + " C2-3:P1 C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" + "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # CPU offlining cases: - " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + " C0-1 . . C2-3 . C4-5 . O2=0 0 A1:0-1|B1:3" + " C0-3:P1 C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" + " C0-3:P1 C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" + " C0-3:P1 C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" + " C0-3:P1 C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" + " C2-3:P1 C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + " C2-3:P1 C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + " C2-3:P1 C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" + " C2-3:P1 C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + " C2-3:P1 C3:P2 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-2" + " C1-3:P1 C3:P2 . . . T:O3=0 . . 0 A1:1-2|A2:1-2 A1:P1|A2:P-2 3|" + " C1-3:P1 C3:P2 . . . T:O3=0 O3=1 . 0 A1:1-2|A2:3 A1:P1|A2:P2 3" + "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # # Remote partition and cpuset.cpus.exclusive tests # - " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" - " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" - " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3" - " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2" + " C0-3 C1-3 C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3 C1-3 C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" + " C4:X0-3 X1-3 X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" + " C4:X0-3 X1-3 X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" # Nested remote/local partition tests - " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P0,B1:P1" - " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4,2-3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \ - A1:P0,A2:P2,A3:P1 2" - " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1" - " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ - A1:P0,A2:P2,A3:P-1 2-4" + " C0-3 C1-3 C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-3" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P0|B1:P1" + " C0-3 C1-3 C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|3" + " C0-4 C1-4 C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ + A1:P0|A2:P2|A3:P1 2-4|2-3" + " C0-4 C1-4 C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ + A1:P0|A2:P2|A3:P1 2" + " C0-4:X2-4 C1-4:X2-4:P2 C2-4:X4:P1 \ + . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ + A1:P0|A2:P-2|A3:P-1 ." + " C0-4:X2-4 C1-4:X2-4:P2 C2-4:X4:P1 \ + . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ + A1:P0|A2:P2|A3:P-1 2-4" # Remote partition offline tests - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" + " C0-3 C1-3 C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" + " C0-3 C1-3 C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." # cpus.exclusive.effective clearing test - " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:" + " C0-3 C1-3 C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2" - " C0-3:S+ C1-3:X3:P2 - . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" + " C0-3 C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." + " C0-3 C1-3:X3:P2 . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3" - " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" - " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" - " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" + " C1-3:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" + " C1-3:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" # Local partition invalidation tests - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3" - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ + . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3" + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ + . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ + . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" # Local partition CPU change tests - " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" - " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" + " C0-5:P2 C4-5:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" + " C0-5:P2 C4-5:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P2 3" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ - A1:P0,A3:P-2" + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ + . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ + A1:P0|A3:P-2 ." + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ + . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ + A1:P0|A3:P-2 ." + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ + . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ + A1:P0|A3:P2 3" + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3:P2 \ + . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \ + A1:P0|A3:P2 3" + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3:P2 \ + . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \ + A1:P0|A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -339,68 +356,142 @@ TEST_MATRIX=( # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" + " C2-3:P1 C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" - " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + " C2-3:P1 C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" + " C3:P1 C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # Changing a partition root to member makes child partitions invalid - "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" - "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" + " C2-3:P1 C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" + "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + " C2-3:P1 . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" + " C0-1:P1 C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" + " C0-1:P1 . . C2-3 . C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" # Enabling partition with child cpusets is allowed - " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" + " C0-1 C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" - # A partition root with non-partition root parent is invalid, but it + # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. - " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" + " C0-1 C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" + " C0-1 C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" - # A non-exclusive cpuset.cpus change will invalidate partition and its siblings - " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" - " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" - " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + # A non-exclusive cpuset.cpus change will not invalidate its siblings partition. + " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:3 A1:P1|B1:P0" + " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-1|XA1:0-1|B1:2-3 A1:P1|B1:P1" + " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-1|B1:2-3 A1:P0|B1:P1" # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it - " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5" # Child partition root that try to take all CPUs from parent partition # with tasks will remain invalid. - " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" - " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1" - " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" + " C1-4:P1 P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1 P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" + " C1-4:P1 P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't # affect cpuset.cpus.exclusive.effective. - " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3" + " C1-4:X3 C1:X3 . . . C . . 0 A2:1-4|XA2:3" + + # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive + # but creating a local partition out of it is not allowed. Similarly and change + # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will + # invalidate it. + " CX1-4 CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ + A1:P0|A2:P2:B1:P1 2-4" + " CX1-4 CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + " CX1-4 CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + + # When multiple partitions with conflicting cpuset.cpus are created, the + # latter created ones will only get what are left of the available exclusive + # CPUs. + " C1-3:P1 . . . . . . C3-5:P1 0 A1:1-3|B1:4-5:XB1:4-5 A1:P1|B1:P1" + + # cpuset.cpus can be set to a subset of sibling's cpuset.cpus.exclusive + " C1-3:X1-3 . . C4-5 . . . C1-2 0 A1:1-3|B1:1-2" + + # cpuset.cpus can become empty with task in it as it inherits parent's effective CPUs + " C1-3 C2 . . . T:C . . 0 A1:1-3|A2:1-3" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" + " C2-3:P1 C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected - " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5" - # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive - " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5" + # cpuset.cpus.exclusive cannot be set to a superset of sibling's cpuset.cpus + " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5" +) + +# +# Cpuset controller remote partition test matrix. +# +# Cgroup test hierarchy +# +# root +# | +# rtest (cpuset.cpus.exclusive=1-7) +# | +# +------+------+ +# | | +# p1 p2 +# +--+--+ +--+--+ +# | | | | +# c11 c12 c21 c22 +# +# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX. +# Only CPUs 1-7 should be used. +# +REMOTE_TEST_MATRIX=( + # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22 + # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS + # ------ ------ ------- ------- ------- ------- ----- ------ -------- + " X1-3 X4-6 X1-2 X3 X4-5 X6 \ + . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \ + c11:P2|c12:P2|c21:P2|c22:P2 1-6" + " CX1-4 . X1-2:P2 C3 . . \ + . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \ + p1:P0|c11:P2|c12:P0 1-2" + " CX1-4 . X1-2:P2 . . . \ + X2-4 . . . . . p1:1,3-4|c11:2 \ + p1:P0|c11:P2 2" + " CX1-5 . X1-2:P2 X3-5:P1 . . \ + X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + " CX1-4 . X1-2:P2 X3-4:P1 . . \ + . . X2 . . . p1:1|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + # p1 as member, will get its effective CPUs from its parent rtest + " CX1-4 . X1-2:P2 X3-4:P1 . . \ + . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \ + p1:P0|c11:P2|c12:P1 1" + " CX1-4 X5-6:P1 . . . . \ + . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ + p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \ + 1-2,4-6|1-2,5-6" + # c12 whose cpuset.cpus CPUs are all granted to c11 will become invalid partition + " C1-5:P1 . C1-4:P1 C2-3 . . \ + . . . P1 . . p1:5|c11:1-4|c12:5 \ + p1:P1|c11:P1|c12:P-1" ) # @@ -439,7 +530,6 @@ set_ctrl_state() CGRP=$1 STATE=$2 SHOWERR=${3} - CTRL=${CTRL:=$CONTROLLER} HASERR=0 REDIRECT="2> $TMPMSG" [[ -z "$STATE" || "$STATE" = '.' ]] && return 0 @@ -449,29 +539,31 @@ set_ctrl_state() for CMD in $(echo $STATE | sed -e "s/:/ /g") do TFILE=$CGRP/cgroup.procs - SFILE=$CGRP/cgroup.subtree_control PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus XFILE=$CGRP/cpuset.cpus.exclusive - S=$(expr substr $CMD 1 1) - if [[ $S = S ]] - then - PREFIX=${CMD#?} - COMM="echo ${PREFIX}${CTRL} > $SFILE" + + # Enable cpuset controller if not enabled yet + [[ -f $CFILE ]] || { + COMM="echo +cpuset > $CGRP/../cgroup.subtree_control" eval $COMM $REDIRECT - elif [[ $S = X ]] - then + } + case $CMD in + X*) CPUS=${CMD#?} COMM="echo $CPUS > $XFILE" eval $COMM $REDIRECT - elif [[ $S = C ]] - then - CPUS=${CMD#?} + ;; + CX*) + CPUS=${CMD#??} + COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE" + eval $COMM $REDIRECT + ;; + C*) CPUS=${CMD#?} COMM="echo $CPUS > $CFILE" eval $COMM $REDIRECT - elif [[ $S = P ]] - then - VAL=${CMD#?} + ;; + P*) VAL=${CMD#?} case $VAL in 0) VAL=member ;; @@ -486,15 +578,17 @@ set_ctrl_state() esac COMM="echo $VAL > $PFILE" eval $COMM $REDIRECT - elif [[ $S = O ]] - then - VAL=${CMD#?} + ;; + O*) VAL=${CMD#?} write_cpu_online $VAL - elif [[ $S = T ]] - then - COMM="echo 0 > $TFILE" + ;; + T*) COMM="echo 0 > $TFILE" eval $COMM $REDIRECT - fi + ;; + *) echo "Unknown command: $CMD" + exit 1 + ;; + esac RET=$? [[ $RET -ne 0 ]] && { [[ -n "$SHOWERR" ]] && { @@ -532,21 +626,18 @@ online_cpus() } # -# Return 1 if the list of effective cpus isn't the same as the initial list. +# Remove all the test cgroup directories # reset_cgroup_states() { echo 0 > $CGROUP2/cgroup.procs online_cpus - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - pause 0.02 - set_ctrl_state . R- - pause 0.01 + rmdir $RESET_LIST > /dev/null 2>&1 } dump_states() { - for DIR in . A1 A1/A2 A1/A2/A3 B1 + for DIR in $CGROUP_LIST do CPUS=$DIR/cpuset.cpus ECPUS=$DIR/cpuset.cpus.effective @@ -566,17 +657,33 @@ dump_states() } # +# Set the actual cgroup directory into $CGRP_DIR +# $1 - cgroup name +# +set_cgroup_dir() +{ + CGRP_DIR=$1 + [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2 + [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3 + [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11 + [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12 + [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21 + [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22 +} + +# # Check effective cpus -# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]* +# $1 - check string, format: <cgroup>:<cpu-list>[|<cgroup>:<cpu-list>]* # check_effective_cpus() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CPUS=$2 + EXPECTED_CPUS=$2 + ACTUAL_CPUS= if [[ $CGRP = X* ]] then CGRP=${CGRP#X} @@ -584,41 +691,39 @@ check_effective_cpus() else FILE=cpuset.cpus.effective fi - [[ $CGRP = A2 ]] && CGRP=A1/A2 - [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 - [[ -e $CGRP/$FILE ]] || return 1 - [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1 + set_cgroup_dir $CGRP + [[ -e $CGRP_DIR/$FILE ]] || return 1 + ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE) + [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1 done } # # Check cgroup states -# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]* +# $1 - check string, format: <cgroup>:<state>[|<cgroup>:<state>]* # check_cgroup_states() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CGRP_DIR=$CGRP - STATE=$2 + EXPECTED_STATE=$2 FILE= - EVAL=$(expr substr $STATE 2 2) - [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2 - [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3 + EVAL=$(expr substr $EXPECTED_STATE 2 2) - case $STATE in + set_cgroup_dir $CGRP + case $EXPECTED_STATE in P*) FILE=$CGRP_DIR/cpuset.cpus.partition ;; - *) echo "Unknown state: $STATE!" + *) echo "Unknown state: $EXPECTED_STATE!" exit 1 ;; esac - VAL=$(cat $FILE) + ACTUAL_STATE=$(cat $FILE) - case "$VAL" in + case "$ACTUAL_STATE" in member) VAL=0 ;; root) VAL=1 @@ -642,7 +747,7 @@ check_cgroup_states() [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && { DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective) [[ -n "$DOMS" ]] && - echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE + echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE } done return 0 @@ -659,28 +764,29 @@ check_cgroup_states() # only CPUs in isolated partitions as well as those that are isolated at # boot time. # -# $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>} +# $1 - expected isolated cpu list(s) <isolcpus1>{|<isolcpus2>} # <isolcpus1> - expected sched/domains value # <isolcpus2> - cpuset.cpus.isolated value = <isolcpus1> if not defined # check_isolcpus() { - EXPECT_VAL=$1 - ISOLCPUS= + EXPECTED_ISOLCPUS=$1 + ISCPUS=${CGROUP2}/cpuset.cpus.isolated + ISOLCPUS=$(cat $ISCPUS) + HKICPUS=$(cat /sys/devices/system/cpu/isolated) LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains - ISCPUS=${CGROUP2}/cpuset.cpus.isolated - if [[ $EXPECT_VAL = . ]] + if [[ $EXPECTED_ISOLCPUS = . ]] then - EXPECT_VAL= - EXPECT_VAL2= - elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + EXPECTED_ISOLCPUS= + EXPECTED_SDOMAIN= + elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]] then - set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") - EXPECT_VAL=$1 - EXPECT_VAL2=$2 + set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g") + EXPECTED_ISOLCPUS=$2 + EXPECTED_SDOMAIN=$1 else - EXPECT_VAL2=$EXPECT_VAL + EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS fi # @@ -689,20 +795,26 @@ check_isolcpus() # to make appending those CPUs easier. # [[ -n "$BOOT_ISOLCPUS" ]] && { - EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} - EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS} + EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS} } # # Check cpuset.cpus.isolated cpumask # - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 ISOLCPUS=$(cat $ISCPUS) } - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1 ISOLCPUS= + EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN + + # + # The inverse of HK_TYPE_DOMAIN cpumask in $HKICPUS should match $ISOLCPUS + # + [[ "$ISOLCPUS" != "$HKICPUS" ]] && return 1 # # Use the sched domain in debugfs to check isolated CPUs, if available @@ -736,7 +848,7 @@ check_isolcpus() done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] + [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]] } test_fail() @@ -774,6 +886,63 @@ null_isolcpus_check() } # +# Check state transition test result +# $1 - Test number +# $2 - Expected effective CPU values +# $3 - Expected partition states +# $4 - Expected isolated CPUs +# +check_test_results() +{ + _NR=$1 + _ECPUS="$2" + _PSTATES="$3" + _ISOLCPUS="$4" + + [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && { + check_effective_cpus $_ECPUS + [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \ + "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS" + } + + [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && { + check_cgroup_states $_PSTATES + [[ $? -ne 0 ]] && test_fail $_NR states \ + "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE" + } + + # Compare the expected isolated CPUs with the actual ones, + # if available + [[ -n "$_ISOLCPUS" ]] && { + check_isolcpus $_ISOLCPUS + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS} + test_fail $_NR "isolated CPU" \ + "Expect $_ISOLCPUS, get $ISOLCPUS instead" + } + } + reset_cgroup_states + # + # Check to see if effective cpu list changes + # + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + RETRY=0 + while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]] + do + # Wait a bit longer & recheck a few times + pause 0.02 + ((RETRY++)) + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + done + [[ $_NEWLIST != $CPULIST ]] && { + echo "Effective cpus changed to $_NEWLIST after test $_NR!" + exit 1 + } + null_isolcpus_check + [[ $VERBOSE -gt 0 ]] && echo "Test $I done." +} + +# # Run cpuset state transition test # $1 - test matrix name # @@ -784,7 +953,8 @@ null_isolcpus_check() run_state_test() { TEST=$1 - CONTROLLER=cpuset + CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1" + RESET_LIST="A1/A2/A3 A1/A2 A1 B1" I=0 eval CNT="\${#$TEST[@]}" @@ -812,10 +982,11 @@ run_state_test() STATES=${11} ICPUS=${12} - set_ctrl_state_noerr B1 $OLD_B1 set_ctrl_state_noerr A1 $OLD_A1 set_ctrl_state_noerr A1/A2 $OLD_A2 set_ctrl_state_noerr A1/A2/A3 $OLD_A3 + set_ctrl_state_noerr B1 $OLD_B1 + RETVAL=0 set_ctrl_state A1 $NEW_A1; ((RETVAL += $?)) set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?)) @@ -824,47 +995,78 @@ run_state_test() [[ $RETVAL -ne $RESULT ]] && test_fail $I result - [[ -n "$ECPUS" && "$ECPUS" != . ]] && { - check_effective_cpus $ECPUS - [[ $? -ne 0 ]] && test_fail $I "effective CPU" - } + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" + ((I++)) + done + echo "All $I tests of $TEST PASSED." +} - [[ -n "$STATES" && "$STATES" != . ]] && { - check_cgroup_states $STATES - [[ $? -ne 0 ]] && test_fail $I states - } +# +# Run cpuset remote partition state transition test +# $1 - test matrix name +# +run_remote_state_test() +{ + TEST=$1 + [[ -d rtest ]] || mkdir rtest + cd rtest + echo +cpuset > cgroup.subtree_control + echo "1-7" > cpuset.cpus + echo "1-7" > cpuset.cpus.exclusive + CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22" + RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2" + I=0 + eval CNT="\${#$TEST[@]}" - # Compare the expected isolated CPUs with the actual ones, - # if available - [[ -n "$ICPUS" ]] && { - check_isolcpus $ICPUS - [[ $? -ne 0 ]] && { - [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} - test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" - } - } - reset_cgroup_states - # - # Check to see if effective cpu list changes - # - NEWLIST=$(cat cpuset.cpus.effective) - RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] - do - # Wait a bit longer & recheck a few times - pause 0.02 - ((RETRY++)) - NEWLIST=$(cat cpuset.cpus.effective) - done - [[ $NEWLIST != $CPULIST ]] && { - echo "Effective cpus changed to $NEWLIST after test $I!" - exit 1 + reset_cgroup_states + console_msg "Running remote partition state transition test ..." + + while [[ $I -lt $CNT ]] + do + echo "Running test $I ..." > $CONSOLE + [[ $VERBOSE -gt 1 ]] && { + echo "" + eval echo \${$TEST[$I]} } - null_isolcpus_check - [[ $VERBOSE -gt 0 ]] && echo "Test $I done." + eval set -- "\${$TEST[$I]}" + OLD_p1=$1 + OLD_p2=$2 + OLD_c11=$3 + OLD_c12=$4 + OLD_c21=$5 + OLD_c22=$6 + NEW_p1=$7 + NEW_p2=$8 + NEW_c11=$9 + NEW_c12=${10} + NEW_c21=${11} + NEW_c22=${12} + ECPUS=${13} + STATES=${14} + ICPUS=${15} + + set_ctrl_state_noerr p1 $OLD_p1 + set_ctrl_state_noerr p2 $OLD_p2 + set_ctrl_state_noerr p1/c11 $OLD_c11 + set_ctrl_state_noerr p1/c12 $OLD_c12 + set_ctrl_state_noerr p2/c21 $OLD_c21 + set_ctrl_state_noerr p2/c22 $OLD_c22 + + RETVAL=0 + set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?)) + set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?)) + set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?)) + set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?)) + set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?)) + set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?)) + + [[ $RETVAL -ne 0 ]] && test_fail $I result + + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" ((I++)) done + cd .. + rmdir rtest echo "All $I tests of $TEST PASSED." } @@ -932,6 +1134,7 @@ test_isolated() echo $$ > $CGROUP2/cgroup.procs [[ -d A1 ]] && rmdir A1 null_isolcpus_check + pause 0.05 } # @@ -997,10 +1200,13 @@ test_inotify() else echo "Inotify test PASSED" fi + echo member > cpuset.cpus.partition + echo "" > cpuset.cpus } trap cleanup 0 2 3 6 run_state_test TEST_MATRIX +run_remote_state_test REMOTE_TEST_MATRIX test_isolated test_inotify echo "All tests PASSED." diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh index 42a6628fb8bc..1c0444729e70 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh @@ -18,7 +18,7 @@ write_test() { echo "testing $interface $value" echo $value > $dir/$interface new=$(cat $dir/$interface) - [[ $value -ne $(cat $dir/$interface) ]] && { + [[ "$value" != "$new" ]] && { echo "$interface write $value failed: new:$new" exit 1 } diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh index 3f45512fb512..7406c24be1ac 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Test the special cpuset v1 hotplug case where a cpuset become empty of diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index 8730645d363a..97fae92c8387 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -11,7 +11,7 @@ #include <string.h> #include <sys/wait.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" #define DEBUG @@ -804,6 +804,662 @@ cleanup: return ret; } +/* + * Get the current frozen_usec for the cgroup. + */ +static long cg_check_freezetime(const char *cgroup) +{ + return cg_read_key_long(cgroup, "cgroup.stat.local", + "frozen_usec "); +} + +/* + * Test that the freeze time will behave as expected for an empty cgroup. + */ +static int test_cgfreezer_time_empty(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + + cgroup = cg_name(root, "cg_time_test_empty"); + if (!cgroup) + goto cleanup; + + /* + * 1) Create an empty cgroup and check that its freeze time + * is 0. + */ + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + + /* + * 2) Sleep for 1000 us. Check that the freeze time is at + * least 1000 us. + */ + usleep(1000); + curr = cg_check_freezetime(cgroup); + if (curr < 1000) { + debug("Expect time (%ld) to be at least 1000 us\n", + curr); + goto cleanup; + } + + /* + * 3) Unfreeze the cgroup. Check that the freeze time is + * larger than at 2). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 4) Check the freeze time again to ensure that it has not + * changed. + */ + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * A simple test for cgroup freezer time accounting. This test follows + * the same flow as test_cgfreezer_time_empty, but with a single process + * in the cgroup. + */ +static int test_cgfreezer_time_simple(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + + cgroup = cg_name(root, "cg_time_test_simple"); + if (!cgroup) + goto cleanup; + + /* + * 1) Create a cgroup and check that its freeze time is 0. + */ + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 2) Populate the cgroup with one child and check that the + * freeze time is still 0. + */ + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr > prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + + /* + * 3) Sleep for 1000 us. Check that the freeze time is at + * least 1000 us. + */ + usleep(1000); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr < 1000) { + debug("Expect time (%ld) to be at least 1000 us\n", + curr); + goto cleanup; + } + + /* + * 4) Unfreeze the cgroup. Check that the freeze time is + * larger than at 3). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 5) Sleep for 1000 us. Check that the freeze time is the + * same as at 4). + */ + usleep(1000); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * Test that freezer time accounting works as expected, even while we're + * populating a cgroup with processes. + */ +static int test_cgfreezer_time_populate(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + int i; + + cgroup = cg_name(root, "cg_time_test_populate"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 1) Populate the cgroup with 100 processes. Check that + * the freeze time is 0. + */ + for (i = 0; i < 100; i++) + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 2) Wait for the group to become fully populated. Check + * that the freeze time is 0. + */ + if (cg_wait_for_proc_count(cgroup, 100)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 3) Freeze the cgroup and then populate it with 100 more + * processes. Check that the freeze time continues to grow. + */ + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + for (i = 0; i < 100; i++) + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 4) Wait for the group to become fully populated. Check + * that the freeze time is larger than at 3). + */ + if (cg_wait_for_proc_count(cgroup, 200)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 5) Unfreeze the cgroup. Check that the freeze time is + * larger than at 4). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 6) Kill the processes. Check that the freeze time is the + * same as it was at 5). + */ + if (cg_killall(cgroup)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 7) Freeze and unfreeze the cgroup. Check that the freeze + * time is larger than it was at 6). + */ + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * Test that frozen time for a cgroup continues to work as expected, + * even as processes are migrated. Frozen cgroup A's freeze time should + * continue to increase and running cgroup B's should stay 0. + */ +static int test_cgfreezer_time_migrate(const char *root) +{ + long prev_A, curr_A, curr_B; + char *cgroup[2] = {0}; + int ret = KSFT_FAIL; + int pid; + + cgroup[0] = cg_name(root, "cg_time_test_migrate_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(root, "cg_time_test_migrate_B"); + if (!cgroup[1]) + goto cleanup; + + if (cg_create(cgroup[0])) + goto cleanup; + + if (cg_check_freezetime(cgroup[0]) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(cgroup[1])) + goto cleanup; + + pid = cg_run_nowait(cgroup[0], child_fn, NULL); + if (pid < 0) + goto cleanup; + + if (cg_wait_for_proc_count(cgroup[0], 1)) + goto cleanup; + + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A) { + debug("Expect time (%ld) to be 0\n", curr_A); + goto cleanup; + } + curr_B = cg_check_freezetime(cgroup[1]); + if (curr_B) { + debug("Expect time (%ld) to be 0\n", curr_B); + goto cleanup; + } + + /* + * Freeze cgroup A. + */ + if (cg_freeze_wait(cgroup[0], true)) + goto cleanup; + prev_A = curr_A; + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A <= prev_A) { + debug("Expect time (%ld) to be > 0\n", curr_A); + goto cleanup; + } + + /* + * Migrate from A (frozen) to B (running). + */ + if (cg_enter(cgroup[1], pid)) + goto cleanup; + + usleep(1000); + curr_B = cg_check_freezetime(cgroup[1]); + if (curr_B) { + debug("Expect time (%ld) to be 0\n", curr_B); + goto cleanup; + } + + prev_A = curr_A; + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A <= prev_A) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr_A, prev_A); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup[0]) + cg_destroy(cgroup[0]); + free(cgroup[0]); + if (cgroup[1]) + cg_destroy(cgroup[1]); + free(cgroup[1]); + return ret; +} + +/* + * The test creates a cgroup and freezes it. Then it creates a child cgroup. + * After that it checks that the child cgroup has a non-zero freeze time + * that is less than the parent's. Next, it freezes the child, unfreezes + * the parent, and sleeps. Finally, it checks that the child's freeze + * time has grown larger than the parent's. + */ +static int test_cgfreezer_time_parent(const char *root) +{ + char *parent, *child = NULL; + int ret = KSFT_FAIL; + long ptime, ctime; + + parent = cg_name(root, "cg_test_parent_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_parent_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_check_freezetime(parent) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_freeze_wait(parent, true)) + goto cleanup; + + usleep(1000); + if (cg_create(child)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + /* + * Since the parent was frozen the entire time the child cgroup + * was being created, we expect the parent's freeze time to be + * larger than the child's. + * + * Ideally, we would be able to check both times simultaneously, + * but here we get the child's after we get the parent's. + */ + ptime = cg_check_freezetime(parent); + ctime = cg_check_freezetime(child); + if (ptime <= ctime) { + debug("Expect ptime (%ld) > ctime (%ld)\n", ptime, ctime); + goto cleanup; + } + + if (cg_freeze_nowait(child, true)) + goto cleanup; + + if (cg_freeze_wait(parent, false)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + usleep(100000); + + ctime = cg_check_freezetime(child); + ptime = cg_check_freezetime(parent); + + if (ctime <= ptime) { + debug("Expect ctime (%ld) > ptime (%ld)\n", ctime, ptime); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + +/* + * The test creates a parent cgroup and a child cgroup. Then, it freezes + * the child and checks that the child's freeze time is greater than the + * parent's, which should be zero. + */ +static int test_cgfreezer_time_child(const char *root) +{ + char *parent, *child = NULL; + int ret = KSFT_FAIL; + long ptime, ctime; + + parent = cg_name(root, "cg_test_child_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_child_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_check_freezetime(parent) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(child)) + goto cleanup; + + if (cg_freeze_wait(child, true)) + goto cleanup; + + ctime = cg_check_freezetime(child); + ptime = cg_check_freezetime(parent); + if (ptime != 0) { + debug("Expect ptime (%ld) to be 0\n", ptime); + goto cleanup; + } + + if (ctime <= ptime) { + debug("Expect ctime (%ld) <= ptime (%ld)\n", ctime, ptime); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + +/* + * The test creates the following hierarchy: + * A + * | + * B + * | + * C + * + * Then it freezes the cgroups in the order C, B, A. + * Then it unfreezes the cgroups in the order A, B, C. + * Then it checks that C's freeze time is larger than B's and + * that B's is larger than A's. + */ +static int test_cgfreezer_time_nested(const char *root) +{ + char *cgroup[3] = {0}; + int ret = KSFT_FAIL; + long time[3] = {0}; + int i; + + cgroup[0] = cg_name(root, "cg_test_time_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(cgroup[0], "B"); + if (!cgroup[1]) + goto cleanup; + + cgroup[2] = cg_name(cgroup[1], "C"); + if (!cgroup[2]) + goto cleanup; + + if (cg_create(cgroup[0])) + goto cleanup; + + if (cg_check_freezetime(cgroup[0]) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(cgroup[1])) + goto cleanup; + + if (cg_create(cgroup[2])) + goto cleanup; + + if (cg_freeze_nowait(cgroup[2], true)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[1], true)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[0], true)) + goto cleanup; + + usleep(1000); + + if (cg_freeze_nowait(cgroup[0], false)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[1], false)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[2], false)) + goto cleanup; + + time[2] = cg_check_freezetime(cgroup[2]); + time[1] = cg_check_freezetime(cgroup[1]); + time[0] = cg_check_freezetime(cgroup[0]); + + if (time[2] <= time[1]) { + debug("Expect C's time (%ld) > B's time (%ld)", time[2], time[1]); + goto cleanup; + } + + if (time[1] <= time[0]) { + debug("Expect B's time (%ld) > A's time (%ld)", time[1], time[0]); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + for (i = 2; i >= 0 && cgroup[i]; i--) { + cg_destroy(cgroup[i]); + free(cgroup[i]); + } + + return ret; +} + #define T(x) { x, #x } struct cgfreezer_test { int (*fn)(const char *root); @@ -819,14 +1475,23 @@ struct cgfreezer_test { T(test_cgfreezer_stopped), T(test_cgfreezer_ptraced), T(test_cgfreezer_vfork), + T(test_cgfreezer_time_empty), + T(test_cgfreezer_time_simple), + T(test_cgfreezer_time_populate), + T(test_cgfreezer_time_migrate), + T(test_cgfreezer_time_parent), + T(test_cgfreezer_time_child), + T(test_cgfreezer_time_nested), }; #undef T int main(int argc, char *argv[]) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; + int i; + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { @@ -838,11 +1503,10 @@ int main(int argc, char *argv[]) ksft_test_result_skip("%s\n", tests[i].name); break; default: - ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } - return ret; + ksft_finished(); } diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c index 856f9508ea56..f451aa449be6 100644 --- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c +++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c @@ -7,7 +7,7 @@ #include <stdlib.h> #include <string.h> #include <fcntl.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" #define ADDR ((void *)(0x0UL)) diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c index 0e5bb6c7307a..f6cd23a8ecc7 100644 --- a/tools/testing/selftests/cgroup/test_kill.c +++ b/tools/testing/selftests/cgroup/test_kill.c @@ -9,7 +9,7 @@ #include <sys/types.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" #include "../pidfd/pidfd.h" #include "cgroup_util.h" @@ -86,7 +86,7 @@ cleanup: wait_for_pid(pids[i]); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup, "cgroup.events", "populated 0\n")) ret = KSFT_FAIL; if (cgroup) @@ -190,7 +190,8 @@ cleanup: wait_for_pid(pids[i]); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup[0], "cgroup.events", + "populated 0\n")) ret = KSFT_FAIL; for (i = 9; i >= 0 && cgroup[i]; i--) { @@ -251,7 +252,7 @@ cleanup: wait_for_pid(pid); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup, "cgroup.events", "populated 0\n")) ret = KSFT_FAIL; if (cgroup) @@ -274,8 +275,10 @@ struct cgkill_test { int main(int argc, char *argv[]) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; + int i; + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { @@ -287,11 +290,10 @@ int main(int argc, char *argv[]) ksft_test_result_skip("%s\n", tests[i].name); break; default: - ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } - return ret; + ksft_finished(); } diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 96693d8772be..12f59925500b 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -14,7 +14,7 @@ #include <sys/sysinfo.h> #include <pthread.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" @@ -26,6 +26,7 @@ */ #define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs()) +#define KMEM_DEAD_WAIT_RETRIES 80 static int alloc_dcache(const char *cgroup, void *arg) { @@ -306,8 +307,7 @@ static int test_kmem_dead_cgroups(const char *root) { int ret = KSFT_FAIL; char *parent; - long dead; - int i; + long dead = -1; parent = cg_name(root, "kmem_dead_cgroups_test"); if (!parent) @@ -322,19 +322,19 @@ static int test_kmem_dead_cgroups(const char *root) if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) goto cleanup; - for (i = 0; i < 5; i++) { - dead = cg_read_key_long(parent, "cgroup.stat", - "nr_dying_descendants "); - if (dead == 0) { - ret = KSFT_PASS; - break; - } - /* - * Reclaiming cgroups might take some time, - * let's wait a bit and repeat. - */ - sleep(1); - } + /* + * Allow up to ~8s for reclaim of dying descendants to complete. + * This is a generous upper bound derived from stress testing, not + * from a specific kernel constant, and can be adjusted if reclaim + * behavior changes in the future. + */ + dead = cg_read_key_long_poll(parent, "cgroup.stat", + "nr_dying_descendants ", 0, KMEM_DEAD_WAIT_RETRIES, + DEFAULT_WAIT_INTERVAL_US); + if (dead) + goto cleanup; + + ret = KSFT_PASS; cleanup: cg_destroy(parent); @@ -368,11 +368,15 @@ static int test_percpu_basic(const char *root) for (i = 0; i < 1000; i++) { child = cg_name_indexed(parent, "child", i); - if (!child) - return -1; + if (!child) { + ret = -1; + goto cleanup_children; + } - if (cg_create(child)) + if (cg_create(child)) { + free(child); goto cleanup_children; + } free(child); } @@ -418,8 +422,10 @@ struct kmem_test { int main(int argc, char **argv) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; + int i; + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); @@ -443,11 +449,10 @@ int main(int argc, char **argv) ksft_test_result_skip("%s\n", tests[i].name); break; default: - ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } - return ret; + ksft_finished(); } diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 16f5d74ae762..b43da9bc20c4 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -10,6 +10,7 @@ #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> +#include <sys/inotify.h> #include <sys/socket.h> #include <sys/wait.h> #include <arpa/inet.h> @@ -18,12 +19,92 @@ #include <errno.h> #include <sys/mman.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" +#define MEMCG_SOCKSTAT_WAIT_RETRIES 30 + static bool has_localevents; static bool has_recursiveprot; +int get_temp_fd(void) +{ + return open(".", O_TMPFILE | O_RDWR | O_EXCL); +} + +int alloc_pagecache(int fd, size_t size) +{ + char buf[PAGE_SIZE]; + struct stat st; + int i; + + if (fstat(fd, &st)) + goto cleanup; + + size += st.st_size; + + if (ftruncate(fd, size)) + goto cleanup; + + for (i = 0; i < size; i += sizeof(buf)) + read(fd, buf, sizeof(buf)); + + return 0; + +cleanup: + return -1; +} + +int alloc_anon(const char *cgroup, void *arg) +{ + size_t size = (unsigned long)arg; + char *buf, *ptr; + + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; + + free(buf); + return 0; +} + +int is_swap_enabled(void) +{ + char buf[PAGE_SIZE]; + const char delim[] = "\n"; + int cnt = 0; + char *line; + + if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) + return -1; + + for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) + cnt++; + + return cnt > 1; +} + +int set_oom_adj_score(int pid, int score) +{ + char path[PATH_MAX]; + int fd, len; + + sprintf(path, "/proc/%d/oom_score_adj", pid); + + fd = open(path, O_WRONLY | O_APPEND); + if (fd < 0) + return fd; + + len = dprintf(fd, "%d", score); + if (len < 0) { + close(fd); + return len; + } + + close(fd); + return 0; +} + /* * This test creates two nested cgroups with and without enabling * the memory controller. @@ -380,10 +461,11 @@ static bool reclaim_until(const char *memcg, long goal); * * Then it checks actual memory usages and expects that: * A/B memory.current ~= 50M - * A/B/C memory.current ~= 29M - * A/B/D memory.current ~= 21M - * A/B/E memory.current ~= 0 - * A/B/F memory.current = 0 + * A/B/C memory.current ~= 29M [memory.events:low > 0] + * A/B/D memory.current ~= 21M [memory.events:low > 0] + * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot, + * undefined otherwise] + * A/B/F memory.current = 0 [memory.events:low == 0] * (for origin of the numbers, see model in memcg_protection.m.) * * After that it tries to allocate more than there is @@ -495,10 +577,10 @@ static int test_memcg_protection(const char *root, bool min) for (i = 0; i < ARRAY_SIZE(children); i++) c[i] = cg_read_long(children[i], "memory.current"); - if (!values_close(c[0], MB(29), 10)) + if (!values_close(c[0], MB(29), 15)) goto cleanup; - if (!values_close(c[1], MB(21), 10)) + if (!values_close(c[1], MB(21), 20)) goto cleanup; if (c[3] != 0) @@ -525,7 +607,14 @@ static int test_memcg_protection(const char *root, bool min) goto cleanup; } + /* + * Child 2 has memory.low=0, but some low protection may still be + * distributed down from its parent with memory.low=50M if cgroup2 + * memory_recursiveprot mount option is enabled. Ignore the low + * event count in this case. + */ for (i = 0; i < ARRAY_SIZE(children); i++) { + int ignore_low_events_index = has_recursiveprot ? 2 : -1; int no_low_events_index = 1; long low, oom; @@ -534,6 +623,8 @@ static int test_memcg_protection(const char *root, bool min) if (oom) goto cleanup; + if (i == ignore_low_events_index) + continue; if (i <= no_low_events_index && low <= 0) goto cleanup; if (i > no_low_events_index && low) @@ -1190,8 +1281,11 @@ static int tcp_server(const char *cgroup, void *arg) saddr.sin6_port = htons(srv_args->port); sk = socket(AF_INET6, SOCK_STREAM, 0); - if (sk < 0) + if (sk < 0) { + /* Pass back errno to the ctl_fd */ + write(ctl_fd, &errno, sizeof(errno)); return ret; + } if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) goto cleanup; @@ -1296,6 +1390,7 @@ static int test_memcg_sock(const char *root) int bind_retries = 5, ret = KSFT_FAIL, pid, err; unsigned short port; char *memcg; + long sock_post = -1; memcg = cg_name(root, "memcg_test"); if (!memcg) @@ -1321,6 +1416,12 @@ static int test_memcg_sock(const char *root) goto cleanup; close(args.ctl[0]); + /* Skip if address family not supported by protocol */ + if (err == EAFNOSUPPORT) { + ret = KSFT_SKIP; + goto cleanup; + } + if (!err) break; if (err != EADDRINUSE) @@ -1344,7 +1445,22 @@ static int test_memcg_sock(const char *root) if (cg_read_long(memcg, "memory.current") < 0) goto cleanup; - if (cg_read_key_long(memcg, "memory.stat", "sock ")) + /* + * memory.stat is updated asynchronously via the memcg rstat + * flushing worker, which runs periodically (every 2 seconds, + * see FLUSH_TIME). On a busy system, the "sock " counter may + * stay non-zero for a short period of time after the TCP + * connection is closed and all socket memory has been + * uncharged. + * + * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some + * scheduling slack) and require that the "sock " counter + * eventually drops to zero. + */ + sock_post = cg_read_key_long_poll(memcg, "memory.stat", "sock ", 0, + MEMCG_SOCKSTAT_WAIT_RETRIES, + DEFAULT_WAIT_INTERVAL_US); + if (sock_post) goto cleanup; ret = KSFT_PASS; @@ -1537,6 +1653,115 @@ cleanup: return ret; } +static int read_event(int inotify_fd, int expected_event, int expected_wd) +{ + struct inotify_event event; + ssize_t len = 0; + + len = read(inotify_fd, &event, sizeof(event)); + if (len < (ssize_t)sizeof(event)) + return -1; + + if (event.mask != expected_event || event.wd != expected_wd) { + fprintf(stderr, + "event does not match expected values: mask %d (expected %d) wd %d (expected %d)\n", + event.mask, expected_event, event.wd, expected_wd); + return -1; + } + + return 0; +} + +static int test_memcg_inotify_delete_file(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg = NULL; + int fd, wd; + + memcg = cg_name(root, "memcg_test_0"); + + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + fd = inotify_init1(0); + if (fd == -1) + goto cleanup; + + wd = inotify_add_watch(fd, cg_control(memcg, "memory.events"), IN_DELETE_SELF); + if (wd == -1) + goto cleanup; + + if (cg_destroy(memcg)) + goto cleanup; + free(memcg); + memcg = NULL; + + if (read_event(fd, IN_DELETE_SELF, wd)) + goto cleanup; + + if (read_event(fd, IN_IGNORED, wd)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (fd >= 0) + close(fd); + if (memcg) + cg_destroy(memcg); + free(memcg); + + return ret; +} + +static int test_memcg_inotify_delete_dir(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg = NULL; + int fd, wd; + + memcg = cg_name(root, "memcg_test_0"); + + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + fd = inotify_init1(0); + if (fd == -1) + goto cleanup; + + wd = inotify_add_watch(fd, memcg, IN_DELETE_SELF); + if (wd == -1) + goto cleanup; + + if (cg_destroy(memcg)) + goto cleanup; + free(memcg); + memcg = NULL; + + if (read_event(fd, IN_DELETE_SELF, wd)) + goto cleanup; + + if (read_event(fd, IN_IGNORED, wd)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (fd >= 0) + close(fd); + if (memcg) + cg_destroy(memcg); + free(memcg); + + return ret; +} + #define T(x) { x, #x } struct memcg_test { int (*fn)(const char *root); @@ -1556,14 +1781,18 @@ struct memcg_test { T(test_memcg_oom_group_leaf_events), T(test_memcg_oom_group_parent_events), T(test_memcg_oom_group_score_events), + T(test_memcg_inotify_delete_file), + T(test_memcg_inotify_delete_dir), }; #undef T int main(int argc, char **argv) { char root[PATH_MAX]; - int i, proc_status, ret = EXIT_SUCCESS; + int i, proc_status; + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); @@ -1597,11 +1826,10 @@ int main(int argc, char **argv) ksft_test_result_skip("%s\n", tests[i].name); break; default: - ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } - return ret; + ksft_finished(); } diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c index 9ecb83c6cc5c..9a387c815d2c 100644 --- a/tools/testing/selftests/cgroup/test_pids.c +++ b/tools/testing/selftests/cgroup/test_pids.c @@ -9,7 +9,7 @@ #include <sys/types.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" static int run_success(const char *cgroup, void *arg) @@ -77,6 +77,9 @@ static int test_pids_events(const char *root) char *cg_parent = NULL, *cg_child = NULL; int pid; + if (cgroup_feature("pids_localevents") <= 0) + return KSFT_SKIP; + cg_parent = cg_name(root, "pids_parent"); cg_child = cg_name(cg_parent, "pids_child"); if (!cg_parent || !cg_child) diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index 40de679248b8..a7bdcdd09d62 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -5,12 +5,14 @@ #include <unistd.h> #include <stdio.h> #include <signal.h> +#include <errno.h> +#include <fcntl.h> #include <sys/sysinfo.h> #include <string.h> #include <sys/wait.h> #include <sys/mman.h> -#include "../kselftest.h" +#include "kselftest.h" #include "cgroup_util.h" static int read_int(const char *path, size_t *value) @@ -338,7 +340,7 @@ static int test_zswap_writeback_one(const char *cgroup, bool wb) return -1; if (wb != !!zswpwb_after) { - ksft_print_msg("zswpwb_after is %ld while wb is %s", + ksft_print_msg("zswpwb_after is %ld while wb is %s\n", zswpwb_after, wb ? "enabled" : "disabled"); return -1; } @@ -574,6 +576,139 @@ out: return ret; } +struct incomp_child_args { + size_t size; + int pipefd[2]; + int madvise_ret; + int madvise_errno; +}; + +static int allocate_random_and_wait(const char *cgroup, void *arg) +{ + struct incomp_child_args *values = arg; + size_t size = values->size; + char *mem; + int fd; + ssize_t n; + + close(values->pipefd[0]); + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) + return -1; + + /* Fill with random data from /dev/urandom - incompressible */ + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + munmap(mem, size); + return -1; + } + + for (size_t i = 0; i < size; ) { + n = read(fd, mem + i, size - i); + if (n <= 0) + break; + i += n; + } + close(fd); + + /* Touch all pages to ensure they're faulted in */ + for (size_t i = 0; i < size; i += PAGE_SIZE) + mem[i] = mem[i]; + + /* Use MADV_PAGEOUT to push pages into zswap */ + values->madvise_ret = madvise(mem, size, MADV_PAGEOUT); + values->madvise_errno = errno; + + /* Notify parent that allocation and pageout are done */ + write(values->pipefd[1], "x", 1); + close(values->pipefd[1]); + + /* Keep memory alive for parent to check stats */ + pause(); + munmap(mem, size); + return 0; +} + +static long get_zswap_incomp(const char *cgroup) +{ + return cg_read_key_long(cgroup, "memory.stat", "zswap_incomp "); +} + +/* + * Test that incompressible pages (random data) are tracked by zswap_incomp. + * + * The child process allocates random data within memory.max, then uses + * MADV_PAGEOUT to push pages into zswap. The parent waits on a pipe for + * the child to finish, then checks the zswap_incomp stat before the child + * exits (zswap_incomp is a gauge that decreases on free). + */ +static int test_zswap_incompressible(const char *root) +{ + int ret = KSFT_FAIL; + struct incomp_child_args *values; + char *test_group; + long zswap_incomp; + pid_t child_pid; + int child_status; + char buf; + + values = mmap(0, sizeof(struct incomp_child_args), PROT_READ | + PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (values == MAP_FAILED) + return KSFT_FAIL; + + if (pipe(values->pipefd)) { + munmap(values, sizeof(struct incomp_child_args)); + return KSFT_FAIL; + } + + test_group = cg_name(root, "zswap_incompressible_test"); + if (!test_group) + goto out; + if (cg_create(test_group)) + goto out; + if (cg_write(test_group, "memory.max", "32M")) + goto out; + + values->size = MB(4); + child_pid = cg_run_nowait(test_group, allocate_random_and_wait, values); + if (child_pid < 0) + goto out; + + close(values->pipefd[1]); + + /* Wait for child to finish allocating and pageout */ + read(values->pipefd[0], &buf, 1); + close(values->pipefd[0]); + + zswap_incomp = get_zswap_incomp(test_group); + if (zswap_incomp <= 0) { + long zswpout = get_zswpout(test_group); + long zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); + long zswap_b = cg_read_key_long(test_group, "memory.stat", "zswap "); + + ksft_print_msg("zswap_incomp not increased: %ld\n", zswap_incomp); + ksft_print_msg("debug: zswpout=%ld zswapped=%ld zswap_b=%ld\n", + zswpout, zswapped, zswap_b); + ksft_print_msg("debug: madvise ret=%d errno=%d\n", + values->madvise_ret, values->madvise_errno); + goto out_kill; + } + + ret = KSFT_PASS; + +out_kill: + kill(child_pid, SIGTERM); + waitpid(child_pid, &child_status, 0); +out: + cg_destroy(test_group); + free(test_group); + munmap(values, sizeof(struct incomp_child_args)); + return ret; +} + #define T(x) { x, #x } struct zswap_test { int (*fn)(const char *root); @@ -586,6 +721,7 @@ struct zswap_test { T(test_zswap_writeback_disabled), T(test_no_kmem_bypass), T(test_no_invasive_cgroup_shrink), + T(test_zswap_incompressible), }; #undef T @@ -597,8 +733,10 @@ static bool zswap_configured(void) int main(int argc, char **argv) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; + int i; + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); @@ -625,11 +763,10 @@ int main(int argc, char **argv) ksft_test_result_skip("%s\n", tests[i].name); break; default: - ret = EXIT_FAILURE; ksft_test_result_fail("%s\n", tests[i].name); break; } } - return ret; + ksft_finished(); } |
