diff options
Diffstat (limited to 'tools/testing/selftests/bpf/prog_tests')
207 files changed, 25125 insertions, 2697 deletions
diff --git a/tools/testing/selftests/bpf/prog_tests/access_variable_array.c b/tools/testing/selftests/bpf/prog_tests/access_variable_array.c deleted file mode 100644 index 08131782437c..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/access_variable_array.c +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2022 Bytedance */ - -#include <test_progs.h> -#include "test_access_variable_array.skel.h" - -void test_access_variable_array(void) -{ - struct test_access_variable_array *skel; - - skel = test_access_variable_array__open_and_load(); - if (!ASSERT_OK_PTR(skel, "test_access_variable_array__open_and_load")) - return; - - test_access_variable_array__destroy(skel); -} diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c deleted file mode 100644 index 4ebd0da898f5..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ /dev/null @@ -1,703 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <test_progs.h> - -#define MAX_INSNS 512 -#define MAX_MATCHES 24 - -struct bpf_reg_match { - unsigned int line; - const char *reg; - const char *match; -}; - -struct bpf_align_test { - const char *descr; - struct bpf_insn insns[MAX_INSNS]; - enum { - UNDEF, - ACCEPT, - REJECT - } result; - enum bpf_prog_type prog_type; - /* Matches must be in order of increasing line */ - struct bpf_reg_match matches[MAX_MATCHES]; -}; - -static struct bpf_align_test tests[] = { - /* Four tests of known constants. These aren't staggeringly - * interesting since we track exact values now. - */ - { - .descr = "mov", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_3, 4), - BPF_MOV64_IMM(BPF_REG_3, 8), - BPF_MOV64_IMM(BPF_REG_3, 16), - BPF_MOV64_IMM(BPF_REG_3, 32), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - {0, "R1", "ctx()"}, - {0, "R10", "fp0"}, - {0, "R3_w", "2"}, - {1, "R3_w", "4"}, - {2, "R3_w", "8"}, - {3, "R3_w", "16"}, - {4, "R3_w", "32"}, - }, - }, - { - .descr = "shift", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4), - BPF_MOV64_IMM(BPF_REG_4, 32), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - {0, "R1", "ctx()"}, - {0, "R10", "fp0"}, - {0, "R3_w", "1"}, - {1, "R3_w", "2"}, - {2, "R3_w", "4"}, - {3, "R3_w", "8"}, - {4, "R3_w", "16"}, - {5, "R3_w", "1"}, - {6, "R4_w", "32"}, - {7, "R4_w", "16"}, - {8, "R4_w", "8"}, - {9, "R4_w", "4"}, - {10, "R4_w", "2"}, - }, - }, - { - .descr = "addsub", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - {0, "R1", "ctx()"}, - {0, "R10", "fp0"}, - {0, "R3_w", "4"}, - {1, "R3_w", "8"}, - {2, "R3_w", "10"}, - {3, "R4_w", "8"}, - {4, "R4_w", "12"}, - {5, "R4_w", "14"}, - }, - }, - { - .descr = "mul", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 7), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - {0, "R1", "ctx()"}, - {0, "R10", "fp0"}, - {0, "R3_w", "7"}, - {1, "R3_w", "7"}, - {2, "R3_w", "14"}, - {3, "R3_w", "56"}, - }, - }, - - /* Tests using unknown values */ -#define PREP_PKT_POINTERS \ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \ - offsetof(struct __sk_buff, data)), \ - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \ - offsetof(struct __sk_buff, data_end)) - -#define LOAD_UNKNOWN(DST_REG) \ - PREP_PKT_POINTERS, \ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \ - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \ - BPF_EXIT_INSN(), \ - BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0) - - { - .descr = "unknown shift", - .insns = { - LOAD_UNKNOWN(BPF_REG_3), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), - LOAD_UNKNOWN(BPF_REG_4), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - {6, "R0_w", "pkt(off=8,r=8)"}, - {6, "R3_w", "var_off=(0x0; 0xff)"}, - {7, "R3_w", "var_off=(0x0; 0x1fe)"}, - {8, "R3_w", "var_off=(0x0; 0x3fc)"}, - {9, "R3_w", "var_off=(0x0; 0x7f8)"}, - {10, "R3_w", "var_off=(0x0; 0xff0)"}, - {12, "R3_w", "pkt_end()"}, - {17, "R4_w", "var_off=(0x0; 0xff)"}, - {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, - {19, "R4_w", "var_off=(0x0; 0xff0)"}, - {20, "R4_w", "var_off=(0x0; 0x7f8)"}, - {21, "R4_w", "var_off=(0x0; 0x3fc)"}, - {22, "R4_w", "var_off=(0x0; 0x1fe)"}, - }, - }, - { - .descr = "unknown mul", - .insns = { - LOAD_UNKNOWN(BPF_REG_3), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - {6, "R3_w", "var_off=(0x0; 0xff)"}, - {7, "R4_w", "var_off=(0x0; 0xff)"}, - {8, "R4_w", "var_off=(0x0; 0xff)"}, - {9, "R4_w", "var_off=(0x0; 0xff)"}, - {10, "R4_w", "var_off=(0x0; 0x1fe)"}, - {11, "R4_w", "var_off=(0x0; 0xff)"}, - {12, "R4_w", "var_off=(0x0; 0x3fc)"}, - {13, "R4_w", "var_off=(0x0; 0xff)"}, - {14, "R4_w", "var_off=(0x0; 0x7f8)"}, - {15, "R4_w", "var_off=(0x0; 0xff0)"}, - }, - }, - { - .descr = "packet const offset", - .insns = { - PREP_PKT_POINTERS, - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - - BPF_MOV64_IMM(BPF_REG_0, 0), - - /* Skip over ethernet header. */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - - BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0), - BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2), - BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3), - BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0), - BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), - - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - {2, "R5_w", "pkt(r=0)"}, - {4, "R5_w", "pkt(off=14,r=0)"}, - {5, "R4_w", "pkt(off=14,r=0)"}, - {9, "R2", "pkt(r=18)"}, - {10, "R5", "pkt(off=14,r=18)"}, - {10, "R4_w", "var_off=(0x0; 0xff)"}, - {13, "R4_w", "var_off=(0x0; 0xffff)"}, - {14, "R4_w", "var_off=(0x0; 0xffff)"}, - }, - }, - { - .descr = "packet variable offset", - .insns = { - LOAD_UNKNOWN(BPF_REG_6), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), - - /* First, add a constant to the R5 packet pointer, - * then a variable with a known alignment. - */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), - BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), - - /* Now, test in the other direction. Adding first - * the variable offset to R5, then the constant. - */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), - - /* Test multiple accumulations of unknown values - * into a packet pointer. - */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), - BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), - - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - /* Calculated offset in R6 has unknown value, but known - * alignment of 4. - */ - {6, "R2_w", "pkt(r=8)"}, - {7, "R6_w", "var_off=(0x0; 0x3fc)"}, - /* Offset is added to packet pointer R5, resulting in - * known fixed offset, and variable offset from R6. - */ - {11, "R5_w", "pkt(id=1,off=14,"}, - /* At the time the word size load is performed from R5, - * it's total offset is NET_IP_ALIGN + reg->off (0) + - * reg->aux_off (14) which is 16. Then the variable - * offset is considered using reg->aux_off_align which - * is 4 and meets the load's requirements. - */ - {15, "R4", "var_off=(0x0; 0x3fc)"}, - {15, "R5", "var_off=(0x0; 0x3fc)"}, - /* Variable offset is added to R5 packet pointer, - * resulting in auxiliary alignment of 4. To avoid BPF - * verifier's precision backtracking logging - * interfering we also have a no-op R4 = R5 - * instruction to validate R5 state. We also check - * that R4 is what it should be in such case. - */ - {18, "R4_w", "var_off=(0x0; 0x3fc)"}, - {18, "R5_w", "var_off=(0x0; 0x3fc)"}, - /* Constant offset is added to R5, resulting in - * reg->off of 14. - */ - {19, "R5_w", "pkt(id=2,off=14,"}, - /* At the time the word size load is performed from R5, - * its total fixed offset is NET_IP_ALIGN + reg->off - * (14) which is 16. Then the variable offset is 4-byte - * aligned, so the total offset is 4-byte aligned and - * meets the load's requirements. - */ - {24, "R4", "var_off=(0x0; 0x3fc)"}, - {24, "R5", "var_off=(0x0; 0x3fc)"}, - /* Constant offset is added to R5 packet pointer, - * resulting in reg->off value of 14. - */ - {26, "R5_w", "pkt(off=14,r=8)"}, - /* Variable offset is added to R5, resulting in a - * variable offset of (4n). See comment for insn #18 - * for R4 = R5 trick. - */ - {28, "R4_w", "var_off=(0x0; 0x3fc)"}, - {28, "R5_w", "var_off=(0x0; 0x3fc)"}, - /* Constant is added to R5 again, setting reg->off to 18. */ - {29, "R5_w", "pkt(id=3,off=18,"}, - /* And once more we add a variable; resulting var_off - * is still (4n), fixed offset is not changed. - * Also, we create a new reg->id. - */ - {31, "R4_w", "var_off=(0x0; 0x7fc)"}, - {31, "R5_w", "var_off=(0x0; 0x7fc)"}, - /* At the time the word size load is performed from R5, - * its total fixed offset is NET_IP_ALIGN + reg->off (18) - * which is 20. Then the variable offset is (4n), so - * the total offset is 4-byte aligned and meets the - * load's requirements. - */ - {35, "R4", "var_off=(0x0; 0x7fc)"}, - {35, "R5", "var_off=(0x0; 0x7fc)"}, - }, - }, - { - .descr = "packet variable offset 2", - .insns = { - /* Create an unknown offset, (4n+2)-aligned */ - LOAD_UNKNOWN(BPF_REG_6), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14), - /* Add it to the packet pointer */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), - /* Check bounds and perform a read */ - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0), - /* Make a (4n) offset from the value we just read */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), - /* Add it to the packet pointer */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), - /* Check bounds and perform a read */ - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - /* Calculated offset in R6 has unknown value, but known - * alignment of 4. - */ - {6, "R2_w", "pkt(r=8)"}, - {7, "R6_w", "var_off=(0x0; 0x3fc)"}, - /* Adding 14 makes R6 be (4n+2) */ - {8, "R6_w", "var_off=(0x2; 0x7fc)"}, - /* Packet pointer has (4n+2) offset */ - {11, "R5_w", "var_off=(0x2; 0x7fc)"}, - {12, "R4", "var_off=(0x2; 0x7fc)"}, - /* At the time the word size load is performed from R5, - * its total fixed offset is NET_IP_ALIGN + reg->off (0) - * which is 2. Then the variable offset is (4n+2), so - * the total offset is 4-byte aligned and meets the - * load's requirements. - */ - {15, "R5", "var_off=(0x2; 0x7fc)"}, - /* Newly read value in R6 was shifted left by 2, so has - * known alignment of 4. - */ - {17, "R6_w", "var_off=(0x0; 0x3fc)"}, - /* Added (4n) to packet pointer's (4n+2) var_off, giving - * another (4n+2). - */ - {19, "R5_w", "var_off=(0x2; 0xffc)"}, - {20, "R4", "var_off=(0x2; 0xffc)"}, - /* At the time the word size load is performed from R5, - * its total fixed offset is NET_IP_ALIGN + reg->off (0) - * which is 2. Then the variable offset is (4n+2), so - * the total offset is 4-byte aligned and meets the - * load's requirements. - */ - {23, "R5", "var_off=(0x2; 0xffc)"}, - }, - }, - { - .descr = "dubious pointer arithmetic", - .insns = { - PREP_PKT_POINTERS, - BPF_MOV64_IMM(BPF_REG_0, 0), - /* (ptr - ptr) << 2 */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2), - /* We have a (4n) value. Let's make a packet offset - * out of it. First add 14, to make it a (4n+2) - */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), - /* Then make sure it's nonnegative */ - BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1), - BPF_EXIT_INSN(), - /* Add it to packet pointer */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), - /* Check bounds and perform a read */ - BPF_MOV64_REG(BPF_REG_4, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .matches = { - {3, "R5_w", "pkt_end()"}, - /* (ptr - ptr) << 2 == unknown, (4n) */ - {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, - /* (4n) + 14 == (4n+2). We blow our bounds, because - * the add could overflow. - */ - {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"}, - /* Checked s>=0 */ - {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"}, - /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, - {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, - /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. - * We checked the bounds, but it might have been able - * to overflow if the packet pointer started in the - * upper half of the address space. - * So we did not get a 'range' on R6, and the access - * attempt will fail. - */ - {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, - } - }, - { - .descr = "variable subtraction", - .insns = { - /* Create an unknown offset, (4n+2)-aligned */ - LOAD_UNKNOWN(BPF_REG_6), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14), - /* Create another unknown, (4n)-aligned, and subtract - * it from the first one - */ - BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2), - BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7), - /* Bounds-check the result */ - BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1), - BPF_EXIT_INSN(), - /* Add it to the packet pointer */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), - /* Check bounds and perform a read */ - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - /* Calculated offset in R6 has unknown value, but known - * alignment of 4. - */ - {6, "R2_w", "pkt(r=8)"}, - {8, "R6_w", "var_off=(0x0; 0x3fc)"}, - /* Adding 14 makes R6 be (4n+2) */ - {9, "R6_w", "var_off=(0x2; 0x7fc)"}, - /* New unknown value in R7 is (4n) */ - {10, "R7_w", "var_off=(0x0; 0x3fc)"}, - /* Subtracting it from R6 blows our unsigned bounds */ - {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"}, - /* Checked s>= 0 */ - {14, "R6", "var_off=(0x2; 0x7fc)"}, - /* At the time the word size load is performed from R5, - * its total fixed offset is NET_IP_ALIGN + reg->off (0) - * which is 2. Then the variable offset is (4n+2), so - * the total offset is 4-byte aligned and meets the - * load's requirements. - */ - {20, "R5", "var_off=(0x2; 0x7fc)"}, - }, - }, - { - .descr = "pointer variable subtraction", - .insns = { - /* Create an unknown offset, (4n+2)-aligned and bounded - * to [14,74] - */ - LOAD_UNKNOWN(BPF_REG_6), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), - BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14), - /* Subtract it from the packet pointer */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6), - /* Create another unknown, (4n)-aligned and >= 74. - * That in fact means >= 76, since 74 % 4 == 2 - */ - BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76), - /* Add it to the packet pointer */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7), - /* Check bounds and perform a read */ - BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .matches = { - /* Calculated offset in R6 has unknown value, but known - * alignment of 4. - */ - {6, "R2_w", "pkt(r=8)"}, - {9, "R6_w", "var_off=(0x0; 0x3c)"}, - /* Adding 14 makes R6 be (4n+2) */ - {10, "R6_w", "var_off=(0x2; 0x7c)"}, - /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"}, - /* New unknown value in R7 is (4n), >= 76 */ - {14, "R7_w", "var_off=(0x0; 0x7fc)"}, - /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w", "var_off=(0x2; 0x7fc)"}, - /* At the time the word size load is performed from R5, - * its total fixed offset is NET_IP_ALIGN + reg->off (0) - * which is 2. Then the variable offset is (4n+2), so - * the total offset is 4-byte aligned and meets the - * load's requirements. - */ - {20, "R5", "var_off=(0x2; 0x7fc)"}, - }, - }, -}; - -static int probe_filter_length(const struct bpf_insn *fp) -{ - int len; - - for (len = MAX_INSNS - 1; len > 0; --len) - if (fp[len].code != 0 || fp[len].imm != 0) - break; - return len + 1; -} - -static char bpf_vlog[32768]; - -static int do_test_single(struct bpf_align_test *test) -{ - struct bpf_insn *prog = test->insns; - int prog_type = test->prog_type; - char bpf_vlog_copy[32768]; - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .prog_flags = BPF_F_STRICT_ALIGNMENT, - .log_buf = bpf_vlog, - .log_size = sizeof(bpf_vlog), - .log_level = 2, - ); - const char *line_ptr; - int cur_line = -1; - int prog_len, i; - int fd_prog; - int ret; - - prog_len = probe_filter_length(prog); - fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - prog, prog_len, &opts); - if (fd_prog < 0 && test->result != REJECT) { - printf("Failed to load program.\n"); - printf("%s", bpf_vlog); - ret = 1; - } else if (fd_prog >= 0 && test->result == REJECT) { - printf("Unexpected success to load!\n"); - printf("%s", bpf_vlog); - ret = 1; - close(fd_prog); - } else { - ret = 0; - /* We make a local copy so that we can strtok() it */ - strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy)); - line_ptr = strtok(bpf_vlog_copy, "\n"); - for (i = 0; i < MAX_MATCHES; i++) { - struct bpf_reg_match m = test->matches[i]; - const char *p; - int tmp; - - if (!m.match) - break; - while (line_ptr) { - cur_line = -1; - sscanf(line_ptr, "%u: ", &cur_line); - if (cur_line == -1) - sscanf(line_ptr, "from %u to %u: ", &tmp, &cur_line); - if (cur_line == m.line) - break; - line_ptr = strtok(NULL, "\n"); - } - if (!line_ptr) { - printf("Failed to find line %u for match: %s=%s\n", - m.line, m.reg, m.match); - ret = 1; - printf("%s", bpf_vlog); - break; - } - /* Check the next line as well in case the previous line - * did not have a corresponding bpf insn. Example: - * func#0 @0 - * 0: R1=ctx() R10=fp0 - * 0: (b7) r3 = 2 ; R3_w=2 - * - * Sometimes it's actually two lines below, e.g. when - * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))": - * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 - * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 - * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) - */ - while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { - cur_line = -1; - line_ptr = strtok(NULL, "\n"); - sscanf(line_ptr ?: "", "%u: ", &cur_line); - if (!line_ptr || cur_line != m.line) - break; - } - if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { - printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match); - ret = 1; - printf("%s", bpf_vlog); - break; - } - } - if (fd_prog >= 0) - close(fd_prog); - } - return ret; -} - -void test_align(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(tests); i++) { - struct bpf_align_test *test = &tests[i]; - - if (!test__start_subtest(test->descr)) - continue; - - ASSERT_OK(do_test_single(test), test->descr); - } -} diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c index 26e7c06c6cb4..d98577a6babc 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c @@ -162,6 +162,66 @@ static void test_uaf(struct arena_atomics *skel) ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails"); } +static void test_load_acquire(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + if (skel->data->skip_lacq_srel_tests) { + printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n", + __func__); + test__skip(); + return; + } + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.load_acquire); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->load_acquire8_result, 0x12, + "load_acquire8_result"); + ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234, + "load_acquire16_result"); + ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678, + "load_acquire32_result"); + ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef, + "load_acquire64_result"); +} + +static void test_store_release(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + if (skel->data->skip_lacq_srel_tests) { + printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n", + __func__); + test__skip(); + return; + } + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.store_release); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->store_release8_result, 0x12, + "store_release8_result"); + ASSERT_EQ(skel->arena->store_release16_result, 0x1234, + "store_release16_result"); + ASSERT_EQ(skel->arena->store_release32_result, 0x12345678, + "store_release32_result"); + ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef, + "store_release64_result"); +} + void test_arena_atomics(void) { struct arena_atomics *skel; @@ -171,7 +231,7 @@ void test_arena_atomics(void) if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open")) return; - if (skel->data->skip_tests) { + if (skel->data->skip_all_tests) { printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang", __func__); test__skip(); @@ -198,6 +258,10 @@ void test_arena_atomics(void) test_xchg(skel); if (test__start_subtest("uaf")) test_uaf(skel); + if (test__start_subtest("load_acquire")) + test_load_acquire(skel); + if (test__start_subtest("store_release")) + test_store_release(skel); cleanup: arena_atomics__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c index d15867cddde0..4f2866a615ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_list.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c @@ -27,17 +27,23 @@ static int list_sum(struct arena_list_head *head) return sum; } -static void test_arena_list_add_del(int cnt) +static void test_arena_list_add_del(int cnt, bool nonsleepable) { LIBBPF_OPTS(bpf_test_run_opts, opts); struct arena_list *skel; int expected_sum = (u64)cnt * (cnt - 1) / 2; int ret, sum; - skel = arena_list__open_and_load(); - if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load")) + skel = arena_list__open(); + if (!ASSERT_OK_PTR(skel, "arena_list__open")) return; + skel->rodata->nonsleepable = nonsleepable; + + ret = arena_list__load(skel); + if (!ASSERT_OK(ret, "arena_list__load")) + goto out; + skel->bss->cnt = cnt; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts); ASSERT_OK(ret, "ret_add"); @@ -65,7 +71,11 @@ out: void test_arena_list(void) { if (test__start_subtest("arena_list_1")) - test_arena_list_add_del(1); + test_arena_list_add_del(1, false); if (test__start_subtest("arena_list_1000")) - test_arena_list_add_del(1000); + test_arena_list_add_del(1000, false); + if (test__start_subtest("arena_list_1_nonsleepable")) + test_arena_list_add_del(1, true); + if (test__start_subtest("arena_list_1000_nonsleepable")) + test_arena_list_add_del(1000, true); } diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c new file mode 100644 index 000000000000..693fd86fbde6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> +#include <sys/sysinfo.h> + +struct __qspinlock { int val; }; +typedef struct __qspinlock arena_spinlock_t; + +struct arena_qnode { + unsigned long next; + int count; + int locked; +}; + +#include "arena_spin_lock.skel.h" + +static long cpu; +static int repeat; + +pthread_barrier_t barrier; + +static void *spin_lock_thread(void *arg) +{ + int err, prog_fd = *(u32 *)arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = repeat, + ); + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity"); + + err = pthread_barrier_wait(&barrier); + if (err != PTHREAD_BARRIER_SERIAL_THREAD && err != 0) + ASSERT_FALSE(true, "pthread_barrier"); + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run err"); + + if (topts.retval == -EOPNOTSUPP) + goto end; + + ASSERT_EQ((int)topts.retval, 0, "test_run retval"); + +end: + pthread_exit(arg); +} + +static void test_arena_spin_lock_size(int size) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct arena_spin_lock *skel; + pthread_t thread_id[16]; + int prog_fd, i, err; + int nthreads; + void *ret; + + nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id)); + if (nthreads < 2) { + test__skip(); + return; + } + + skel = arena_spin_lock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load")) + return; + + if (skel->data->test_skip == 2) { + test__skip(); + goto end; + } + skel->bss->cs_count = size; + skel->bss->limit = repeat * nthreads; + + ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init"); + + prog_fd = bpf_program__fd(skel->progs.prog); + for (i = 0; i < nthreads; i++) { + err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + goto end_barrier; + } + + for (i = 0; i < nthreads; i++) { + if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) + goto end_barrier; + if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) + goto end_barrier; + } + + if (skel->data->test_skip == 3) { + printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n", + __func__); + test__skip(); + goto end_barrier; + } + + ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value"); + +end_barrier: + pthread_barrier_destroy(&barrier); +end: + arena_spin_lock__destroy(skel); + return; +} + +void test_arena_spin_lock(void) +{ + repeat = 1000; + if (test__start_subtest("arena_spin_lock_1")) + test_arena_spin_lock_size(1); + cpu = 0; + if (test__start_subtest("arena_spin_lock_1000")) + test_arena_spin_lock_size(1000); + cpu = 0; + repeat = 100; + if (test__start_subtest("arena_spin_lock_50000")) + test_arena_spin_lock_size(50000); +} diff --git a/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c new file mode 100644 index 000000000000..f81a0c066505 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "arena_strsearch.skel.h" + +static void test_arena_str(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct arena_strsearch *skel; + int ret; + + skel = arena_strsearch__open_and_load(); + if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts); + ASSERT_OK(ret, "ret_add"); + ASSERT_OK(opts.retval, "retval"); + if (skel->bss->skip) { + printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__); + test__skip(); + } + arena_strsearch__destroy(skel); +} + +void test_arena_strsearch(void) +{ + if (test__start_subtest("arena_strsearch")) + test_arena_str(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c index bb143de68875..e27d66b75fb1 100644 --- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -144,11 +144,17 @@ static void test_parse_test_list_file(void) if (!ASSERT_OK(ferror(fp), "prepare tmp")) goto out_fclose; + if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp")) + goto out_fclose; + init_test_filter_set(&set); - ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"); + if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file")) + goto out_fclose; + + if (!ASSERT_EQ(set.cnt, 4, "test count")) + goto out_free_set; - ASSERT_EQ(set.cnt, 4, "test count"); ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count"); ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name"); @@ -158,8 +164,8 @@ static void test_parse_test_list_file(void) ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name"); ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name"); +out_free_set: free_test_filter_set(&set); - out_fclose: fclose(fp); out_remove: diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 13e101f370a1..92b5f378bfb8 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -165,11 +165,17 @@ static void test_xchg(struct atomics_lskel *skel) void test_atomics(void) { struct atomics_lskel *skel; + int err; - skel = atomics_lskel__open_and_load(); - if (!ASSERT_OK_PTR(skel, "atomics skeleton load")) + skel = atomics_lskel__open(); + if (!ASSERT_OK_PTR(skel, "atomics skeleton open")) return; + skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = atomics_lskel__load(skel); + if (!ASSERT_OK(err, "atomics skeleton load")) + goto cleanup; + if (skel->data->skip_tests) { printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)", __func__); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 329c7862b52d..e8c1a619e330 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -3,6 +3,7 @@ #include "test_attach_kprobe_sleepable.skel.h" #include "test_attach_probe_manual.skel.h" #include "test_attach_probe.skel.h" +#include "kprobe_write_ctx.skel.h" /* this is how USDT semaphore is actually defined, except volatile modifier */ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); @@ -122,6 +123,306 @@ cleanup: test_attach_probe_manual__destroy(skel); } +/* manual attach address-based kprobe/kretprobe testings */ +static void test_attach_kprobe_by_addr(enum probe_attach_mode attach_mode) +{ + LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + struct test_attach_probe_manual *skel; + unsigned long func_addr; + + if (!ASSERT_OK(load_kallsyms(), "load_kallsyms")) + return; + + func_addr = ksym_get_addr(SYS_NANOSLEEP_KPROBE_NAME); + if (!ASSERT_NEQ(func_addr, 0UL, "func_addr")) + return; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + + kprobe_opts.attach_mode = attach_mode; + kprobe_opts.retprobe = false; + kprobe_opts.offset = func_addr; + skel->links.handle_kprobe = + bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + NULL, &kprobe_opts); + if (!ASSERT_OK_PTR(skel->links.handle_kprobe, "attach_kprobe_by_addr")) + goto cleanup; + + kprobe_opts.retprobe = true; + skel->links.handle_kretprobe = + bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + NULL, &kprobe_opts); + if (!ASSERT_OK_PTR(skel->links.handle_kretprobe, "attach_kretprobe_by_addr")) + goto cleanup; + + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); + ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); + +cleanup: + test_attach_probe_manual__destroy(skel); +} + +/* reject legacy address-based kprobe attach */ +static void test_attach_kprobe_legacy_by_addr_reject(void) +{ + LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + struct test_attach_probe_manual *skel; + unsigned long func_addr; + + if (!ASSERT_OK(load_kallsyms(), "load_kallsyms")) + return; + + func_addr = ksym_get_addr(SYS_NANOSLEEP_KPROBE_NAME); + if (!ASSERT_NEQ(func_addr, 0UL, "func_addr")) + return; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + + kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY; + kprobe_opts.offset = func_addr; + skel->links.handle_kprobe = + bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + NULL, &kprobe_opts); + ASSERT_ERR_PTR(skel->links.handle_kprobe, "attach_kprobe_legacy_by_addr"); + ASSERT_EQ(libbpf_get_error(skel->links.handle_kprobe), + -EOPNOTSUPP, "attach_kprobe_legacy_by_addr_err"); + + test_attach_probe_manual__destroy(skel); +} + +/* + * bpf_fentry_shadow_test exists in both vmlinux (net/bpf/test_run.c) and + * bpf_testmod (bpf_testmod.c). When bpf_testmod is loaded the symbol is + * duplicated. Test that kprobe attachment handles this correctly: + * - Unqualified name ("bpf_fentry_shadow_test") attaches to vmlinux. + * - MOD:SYM name ("bpf_testmod:bpf_fentry_shadow_test") attaches to module. + * + * Note: bpf_fentry_shadow_test is not invoked via test_run, so we only + * verify that attach and detach succeed without triggering the probe. + */ +static void test_attach_probe_dup_sym(enum probe_attach_mode attach_mode) +{ + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + struct bpf_link *kprobe_link, *kretprobe_link; + struct test_attach_probe_manual *skel; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_dup_sym_open_and_load")) + return; + + kprobe_opts.attach_mode = attach_mode; + + /* Unqualified: should attach to vmlinux symbol */ + kprobe_opts.retprobe = false; + kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + "bpf_fentry_shadow_test", + &kprobe_opts); + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_vmlinux")) + goto cleanup; + bpf_link__destroy(kprobe_link); + + kprobe_opts.retprobe = true; + kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + "bpf_fentry_shadow_test", + &kprobe_opts); + if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_vmlinux")) + goto cleanup; + bpf_link__destroy(kretprobe_link); + + /* MOD:SYM qualified: should attach to module symbol */ + kprobe_opts.retprobe = false; + kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + "bpf_testmod:bpf_fentry_shadow_test", + &kprobe_opts); + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_module")) + goto cleanup; + bpf_link__destroy(kprobe_link); + + kprobe_opts.retprobe = true; + kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + "bpf_testmod:bpf_fentry_shadow_test", + &kprobe_opts); + if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_module")) + goto cleanup; + bpf_link__destroy(kretprobe_link); + +cleanup: + test_attach_probe_manual__destroy(skel); +} + +/* attach uprobe/uretprobe long event name testings */ +static void test_attach_uprobe_long_event_name(void) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct bpf_link *uprobe_link, *uretprobe_link; + struct test_attach_probe_manual *skel; + ssize_t uprobe_offset; + char path[PATH_MAX] = {0}; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + + uprobe_offset = get_uprobe_offset(&trigger_func); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) + goto cleanup; + + if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink")) + goto cleanup; + + /* manual-attach uprobe/uretprobe */ + uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY; + uprobe_opts.ref_ctr_offset = 0; + uprobe_opts.retprobe = false; + uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, + 0 /* self pid */, + path, + uprobe_offset, + &uprobe_opts); + if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name")) + goto cleanup; + skel->links.handle_uprobe = uprobe_link; + + uprobe_opts.retprobe = true; + uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, + -1 /* any pid */, + path, + uprobe_offset, &uprobe_opts); + if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name")) + goto cleanup; + skel->links.handle_uretprobe = uretprobe_link; + +cleanup: + test_attach_probe_manual__destroy(skel); +} + +/* attach kprobe/kretprobe long event name testings */ +static void test_attach_kprobe_long_event_name(void) +{ + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + struct bpf_link *kprobe_link, *kretprobe_link; + struct test_attach_probe_manual *skel; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + + /* manual-attach kprobe/kretprobe */ + kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY; + kprobe_opts.retprobe = false; + kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + "bpf_testmod_looooooooooooooooooooooooooooooong_name", + &kprobe_opts); + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name")) + goto cleanup; + skel->links.handle_kprobe = kprobe_link; + + kprobe_opts.retprobe = true; + kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + "bpf_testmod_looooooooooooooooooooooooooooooong_name", + &kprobe_opts); + if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name")) + goto cleanup; + skel->links.handle_kretprobe = kretprobe_link; + +cleanup: + test_attach_probe_manual__destroy(skel); +} + +#ifdef __x86_64__ +/* attach kprobe/kretprobe long event name testings */ +static void test_attach_kprobe_write_ctx(void) +{ + struct kprobe_write_ctx *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_write_ctx__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load")) + return; + + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts")) + bpf_link__destroy(link); + + kprobe_write_ctx__destroy(skel); +} + +static void test_freplace_kprobe_write_ctx(void) +{ + struct bpf_program *prog_kprobe, *prog_ext, *prog_fentry; + struct kprobe_write_ctx *skel_kprobe, *skel_ext = NULL; + struct bpf_link *link_kprobe = NULL, *link_ext = NULL; + int err, prog_fd; + LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel_kprobe = kprobe_write_ctx__open(); + if (!ASSERT_OK_PTR(skel_kprobe, "kprobe_write_ctx__open kprobe")) + return; + + prog_kprobe = skel_kprobe->progs.kprobe_dummy; + bpf_program__set_autoload(prog_kprobe, true); + + prog_fentry = skel_kprobe->progs.fentry; + bpf_program__set_autoload(prog_fentry, true); + + err = kprobe_write_ctx__load(skel_kprobe); + if (!ASSERT_OK(err, "kprobe_write_ctx__load kprobe")) + goto out; + + skel_ext = kprobe_write_ctx__open(); + if (!ASSERT_OK_PTR(skel_ext, "kprobe_write_ctx__open ext")) + goto out; + + prog_ext = skel_ext->progs.freplace_kprobe; + bpf_program__set_autoload(prog_ext, true); + + prog_fd = bpf_program__fd(skel_kprobe->progs.kprobe_write_ctx); + bpf_program__set_attach_target(prog_ext, prog_fd, "kprobe_write_ctx"); + + err = kprobe_write_ctx__load(skel_ext); + if (!ASSERT_OK(err, "kprobe_write_ctx__load ext")) + goto out; + + prog_fd = bpf_program__fd(prog_kprobe); + link_ext = bpf_program__attach_freplace(prog_ext, prog_fd, "kprobe_dummy"); + ASSERT_ERR_PTR(link_ext, "bpf_program__attach_freplace link"); + ASSERT_EQ(libbpf_get_error(link_ext), -EINVAL, "bpf_program__attach_freplace error"); + + link_kprobe = bpf_program__attach_kprobe_opts(prog_kprobe, "bpf_fentry_test1", + &kprobe_opts); + if (!ASSERT_OK_PTR(link_kprobe, "bpf_program__attach_kprobe_opts")) + goto out; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog_fentry), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + +out: + bpf_link__destroy(link_ext); + bpf_link__destroy(link_kprobe); + kprobe_write_ctx__destroy(skel_ext); + kprobe_write_ctx__destroy(skel_kprobe); +} +#else +static void test_attach_kprobe_write_ctx(void) +{ + test__skip(); +} + +static void test_freplace_kprobe_write_ctx(void) +{ + test__skip(); +} +#endif + static void test_attach_probe_auto(struct test_attach_probe *skel) { struct bpf_link *uprobe_err_link; @@ -311,6 +612,21 @@ void test_attach_probe(void) test_attach_probe_manual(PROBE_ATTACH_MODE_PERF); if (test__start_subtest("manual-link")) test_attach_probe_manual(PROBE_ATTACH_MODE_LINK); + if (test__start_subtest("kprobe-perf-by-addr")) + test_attach_kprobe_by_addr(PROBE_ATTACH_MODE_PERF); + if (test__start_subtest("kprobe-link-by-addr")) + test_attach_kprobe_by_addr(PROBE_ATTACH_MODE_LINK); + if (test__start_subtest("kprobe-legacy-by-addr-reject")) + test_attach_kprobe_legacy_by_addr_reject(); + + if (test__start_subtest("dup-sym-default")) + test_attach_probe_dup_sym(PROBE_ATTACH_MODE_DEFAULT); + if (test__start_subtest("dup-sym-legacy")) + test_attach_probe_dup_sym(PROBE_ATTACH_MODE_LEGACY); + if (test__start_subtest("dup-sym-perf")) + test_attach_probe_dup_sym(PROBE_ATTACH_MODE_PERF); + if (test__start_subtest("dup-sym-link")) + test_attach_probe_dup_sym(PROBE_ATTACH_MODE_LINK); if (test__start_subtest("auto")) test_attach_probe_auto(skel); @@ -323,6 +639,15 @@ void test_attach_probe(void) if (test__start_subtest("uprobe-ref_ctr")) test_uprobe_ref_ctr(skel); + if (test__start_subtest("uprobe-long_name")) + test_attach_uprobe_long_event_name(); + if (test__start_subtest("kprobe-long_name")) + test_attach_kprobe_long_event_name(); + if (test__start_subtest("kprobe-write-ctx")) + test_attach_kprobe_write_ctx(); + if (test__start_subtest("freplace-kprobe-write-ctx")) + test_freplace_kprobe_write_ctx(); + cleanup: test_attach_probe__destroy(skel); ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup"); diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index cc184e4420f6..42b49870e520 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -6,10 +6,14 @@ #include <test_progs.h> #include "bloom_filter_map.skel.h" +#ifndef NUMA_NO_NODE +#define NUMA_NO_NODE (-1) +#endif + static void test_fail_cases(void) { LIBBPF_OPTS(bpf_map_create_opts, opts); - __u32 value; + __u32 value = 0; int fd, err; /* Invalid key size */ @@ -69,6 +73,7 @@ static void test_success_cases(void) /* Create a map */ opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE; + opts.numa_node = NUMA_NO_NODE; fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts); if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 6befa870434b..35adc3f6d443 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -6,6 +6,7 @@ #include <sys/syscall.h> #include <sys/mman.h> #include <unistd.h> +#include <linux/compiler.h> #include <test_progs.h> #include <network_helpers.h> #include <bpf/btf.h> @@ -105,6 +106,11 @@ static void kprobe_multi_link_api_subtest(void) unsigned long long addrs[8]; __u64 cookies[8]; + if (!env.has_testmod) { + test__skip(); + return; + } + if (!ASSERT_OK(load_kallsyms(), "load_kallsyms")) goto cleanup; @@ -192,6 +198,11 @@ static void kprobe_multi_attach_api_subtest(void) }; __u64 cookies[8]; + if (!env.has_testmod) { + test__skip(); + return; + } + skel = kprobe_multi__open_and_load(); if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load")) goto cleanup; @@ -421,11 +432,12 @@ cleanup: bpf_link__destroy(link3); } -static void burn_cpu(void) +static void burn_cpu(long loops) { - volatile int j = 0; + long j = 0; cpu_set_t cpu_set; - int i, err; + long i; + int err; /* generate some branches on cpu 0 */ CPU_ZERO(&cpu_set); @@ -433,9 +445,10 @@ static void burn_cpu(void) err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); ASSERT_OK(err, "set_thread_affinity"); - /* spin the loop for a while (random high number) */ - for (i = 0; i < 1000000; ++i) + for (i = 0; i < loops; ++i) { ++j; + barrier(); + } } static void pe_subtest(struct test_bpf_cookie *skel) @@ -450,9 +463,8 @@ static void pe_subtest(struct test_bpf_cookie *skel) attr.size = sizeof(attr); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; - attr.freq = 1; - attr.sample_freq = 10000; - pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + attr.sample_period = 100000; + pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; @@ -461,7 +473,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) if (!ASSERT_OK_PTR(link, "link1")) goto cleanup; - burn_cpu(); /* trigger BPF prog */ + burn_cpu(100000000L); /* trigger BPF prog */ ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1"); @@ -480,7 +492,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) if (!ASSERT_OK_PTR(link, "link2")) goto cleanup; - burn_cpu(); /* trigger BPF prog */ + burn_cpu(100000000L); /* trigger BPF prog */ ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2"); @@ -489,10 +501,28 @@ cleanup: bpf_link__destroy(link); } +static int verify_tracing_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type")) + return -1; + + ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie"); + + return 0; +} + static void tracing_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd; + int prog_fd, err; int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1; LIBBPF_OPTS(bpf_test_run_opts, opts); LIBBPF_OPTS(bpf_link_create_opts, link_opts); @@ -507,6 +537,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel) if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create")) goto cleanup; + err = verify_tracing_link_info(fentry_fd, cookie); + if (!ASSERT_OK(err, "verify_tracing_link_info")) + goto cleanup; + cookie = 0x20000000000000L; prog_fd = bpf_program__fd(skel->progs.fexit_test1); link_opts.tracing.cookie = cookie; @@ -635,10 +669,29 @@ cleanup: bpf_link__destroy(link); } +static int verify_raw_tp_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + memset(&info, 0, sizeof(info)); + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type")) + return -1; + + ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie"); + + return 0; +} + static void raw_tp_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd, link_fd = -1; + int err, prog_fd, link_fd = -1; struct bpf_link *link = NULL; LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts); LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts); @@ -656,6 +709,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel) goto cleanup; usleep(1); /* trigger */ + + err = verify_raw_tp_link_info(link_fd, cookie); + if (!ASSERT_OK(err, "verify_raw_tp_link_info")) + goto cleanup; + close(link_fd); /* detach */ link_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c new file mode 100644 index 000000000000..73dc63882b7d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in6.h> +#include <linux/udp.h> +#include <linux/tcp.h> + +#include <sys/syscall.h> +#include <bpf/bpf.h> + +#include "bpf_gotox.skel.h" + +static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts, + .ctx_in = ctx_in, + .ctx_size_in = ctx_size_in, + ); + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run_opts err"); +} + +static void __subtest(struct bpf_gotox *skel, void (*check)(struct bpf_gotox *)) +{ + if (skel->data->skip) + test__skip(); + else + check(skel); +} + +static void check_simple(struct bpf_gotox *skel, + struct bpf_program *prog, + __u64 ctx_in, + __u64 expected) +{ + skel->bss->ret_user = 0; + + __test_run(prog, &ctx_in, sizeof(ctx_in)); + + if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user")) + return; +} + +static void check_simple_fentry(struct bpf_gotox *skel, + struct bpf_program *prog, + __u64 ctx_in, + __u64 expected) +{ + skel->bss->in_user = ctx_in; + skel->bss->ret_user = 0; + + /* trigger */ + usleep(1); + + if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user")) + return; +} + +/* validate that for two loads of the same jump table libbpf generates only one map */ +static void check_one_map_two_jumps(struct bpf_gotox *skel) +{ + struct bpf_prog_info prog_info; + struct bpf_map_info map_info; + __u32 len; + __u32 map_ids[16]; + int prog_fd, map_fd; + int ret; + int i; + bool seen = false; + + memset(&prog_info, 0, sizeof(prog_info)); + prog_info.map_ids = (long)map_ids; + prog_info.nr_map_ids = ARRAY_SIZE(map_ids); + prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps); + if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)")) + return; + + len = sizeof(prog_info); + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)")) + return; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + map_fd = bpf_map_get_fd_by_id(map_ids[i]); + if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id")) + return; + + len = sizeof(map_info); + memset(&map_info, 0, len); + ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) { + close(map_fd); + return; + } + + if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) { + if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) { + close(map_fd); + return; + } + seen = true; + } + close(map_fd); + } + + ASSERT_EQ(seen, true, "no INSN_ARRAY map"); +} + +static void check_one_switch(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.one_switch, in[i], out[i]); +} + +static void check_one_switch_non_zero_sec_off(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.one_switch_non_zero_sec_off, in[i], out[i]); +} + +static void check_two_switches(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {103, 104, 107, 205, 115, 1019, 1019}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.two_switches, in[i], out[i]); +} + +static void check_big_jump_table(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 11, 27, 31, 22, 45, 99}; + __u64 out[] = {2, 3, 4, 5, 19, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.big_jump_table, in[i], out[i]); +} + +static void check_one_jump_two_maps(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {12, 15, 7 , 15, 12, 15, 15}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.one_jump_two_maps, in[i], out[i]); +} + +static void check_static_global(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_static_global1, in[i], out[i]); + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_static_global2, in[i], out[i]); +} + +static void check_nonstatic_global(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]); + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]); +} + +static void check_other_sec(struct bpf_gotox *skel) +{ + struct bpf_link *link; + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + link = bpf_program__attach(skel->progs.simple_test_other_sec); + if (!ASSERT_OK_PTR(link, "link")) + return; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]); + + bpf_link__destroy(link); +} + +static void check_static_global_other_sec(struct bpf_gotox *skel) +{ + struct bpf_link *link; + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + link = bpf_program__attach(skel->progs.use_static_global_other_sec); + if (!ASSERT_OK_PTR(link, "link")) + return; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]); + + bpf_link__destroy(link); +} + +static void check_nonstatic_global_other_sec(struct bpf_gotox *skel) +{ + struct bpf_link *link; + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + link = bpf_program__attach(skel->progs.use_nonstatic_global_other_sec); + if (!ASSERT_OK_PTR(link, "link")) + return; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]); + + bpf_link__destroy(link); +} + +/* + * The following subtests do not use skeleton rather than to check + * if the test should be skipped. + */ + +static int create_jt_map(__u32 max_entries) +{ + const char *map_name = "jt"; + __u32 key_size = 4; + __u32 value_size = sizeof(struct bpf_insn_array_value); + + return bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, map_name, + key_size, value_size, max_entries, NULL); +} + +static int prog_load(struct bpf_insn *insns, __u32 insn_cnt) +{ + return bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); +} + +static int __check_ldimm64_off_prog_load(__u32 max_entries, __u32 off) +{ + struct bpf_insn insns[] = { + BPF_LD_IMM64_RAW(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int map_fd, ret; + + map_fd = create_jt_map(max_entries); + if (!ASSERT_GE(map_fd, 0, "create_jt_map")) + return -1; + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) { + close(map_fd); + return -1; + } + + insns[0].imm = map_fd; + insns[1].imm = off; + + ret = prog_load(insns, ARRAY_SIZE(insns)); + close(map_fd); + return ret; +} + +/* + * Check that loads from an instruction array map are only allowed with offsets + * which are multiples of 8 and do not point to outside of the map. + */ +static void check_ldimm64_off_load(struct bpf_gotox *skel __always_unused) +{ + const __u32 max_entries = 10; + int prog_fd; + __u32 off; + + for (off = 0; off < max_entries; off++) { + prog_fd = __check_ldimm64_off_prog_load(max_entries, off * 8); + if (!ASSERT_GE(prog_fd, 0, "__check_ldimm64_off_prog_load")) + return; + close(prog_fd); + } + + prog_fd = __check_ldimm64_off_prog_load(max_entries, 7 /* not a multiple of 8 */); + if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_off_prog_load: should be -EACCES")) { + close(prog_fd); + return; + } + + prog_fd = __check_ldimm64_off_prog_load(max_entries, max_entries * 8 /* too large */); + if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_off_prog_load: should be -EACCES")) { + close(prog_fd); + return; + } +} + +static int __check_ldimm64_gotox_prog_load(struct bpf_insn *insns, + __u32 insn_cnt, + int off1, int off2, int off3) +{ + const __u32 values[] = {5, 7, 9, 11, 13, 15}; + const __u32 max_entries = ARRAY_SIZE(values); + struct bpf_insn_array_value val = {}; + int map_fd, ret, i; + + map_fd = create_jt_map(max_entries); + if (!ASSERT_GE(map_fd, 0, "create_jt_map")) + return -1; + + for (i = 0; i < max_entries; i++) { + val.orig_off = values[i]; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, + "bpf_map_update_elem")) { + close(map_fd); + return -1; + } + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) { + close(map_fd); + return -1; + } + + /* r1 = &map + offset1 */ + insns[0].imm = map_fd; + insns[1].imm = off1; + + /* r1 += off2 */ + insns[2].imm = off2; + + /* r1 = *(r1 + off3) */ + insns[3].off = off3; + + ret = prog_load(insns, insn_cnt); + close(map_fd); + return ret; +} + +static void +allow_offsets(struct bpf_insn *insns, __u32 insn_cnt, int off1, int off2, int off3) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int prog_fd, err; + char s[128] = ""; + + prog_fd = __check_ldimm64_gotox_prog_load(insns, insn_cnt, off1, off2, off3); + snprintf(s, sizeof(s), "__check_ldimm64_gotox_prog_load(%d,%d,%d)", off1, off2, off3); + if (!ASSERT_GE(prog_fd, 0, s)) + return; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) { + close(prog_fd); + return; + } + + if (!ASSERT_EQ(topts.retval, (off1 + off2 + off3) / 8, "test_run_opts retval")) { + close(prog_fd); + return; + } + + close(prog_fd); +} + +static void +reject_offsets(struct bpf_insn *insns, __u32 insn_cnt, int off1, int off2, int off3) +{ + int prog_fd; + + prog_fd = __check_ldimm64_gotox_prog_load(insns, insn_cnt, off1, off2, off3); + if (!ASSERT_EQ(prog_fd, -EACCES, "__check_ldimm64_gotox_prog_load")) + close(prog_fd); +} + +/* + * Verify a bit more complex programs which include indirect jumps + * and with jump tables loaded with a non-zero offset + */ +static void check_ldimm64_off_gotox(struct bpf_gotox *skel __always_unused) +{ + struct bpf_insn insns[] = { + /* + * The following instructions perform an indirect jump to + * labels below. Thus valid offsets in the map are {0,...,5}. + * The program rewrites the offsets in the instructions below: + * r1 = &map + offset1 + * r1 += offset2 + * r1 = *(r1 + offset3) + * gotox r1 + */ + BPF_LD_IMM64_RAW(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0, 0), + + /* case 0: */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* case 1: */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* case 2: */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* case 3: */ + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* case 4: */ + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_EXIT_INSN(), + /* default: */ + BPF_MOV64_IMM(BPF_REG_0, 5), + BPF_EXIT_INSN(), + }; + int off1, off2, off3; + + /* allow all combinations off1 + off2 + off3 < 6 */ + for (off1 = 0; off1 < 6; off1++) + for (off2 = 0; off1 + off2 < 6; off2++) + for (off3 = 0; off1 + off2 + off3 < 6; off3++) + allow_offsets(insns, ARRAY_SIZE(insns), + off1 * 8, off2 * 8, off3 * 8); + + /* allow for some offsets to be negative */ + allow_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 0, -(8 * 3)); + allow_offsets(insns, ARRAY_SIZE(insns), 8 * 3, -(8 * 3), 0); + allow_offsets(insns, ARRAY_SIZE(insns), 0, 8 * 3, -(8 * 3)); + allow_offsets(insns, ARRAY_SIZE(insns), 8 * 4, 0, -(8 * 2)); + allow_offsets(insns, ARRAY_SIZE(insns), 8 * 4, -(8 * 2), 0); + allow_offsets(insns, ARRAY_SIZE(insns), 0, 8 * 4, -(8 * 2)); + + /* disallow negative sums of offsets */ + reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 0, -(8 * 4)); + reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, -(8 * 4), 0); + reject_offsets(insns, ARRAY_SIZE(insns), 0, 8 * 3, -(8 * 4)); + + /* disallow the off1 to be negative in any case */ + reject_offsets(insns, ARRAY_SIZE(insns), -8 * 1, 0, 0); + reject_offsets(insns, ARRAY_SIZE(insns), -8 * 1, 8 * 1, 0); + reject_offsets(insns, ARRAY_SIZE(insns), -8 * 1, 8 * 1, 8 * 1); + + /* reject off1 + off2 + off3 >= 6 */ + reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 8 * 3, 8 * 0); + reject_offsets(insns, ARRAY_SIZE(insns), 8 * 7, 8 * 0, 8 * 0); + reject_offsets(insns, ARRAY_SIZE(insns), 8 * 0, 8 * 7, 8 * 0); + reject_offsets(insns, ARRAY_SIZE(insns), 8 * 3, 8 * 0, 8 * 3); + reject_offsets(insns, ARRAY_SIZE(insns), 8 * 0, 8 * 3, 8 * 3); + + /* reject (off1 + off2) % 8 != 0, off3 % 8 != 0 */ + reject_offsets(insns, ARRAY_SIZE(insns), 3, 3, 0); + reject_offsets(insns, ARRAY_SIZE(insns), 7, 0, 0); + reject_offsets(insns, ARRAY_SIZE(insns), 0, 7, 0); + reject_offsets(insns, ARRAY_SIZE(insns), 0, 0, 7); +} + +static void check_ldimm64_off_gotox_llvm(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4}; + __u64 out[] = {1, 1, 5, 1, 1}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.load_with_nonzero_offset, in[i], out[i]); +} + +void test_bpf_gotox(void) +{ + struct bpf_gotox *skel; + int ret; + + skel = bpf_gotox__open(); + if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open")) + return; + + ret = bpf_gotox__load(skel); + if (!ASSERT_OK(ret, "bpf_gotox__load")) + return; + + skel->bss->pid = getpid(); + + if (test__start_subtest("one-switch")) + __subtest(skel, check_one_switch); + + if (test__start_subtest("one-switch-non-zero-sec-offset")) + __subtest(skel, check_one_switch_non_zero_sec_off); + + if (test__start_subtest("two-switches")) + __subtest(skel, check_two_switches); + + if (test__start_subtest("big-jump-table")) + __subtest(skel, check_big_jump_table); + + if (test__start_subtest("static-global")) + __subtest(skel, check_static_global); + + if (test__start_subtest("nonstatic-global")) + __subtest(skel, check_nonstatic_global); + + if (test__start_subtest("other-sec")) + __subtest(skel, check_other_sec); + + if (test__start_subtest("static-global-other-sec")) + __subtest(skel, check_static_global_other_sec); + + if (test__start_subtest("nonstatic-global-other-sec")) + __subtest(skel, check_nonstatic_global_other_sec); + + if (test__start_subtest("one-jump-two-maps")) + __subtest(skel, check_one_jump_two_maps); + + if (test__start_subtest("one-map-two-jumps")) + __subtest(skel, check_one_map_two_jumps); + + if (test__start_subtest("check-ldimm64-off")) + __subtest(skel, check_ldimm64_off_load); + + if (test__start_subtest("check-ldimm64-off-gotox")) + __subtest(skel, check_ldimm64_off_gotox); + + if (test__start_subtest("check-ldimm64-off-gotox-llvm")) + __subtest(skel, check_ldimm64_off_gotox_llvm); + + bpf_gotox__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c new file mode 100644 index 000000000000..0222a9a5d076 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <bpf/bpf.h> +#include <test_progs.h> + +#if defined(__x86_64__) || defined(__powerpc__) || defined(__aarch64__) +static int map_create(__u32 map_type, __u32 max_entries) +{ + const char *map_name = "insn_array"; + __u32 key_size = 4; + __u32 value_size = sizeof(struct bpf_insn_array_value); + + return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL); +} + +static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.fd_array = fd_array; + opts.fd_array_cnt = fd_array_cnt; + + return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts); +} + +static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out) +{ + struct bpf_insn_array_value val = {}; + int prog_fd = -1, map_fd, i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < insn_cnt; i++) { + val.orig_off = map_in[i]; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, insn_cnt, &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < insn_cnt; i++) { + char buf[64]; + + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i); + ASSERT_EQ(val.xlated_off, map_out[i], buf); + } + +cleanup: + close(prog_fd); + close(map_fd); +} + +/* + * Load a program, which will not be anyhow mangled by the verifier. Add an + * insn_array map pointing to every instruction. Check that it hasn't changed + * after the program load. + */ +static void check_one_to_one_mapping(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, 1, 2, 3, 4, 5}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Load a program with two patches (get jiffies, for simplicity). Add an + * insn_array map pointing to every instruction. Check how it was changed + * after the program load. + */ +static void check_simple(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, 1, 4, 5, 8, 9}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Verifier can delete code in two cases: nops & dead code. From insn + * array's point of view, the two cases are the same, so test using + * the simplest method: by loading some nops + */ +static void check_deletions(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, -1, 1, -1, 2, 3}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Same test as check_deletions, but also add code which adds instructions + */ +static void check_deletions_with_functions(void) +{ + struct bpf_insn insns[] = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = { 0, 1, 2, 3, 4, 5, /* func */ 6, 7, 8, 9, 10}; + __u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Try to load a program with a map which points to outside of the program + */ +static void check_out_of_bounds_index(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd; + struct bpf_insn_array_value val = {}; + int key; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + key = 0; + val.orig_off = ARRAY_SIZE(insns); /* too big */ + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) { + close(prog_fd); + goto cleanup; + } + +cleanup: + close(map_fd); +} + +/* + * Try to load a program with a map which points to the middle of 16-bit insn + */ +static void check_mid_insn_index(void) +{ + struct bpf_insn insns[] = { + BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */ + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd; + struct bpf_insn_array_value val = {}; + int key; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + key = 0; + val.orig_off = 1; /* middle of 16-byte instruction */ + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) { + close(prog_fd); + goto cleanup; + } + +cleanup: + close(map_fd); +} + +static void check_incorrect_index(void) +{ + check_out_of_bounds_index(); + check_mid_insn_index(); +} + +static int set_bpf_jit_harden(char *level) +{ + char old_level; + int err = -1; + int fd = -1; + + fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK); + if (fd < 0) { + ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno); + return -1; + } + + err = read(fd, &old_level, 1); + if (err != 1) { + ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno); + err = -1; + goto end; + } + + lseek(fd, 0, SEEK_SET); + + err = write(fd, level, 1); + if (err != 1) { + ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno); + err = -1; + goto end; + } + + err = 0; + *level = old_level; +end: + if (fd >= 0) + close(fd); + return err; +} + +static void check_blindness(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + struct bpf_insn_array_value val = {}; + char bpf_jit_harden = '@'; /* non-exizsting value */ + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + bpf_jit_harden = '2'; + if (set_bpf_jit_harden(&bpf_jit_harden)) { + bpf_jit_harden = '@'; /* open, read or write failed => no write was done */ + goto cleanup; + } + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + char fmt[32]; + + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i); + ASSERT_EQ(val.xlated_off, i * 3, fmt); + } + +cleanup: + /* restore the old one */ + if (bpf_jit_harden != '@') + set_bpf_jit_harden(&bpf_jit_harden); + + close(prog_fd); + close(map_fd); +} + +/* Once map was initialized, it should be frozen */ +static void check_load_unfrozen_map(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + struct bpf_insn_array_value val = {}; + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) + goto cleanup; + + /* correctness: now freeze the map, the program should load fine */ + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + ASSERT_EQ(val.xlated_off, i, "val should be equal i"); + } + +cleanup: + close(prog_fd); + close(map_fd); +} + +/* Map can be used only by one BPF program */ +static void check_no_map_reuse(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd, extra_fd = -1; + struct bpf_insn_array_value val = {}; + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + ASSERT_EQ(val.xlated_off, i, "val should be equal i"); + } + + extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)")) + goto cleanup; + + /* correctness: check that prog is still loadable without fd_array */ + extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_GE(extra_fd, 0, "bpf(BPF_PROG_LOAD): expected no error")) + goto cleanup; + +cleanup: + close(extra_fd); + close(prog_fd); + close(map_fd); +} + +static void check_bpf_no_lookup(void) +{ + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + insns[0].imm = map_fd; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) + goto cleanup; + + /* correctness: check that prog is still loadable with normal map */ + close(map_fd); + map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1); + insns[0].imm = map_fd; + prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + +cleanup: + close(prog_fd); + close(map_fd); +} + +static void check_bpf_side(void) +{ + check_bpf_no_lookup(); +} + +static void __test_bpf_insn_array(void) +{ + /* Test if offsets are adjusted properly */ + + if (test__start_subtest("one2one")) + check_one_to_one_mapping(); + + if (test__start_subtest("simple")) + check_simple(); + + if (test__start_subtest("deletions")) + check_deletions(); + + if (test__start_subtest("deletions-with-functions")) + check_deletions_with_functions(); + + if (test__start_subtest("blindness")) + check_blindness(); + + /* Check all kinds of operations and related restrictions */ + + if (test__start_subtest("incorrect-index")) + check_incorrect_index(); + + if (test__start_subtest("load-unfrozen-map")) + check_load_unfrozen_map(); + + if (test__start_subtest("no-map-reuse")) + check_no_map_reuse(); + + if (test__start_subtest("bpf-side-ops")) + check_bpf_side(); +} +#else +static void __test_bpf_insn_array(void) +{ + test__skip(); +} +#endif + +void test_bpf_insn_array(void) +{ + __test_bpf_insn_array(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 6f1bfacd7375..c69080ca14f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -323,19 +323,86 @@ static void test_task_pidfd(void) static void test_task_sleepable(void) { struct bpf_iter_tasks *skel; + int pid, status, err, data_pipe[2], finish_pipe[2], c = 0; + char *test_data = NULL; + char *test_data_long = NULL; + char *data[2]; + + if (!ASSERT_OK(pipe(data_pipe), "data_pipe") || + !ASSERT_OK(pipe(finish_pipe), "finish_pipe")) + return; skel = bpf_iter_tasks__open_and_load(); if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load")) return; + pid = fork(); + if (!ASSERT_GE(pid, 0, "fork")) + return; + + if (pid == 0) { + /* child */ + close(data_pipe[0]); + close(finish_pipe[1]); + + test_data = malloc(sizeof(char) * 10); + strscpy(test_data, "test_data", 10); + + test_data_long = malloc(sizeof(char) * 5000); + for (int i = 0; i < 5000; ++i) { + if (i % 2 == 0) + test_data_long[i] = 'b'; + else + test_data_long[i] = 'a'; + } + test_data_long[4999] = '\0'; + + data[0] = test_data; + data[1] = test_data_long; + + write(data_pipe[1], &data, sizeof(data)); + + /* keep child alive until after the test */ + err = read(finish_pipe[0], &c, 1); + if (err != 1) + exit(-1); + + close(data_pipe[1]); + close(finish_pipe[0]); + _exit(0); + } + + /* parent */ + close(data_pipe[1]); + close(finish_pipe[0]); + + err = read(data_pipe[0], &data, sizeof(data)); + ASSERT_EQ(err, sizeof(data), "read_check"); + + skel->bss->user_ptr = data[0]; + skel->bss->user_ptr_long = data[1]; + skel->bss->pid = pid; + do_dummy_read(skel->progs.dump_task_sleepable); ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0, "num_expected_failure_copy_from_user_task"); ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0, "num_success_copy_from_user_task"); + ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0, + "num_expected_failure_copy_from_user_task_str"); + ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0, + "num_success_copy_from_user_task_str"); bpf_iter_tasks__destroy(skel); + + write(finish_pipe[1], &c, 1); + err = waitpid(pid, &status, 0); + ASSERT_EQ(err, pid, "waitpid"); + ASSERT_EQ(status, 0, "zero_child_exit"); + + close(data_pipe[0]); + close(finish_pipe[1]); } static void test_task_stack(void) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index fe2c502e5089..ecc3d47919ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr) } uffd_register.range.start = (unsigned long)fault_addr; - uffd_register.range.len = 4096; + uffd_register.range.len = getpagesize(); uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { close(uffd); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index a4a1f93878d4..215878ea04de 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -19,6 +19,10 @@ struct { { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" }, { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" }, { "write_not_allowlisted_field", "no write support to nf_conn at off" }, + { "lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted arg1" }, + { "lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted arg3" }, + { "xdp_lookup_null_bpf_tuple", "Possibly NULL pointer passed to trusted arg1" }, + { "xdp_lookup_null_bpf_opts", "Possibly NULL pointer passed to trusted arg3" }, }; enum { @@ -63,6 +67,12 @@ static void test_bpf_nf_ct(int mode) .repeat = 1, ); + if (SYS_NOFAIL("iptables-legacy --version")) { + fprintf(stdout, "Missing required iptables-legacy tool\n"); + test__skip(); + return; + } + skel = test_bpf_nf__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) return; @@ -72,11 +82,14 @@ static void test_bpf_nf_ct(int mode) if (!ASSERT_OK(system(cmd), cmd)) goto end; - srv_port = (mode == TEST_XDP) ? 5005 : 5006; - srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS); + srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS); if (!ASSERT_GE(srv_fd, 0, "start_server")) goto end; + srv_port = get_socket_local_port(srv_fd); + if (!ASSERT_GE(srv_port, 0, "get_sock_local_port")) + goto end; + client_fd = connect_to_server(srv_fd); if (!ASSERT_GE(client_fd, 0, "connect_to_server")) goto end; @@ -91,7 +104,7 @@ static void test_bpf_nf_ct(int mode) skel->bss->saddr = peer_addr.sin_addr.s_addr; skel->bss->sport = peer_addr.sin_port; skel->bss->daddr = peer_addr.sin_addr.s_addr; - skel->bss->dport = htons(srv_port); + skel->bss->dport = srv_port; if (mode == TEST_XDP) prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test); @@ -102,7 +115,6 @@ static void test_bpf_nf_ct(int mode) if (!ASSERT_OK(err, "bpf_prog_test_run")) goto end; - ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple"); ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0"); ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0"); ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1"); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c new file mode 100644 index 000000000000..730357cd0c9a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/pkt_sched.h> +#include <linux/rtnetlink.h> +#include <test_progs.h> + +#include "network_helpers.h" +#include "bpf_qdisc_fifo.skel.h" +#include "bpf_qdisc_fq.skel.h" +#include "bpf_qdisc_fail__incompl_ops.skel.h" + +#define LO_IFINDEX 1 + +static const unsigned int total_bytes = 10 * 1024 * 1024; + +static void do_test(char *qdisc) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_ROOT, + .handle = 0x8000000, + .qdisc = qdisc); + int srv_fd = -1, cli_fd = -1; + int err; + + err = bpf_tc_hook_create(&hook); + if (!ASSERT_OK(err, "attach qdisc")) + return; + + srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_OK_FD(srv_fd, "start server")) + goto done; + + cli_fd = connect_to_fd(srv_fd, 0); + if (!ASSERT_OK_FD(cli_fd, "connect to client")) + goto done; + + err = send_recv_data(srv_fd, cli_fd, total_bytes); + ASSERT_OK(err, "send_recv_data"); + +done: + if (srv_fd != -1) + close(srv_fd); + if (cli_fd != -1) + close(cli_fd); + + bpf_tc_hook_destroy(&hook); +} + +static void test_fifo(void) +{ + struct bpf_qdisc_fifo *fifo_skel; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + do_test("bpf_fifo"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_fq(void) +{ + struct bpf_qdisc_fq *fq_skel; + + fq_skel = bpf_qdisc_fq__open_and_load(); + if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach")) + goto out; + + do_test("bpf_fq"); +out: + bpf_qdisc_fq__destroy(fq_skel); +} + +static void test_qdisc_attach_to_mq(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_MAKE(1 << 16, 1), + .handle = 0x11 << 16, + .qdisc = "bpf_fifo"); + struct bpf_qdisc_fifo *fifo_skel; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + SYS(out, "ip link add veth0 type veth peer veth1"); + hook.ifindex = if_nametoindex("veth0"); + SYS(out, "tc qdisc add dev veth0 root handle 1: mq"); + + err = bpf_tc_hook_create(&hook); + ASSERT_OK(err, "attach qdisc"); + + bpf_tc_hook_destroy(&hook); + + SYS(out, "tc qdisc delete dev veth0 root mq"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_qdisc_attach_to_non_root(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_MAKE(1 << 16, 1), + .handle = 0x11 << 16, + .qdisc = "bpf_fifo"); + struct bpf_qdisc_fifo *fifo_skel; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + SYS(out, "tc qdisc add dev lo root handle 1: htb"); + SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit"); + + err = bpf_tc_hook_create(&hook); + if (!ASSERT_ERR(err, "attach qdisc")) + bpf_tc_hook_destroy(&hook); + +out_del_htb: + SYS(out, "tc qdisc delete dev lo root htb"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_incompl_ops(void) +{ + struct bpf_qdisc_fail__incompl_ops *skel; + struct bpf_link *link; + + skel = bpf_qdisc_fail__incompl_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.test); + if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops")) + bpf_link__destroy(link); + + bpf_qdisc_fail__incompl_ops__destroy(skel); +} + +static int get_default_qdisc(char *qdisc_name) +{ + FILE *f; + int num; + + f = fopen("/proc/sys/net/core/default_qdisc", "r"); + if (!f) + return -errno; + + num = fscanf(f, "%s", qdisc_name); + fclose(f); + + return num == 1 ? 0 : -EFAULT; +} + +static void test_default_qdisc_attach_to_mq(void) +{ + char default_qdisc[IFNAMSIZ] = {}; + struct bpf_qdisc_fifo *fifo_skel; + struct netns_obj *netns = NULL; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + err = get_default_qdisc(default_qdisc); + if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc")) + goto out; + + err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo"); + if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc")) + goto out; + + netns = netns_new("bpf_qdisc_ns", true); + if (!ASSERT_OK_PTR(netns, "netns_new")) + goto out; + + SYS(out, "ip link add veth0 type veth peer veth1"); + SYS(out, "tc qdisc add dev veth0 root handle 1: mq"); + + ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called"); + + SYS(out, "tc qdisc delete dev veth0 root mq"); +out: + netns_free(netns); + if (default_qdisc[0]) + write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc); + + bpf_qdisc_fifo__destroy(fifo_skel); +} + +void test_ns_bpf_qdisc(void) +{ + if (test__start_subtest("fifo")) + test_fifo(); + if (test__start_subtest("fq")) + test_fq(); + if (test__start_subtest("attach to mq")) + test_qdisc_attach_to_mq(); + if (test__start_subtest("attach to non root")) + test_qdisc_attach_to_non_root(); + if (test__start_subtest("incompl_ops")) + test_incompl_ops(); +} + +void serial_test_bpf_qdisc_default(void) +{ + test_default_qdisc_attach_to_mq(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index b7d1b52309d0..fe30181e6336 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -112,6 +112,10 @@ static void test_cubic(void) ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); + ASSERT_TRUE(cubic_skel->bss->nodelay_init_reject, "init reject nodelay option"); + ASSERT_TRUE(cubic_skel->bss->nodelay_cwnd_event_tx_start_reject, + "cwnd_event_tx_start reject nodelay option"); + bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); } @@ -281,7 +285,7 @@ static void test_dctcp_fallback(void) dctcp_skel = bpf_dctcp__open(); if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel")) return; - strcpy(dctcp_skel->rodata->fallback_cc, "cubic"); + strscpy(dctcp_skel->rodata->fallback_cc, "cubic"); if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load")) goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c b/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c new file mode 100644 index 000000000000..e0eb869cb1b4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpftool_maps_access.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdint.h> +#include <sys/stat.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/libbpf.h> +#include <bpftool_helpers.h> +#include <test_progs.h> +#include <bpf/bpf.h> +#include "security_bpf_map.skel.h" + +#define PROTECTED_MAP_NAME "prot_map" +#define UNPROTECTED_MAP_NAME "not_prot_map" +#define BPF_ITER_FILE "bpf_iter_map_elem.bpf.o" +#define BPFFS_PIN_DIR "/sys/fs/bpf/test_bpftool_map" +#define INNER_MAP_NAME "inner_map_tt" +#define OUTER_MAP_NAME "outer_map_tt" + +#define MAP_NAME_MAX_LEN 64 +#define PATH_MAX_LEN 128 + +enum map_protection { + PROTECTED, + UNPROTECTED +}; + +struct test_desc { + char *name; + enum map_protection protection; + struct bpf_map *map; + char *map_name; + bool pinned; + char pin_path[PATH_MAX_LEN]; + bool write_must_fail; +}; + +static struct security_bpf_map *general_setup(void) +{ + struct security_bpf_map *skel; + uint32_t key, value; + int ret, i; + + skel = security_bpf_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + goto end; + + struct bpf_map *maps[] = {skel->maps.prot_map, skel->maps.not_prot_map}; + + ret = security_bpf_map__attach(skel); + if (!ASSERT_OK(ret, "attach maps security programs")) + goto end_destroy; + + for (i = 0; i < sizeof(maps)/sizeof(struct bpf_map *); i++) { + for (key = 0; key < 2; key++) { + int ret = bpf_map__update_elem(maps[i], &key, + sizeof(key), &key, sizeof(key), + 0); + if (!ASSERT_OK(ret, "set initial map value")) + goto end_destroy; + } + } + + key = 0; + value = 1; + ret = bpf_map__update_elem(skel->maps.prot_status_map, &key, + sizeof(key), &value, sizeof(value), 0); + if (!ASSERT_OK(ret, "configure map protection")) + goto end_destroy; + + if (!ASSERT_OK(mkdir(BPFFS_PIN_DIR, S_IFDIR), "create bpffs pin dir")) + goto end_destroy; + + return skel; +end_destroy: + security_bpf_map__destroy(skel); +end: + return NULL; +} + +static void general_cleanup(struct security_bpf_map *skel) +{ + rmdir(BPFFS_PIN_DIR); + security_bpf_map__destroy(skel); +} + +static void update_test_desc(struct security_bpf_map *skel, + struct test_desc *test) +{ + /* Now that the skeleton is loaded, update all missing fields to + * have the subtest properly configured + */ + if (test->protection == PROTECTED) { + test->map = skel->maps.prot_map; + test->map_name = PROTECTED_MAP_NAME; + } else { + test->map = skel->maps.not_prot_map; + test->map_name = UNPROTECTED_MAP_NAME; + } +} + +static int test_setup(struct security_bpf_map *skel, struct test_desc *desc) +{ + int ret; + + update_test_desc(skel, desc); + + if (desc->pinned) { + ret = snprintf(desc->pin_path, PATH_MAX_LEN, "%s/%s", BPFFS_PIN_DIR, + desc->name); + if (!ASSERT_GT(ret, 0, "format pin path")) + return 1; + ret = bpf_map__pin(desc->map, desc->pin_path); + if (!ASSERT_OK(ret, "pin map")) + return 1; + } + + return 0; +} + +static void test_cleanup(struct test_desc *desc) +{ + if (desc->pinned) + bpf_map__unpin(desc->map, NULL); +} + +static int lookup_map_value(char *map_handle) +{ + char cmd[MAX_BPFTOOL_CMD_LEN]; + int ret = 0; + + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "map lookup %s key 0 0 0 0", + map_handle); + if (!ASSERT_GT(ret, 0, "format map lookup cmd")) + return 1; + return run_bpftool_command(cmd); +} + +static int read_map_btf_data(char *map_handle) +{ + char cmd[MAX_BPFTOOL_CMD_LEN]; + int ret = 0; + + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "btf dump map %s", + map_handle); + if (!ASSERT_GT(ret, 0, "format map btf dump cmd")) + return 1; + return run_bpftool_command(cmd); +} + +static int write_map_value(char *map_handle) +{ + char cmd[MAX_BPFTOOL_CMD_LEN]; + int ret = 0; + + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, + "map update %s key 0 0 0 0 value 1 1 1 1", map_handle); + if (!ASSERT_GT(ret, 0, "format value write cmd")) + return 1; + return run_bpftool_command(cmd); +} + +static int delete_map_value(char *map_handle) +{ + char cmd[MAX_BPFTOOL_CMD_LEN]; + int ret = 0; + + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, + "map delete %s key 0 0 0 0", map_handle); + if (!ASSERT_GT(ret, 0, "format value deletion cmd")) + return 1; + return run_bpftool_command(cmd); +} + +static int iterate_on_map_values(char *map_handle, char *iter_pin_path) +{ + char cmd[MAX_BPFTOOL_CMD_LEN]; + int ret = 0; + + + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "iter pin %s %s map %s", + BPF_ITER_FILE, iter_pin_path, map_handle); + if (!ASSERT_GT(ret, 0, "format iterator creation cmd")) + return 1; + ret = run_bpftool_command(cmd); + if (ret) + return ret; + ret = snprintf(cmd, MAP_NAME_MAX_LEN, "cat %s", iter_pin_path); + if (ret < 0) + goto cleanup; + ret = system(cmd); + +cleanup: + unlink(iter_pin_path); + return ret; +} + +static int create_inner_map(void) +{ + char cmd[MAX_BPFTOOL_CMD_LEN]; + int ret = 0; + + ret = snprintf( + cmd, MAX_BPFTOOL_CMD_LEN, + "map create %s/%s type array key 4 value 4 entries 4 name %s", + BPFFS_PIN_DIR, INNER_MAP_NAME, INNER_MAP_NAME); + if (!ASSERT_GT(ret, 0, "format inner map create cmd")) + return 1; + return run_bpftool_command(cmd); +} + +static int create_outer_map(void) +{ + char cmd[MAX_BPFTOOL_CMD_LEN]; + int ret = 0; + + ret = snprintf( + cmd, MAX_BPFTOOL_CMD_LEN, + "map create %s/%s type hash_of_maps key 4 value 4 entries 2 name %s inner_map name %s", + BPFFS_PIN_DIR, OUTER_MAP_NAME, OUTER_MAP_NAME, INNER_MAP_NAME); + if (!ASSERT_GT(ret, 0, "format outer map create cmd")) + return 1; + return run_bpftool_command(cmd); +} + +static void delete_pinned_map(char *map_name) +{ + char pin_path[PATH_MAX_LEN]; + int ret; + + ret = snprintf(pin_path, PATH_MAX_LEN, "%s/%s", BPFFS_PIN_DIR, + map_name); + if (ret >= 0) + unlink(pin_path); +} + +static int add_outer_map_entry(int key) +{ + char cmd[MAX_BPFTOOL_CMD_LEN]; + int ret = 0; + + ret = snprintf( + cmd, MAX_BPFTOOL_CMD_LEN, + "map update pinned %s/%s key %d 0 0 0 value name %s", + BPFFS_PIN_DIR, OUTER_MAP_NAME, key, INNER_MAP_NAME); + if (!ASSERT_GT(ret, 0, "format outer map value addition cmd")) + return 1; + return run_bpftool_command(cmd); +} + +static void test_basic_access(struct test_desc *desc) +{ + char map_handle[MAP_NAME_MAX_LEN]; + char iter_pin_path[PATH_MAX_LEN]; + int ret; + + if (desc->pinned) + ret = snprintf(map_handle, MAP_NAME_MAX_LEN, "pinned %s", + desc->pin_path); + else + ret = snprintf(map_handle, MAP_NAME_MAX_LEN, "name %s", + desc->map_name); + if (!ASSERT_GT(ret, 0, "format map handle")) + return; + + ret = lookup_map_value(map_handle); + ASSERT_OK(ret, "read map value"); + + ret = read_map_btf_data(map_handle); + ASSERT_OK(ret, "read map btf data"); + + ret = write_map_value(map_handle); + ASSERT_OK(desc->write_must_fail ? !ret : ret, "write map value"); + + ret = delete_map_value(map_handle); + ASSERT_OK(desc->write_must_fail ? !ret : ret, "delete map value"); + /* Restore deleted value */ + if (!ret) + write_map_value(map_handle); + + ret = snprintf(iter_pin_path, PATH_MAX_LEN, "%s/iter", BPFFS_PIN_DIR); + if (ASSERT_GT(ret, 0, "format iter pin path")) { + ret = iterate_on_map_values(map_handle, iter_pin_path); + ASSERT_OK(ret, "iterate on map values"); + } +} + +static void test_create_nested_maps(void) +{ + if (!ASSERT_OK(create_inner_map(), "create inner map")) + return; + if (!ASSERT_OK(create_outer_map(), "create outer map")) + goto end_cleanup_inner; + ASSERT_OK(add_outer_map_entry(0), "add a first entry in outer map"); + ASSERT_OK(add_outer_map_entry(1), "add a second entry in outer map"); + ASSERT_NEQ(add_outer_map_entry(2), 0, "add a third entry in outer map"); + + delete_pinned_map(OUTER_MAP_NAME); +end_cleanup_inner: + delete_pinned_map(INNER_MAP_NAME); +} + +static void test_btf_list(void) +{ + ASSERT_OK(run_bpftool_command("btf list"), "list btf data"); +} + +static struct test_desc tests[] = { + { + .name = "unprotected_unpinned", + .protection = UNPROTECTED, + .map_name = UNPROTECTED_MAP_NAME, + .pinned = false, + .write_must_fail = false, + }, + { + .name = "unprotected_pinned", + .protection = UNPROTECTED, + .map_name = UNPROTECTED_MAP_NAME, + .pinned = true, + .write_must_fail = false, + }, + { + .name = "protected_unpinned", + .protection = PROTECTED, + .map_name = UNPROTECTED_MAP_NAME, + .pinned = false, + .write_must_fail = true, + }, + { + .name = "protected_pinned", + .protection = PROTECTED, + .map_name = UNPROTECTED_MAP_NAME, + .pinned = true, + .write_must_fail = true, + } +}; + +static const size_t tests_count = ARRAY_SIZE(tests); + +void test_bpftool_maps_access(void) +{ + struct security_bpf_map *skel; + struct test_desc *current; + int i; + + skel = general_setup(); + if (!ASSERT_OK_PTR(skel, "prepare programs")) + goto cleanup; + + for (i = 0; i < tests_count; i++) { + current = &tests[i]; + if (!test__start_subtest(current->name)) + continue; + if (ASSERT_OK(test_setup(skel, current), "subtest setup")) { + test_basic_access(current); + test_cleanup(current); + } + } + if (test__start_subtest("nested_maps")) + test_create_nested_maps(); + if (test__start_subtest("btf_list")) + test_btf_list(); + +cleanup: + general_cleanup(skel); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c b/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c new file mode 100644 index 000000000000..408ace90dc7e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpftool_metadata.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <bpftool_helpers.h> +#include <test_progs.h> +#include <linux/bpf.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <stdbool.h> + +#define BPFFS_DIR "/sys/fs/bpf/test_metadata" +#define BPFFS_USED BPFFS_DIR "/used" +#define BPFFS_UNUSED BPFFS_DIR "/unused" + +#define BPF_FILE_USED "metadata_used.bpf.o" +#define BPF_FILE_UNUSED "metadata_unused.bpf.o" +#define METADATA_MAP_NAME "metadata.rodata" + +#define MAX_BPFTOOL_OUTPUT_LEN (64*1024) + +#define MAX_TOKENS_TO_CHECK 3 +static char output[MAX_BPFTOOL_OUTPUT_LEN]; + +struct test_desc { + char *name; + char *bpf_prog; + char *bpffs_path; + char *expected_output[MAX_TOKENS_TO_CHECK]; + char *expected_output_json[MAX_TOKENS_TO_CHECK]; + char *metadata_map_name; +}; + +static int setup(struct test_desc *test) +{ + return mkdir(BPFFS_DIR, 0700); +} + +static void cleanup(struct test_desc *test) +{ + unlink(test->bpffs_path); + rmdir(BPFFS_DIR); +} + +static int check_metadata(char *buf, char * const *tokens, int count) +{ + int i; + + for (i = 0; i < count && tokens[i]; i++) + if (!strstr(buf, tokens[i])) + return 1; + + return 0; +} + +static void run_test(struct test_desc *test) +{ + int ret; + char cmd[MAX_BPFTOOL_CMD_LEN]; + + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog load %s %s", + test->bpf_prog, test->bpffs_path); + if (!ASSERT_GT(ret, 0, "format prog insert command")) + return; + ret = run_bpftool_command(cmd); + if (!ASSERT_OK(ret, "load program")) + return; + + /* Check output with default format */ + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog show pinned %s", + test->bpffs_path); + if (!ASSERT_GT(ret, 0, "format pinned prog check command")) + return; + ret = get_bpftool_command_output(cmd, output, + MAX_BPFTOOL_OUTPUT_LEN); + if (ASSERT_OK(ret, "get program info")) { + ret = check_metadata(output, test->expected_output, + ARRAY_SIZE(test->expected_output)); + ASSERT_OK(ret, "find metadata"); + } + + /* Check output with json format */ + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "prog -j show pinned %s", + test->bpffs_path); + if (!ASSERT_GT(ret, 0, "format pinned prog check command in json")) + return; + ret = get_bpftool_command_output(cmd, output, + MAX_BPFTOOL_OUTPUT_LEN); + if (ASSERT_OK(ret, "get program info in json")) { + ret = check_metadata(output, test->expected_output_json, + ARRAY_SIZE(test->expected_output_json)); + ASSERT_OK(ret, "find metadata in json"); + } + + /* Check that the corresponding map can be found and accessed */ + ret = snprintf(cmd, MAX_BPFTOOL_CMD_LEN, "map show name %s", + test->metadata_map_name); + if (!ASSERT_GT(ret, 0, "format map check command")) + return; + ASSERT_OK(run_bpftool_command(cmd), "access metadata map"); +} + +static struct test_desc tests[] = { + { + .name = "metadata_unused", + .bpf_prog = BPF_FILE_UNUSED, + .bpffs_path = BPFFS_UNUSED, + .expected_output = { + "a = \"foo\"", + "b = 1" + }, + .expected_output_json = { + "\"metadata\":{\"a\":\"foo\",\"b\":1}" + }, + .metadata_map_name = METADATA_MAP_NAME + }, + { + .name = "metadata_used", + .bpf_prog = BPF_FILE_USED, + .bpffs_path = BPFFS_USED, + .expected_output = { + "a = \"bar\"", + "b = 2" + }, + .expected_output_json = { + "\"metadata\":{\"a\":\"bar\",\"b\":2}" + }, + .metadata_map_name = METADATA_MAP_NAME + } +}; +static const int tests_count = ARRAY_SIZE(tests); + +void test_bpftool_metadata(void) +{ + int i; + + for (i = 0; i < tests_count; i++) { + if (!test__start_subtest(tests[i].name)) + continue; + if (ASSERT_OK(setup(&tests[i]), "setup bpffs pin dir")) { + run_test(&tests[i]); + cleanup(&tests[i]); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index e63d74ce046f..054ecb6b1e9f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3866,11 +3866,11 @@ static struct btf_raw_test raw_tests[] = { .err_str = "vlen != 0", }, { - .descr = "decl_tag test #8, invalid kflag", + .descr = "decl_tag test #8, tag with kflag", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1), + BTF_DECL_ATTR_ENC(NAME_TBD, 2, -1), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag1"), @@ -3881,8 +3881,6 @@ static struct btf_raw_test raw_tests[] = { .key_type_id = 1, .value_type_id = 1, .max_entries = 1, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", }, { .descr = "decl_tag test #9, var, invalid component_idx", @@ -4207,6 +4205,23 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Type tags don't precede modifiers", }, { + .descr = "type_tag test #7, tag with kflag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ATTR_ENC(NAME_TBD, 1), /* [2] */ + BTF_PTR_ENC(2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ .descr = "enum64 test #1, unsigned, size 8", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -7481,6 +7496,71 @@ static struct btf_dedup_test dedup_tests[] = { }, }, { + .descr = "dedup: recursive typedef", + /* + * This test simulates a recursive typedef, which in GO is defined as such: + * + * type Foo func() Foo + * + * In BTF terms, this is represented as a TYPEDEF referencing + * a FUNC_PROTO that returns the same TYPEDEF. + */ + .input = { + .raw_types = { + /* + * [1] typedef Foo -> func() Foo + * [2] func_proto() -> Foo + * [3] typedef Foo -> func() Foo + * [4] func_proto() -> Foo + */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 0), /* [2] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 4), /* [3] */ + BTF_FUNC_PROTO_ENC(3, 0), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0Foo"), + }, + .expect = { + .raw_types = { + BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0Foo"), + }, +}, +{ + .descr = "dedup: typedef", + /* + * // CU 1: + * typedef int foo; + * + * // CU 2: + * typedef int foo; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + /* CU 2 */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [3] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 3), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo"), + }, +}, +{ .descr = "dedup: typedef tags", .input = { .raw_types = { diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index d9024c7a892a..5bc15bb6b7ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -440,6 +440,105 @@ cleanup: btf__free(btf1); } +/* Ensure module split BTF dedup worked correctly; when dedup fails badly + * core kernel types are in split BTF also, so ensure that references to + * such types point at base - not split - BTF. + * + * bpf_testmod_test_write() has multiple core kernel type parameters; + * + * ssize_t + * bpf_testmod_test_write(struct file *file, struct kobject *kobj, + * struct bin_attribute *bin_attr, + * char *buf, loff_t off, size_t len); + * + * Ensure each of the FUNC_PROTO params is a core kernel type. + * + * Do the same for + * + * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk); + * + * ...and + * + * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb); + * + */ +const char *mod_funcs[] = { + "bpf_testmod_test_write", + "bpf_kfunc_call_test3", + "bpf_kfunc_call_test_pass_ctx" +}; + +static void test_split_module(void) +{ + struct btf *vmlinux_btf, *btf1 = NULL; + int i, nr_base_types; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf")) + return; + nr_base_types = btf__type_cnt(vmlinux_btf); + if (!ASSERT_GT(nr_base_types, 0, "nr_base_types")) + goto cleanup; + + btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf); + if (!ASSERT_OK_PTR(btf1, "split_btf")) + return; + + for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) { + const struct btf_param *p; + const struct btf_type *t; + __u16 vlen; + __u32 id; + int j; + + id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC); + if (!ASSERT_GE(id, nr_base_types, "func_id")) + goto cleanup; + t = btf__type_by_id(btf1, id); + if (!ASSERT_OK_PTR(t, "func_id_type")) + goto cleanup; + t = btf__type_by_id(btf1, t->type); + if (!ASSERT_OK_PTR(t, "func_proto_id_type")) + goto cleanup; + if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto")) + goto cleanup; + vlen = btf_vlen(t); + + for (j = 0, p = btf_params(t); j < vlen; j++, p++) { + /* bpf_testmod uses resilient split BTF, so any + * reference types will be added to split BTF and their + * associated targets will be base BTF types; for example + * for a "struct sock *" the PTR will be in split BTF + * while the "struct sock" will be in base. + * + * In some cases like loff_t we have to resolve + * multiple typedefs hence the while() loop below. + * + * Note that resilient split BTF generation depends + * on pahole version, so we do not assert that + * reference types are in split BTF, as if pahole + * does not support resilient split BTF they will + * also be base BTF types. + */ + id = p->type; + do { + t = btf__type_by_id(btf1, id); + if (!ASSERT_OK_PTR(t, "param_ref_type")) + goto cleanup; + if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t)) + break; + id = t->type; + } while (true); + + if (!ASSERT_LT(id, nr_base_types, "verify_base_type")) + goto cleanup; + } + } +cleanup: + btf__free(btf1); + btf__free(vmlinux_btf); +} + void test_btf_dedup_split() { if (test__start_subtest("split_simple")) @@ -450,4 +549,6 @@ void test_btf_dedup_split() test_split_fwd_resolve(); if (test__start_subtest("split_dup_struct_in_cu")) test_split_dup_struct_in_cu(); + if (test__start_subtest("split_module")) + test_split_module(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index b293b8501fd6..f1642794f70e 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -63,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) /* tests with t->known_ptr_sz have no "long" or "unsigned long" type, * so it's impossible to determine correct pointer size; but if they - * do, it should be 8 regardless of host architecture, becaues BPF + * do, it should be 8 regardless of host architecture, because BPF * target is always 64-bit */ if (!t->known_ptr_sz) { @@ -126,26 +126,69 @@ done: return err; } -static char *dump_buf; -static size_t dump_buf_sz; -static FILE *dump_buf_file; +struct test_ctx { + struct btf *btf; + struct btf_dump *d; + char *dump_buf; + size_t dump_buf_sz; + FILE *dump_buf_file; +}; -static void test_btf_dump_incremental(void) +static void test_ctx__free(struct test_ctx *t) { - struct btf *btf = NULL; - struct btf_dump *d = NULL; - int id, err, i; + fclose(t->dump_buf_file); + free(t->dump_buf); + btf_dump__free(t->d); + btf__free(t->btf); +} - dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); - if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) - return; - btf = btf__new_empty(); - if (!ASSERT_OK_PTR(btf, "new_empty")) +static int test_ctx__init(struct test_ctx *t) +{ + t->dump_buf_file = open_memstream(&t->dump_buf, &t->dump_buf_sz); + if (!ASSERT_OK_PTR(t->dump_buf_file, "dump_memstream")) + return -1; + t->btf = btf__new_empty(); + if (!ASSERT_OK_PTR(t->btf, "new_empty")) goto err_out; - d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL); - if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) + t->d = btf_dump__new(t->btf, btf_dump_printf, t->dump_buf_file, NULL); + if (!ASSERT_OK(libbpf_get_error(t->d), "btf_dump__new")) goto err_out; + return 0; + +err_out: + test_ctx__free(t); + return -1; +} + +static void test_ctx__dump_and_compare(struct test_ctx *t, + const char *expected_output, + const char *message) +{ + int i, err; + + for (i = 1; i < btf__type_cnt(t->btf); i++) { + err = btf_dump__dump_type(t->d, i); + ASSERT_OK(err, "dump_type_ok"); + } + + fflush(t->dump_buf_file); + t->dump_buf[t->dump_buf_sz] = 0; /* some libc implementations don't do this */ + + ASSERT_STREQ(t->dump_buf, expected_output, message); +} + +static void test_btf_dump_incremental(void) +{ + struct test_ctx t = {}; + struct btf *btf; + int id, err; + + if (test_ctx__init(&t)) + return; + + btf = t.btf; + /* First, generate BTF corresponding to the following C code: * * enum x; @@ -182,15 +225,7 @@ static void test_btf_dump_incremental(void) err = btf__add_field(btf, "x", 4, 0, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i < btf__type_cnt(btf); i++) { - err = btf_dump__dump_type(d, i); - ASSERT_OK(err, "dump_type_ok"); - } - - fflush(dump_buf_file); - dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ - - ASSERT_STREQ(dump_buf, + test_ctx__dump_and_compare(&t, "enum x;\n" "\n" "enum x {\n" @@ -221,7 +256,7 @@ static void test_btf_dump_incremental(void) * enum values don't conflict; * */ - fseek(dump_buf_file, 0, SEEK_SET); + fseek(t.dump_buf_file, 0, SEEK_SET); id = btf__add_struct(btf, "s", 4); ASSERT_EQ(id, 7, "struct_id"); @@ -232,14 +267,7 @@ static void test_btf_dump_incremental(void) err = btf__add_field(btf, "s", 6, 64, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i < btf__type_cnt(btf); i++) { - err = btf_dump__dump_type(d, i); - ASSERT_OK(err, "dump_type_ok"); - } - - fflush(dump_buf_file); - dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ - ASSERT_STREQ(dump_buf, + test_ctx__dump_and_compare(&t, "struct s___2 {\n" " enum x x;\n" " enum {\n" @@ -248,11 +276,53 @@ static void test_btf_dump_incremental(void) " struct s s;\n" "};\n\n" , "c_dump1"); -err_out: - fclose(dump_buf_file); - free(dump_buf); - btf_dump__free(d); - btf__free(btf); + test_ctx__free(&t); +} + +static void test_btf_dump_type_tags(void) +{ + struct test_ctx t = {}; + struct btf *btf; + int id, err; + + if (test_ctx__init(&t)) + return; + + btf = t.btf; + + /* Generate BTF corresponding to the following C code: + * + * struct s { + * void __attribute__((btf_type_tag(\"void_tag\"))) *p1; + * void __attribute__((void_attr)) *p2; + * }; + * + */ + + id = btf__add_type_tag(btf, "void_tag", 0); + ASSERT_EQ(id, 1, "type_tag_id"); + id = btf__add_ptr(btf, id); + ASSERT_EQ(id, 2, "void_ptr_id1"); + + id = btf__add_type_attr(btf, "void_attr", 0); + ASSERT_EQ(id, 3, "type_attr_id"); + id = btf__add_ptr(btf, id); + ASSERT_EQ(id, 4, "void_ptr_id2"); + + id = btf__add_struct(btf, "s", 8); + ASSERT_EQ(id, 5, "struct_id"); + err = btf__add_field(btf, "p1", 2, 0, 0); + ASSERT_OK(err, "field_ok1"); + err = btf__add_field(btf, "p2", 4, 0, 0); + ASSERT_OK(err, "field_ok2"); + + test_ctx__dump_and_compare(&t, +"struct s {\n" +" void __attribute__((btf_type_tag(\"void_tag\"))) *p1;\n" +" void __attribute__((void_attr)) *p2;\n" +"};\n\n", "dump_and_compare"); + + test_ctx__free(&t); } #define STRSIZE 4096 @@ -805,8 +875,124 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); #endif - TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_cgrp_storage_busy", int, BTF_F_COMPACT, - "static int bpf_cgrp_storage_busy = (int)2", 2); + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_bprintf_nest_level", int, BTF_F_COMPACT, + "static int bpf_bprintf_nest_level = (int)2", 2); +} + +struct btf_dump_string_ctx { + struct btf *btf; + struct btf_dump *d; + char *str; + struct btf_dump_type_data_opts *opts; + int array_id; +}; + +static int btf_dump_one_string(struct btf_dump_string_ctx *ctx, + char *ptr, size_t ptr_sz, + const char *expected_val) +{ + size_t type_sz; + int ret; + + ctx->str[0] = '\0'; + type_sz = btf__resolve_size(ctx->btf, ctx->array_id); + ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts); + if (type_sz <= ptr_sz) { + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) + return -EINVAL; + } + if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match")) + return -EFAULT; + return 0; +} + +static void btf_dump_strings(struct btf_dump_string_ctx *ctx) +{ + struct btf_dump_type_data_opts *opts = ctx->opts; + + opts->emit_strings = true; + + opts->compact = true; + opts->emit_zeroes = false; + + opts->skip_names = false; + btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\""); + + opts->skip_names = true; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->emit_zeroes = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->compact = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* Non-printable characters come out as hex. */ + btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\""); + btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\""); + + /* + * Strings that are too long for the specified type ("char[4]") + * should fall back to the current behavior. + */ + opts->compact = true; + btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]"); + + /* + * Strings that are too short for the specified type ("char[4]") + * should work normally. + */ + btf_dump_one_string(ctx, "ab", 3, "\"ab\""); + + /* Non-NUL-terminated arrays don't get printed as strings. */ + char food[4] = { 'f', 'o', 'o', 'd' }; + char bye[3] = { 'b', 'y', 'e' }; + + btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]"); + btf_dump_one_string(ctx, bye, 3, "['b','y','e',]"); + + /* The embedded NUL should terminate the string. */ + char embed[4] = { 'f', 'o', '\0', 'd' }; + + btf_dump_one_string(ctx, embed, 4, "\"fo\""); +} + +static void test_btf_dump_string_data(void) +{ + struct test_ctx t = {}; + char str[STRSIZE]; + struct btf_dump *d; + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + struct btf_dump_string_ctx ctx; + int char_id, int_id, array_id; + + if (test_ctx__init(&t)) + return; + + d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + /* Generate BTF for a four-element char array. */ + char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR); + ASSERT_EQ(char_id, 1, "char_id"); + int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(int_id, 2, "int_id"); + array_id = btf__add_array(t.btf, int_id, char_id, 4); + ASSERT_EQ(array_id, 3, "array_id"); + + ctx.btf = t.btf; + ctx.d = d; + ctx.str = str; + ctx.opts = &opts; + ctx.array_id = array_id; + + btf_dump_strings(&ctx); + + btf_dump__free(d); + test_ctx__free(&t); } static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, @@ -874,6 +1060,9 @@ void test_btf_dump() { if (test__start_subtest("btf_dump: incremental")) test_btf_dump_incremental(); + if (test__start_subtest("btf_dump: type_tags")) + test_btf_dump_type_tags(); + btf = libbpf_find_kernel_btf(); if (!ASSERT_OK_PTR(btf, "no kernel BTF found")) return; @@ -897,6 +1086,8 @@ void test_btf_dump() { test_btf_dump_struct_data(btf, d, str); if (test__start_subtest("btf_dump: var_data")) test_btf_dump_var_data(btf, d, str); + if (test__start_subtest("btf_dump: string_data")) + test_btf_dump_string_data(); btf_dump__free(d); btf__free(btf); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_kind.c b/tools/testing/selftests/bpf/prog_tests/btf_kind.c new file mode 100644 index 000000000000..f61afe6a79a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_kind.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> + +/* Verify kind encoding exists for each kind */ +static void test_btf_kind_encoding(void) +{ + LIBBPF_OPTS(btf_new_opts, opts); + const struct btf_header *hdr; + const void *raw_btf; + struct btf *btf; + __u32 raw_size; + + opts.add_layout = true; + btf = btf__new_empty_opts(&opts); + if (!ASSERT_OK_PTR(btf, "btf_new")) + return; + + raw_btf = btf__raw_data(btf, &raw_size); + if (!ASSERT_OK_PTR(raw_btf, "btf__raw_data")) + return; + + hdr = raw_btf; + + ASSERT_EQ(hdr->layout_off % 4, 0, "layout_aligned"); + ASSERT_EQ(hdr->layout_len, sizeof(struct btf_layout) * NR_BTF_KINDS, + "layout_len"); + ASSERT_EQ(hdr->str_off, hdr->layout_off + hdr->layout_len, "str_after_layout"); + btf__free(btf); + + opts.add_layout = false; + btf = btf__new_empty_opts(&opts); + if (!ASSERT_OK_PTR(btf, "btf_new")) + return; + + raw_btf = btf__raw_data(btf, &raw_size); + if (!ASSERT_OK_PTR(raw_btf, "btf__raw_data")) + return; + + hdr = raw_btf; + + ASSERT_EQ(hdr->layout_off, 0, "no_layout_off"); + ASSERT_EQ(hdr->layout_len, 0, "no_layout_len"); + ASSERT_EQ(hdr->str_off, hdr->type_off + hdr->type_len, "strs_after_types"); + btf__free(btf); +} + +static int write_raw_btf(void *raw_btf, size_t raw_size, char *file) +{ + int fd = mkstemp(file); + ssize_t n; + + if (!ASSERT_OK_FD(fd, "open_raw_btf")) + return -1; + n = write(fd, raw_btf, raw_size); + close(fd); + if (!ASSERT_EQ(n, (ssize_t)raw_size, "write_raw_btf")) + return -1; + return 0; +} + +/* + * Fabricate an unrecognized kind at BTF_KIND_MAX + 1, and after adding + * the appropriate struct/typedefs to the BTF such that it recognizes + * this kind, ensure that parsing of BTF containing the unrecognized kind + * can succeed. + */ +void test_btf_kind_decoding(void) +{ + char btf_kind_file1[] = "/tmp/test_btf_kind.XXXXXX"; + char btf_kind_file2[] = "/tmp/test_btf_kind.XXXXXX"; + char btf_kind_file3[] = "/tmp/test_btf_kind.XXXXXX"; + struct btf *btf = NULL, *new_btf = NULL; + __s32 int_id, unrec_id, id, id2; + LIBBPF_OPTS(btf_new_opts, opts); + struct btf_layout *l; + struct btf_header *hdr; + const void *raw_btf; + struct btf_type *t; + void *new_raw_btf; + void *str_data; + __u32 raw_size; + + opts.add_layout = true; + btf = btf__new_empty_opts(&opts); + if (!ASSERT_OK_PTR(btf, "btf_new")) + return; + + int_id = btf__add_int(btf, "test_char", 1, BTF_INT_CHAR); + if (!ASSERT_GT(int_id, 0, "add_int_id")) + return; + + /* + * Create our type with unrecognized kind by adding a typedef kind + * we will overwrite it with our unrecognized kind value. + */ + unrec_id = btf__add_typedef(btf, "unrec_kind", int_id); + if (!ASSERT_GT(unrec_id, 0, "add_unrec_id")) + return; + + /* + * Add an id after it that we will look up to verify we can parse + * beyond unrecognized kinds. + */ + id = btf__add_typedef(btf, "test_lookup", int_id); + if (!ASSERT_GT(id, 0, "add_test_lookup_id")) + return; + id2 = btf__add_typedef(btf, "test_lookup2", int_id); + if (!ASSERT_GT(id2, 0, "add_test_lookup_id2")) + return; + + raw_btf = (void *)btf__raw_data(btf, &raw_size); + if (!ASSERT_OK_PTR(raw_btf, "btf__raw_data")) + return; + + new_raw_btf = calloc(1, raw_size + sizeof(*l)); + if (!ASSERT_OK_PTR(new_raw_btf, "calloc_raw_btf")) + return; + memcpy(new_raw_btf, raw_btf, raw_size); + + hdr = new_raw_btf; + + /* Move strings to make space for one new layout description */ + raw_size += sizeof(*l); + str_data = new_raw_btf + hdr->hdr_len + hdr->str_off; + memmove(str_data + sizeof(*l), str_data, hdr->str_len); + hdr->str_off += sizeof(*l); + + /* Add new layout description */ + hdr->layout_len += sizeof(*l); + l = new_raw_btf + hdr->hdr_len + hdr->layout_off; + l[NR_BTF_KINDS].info_sz = 0; + l[NR_BTF_KINDS].elem_sz = 0; + l[NR_BTF_KINDS].flags = 0; + + /* Now modify typedef added above to be an unrecognized kind. */ + t = (void *)hdr + hdr->hdr_len + hdr->type_off + sizeof(struct btf_type) + + sizeof(__u32); + t->info = (NR_BTF_KINDS << 24); + + /* Write BTF to a raw file, ready for parsing. */ + if (write_raw_btf(new_raw_btf, raw_size, btf_kind_file1)) + goto out; + + /* + * Verify parsing succeeds, and that we can read type info past + * the unrecognized kind. + */ + new_btf = btf__parse_raw(btf_kind_file1); + if (ASSERT_OK_PTR(new_btf, "btf__parse_raw")) { + ASSERT_EQ(btf__find_by_name(new_btf, "unrec_kind"), unrec_id, + "unrec_kind_found"); + ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup", + BTF_KIND_TYPEDEF), id, + "verify_id_lookup"); + ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup2", + BTF_KIND_TYPEDEF), id2, + "verify_id2_lookup"); + } + btf__free(new_btf); + new_btf = NULL; + + /* + * Next, change info_sz to equal sizeof(struct btf_type); this means the + * "test_lookup" kind will be reinterpreted as a singular info element + * following the unrecognized kind. + */ + l[NR_BTF_KINDS].info_sz = sizeof(struct btf_type); + if (write_raw_btf(new_raw_btf, raw_size, btf_kind_file2)) + goto out; + + new_btf = btf__parse_raw(btf_kind_file2); + if (ASSERT_OK_PTR(new_btf, "btf__parse_raw")) { + ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup", + BTF_KIND_TYPEDEF), -ENOENT, + "verify_id_not_found"); + /* id of "test_lookup2" will be id2 -1 as we have removed one type */ + ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup2", + BTF_KIND_TYPEDEF), id2 - 1, + "verify_id_lookup2"); + + } + btf__free(new_btf); + new_btf = NULL; + + /* + * Change elem_sz to equal sizeof(struct btf_type) and set vlen + * associated with unrecognized type to 1; this allows us to verify + * vlen-specified BTF can still be parsed. + */ + l[NR_BTF_KINDS].info_sz = 0; + l[NR_BTF_KINDS].elem_sz = sizeof(struct btf_type); + t->info |= 1; + if (write_raw_btf(new_raw_btf, raw_size, btf_kind_file3)) + goto out; + + new_btf = btf__parse_raw(btf_kind_file3); + if (ASSERT_OK_PTR(new_btf, "btf__parse_raw")) { + ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup", + BTF_KIND_TYPEDEF), -ENOENT, + "verify_id_not_found"); + /* id of "test_lookup2" will be id2 -1 as we have removed one type */ + ASSERT_EQ(btf__find_by_name_kind(new_btf, "test_lookup2", + BTF_KIND_TYPEDEF), id2 - 1, + "verify_id_lookup2"); + + } +out: + btf__free(new_btf); + free(new_raw_btf); + unlink(btf_kind_file1); + unlink(btf_kind_file2); + unlink(btf_kind_file3); + btf__free(btf); +} + +void test_btf_kind(void) +{ + if (test__start_subtest("btf_kind_encoding")) + test_btf_kind_encoding(); + if (test__start_subtest("btf_kind_decoding")) + test_btf_kind_decoding(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_permute.c b/tools/testing/selftests/bpf/prog_tests/btf_permute.c new file mode 100644 index 000000000000..04ade5ad77ac --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_permute.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Xiaomi */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include "btf_helpers.h" + +static void permute_base_check(struct btf *btf) +{ + VALIDATE_RAW_BTF( + btf, + "[1] STRUCT 's2' size=4 vlen=1\n" + "\t'm' type_id=4 bits_offset=0", + "[2] FUNC 'f' type_id=6 linkage=static", + "[3] PTR '(anon)' type_id=4", + "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[5] STRUCT 's1' size=4 vlen=1\n" + "\t'm' type_id=4 bits_offset=0", + "[6] FUNC_PROTO '(anon)' ret_type_id=4 vlen=1\n" + "\t'p' type_id=3"); +} + +/* Ensure btf__permute works as expected in the base-BTF scenario */ +static void test_permute_base(void) +{ + struct btf *btf; + __u32 permute_ids[7]; + int err; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_main_btf")) + return; + + btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf, 1); /* [2] ptr to int */ + btf__add_struct(btf, "s1", 4); /* [3] struct s1 { */ + btf__add_field(btf, "m", 1, 0, 0); /* int m; */ + /* } */ + btf__add_struct(btf, "s2", 4); /* [4] struct s2 { */ + btf__add_field(btf, "m", 1, 0, 0); /* int m; */ + /* } */ + btf__add_func_proto(btf, 1); /* [5] int (*)(int *p); */ + btf__add_func_param(btf, "p", 2); + btf__add_func(btf, "f", BTF_FUNC_STATIC, 5); /* [6] int f(int *p); */ + + VALIDATE_RAW_BTF( + btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'm' type_id=1 bits_offset=0", + "[4] STRUCT 's2' size=4 vlen=1\n" + "\t'm' type_id=1 bits_offset=0", + "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p' type_id=2", + "[6] FUNC 'f' type_id=5 linkage=static"); + + permute_ids[0] = 0; /* [0] -> [0] */ + permute_ids[1] = 4; /* [1] -> [4] */ + permute_ids[2] = 3; /* [2] -> [3] */ + permute_ids[3] = 5; /* [3] -> [5] */ + permute_ids[4] = 1; /* [4] -> [1] */ + permute_ids[5] = 6; /* [5] -> [6] */ + permute_ids[6] = 2; /* [6] -> [2] */ + err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL); + if (!ASSERT_OK(err, "btf__permute_base")) + goto done; + permute_base_check(btf); + + /* ids[0] must be 0 for base BTF */ + permute_ids[0] = 4; /* [0] -> [0] */ + permute_ids[1] = 0; /* [1] -> [4] */ + permute_ids[2] = 3; /* [2] -> [3] */ + permute_ids[3] = 5; /* [3] -> [5] */ + permute_ids[4] = 1; /* [4] -> [1] */ + permute_ids[5] = 6; /* [5] -> [6] */ + permute_ids[6] = 2; /* [6] -> [2] */ + err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL); + if (!ASSERT_ERR(err, "btf__permute_base")) + goto done; + /* BTF is not modified */ + permute_base_check(btf); + + /* id_map_cnt is invalid */ + permute_ids[0] = 0; /* [0] -> [0] */ + permute_ids[1] = 4; /* [1] -> [4] */ + permute_ids[2] = 3; /* [2] -> [3] */ + permute_ids[3] = 5; /* [3] -> [5] */ + permute_ids[4] = 1; /* [4] -> [1] */ + permute_ids[5] = 6; /* [5] -> [6] */ + permute_ids[6] = 2; /* [6] -> [2] */ + err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids) - 1, NULL); + if (!ASSERT_ERR(err, "btf__permute_base")) + goto done; + /* BTF is not modified */ + permute_base_check(btf); + + /* Multiple types can not be mapped to the same ID */ + permute_ids[0] = 0; + permute_ids[1] = 4; + permute_ids[2] = 4; + permute_ids[3] = 5; + permute_ids[4] = 1; + permute_ids[5] = 6; + permute_ids[6] = 2; + err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL); + if (!ASSERT_ERR(err, "btf__permute_base")) + goto done; + /* BTF is not modified */ + permute_base_check(btf); + + /* Type ID must be valid */ + permute_ids[0] = 0; + permute_ids[1] = 4; + permute_ids[2] = 3; + permute_ids[3] = 5; + permute_ids[4] = 1; + permute_ids[5] = 7; + permute_ids[6] = 2; + err = btf__permute(btf, permute_ids, ARRAY_SIZE(permute_ids), NULL); + if (!ASSERT_ERR(err, "btf__permute_base")) + goto done; + /* BTF is not modified */ + permute_base_check(btf); + +done: + btf__free(btf); +} + +static void permute_split_check(struct btf *btf) +{ + VALIDATE_RAW_BTF( + btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's2' size=4 vlen=1\n" + "\t'm' type_id=1 bits_offset=0", + "[4] FUNC 'f' type_id=5 linkage=static", + "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p' type_id=2", + "[6] STRUCT 's1' size=4 vlen=1\n" + "\t'm' type_id=1 bits_offset=0"); +} + +/* Ensure btf__permute works as expected in the split-BTF scenario */ +static void test_permute_split(void) +{ + struct btf *split_btf = NULL, *base_btf = NULL; + __u32 permute_ids[4]; + int err, start_id; + + base_btf = btf__new_empty(); + if (!ASSERT_OK_PTR(base_btf, "empty_main_btf")) + return; + + btf__add_int(base_btf, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(base_btf, 1); /* [2] ptr to int */ + VALIDATE_RAW_BTF( + base_btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1"); + split_btf = btf__new_empty_split(base_btf); + if (!ASSERT_OK_PTR(split_btf, "empty_split_btf")) + goto cleanup; + btf__add_struct(split_btf, "s1", 4); /* [3] struct s1 { */ + btf__add_field(split_btf, "m", 1, 0, 0); /* int m; */ + /* } */ + btf__add_struct(split_btf, "s2", 4); /* [4] struct s2 { */ + btf__add_field(split_btf, "m", 1, 0, 0); /* int m; */ + /* } */ + btf__add_func_proto(split_btf, 1); /* [5] int (*)(int p); */ + btf__add_func_param(split_btf, "p", 2); + btf__add_func(split_btf, "f", BTF_FUNC_STATIC, 5); /* [6] int f(int *p); */ + + VALIDATE_RAW_BTF( + split_btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'm' type_id=1 bits_offset=0", + "[4] STRUCT 's2' size=4 vlen=1\n" + "\t'm' type_id=1 bits_offset=0", + "[5] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p' type_id=2", + "[6] FUNC 'f' type_id=5 linkage=static"); + + start_id = btf__type_cnt(base_btf); + permute_ids[3 - start_id] = 6; /* [3] -> [6] */ + permute_ids[4 - start_id] = 3; /* [4] -> [3] */ + permute_ids[5 - start_id] = 5; /* [5] -> [5] */ + permute_ids[6 - start_id] = 4; /* [6] -> [4] */ + err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL); + if (!ASSERT_OK(err, "btf__permute_split")) + goto cleanup; + permute_split_check(split_btf); + + /* + * For split BTF, id_map_cnt must equal to the number of types + * added on top of base BTF + */ + permute_ids[3 - start_id] = 4; + permute_ids[4 - start_id] = 3; + permute_ids[5 - start_id] = 5; + permute_ids[6 - start_id] = 6; + err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids) - 1, NULL); + if (!ASSERT_ERR(err, "btf__permute_split")) + goto cleanup; + /* BTF is not modified */ + permute_split_check(split_btf); + + /* Multiple types can not be mapped to the same ID */ + permute_ids[3 - start_id] = 4; + permute_ids[4 - start_id] = 3; + permute_ids[5 - start_id] = 3; + permute_ids[6 - start_id] = 6; + err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL); + if (!ASSERT_ERR(err, "btf__permute_split")) + goto cleanup; + /* BTF is not modified */ + permute_split_check(split_btf); + + /* Can not map to base ID */ + permute_ids[3 - start_id] = 4; + permute_ids[4 - start_id] = 2; + permute_ids[5 - start_id] = 5; + permute_ids[6 - start_id] = 6; + err = btf__permute(split_btf, permute_ids, ARRAY_SIZE(permute_ids), NULL); + if (!ASSERT_ERR(err, "btf__permute_split")) + goto cleanup; + /* BTF is not modified */ + permute_split_check(split_btf); + +cleanup: + btf__free(split_btf); + btf__free(base_btf); +} + +void test_btf_permute(void) +{ + if (test__start_subtest("permute_base")) + test_permute_base(); + if (test__start_subtest("permute_split")) + test_permute_split(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sanitize.c b/tools/testing/selftests/bpf/prog_tests/btf_sanitize.c new file mode 100644 index 000000000000..652b51efafc2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_sanitize.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026, Oracle and/or its affiliates. */ +#include <test_progs.h> +#include <linux/btf.h> +#include "bpf/libbpf_internal.h" +#include "../test_btf.h" +#include "kfree_skb.skel.h" + +#define TYPE_LEN (sizeof(struct btf_type) + sizeof(__u32)) +#define MAX_NR_LAYOUT 2 +#define LAYOUT_LEN (sizeof(struct btf_layout) * MAX_NR_LAYOUT) +#define STR_LEN sizeof("\0int") + +struct layout_btf { + struct btf_header hdr; + __u32 types[TYPE_LEN/sizeof(__u32)]; + struct btf_layout layout[MAX_NR_LAYOUT]; + char strs[STR_LEN]; +}; + +static const struct layout_btf layout_btf = { + .hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_off = 0, + .type_len = TYPE_LEN, + .str_off = TYPE_LEN + LAYOUT_LEN, + .str_len = STR_LEN, + .layout_off = TYPE_LEN, + .layout_len = LAYOUT_LEN, + }, + .types = { + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }, + .layout = { + { .info_sz = 0, .elem_sz = 0, .flags = 0 }, + { .info_sz = sizeof(__u32), .elem_sz = 0, .flags = 0 }, + }, + .strs = "\0int", +}; + +void test_btf_sanitize_layout(void) +{ + struct btf *orig = NULL, *sanitized = NULL; + struct kern_feature_cache *cache = NULL; + struct kfree_skb *skel = NULL; + const struct btf_header *hdr; + const void *raw; + __u32 raw_sz; + + skel = kfree_skb__open(); + if (!ASSERT_OK_PTR(skel, "kfree_skb_skel")) + return; + orig = btf__new(&layout_btf, sizeof(layout_btf)); + if (!ASSERT_OK_PTR(orig, "btf_new_layout")) + goto out; + raw = btf__raw_data(orig, &raw_sz); + if (!ASSERT_OK_PTR(raw, "btf__raw_data_orig")) + goto out; + hdr = (struct btf_header *)raw; + ASSERT_EQ(hdr->layout_off, TYPE_LEN, "layout_off_nonzero"); + ASSERT_EQ(hdr->layout_len, LAYOUT_LEN, "layout_len_nonzero"); + + cache = calloc(1, sizeof(*cache)); + if (!ASSERT_OK_PTR(cache, "alloc_feat_cache")) + goto out; + for (int i = 0; i < __FEAT_CNT; i++) + cache->res[i] = FEAT_SUPPORTED; + cache->res[FEAT_BTF_LAYOUT] = FEAT_MISSING; + + bpf_object_set_feat_cache(skel->obj, cache); + + if (!ASSERT_FALSE(kernel_supports(skel->obj, FEAT_BTF_LAYOUT), "layout_feature_missing")) + goto out; + if (!ASSERT_TRUE(kernel_supports(skel->obj, FEAT_BTF_FUNC), "other_feature_allowed")) + goto out; + + sanitized = bpf_object__sanitize_btf(skel->obj, orig); + if (!ASSERT_OK_PTR(sanitized, "bpf_object__sanitize_btf")) + goto out; + + raw = btf__raw_data(sanitized, &raw_sz); + if (!ASSERT_OK_PTR(raw, "btf__raw_data_sanitized")) + goto out; + hdr = (struct btf_header *)raw; + ASSERT_EQ(hdr->layout_off, 0, "layout_off_zero"); + ASSERT_EQ(hdr->layout_len, 0, "layout_len_zero"); + ASSERT_EQ(hdr->str_off, TYPE_LEN, "strs_after_types"); + ASSERT_EQ(hdr->str_len, STR_LEN, "strs_len_unchanged"); + ASSERT_EQ(raw_sz, hdr->hdr_len + hdr->type_len + hdr->str_len, "btf_raw_sz_reduced"); +out: + /* This will free the cache we allocated above */ + kfree_skb__destroy(skel); + btf__free(sanitized); + btf__free(orig); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index eef1158676ed..2d47cad50a51 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -12,10 +12,45 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args) vfprintf(ctx, fmt, args); } -void test_btf_split() { +/* Write raw BTF to file, return number of bytes written or negative errno */ +static ssize_t btf_raw_write(struct btf *btf, char *file) +{ + ssize_t written = 0; + const void *data; + __u32 size = 0; + int fd, ret; + + fd = mkstemp(file); + if (!ASSERT_GE(fd, 0, "create_file")) + return -errno; + + data = btf__raw_data(btf, &size); + if (!ASSERT_OK_PTR(data, "btf__raw_data")) { + close(fd); + return -EINVAL; + } + while (written < size) { + ret = write(fd, data + written, size - written); + if (!ASSERT_GE(ret, 0, "write succeeded")) { + close(fd); + return -errno; + } + written += ret; + } + close(fd); + return written; +} + +static void __test_btf_split(bool multi) +{ + char multisplit_btf_file[] = "/tmp/test_btf_multisplit.XXXXXX"; + char split_btf_file[] = "/tmp/test_btf_split.XXXXXX"; + char base_btf_file[] = "/tmp/test_btf_base.XXXXXX"; + ssize_t multisplit_btf_sz = 0, split_btf_sz = 0, base_btf_sz = 0; struct btf_dump *d = NULL; - const struct btf_type *t; - struct btf *btf1, *btf2; + const struct btf_type *t, *ot; + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL; + struct btf *btf4 = NULL, *btf5 = NULL, *btf6 = NULL; int str_off, i, err; btf1 = btf__new_empty(); @@ -63,14 +98,46 @@ void test_btf_split() { ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen"); ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name"); + if (multi) { + btf3 = btf__new_empty_split(btf2); + if (!ASSERT_OK_PTR(btf3, "multi_split_btf")) + goto cleanup; + } else { + btf3 = btf2; + } + + btf__add_union(btf3, "u1", 16); /* [5] union u1 { */ + btf__add_field(btf3, "f1", 4, 0, 0); /* struct s2 f1; */ + btf__add_field(btf3, "uf2", 1, 0, 0); /* int f2; */ + /* } */ + + if (multi) { + t = btf__type_by_id(btf2, 5); + ASSERT_NULL(t, "multisplit_type_in_first_split"); + } + + t = btf__type_by_id(btf3, 5); + if (!ASSERT_OK_PTR(t, "split_union_type")) + goto cleanup; + ASSERT_EQ(btf_is_union(t), true, "split_union_kind"); + ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen"); + ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name"); + ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt"); + + t = btf__type_by_id(btf3, 1); + if (!ASSERT_OK_PTR(t, "split_base_type")) + goto cleanup; + ASSERT_EQ(btf_is_int(t), true, "split_base_int"); + ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name"); + /* BTF-to-C dump of split BTF */ dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) return; - d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL); + d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL); if (!ASSERT_OK_PTR(d, "btf_dump__new")) goto cleanup; - for (i = 1; i < btf__type_cnt(btf2); i++) { + for (i = 1; i < btf__type_cnt(btf3); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -79,14 +146,56 @@ void test_btf_split() { ASSERT_STREQ(dump_buf, "struct s1 {\n" " int f1;\n" -"};\n" -"\n" +"};\n\n" "struct s2 {\n" " struct s1 f1;\n" " int f2;\n" " int *f3;\n" +"};\n\n" +"union u1 {\n" +" struct s2 f1;\n" +" int uf2;\n" "};\n\n", "c_dump"); + /* write base, split BTFs to files and ensure parsing succeeds */ + base_btf_sz = btf_raw_write(btf1, base_btf_file); + if (base_btf_sz < 0) + goto cleanup; + split_btf_sz = btf_raw_write(btf2, split_btf_file); + if (split_btf_sz < 0) + goto cleanup; + btf4 = btf__parse(base_btf_file, NULL); + if (!ASSERT_OK_PTR(btf4, "parse_base")) + goto cleanup; + btf5 = btf__parse_split(split_btf_file, btf4); + if (!ASSERT_OK_PTR(btf5, "parse_split")) + goto cleanup; + if (multi) { + multisplit_btf_sz = btf_raw_write(btf3, multisplit_btf_file); + if (multisplit_btf_sz < 0) + goto cleanup; + btf6 = btf__parse_split(multisplit_btf_file, btf5); + if (!ASSERT_OK_PTR(btf6, "parse_multisplit")) + goto cleanup; + } else { + btf6 = btf5; + } + + if (!ASSERT_EQ(btf__type_cnt(btf3), btf__type_cnt(btf6), "cmp_type_cnt")) + goto cleanup; + + /* compare parsed to original BTF */ + for (i = 1; i < btf__type_cnt(btf6); i++) { + t = btf__type_by_id(btf6, i); + if (!ASSERT_OK_PTR(t, "type_in_parsed_btf")) + goto cleanup; + ot = btf__type_by_id(btf3, i); + if (!ASSERT_OK_PTR(ot, "type_in_orig_btf")) + goto cleanup; + if (!ASSERT_EQ(memcmp(t, ot, sizeof(*ot)), 0, "cmp_parsed_orig_btf")) + goto cleanup; + } + cleanup: if (dump_buf_file) fclose(dump_buf_file); @@ -94,4 +203,24 @@ cleanup: btf_dump__free(d); btf__free(btf1); btf__free(btf2); + if (btf2 != btf3) + btf__free(btf3); + btf__free(btf4); + btf__free(btf5); + if (btf5 != btf6) + btf__free(btf6); + if (base_btf_sz > 0) + unlink(base_btf_file); + if (split_btf_sz > 0) + unlink(split_btf_file); + if (multisplit_btf_sz > 0) + unlink(multisplit_btf_file); +} + +void test_btf_split(void) +{ + if (test__start_subtest("single_split")) + __test_btf_split(false); + if (test__start_subtest("multi_split")) + __test_btf_split(true); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c new file mode 100644 index 000000000000..3923e64c4c1d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2025 Isovalent */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> + +static void test_btf_mmap_sysfs(const char *path, struct btf *base) +{ + struct stat st; + __u64 btf_size, end; + void *raw_data = NULL; + int fd = -1; + long page_size; + struct btf *btf = NULL; + + page_size = sysconf(_SC_PAGESIZE); + if (!ASSERT_GE(page_size, 0, "get_page_size")) + goto cleanup; + + if (!ASSERT_OK(stat(path, &st), "stat_btf")) + goto cleanup; + + btf_size = st.st_size; + end = (btf_size + page_size - 1) / page_size * page_size; + + fd = open(path, O_RDONLY); + if (!ASSERT_GE(fd, 0, "open_btf")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared")) + goto cleanup; + + raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size")) + goto cleanup; + + raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0); + if (!ASSERT_OK_PTR(raw_data, "mmap_btf")) + goto cleanup; + + if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1, + "mprotect_writable")) + goto cleanup; + + if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1, + "mprotect_executable")) + goto cleanup; + + /* Check padding is zeroed */ + for (int i = btf_size; i < end; i++) { + if (((__u8 *)raw_data)[i] != 0) { + PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i); + goto cleanup; + } + } + + btf = btf__new_split(raw_data, btf_size, base); + if (!ASSERT_OK_PTR(btf, "parse_btf")) + goto cleanup; + +cleanup: + btf__free(btf); + if (raw_data && raw_data != MAP_FAILED) + munmap(raw_data, btf_size); + if (fd >= 0) + close(fd); +} + +void test_btf_sysfs(void) +{ + test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index 6e36de1302fc..5c84723cf254 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -497,10 +497,121 @@ cleanup: btf__free(btf2); } +static void test_btf_add_btf_split() +{ + struct btf *base = NULL, *split1 = NULL, *split2 = NULL; + struct btf *combined = NULL; + int id, err; + + /* Create a base BTF with an INT and a PTR to it */ + base = btf__new_empty(); + if (!ASSERT_OK_PTR(base, "base")) + return; + + id = btf__add_int(base, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(id, 1, "base_int_id"); + id = btf__add_ptr(base, 1); + ASSERT_EQ(id, 2, "base_ptr_id"); + + /* base has 2 types, type IDs 1..2 */ + ASSERT_EQ(btf__type_cnt(base), 3, "base_type_cnt"); + + /* Create split1 on base: a STRUCT referencing base's int (ID 1) */ + split1 = btf__new_empty_split(base); + if (!ASSERT_OK_PTR(split1, "split1")) + goto cleanup; + + id = btf__add_struct(split1, "s1", 4); + /* split types start at base_type_cnt = 3 */ + ASSERT_EQ(id, 3, "split1_struct_id"); + btf__add_field(split1, "x", 1, 0, 0); /* refers to base int */ + + id = btf__add_ptr(split1, 3); + ASSERT_EQ(id, 4, "split1_ptr_id"); /* ptr to the struct (split self-ref) */ + + /* Add a typedef "int_alias" -> base int in split1, which will be + * duplicated in split2 to test that btf__dedup() merges them. + */ + id = btf__add_typedef(split1, "int_alias", 1); + ASSERT_EQ(id, 5, "split1_typedef_id"); + + /* Create split2 on base: a TYPEDEF referencing base's ptr (ID 2) */ + split2 = btf__new_empty_split(base); + if (!ASSERT_OK_PTR(split2, "split2")) + goto cleanup; + + id = btf__add_typedef(split2, "int_ptr", 2); /* refers to base ptr */ + ASSERT_EQ(id, 3, "split2_typedef_id"); + + id = btf__add_struct(split2, "s2", 8); + ASSERT_EQ(id, 4, "split2_struct_id"); + btf__add_field(split2, "p", 3, 0, 0); /* refers to split2's own typedef */ + + /* Same "int_alias" typedef as split1 - should be deduped away */ + id = btf__add_typedef(split2, "int_alias", 1); + ASSERT_EQ(id, 5, "split2_dup_typedef_id"); + + /* Create combined split BTF on same base and merge both */ + combined = btf__new_empty_split(base); + if (!ASSERT_OK_PTR(combined, "combined")) + goto cleanup; + + /* Merge split1: its types (3,4,5) should land at IDs 3,4,5 */ + id = btf__add_btf(combined, split1); + if (!ASSERT_GE(id, 0, "add_split1")) + goto cleanup; + ASSERT_EQ(id, 3, "split1_first_id"); + + /* Merge split2: its types (3,4,5) should be remapped to 6,7,8 */ + id = btf__add_btf(combined, split2); + if (!ASSERT_GE(id, 0, "add_split2")) + goto cleanup; + ASSERT_EQ(id, 6, "split2_first_id"); + + /* Before dedup: base (2) + split1 (3) + split2 (3) = 8 types + void */ + ASSERT_EQ(btf__type_cnt(combined), 9, "pre_dedup_type_cnt"); + + VALIDATE_RAW_BTF( + combined, + /* base types (IDs 1-2) */ + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + + /* split1 types (IDs 3-5): base refs unchanged */ + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'x' type_id=1 bits_offset=0", /* refers to base int=1 */ + "[4] PTR '(anon)' type_id=3", /* refers to split1's struct=3 */ + "[5] TYPEDEF 'int_alias' type_id=1", /* refers to base int=1 */ + + /* split2 types (IDs 6-8): remapped from 3,4,5 to 6,7,8 */ + "[6] TYPEDEF 'int_ptr' type_id=2", /* base ptr=2, unchanged */ + "[7] STRUCT 's2' size=8 vlen=1\n" + "\t'p' type_id=6 bits_offset=0", /* split2 typedef: 3->6 */ + "[8] TYPEDEF 'int_alias' type_id=1"); /* dup of [5] */ + + /* Dedup to mirror the bpftool merge flow; should remove the + * duplicate "int_alias" typedef. + */ + err = btf__dedup(combined, NULL); + if (!ASSERT_OK(err, "dedup")) + goto cleanup; + + /* After dedup: one int_alias removed, so 7 types + void */ + ASSERT_EQ(btf__type_cnt(combined), 8, "dedup_type_cnt"); + +cleanup: + btf__free(combined); + btf__free(split2); + btf__free(split1); + btf__free(base); +} + void test_btf_write() { if (test__start_subtest("btf_add")) test_btf_add(); if (test__start_subtest("btf_add_btf")) test_btf_add_btf(); + if (test__start_subtest("btf_add_btf_split")) + test_btf_add_btf_split(); } diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c index 574d9a0cdc8e..0f88a9d00a22 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c @@ -190,6 +190,16 @@ static void test_walk_self_only(struct cgroup_iter *skel) BPF_CGROUP_ITER_SELF_ONLY, "self_only"); } +static void test_walk_children(struct cgroup_iter *skel) +{ + snprintf(expected_output, sizeof(expected_output), + PROLOGUE "%8llu\n%8llu\n" EPILOGUE, cg_id[CHILD1], + cg_id[CHILD2]); + + read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT], + BPF_CGROUP_ITER_CHILDREN, "children"); +} + static void test_walk_dead_self_only(struct cgroup_iter *skel) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); @@ -325,6 +335,8 @@ void test_cgroup_iter(void) test_walk_dead_self_only(skel); if (test__start_subtest("cgroup_iter__self_only_css_task")) test_walk_self_only_css_task(); + if (test__start_subtest("cgroup_iter__children")) + test_walk_children(skel); out: cgroup_iter__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c new file mode 100644 index 000000000000..b7c18d590b99 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> +#include "cgroup_helpers.h" +#include "cgroup_iter_memcg.h" +#include "cgroup_iter_memcg.skel.h" + +static int read_stats(struct bpf_link *link) +{ + int fd, ret = 0; + ssize_t bytes; + + fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(fd, "bpf_iter_create")) + return 1; + + /* + * Invoke iter program by reading from its fd. We're not expecting any + * data to be written by the bpf program so the result should be zero. + * Results will be read directly through the custom data section + * accessible through skel->data_query.memcg_query. + */ + bytes = read(fd, NULL, 0); + if (!ASSERT_EQ(bytes, 0, "read fd")) + ret = 1; + + close(fd); + return ret; +} + +static void test_anon(struct bpf_link *link, struct memcg_query *memcg_query) +{ + void *map; + size_t len; + + len = sysconf(_SC_PAGESIZE) * 1024; + + /* + * Increase memcg anon usage by mapping and writing + * to a new anon region. + */ + map = mmap(NULL, len, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon")) + return; + + memset(map, 1, len); + + if (!ASSERT_OK(read_stats(link), "read stats")) + goto cleanup; + + ASSERT_GT(memcg_query->nr_anon_mapped, 0, "final anon mapped val"); + +cleanup: + munmap(map, len); +} + +static void test_file(struct bpf_link *link, struct memcg_query *memcg_query) +{ + void *map; + size_t len; + char *path; + int fd; + + len = sysconf(_SC_PAGESIZE) * 1024; + path = "/tmp/test_cgroup_iter_memcg"; + + /* + * Increase memcg file usage by creating and writing + * to a mapped file. + */ + fd = open(path, O_CREAT | O_RDWR, 0644); + if (!ASSERT_OK_FD(fd, "open fd")) + return; + if (!ASSERT_OK(ftruncate(fd, len), "ftruncate")) + goto cleanup_fd; + + map = mmap(NULL, len, PROT_WRITE, MAP_SHARED, fd, 0); + if (!ASSERT_NEQ(map, MAP_FAILED, "mmap file")) + goto cleanup_fd; + + memset(map, 1, len); + + if (!ASSERT_OK(read_stats(link), "read stats")) + goto cleanup_map; + + ASSERT_GT(memcg_query->nr_file_pages, 0, "final file value"); + ASSERT_GT(memcg_query->nr_file_mapped, 0, "final file mapped value"); + +cleanup_map: + munmap(map, len); +cleanup_fd: + close(fd); + unlink(path); +} + +static void test_shmem(struct bpf_link *link, struct memcg_query *memcg_query) +{ + size_t len; + int fd; + + len = sysconf(_SC_PAGESIZE) * 1024; + + /* + * Increase memcg shmem usage by creating and writing + * to a memfd backed by shmem/tmpfs. + */ + fd = memfd_create("tmp_shmem", 0); + if (!ASSERT_OK_FD(fd, "memfd_create")) + return; + + if (!ASSERT_OK(fallocate(fd, 0, 0, len), "fallocate")) + goto cleanup; + + if (!ASSERT_OK(read_stats(link), "read stats")) + goto cleanup; + + ASSERT_GT(memcg_query->nr_shmem, 0, "final shmem value"); + +cleanup: + close(fd); +} + +static void test_pgfault(struct bpf_link *link, struct memcg_query *memcg_query) +{ + void *map; + size_t len; + + len = sysconf(_SC_PAGESIZE) * 1024; + + /* Create region to use for triggering a page fault. */ + map = mmap(NULL, len, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon")) + return; + + /* Trigger page fault. */ + memset(map, 1, len); + + if (!ASSERT_OK(read_stats(link), "read stats")) + goto cleanup; + + ASSERT_GT(memcg_query->pgfault, 0, "final pgfault val"); + +cleanup: + munmap(map, len); +} + +void test_cgroup_iter_memcg(void) +{ + char *cgroup_rel_path = "/cgroup_iter_memcg_test"; + struct cgroup_iter_memcg *skel; + struct bpf_link *link; + int cgroup_fd; + + cgroup_fd = cgroup_setup_and_join(cgroup_rel_path); + if (!ASSERT_OK_FD(cgroup_fd, "cgroup_setup_and_join")) + return; + + skel = cgroup_iter_memcg__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_iter_memcg__open_and_load")) + goto cleanup_cgroup_fd; + + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo = { + .cgroup.cgroup_fd = cgroup_fd, + .cgroup.order = BPF_CGROUP_ITER_SELF_ONLY, + }; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.cgroup_memcg_query, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto cleanup_skel; + + if (test__start_subtest("cgroup_iter_memcg__anon")) + test_anon(link, &skel->data_query->memcg_query); + if (test__start_subtest("cgroup_iter_memcg__shmem")) + test_shmem(link, &skel->data_query->memcg_query); + if (test__start_subtest("cgroup_iter_memcg__file")) + test_file(link, &skel->data_query->memcg_query); + if (test__start_subtest("cgroup_iter_memcg__pgfault")) + test_pgfault(link, &skel->data_query->memcg_query); + + bpf_link__destroy(link); +cleanup_skel: + cgroup_iter_memcg__destroy(skel); +cleanup_cgroup_fd: + close(cgroup_fd); + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c new file mode 100644 index 000000000000..bb60704a3ef9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_mprog.skel.h" + +static void assert_mprog_count(int cg, int atype, int expected) +{ + __u32 count = 0, attach_flags = 0; + int err; + + err = bpf_prog_query(cg, atype, 0, &attach_flags, + NULL, &count); + ASSERT_EQ(count, expected, "count"); + ASSERT_EQ(err, 0, "prog_query"); +} + +static void test_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + optd.expected_revision = 5; + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + LIBBPF_OPTS_RESET(optd); + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + LIBBPF_OPTS(bpf_cgroup_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct bpf_link *link1, *link2, *link3, *link4; + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_LINK, + .relative_id = id_from_link_fd(bpf_link__fd(link1)), + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "link_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + __u32 fd1, fd2, fd3, fd4; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/preorder_prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, -EINVAL, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + struct bpf_link *link1, *link2, *link3, *link4; + struct cgroup_mprog *skel; + __u32 fd2; + int cg; + + cg = test__join_cgroup("/preorder_link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_ERR_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_invalid_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + __u32 fd1, fd2, id2; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/invalid_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + id2 = id_from_prog_fd(fd2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER, + .replace_prog_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +void test_cgroup_mprog_opts(void) +{ + if (test__start_subtest("prog_attach_detach")) + test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("link_attach_detach")) + test_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_prog_attach_detach")) + test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_link_attach_detach")) + test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("invalid_attach_detach")) + test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c new file mode 100644 index 000000000000..a36d2e968bc5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_preorder.skel.h" + +static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opts); + enum bpf_attach_type prog_p_atype, prog_p2_atype; + int prog_p_fd, prog_p2_fd; + struct cgroup_preorder *skel = NULL; + struct bpf_program *prog; + __u8 *result, buf; + socklen_t optlen = 1; + int err = 0; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return 0; + + LIBBPF_OPTS_RESET(opts); + opts.flags = BPF_F_ALLOW_MULTI; + prog = skel->progs.parent; + prog_p_fd = bpf_program__fd(prog); + prog_p_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE; + if (has_relative_fd) + opts.relative_fd = prog_p_fd; + prog = skel->progs.parent_2; + prog_p2_fd = bpf_program__fd(prog); + prog_p2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) + goto detach_parent; + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_parent_2; + + result = skel->bss->result; + ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values"); + +detach_parent_2: + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), + "bpf_prog_detach2-parent_2"); +detach_parent: + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), + "bpf_prog_detach2-parent"); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + +void test_cgroup_mprog_ordering(void) +{ + int cg_parent = -1, sock_fd = -1; + + cg_parent = test__join_cgroup("/parent"); + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(sock_fd, 0, "socket")) + goto out; + + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1"); + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2"); + +out: + close(sock_fd); + close(cg_parent); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c new file mode 100644 index 000000000000..d4d583872fa2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_preorder.skel.h" + +static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opts); + enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype; + int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd; + struct cgroup_preorder *skel = NULL; + struct bpf_program *prog; + __u8 *result, buf; + socklen_t optlen; + int err = 0; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return 0; + + buf = 0x00; + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (!ASSERT_OK(err, "setsockopt")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI; + if (all_preorder) + opts.flags |= BPF_F_PREORDER; + prog = skel->progs.child; + prog_c_fd = bpf_program__fd(prog); + prog_c_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-child")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER; + prog = skel->progs.child_2; + prog_c2_fd = bpf_program__fd(prog); + prog_c2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2")) + goto detach_child; + + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_child_2; + + result = skel->bss->result; + if (all_preorder) + ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only"); + else + ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only"); + + skel->bss->idx = 0; + memset(result, 0, 4); + + opts.flags = BPF_F_ALLOW_MULTI; + if (all_preorder) + opts.flags |= BPF_F_PREORDER; + prog = skel->progs.parent; + prog_p_fd = bpf_program__fd(prog); + prog_p_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) + goto detach_child_2; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER; + prog = skel->progs.parent_2; + prog_p2_fd = bpf_program__fd(prog); + prog_p2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) + goto detach_parent; + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_parent_2; + + if (all_preorder) + ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2, + "parent and child"); + else + ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3, + "parent and child"); + +detach_parent_2: + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), + "bpf_prog_detach2-parent_2"); +detach_parent: + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), + "bpf_prog_detach2-parent"); +detach_child_2: + ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype), + "bpf_prog_detach2-child_2"); +detach_child: + ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype), + "bpf_prog_detach2-child"); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + +void test_cgroup_preorder(void) +{ + int cg_parent = -1, cg_child = -1, sock_fd = -1; + + cg_parent = test__join_cgroup("/parent"); + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) + goto out; + + cg_child = test__join_cgroup("/parent/child"); + if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(sock_fd, 0, "socket")) + goto out; + + ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1"); + ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2"); + +out: + close(sock_fd); + close(cg_child); + close(cg_parent); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c b/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c index cf395715ced4..8dab655db342 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include <unistd.h> +#include <sys/socket.h> #include <test_progs.h> #include "cgroup_helpers.h" #include "network_helpers.h" @@ -86,6 +88,11 @@ void test_cgroup_storage(void) err = SYS_NOFAIL(PING_CMD); ASSERT_OK(err, "sixth ping"); + err = bpf_map__get_next_key(skel->maps.cgroup_storage, &key, &key, + sizeof(key)); + ASSERT_ERR(err, "bpf_map__get_next_key should fail"); + ASSERT_EQ(errno, ENOENT, "no second key"); + cleanup_progs: cgroup_storage__destroy(skel); cleanup_network: @@ -94,3 +101,43 @@ cleanup_cgroup: close(cgroup_fd); cleanup_cgroup_environment(); } + +void test_cgroup_storage_oob(void) +{ + struct cgroup_storage *skel; + int cgroup_fd, sock_fd; + + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + if (!ASSERT_OK_FD(cgroup_fd, "create cgroup")) + return; + + /* Load and attach BPF program */ + skel = cgroup_storage__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_storage__open_and_load")) + goto cleanup_cgroup; + + skel->links.trigger_oob = bpf_program__attach_cgroup(skel->progs.trigger_oob, + cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.trigger_oob, "attach_cgroup")) + goto cleanup_skel; + + /* Create a socket to trigger cgroup/sock_create hook. + * This will execute our BPF program and trigger the OOB read + * if the bug is present (before the fix). + */ + sock_fd = socket(AF_INET, SOCK_DGRAM, 0); + if (!ASSERT_OK_FD(sock_fd, "create socket")) + goto cleanup_skel; + + close(sock_fd); + + /* If we reach here without a kernel panic or KASAN report, + * the test passes (the fix is working). + */ + +cleanup_skel: + cgroup_storage__destroy(skel); +cleanup_cgroup: + close(cgroup_fd); + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index 64abba72ac10..37c1cc52ed98 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -10,12 +10,18 @@ static int run_test(int cgroup_fd, int server_fd, bool classid) { struct connect4_dropper *skel; - int fd, err = 0; + int fd, err = 0, port; skel = connect4_dropper__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) return -1; + port = get_socket_local_port(server_fd); + if (!ASSERT_GE(port, 0, "get_socket_local_port")) + return -1; + + skel->bss->port = ntohs(port); + skel->links.connect_v4_dropper = bpf_program__attach_cgroup(skel->progs.connect_v4_dropper, cgroup_fd); @@ -48,10 +54,9 @@ void test_cgroup_v1v2(void) { struct network_helper_opts opts = {}; int server_fd, client_fd, cgroup_fd; - static const int port = 60120; /* Step 1: Check base connectivity works without any BPF. */ - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "server_fd")) return; client_fd = connect_to_fd_opts(server_fd, &opts); @@ -66,7 +71,7 @@ void test_cgroup_v1v2(void) cgroup_fd = test__join_cgroup("/connect_dropper"); if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) return; - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "server_fd")) { close(cgroup_fd); return; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c new file mode 100644 index 000000000000..5ad904e9d15d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <string.h> +#include <unistd.h> +#include <sys/socket.h> +#include <test_progs.h> +#include "cgroup_helpers.h" + +#include "read_cgroupfs_xattr.skel.h" +#include "cgroup_read_xattr.skel.h" + +#define CGROUP_FS_PARENT "foo/" +#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/" +#define TMP_FILE "/tmp/selftests_cgroup_xattr" + +static const char xattr_value_a[] = "bpf_selftest_value_a"; +static const char xattr_value_b[] = "bpf_selftest_value_b"; +static const char xattr_name[] = "user.bpf_test"; + +static void test_read_cgroup_xattr(void) +{ + int tmp_fd, parent_cgroup_fd = -1, child_cgroup_fd = -1; + struct read_cgroupfs_xattr *skel = NULL; + + parent_cgroup_fd = test__join_cgroup(CGROUP_FS_PARENT); + if (!ASSERT_OK_FD(parent_cgroup_fd, "create parent cgroup")) + return; + if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a), + "set parent xattr")) + goto out; + + child_cgroup_fd = test__join_cgroup(CGROUP_FS_CHILD); + if (!ASSERT_OK_FD(child_cgroup_fd, "create child cgroup")) + goto out; + if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b), + "set child xattr")) + goto out; + + skel = read_cgroupfs_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) + goto out; + + skel->bss->target_pid = sys_gettid(); + + if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) + goto out; + + tmp_fd = open(TMP_FILE, O_RDONLY | O_CREAT); + ASSERT_OK_FD(tmp_fd, "open tmp file"); + close(tmp_fd); + + ASSERT_TRUE(skel->bss->found_value_a, "found_value_a"); + ASSERT_TRUE(skel->bss->found_value_b, "found_value_b"); + +out: + close(child_cgroup_fd); + close(parent_cgroup_fd); + read_cgroupfs_xattr__destroy(skel); + unlink(TMP_FILE); +} + +void test_cgroup_xattr(void) +{ + RUN_TESTS(cgroup_read_xattr); + + if (test__start_subtest("read_cgroupfs_xattr")) + test_read_cgroup_xattr(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c index adda85f97058..4b42fbc96efc 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -4,6 +4,8 @@ #define _GNU_SOURCE #include <cgroup_helpers.h> #include <test_progs.h> +#include <sched.h> +#include <sys/wait.h> #include "cgrp_kfunc_failure.skel.h" #include "cgrp_kfunc_success.skel.h" @@ -87,6 +89,72 @@ static const char * const success_tests[] = { "test_cgrp_from_id", }; +static void test_cgrp_from_id_ns(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct cgrp_kfunc_success *skel; + struct bpf_program *prog; + int pid, pipe_fd[2]; + + skel = open_load_cgrp_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err")) + goto cleanup; + + prog = skel->progs.test_cgrp_from_id_ns; + + if (!ASSERT_OK(pipe(pipe_fd), "pipe")) + goto cleanup; + + pid = fork(); + if (!ASSERT_GE(pid, 0, "fork result")) { + close(pipe_fd[0]); + close(pipe_fd[1]); + goto cleanup; + } + + if (pid == 0) { + int ret = 0; + + close(pipe_fd[0]); + + if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup")) + exit(1); + + if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns")) + exit(1); + + ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(ret, "test run ret")) + exit(1); + + if (!ASSERT_OK(opts.retval, "test run retval")) + exit(1); + + if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe")) + exit(1); + + exit(0); + } else { + int res; + + close(pipe_fd[1]); + + ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res"); + ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child"); + + remove_cgroup_pid("cgrp_from_id_ns", pid); + + ASSERT_OK(res, "result from run"); + } + + close(pipe_fd[0]); +cleanup: + cgrp_kfunc_success__destroy(skel); +} + void test_cgrp_kfunc(void) { int i, err; @@ -102,6 +170,9 @@ void test_cgrp_kfunc(void) run_success_test(success_tests[i]); } + if (test__start_subtest("test_cgrp_from_id_ns")) + test_cgrp_from_id_ns(); + RUN_TESTS(cgrp_kfunc_failure); cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 9015e2c2ab12..478a77cb67e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -202,7 +202,7 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) iter_fd = bpf_iter_create(bpf_link__fd(link)); if (!ASSERT_GE(iter_fd, 0, "iter_create")) - goto out; + goto out_link; /* trigger the program run */ (void)read(iter_fd, buf, sizeof(buf)); @@ -210,6 +210,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); close(iter_fd); +out_link: + bpf_link__destroy(link); out: cgrp_ls_sleepable__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c deleted file mode 100644 index 7526de379081..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "bpf/libbpf.h" -#include "changes_pkt_data_freplace.skel.h" -#include "changes_pkt_data.skel.h" -#include <test_progs.h> - -static void print_verifier_log(const char *log) -{ - if (env.verbosity >= VERBOSE_VERY) - fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); -} - -static void test_aux(const char *main_prog_name, - const char *to_be_replaced, - const char *replacement, - bool expect_load) -{ - struct changes_pkt_data_freplace *freplace = NULL; - struct bpf_program *freplace_prog = NULL; - struct bpf_program *main_prog = NULL; - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct changes_pkt_data *main = NULL; - char log[16*1024]; - int err; - - opts.kernel_log_buf = log; - opts.kernel_log_size = sizeof(log); - if (env.verbosity >= VERBOSE_SUPER) - opts.kernel_log_level = 1 | 2 | 4; - main = changes_pkt_data__open_opts(&opts); - if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) - goto out; - main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); - if (!ASSERT_OK_PTR(main_prog, "main_prog")) - goto out; - bpf_program__set_autoload(main_prog, true); - err = changes_pkt_data__load(main); - print_verifier_log(log); - if (!ASSERT_OK(err, "changes_pkt_data__load")) - goto out; - freplace = changes_pkt_data_freplace__open_opts(&opts); - if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) - goto out; - freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); - if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) - goto out; - bpf_program__set_autoload(freplace_prog, true); - bpf_program__set_autoattach(freplace_prog, true); - bpf_program__set_attach_target(freplace_prog, - bpf_program__fd(main_prog), - to_be_replaced); - err = changes_pkt_data_freplace__load(freplace); - print_verifier_log(log); - if (expect_load) { - ASSERT_OK(err, "changes_pkt_data_freplace__load"); - } else { - ASSERT_ERR(err, "changes_pkt_data_freplace__load"); - ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); - } - -out: - changes_pkt_data_freplace__destroy(freplace); - changes_pkt_data__destroy(main); -} - -/* There are two global subprograms in both changes_pkt_data.skel.h: - * - one changes packet data; - * - another does not. - * It is ok to freplace subprograms that change packet data with those - * that either do or do not. It is only ok to freplace subprograms - * that do not change packet data with those that do not as well. - * The below tests check outcomes for each combination of such freplace. - * Also test a case when main subprogram itself is replaced and is a single - * subprogram in a program. - */ -void test_changes_pkt_data_freplace(void) -{ - struct { - const char *main; - const char *to_be_replaced; - bool changes; - } mains[] = { - { "main_with_subprogs", "changes_pkt_data", true }, - { "main_with_subprogs", "does_not_change_pkt_data", false }, - { "main_changes", "main_changes", true }, - { "main_does_not_change", "main_does_not_change", false }, - }; - struct { - const char *func; - bool changes; - } replacements[] = { - { "changes_pkt_data", true }, - { "does_not_change_pkt_data", false } - }; - char buf[64]; - - for (int i = 0; i < ARRAY_SIZE(mains); ++i) { - for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { - snprintf(buf, sizeof(buf), "%s_with_%s", - mains[i].to_be_replaced, replacements[j].func); - if (!test__start_subtest(buf)) - continue; - test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, - mains[i].changes || !replacements[j].changes); - } - } -} diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 2a9a30650350..65b4512967e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -153,6 +153,26 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel, ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user"); } +static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu) +{ + int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag); + struct __sk_buff skb = { + .gso_size = 10, + }; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + ); + + /* Lower the mtu to test the BPF_MTU_CHK_SEGS */ + SYS_NOFAIL("ip link set dev lo mtu 10"); + err = bpf_prog_test_run_opts(prog_fd, &topts); + SYS_NOFAIL("ip link set dev lo mtu %u", mtu); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, BPF_OK, "retval"); +} static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) { @@ -177,11 +197,12 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu); test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu); test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu); + test_chk_segs_flag(skel, mtu); cleanup: test_check_mtu__destroy(skel); } -void serial_test_check_mtu(void) +void test_ns_check_mtu(void) { int mtu_lo; diff --git a/tools/testing/selftests/bpf/prog_tests/clone_attach_btf_id.c b/tools/testing/selftests/bpf/prog_tests/clone_attach_btf_id.c new file mode 100644 index 000000000000..1c3e28e74606 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/clone_attach_btf_id.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta */ +#include <test_progs.h> +#include "clone_attach_btf_id.skel.h" + +/* + * Test that bpf_program__clone() respects caller-provided attach_btf_id + * override via bpf_prog_load_opts. + * + * The BPF program has SEC("fentry/bpf_fentry_test1"). Clone it twice + * from the same prepared object: first with no opts (callback resolves + * attach_btf_id from sec_name), then with attach_btf_id overridden to + * bpf_fentry_test2. Verify each loaded program's attach_btf_id via + * bpf_prog_get_info_by_fd(). + */ + +static int get_prog_attach_btf_id(int prog_fd) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int err; + + err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); + if (err) + return err; + return info.attach_btf_id; +} + +void test_clone_attach_btf_id(void) +{ + struct clone_attach_btf_id *skel; + int fd1 = -1, fd2 = -1, err; + int btf_id_test1, btf_id_test2; + + btf_id_test1 = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", BPF_TRACE_FENTRY); + if (!ASSERT_GT(btf_id_test1, 0, "find_btf_id_test1")) + return; + + btf_id_test2 = libbpf_find_vmlinux_btf_id("bpf_fentry_test2", BPF_TRACE_FENTRY); + if (!ASSERT_GT(btf_id_test2, 0, "find_btf_id_test2")) + return; + + skel = clone_attach_btf_id__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + err = bpf_object__prepare(skel->obj); + if (!ASSERT_OK(err, "obj_prepare")) + goto out; + + /* Clone with no opts — callback resolves BTF from sec_name */ + fd1 = bpf_program__clone(skel->progs.fentry_handler, NULL); + if (!ASSERT_GE(fd1, 0, "clone_default")) + goto out; + ASSERT_EQ(get_prog_attach_btf_id(fd1), btf_id_test1, + "attach_btf_id_default"); + + /* + * Clone with attach_btf_id override pointing to a different + * function. The BPF program never accesses arguments, so the + * load succeeds regardless of signature mismatch. + */ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .attach_btf_id = btf_id_test2, + ); + fd2 = bpf_program__clone(skel->progs.fentry_handler, &opts); + if (!ASSERT_GE(fd2, 0, "clone_override")) + goto out; + ASSERT_EQ(get_prog_attach_btf_id(fd2), btf_id_test2, + "attach_btf_id_override"); + +out: + if (fd1 >= 0) + close(fd1); + if (fd2 >= 0) + close(fd2); + clone_attach_btf_id__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 34b59f6baca1..7488a7606e6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -22,79 +22,37 @@ static int duration = 0; -struct addr_port { - in_port_t port; - union { - struct in_addr in_addr; - struct in6_addr in6_addr; - }; -}; - -struct tuple { - int family; - struct addr_port src; - struct addr_port dst; -}; - -static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap) -{ - const struct sockaddr_in6 *in6; - const struct sockaddr_in *in; - - switch (sa->sa_family) { - case AF_INET: - in = (const struct sockaddr_in *)sa; - ap->in_addr = in->sin_addr; - ap->port = in->sin_port; - return true; - - case AF_INET6: - in6 = (const struct sockaddr_in6 *)sa; - ap->in6_addr = in6->sin6_addr; - ap->port = in6->sin6_port; - return true; - - default: - return false; - } -} -static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type, - int *server, int *conn, struct tuple *tuple) +static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type, + int *server, int *conn, + struct sockaddr_storage *src, + struct sockaddr_storage *dst) { struct sockaddr_storage ss; socklen_t slen = sizeof(ss); - struct sockaddr *sa = (struct sockaddr *)&ss; - *server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL); + *server = start_server_addr(type, addr, len, NULL); if (*server < 0) return false; - if (CHECK_FAIL(getsockname(*server, sa, &slen))) + if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen))) goto close_server; - *conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL); + *conn = connect_to_addr(type, &ss, slen, NULL); if (*conn < 0) goto close_server; /* We want to simulate packets arriving at conn, so we have to * swap src and dst. */ - slen = sizeof(ss); - if (CHECK_FAIL(getsockname(*conn, sa, &slen))) - goto close_conn; - - if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst))) + slen = sizeof(*dst); + if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)dst, &slen))) goto close_conn; - slen = sizeof(ss); - if (CHECK_FAIL(getpeername(*conn, sa, &slen))) + slen = sizeof(*src); + if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)src, &slen))) goto close_conn; - if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src))) - goto close_conn; - - tuple->family = ss.ss_family; return true; close_conn: @@ -110,17 +68,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family) { struct sockaddr_in *addr4; struct sockaddr_in6 *addr6; + memset(addr, 0, sizeof(*addr)); switch (family) { case AF_INET: addr4 = (struct sockaddr_in *)addr; - memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = family; addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); return sizeof(*addr4); case AF_INET6: addr6 = (struct sockaddr_in6 *)addr; - memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = family; addr6->sin6_addr = in6addr_loopback; return sizeof(*addr6); @@ -242,9 +199,15 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto) } static size_t build_input(const struct test_cfg *test, void *const buf, - const struct tuple *tuple) + const struct sockaddr_storage *src, + const struct sockaddr_storage *dst) { - in_port_t sport = tuple->src.port; + struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)dst; + struct sockaddr_in *src_in = (struct sockaddr_in *)src; + struct sockaddr_in *dst_in = (struct sockaddr_in *)dst; + sa_family_t family = src->ss_family; + in_port_t sport, dport; encap_headers_t encap; struct iphdr ip; struct ipv6hdr ipv6; @@ -254,8 +217,11 @@ static size_t build_input(const struct test_cfg *test, void *const buf, uint8_t *p = buf; int proto; + sport = (family == AF_INET) ? src_in->sin_port : src_in6->sin6_port; + dport = (family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port; + proto = IPPROTO_IPIP; - if (tuple->family == AF_INET6) + if (family == AF_INET6) proto = IPPROTO_IPV6; encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto); @@ -270,15 +236,15 @@ static size_t build_input(const struct test_cfg *test, void *const buf, if (test->type == UDP) proto = IPPROTO_UDP; - switch (tuple->family) { + switch (family) { case AF_INET: ip = (struct iphdr){ .ihl = 5, .version = 4, .ttl = IPDEFTTL, .protocol = proto, - .saddr = tuple->src.in_addr.s_addr, - .daddr = tuple->dst.in_addr.s_addr, + .saddr = src_in->sin_addr.s_addr, + .daddr = dst_in->sin_addr.s_addr, }; p = mempcpy(p, &ip, sizeof(ip)); break; @@ -287,8 +253,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf, .version = 6, .hop_limit = IPDEFTTL, .nexthdr = proto, - .saddr = tuple->src.in6_addr, - .daddr = tuple->dst.in6_addr, + .saddr = src_in6->sin6_addr, + .daddr = dst_in6->sin6_addr, }; p = mempcpy(p, &ipv6, sizeof(ipv6)); break; @@ -303,18 +269,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf, case TCP: tcp = (struct tcphdr){ .source = sport, - .dest = tuple->dst.port, + .dest = dport, + .syn = (test->flags == SYN), + .ack = (test->flags == ACK), }; - if (test->flags == SYN) - tcp.syn = true; - if (test->flags == ACK) - tcp.ack = true; p = mempcpy(p, &tcp, sizeof(tcp)); break; case UDP: udp = (struct udphdr){ .source = sport, - .dest = tuple->dst.port, + .dest = dport, }; p = mempcpy(p, &udp, sizeof(udp)); break; @@ -339,27 +303,26 @@ static void test_cls_redirect_common(struct bpf_program *prog) LIBBPF_OPTS(bpf_test_run_opts, tattr); int families[] = { AF_INET, AF_INET6 }; struct sockaddr_storage ss; - struct sockaddr *addr; socklen_t slen; int i, j, err, prog_fd; int servers[__NR_KIND][ARRAY_SIZE(families)] = {}; int conns[__NR_KIND][ARRAY_SIZE(families)] = {}; - struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)]; + struct sockaddr_storage srcs[__NR_KIND][ARRAY_SIZE(families)]; + struct sockaddr_storage dsts[__NR_KIND][ARRAY_SIZE(families)]; - addr = (struct sockaddr *)&ss; for (i = 0; i < ARRAY_SIZE(families); i++) { slen = prepare_addr(&ss, families[i]); if (CHECK_FAIL(!slen)) goto cleanup; - if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM, + if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM, &servers[UDP][i], &conns[UDP][i], - &tuples[UDP][i]))) + &srcs[UDP][i], &dsts[UDP][i]))) goto cleanup; - if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM, + if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM, &servers[TCP][i], &conns[TCP][i], - &tuples[TCP][i]))) + &srcs[TCP][i], &dsts[TCP][i]))) goto cleanup; } @@ -368,11 +331,12 @@ static void test_cls_redirect_common(struct bpf_program *prog) struct test_cfg *test = &tests[i]; for (j = 0; j < ARRAY_SIZE(families); j++) { - struct tuple *tuple = &tuples[test->type][j]; + struct sockaddr_storage *src = &srcs[test->type][j]; + struct sockaddr_storage *dst = &dsts[test->type][j]; char input[256]; char tmp[256]; - test_str(tmp, sizeof(tmp), test, tuple->family); + test_str(tmp, sizeof(tmp), test, families[j]); if (!test__start_subtest(tmp)) continue; @@ -380,7 +344,7 @@ static void test_cls_redirect_common(struct bpf_program *prog) tattr.data_size_out = sizeof(tmp); tattr.data_in = input; - tattr.data_size_in = build_input(test, input, tuple); + tattr.data_size_in = build_input(test, input, src, dst); if (CHECK_FAIL(!tattr.data_size_in)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c new file mode 100644 index 000000000000..285f20241fe1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "compute_live_registers.skel.h" +#include "test_progs.h" + +void test_compute_live_registers(void) +{ + RUN_TESTS(compute_live_registers); +} diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c index 24d553109f8d..dfb7f6cf3ee1 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c @@ -53,6 +53,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) __u16 expected_peer_port = 60000; struct bpf_program *prog; struct bpf_object *obj; + struct bpf_map *map; + __u16 *port_ptr; + size_t port_size; const char *obj_file = v4 ? "connect_force_port4.bpf.o" : "connect_force_port6.bpf.o"; int fd, err; __u32 duration = 0; @@ -61,6 +64,21 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) if (!ASSERT_OK_PTR(obj, "bpf_obj_open")) return -1; + map = bpf_object__find_map_by_name(obj, ".bss"); + if (!ASSERT_OK_PTR(map, "find bss map")) { + err = -EIO; + goto close_bpf_object; + } + + port_ptr = bpf_map__initial_value(map, &port_size); + if (!ASSERT_OK_PTR(port_ptr, "get bss initial value")) { + err = -EIO; + goto close_bpf_object; + } + + /* Auto assigns the port according to availability */ + *port_ptr = ntohs(get_socket_local_port(server_fd)); + err = bpf_object__load(obj); if (!ASSERT_OK(err, "bpf_obj_load")) { err = -EIO; @@ -138,25 +156,25 @@ void test_connect_force_port(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM)); close(server_fd); - server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0); + server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM)); close(server_fd); - server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0); + server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM)); close(server_fd); - server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0); + server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM)); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index e10ea92c3fe2..08963c82f30b 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -85,11 +85,11 @@ static int duration = 0; #define NESTING_ERR_CASE(name) { \ NESTING_CASE_COMMON(name), \ .fails = true, \ - .run_btfgen_fails = true, \ + .run_btfgen_fails = true, \ } #define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ - .a = { [2] = 1 }, \ + .a = { [2] = 1, [3] = 11 }, \ .b = { [1] = { [2] = { [3] = 2 } } }, \ .c = { [1] = { .c = 3 } }, \ .d = { [0] = { [0] = { .d = 4 } } }, \ @@ -108,6 +108,7 @@ static int duration = 0; .input_len = sizeof(struct core_reloc_##name), \ .output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \ .a2 = 1, \ + .a3 = 12, \ .b123 = 2, \ .c1c = 3, \ .d00d = 4, \ @@ -602,6 +603,7 @@ static const struct core_reloc_test_case test_cases[] = { ARRAYS_ERR_CASE(arrays___err_non_array), ARRAYS_ERR_CASE(arrays___err_wrong_val_type), ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr), + ARRAYS_ERR_CASE(arrays___err_bad_signed_arr_elem_sz), /* enum/ptr/int handling scenarios */ PRIMITIVES_CASE(primitives), diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index e58a04654238..6c45330a5ca3 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -25,6 +25,10 @@ static const char * const cpumask_success_testcases[] = { "test_global_mask_nested_deep_rcu", "test_global_mask_nested_deep_array_rcu", "test_cpumask_weight", + "test_refcount_null_tracking", + "test_populate_reject_small_mask", + "test_populate_reject_unaligned", + "test_populate", }; static void verify_success(const char *prog_name) @@ -78,6 +82,5 @@ void test_cpumask(void) verify_success(cpumask_success_testcases[i]); } - RUN_TESTS(cpumask_success); RUN_TESTS(cpumask_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index dd75ccb03770..469e92869523 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -308,8 +308,10 @@ static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches return -1; } - strncpy(type_str, type, type_sz); - strncpy(field_str, field, field_sz); + memcpy(type_str, type, type_sz); + type_str[type_sz] = '\0'; + memcpy(field_str, field, field_sz); + field_str[field_sz] = '\0'; btf_id = btf__find_by_name(btf, type_str); if (btf_id < 0) { PRINT_FAIL("No BTF info for type %s\n", type_str); diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index ccc768592e66..1a2a2f1abf03 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -38,6 +38,14 @@ static int set_pathname(int fd, pid_t pid) return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN); } +static inline long syscall_close(int fd) +{ + return syscall(__NR_close_range, + (unsigned int)fd, + (unsigned int)fd, + 0u); +} + static int trigger_fstat_events(pid_t pid) { int sockfd = -1, procfd = -1, devfd = -1; @@ -104,18 +112,34 @@ out_close: /* sys_close no longer triggers filp_close, but we can * call sys_close_range instead which still does */ -#define close(fd) syscall(__NR_close_range, fd, fd, 0) + syscall_close(pipefd[0]); + syscall_close(pipefd[1]); + syscall_close(sockfd); + syscall_close(procfd); + syscall_close(devfd); + syscall_close(localfd); + syscall_close(indicatorfd); + return ret; +} - close(pipefd[0]); - close(pipefd[1]); - close(sockfd); - close(procfd); - close(devfd); - close(localfd); - close(indicatorfd); +static void attach_and_load(struct test_d_path **skel) +{ + int err; -#undef close - return ret; + *skel = test_d_path__open_and_load(); + if (CHECK(!*skel, "setup", "d_path skeleton failed\n")) + goto cleanup; + + err = test_d_path__attach(*skel); + if (CHECK(err, "setup", "attach failed: %d\n", err)) + goto cleanup; + + (*skel)->bss->my_pid = getpid(); + return; + +cleanup: + test_d_path__destroy(*skel); + *skel = NULL; } static void test_d_path_basic(void) @@ -124,16 +148,11 @@ static void test_d_path_basic(void) struct test_d_path *skel; int err; - skel = test_d_path__open_and_load(); - if (CHECK(!skel, "setup", "d_path skeleton failed\n")) - goto cleanup; - - err = test_d_path__attach(skel); - if (CHECK(err, "setup", "attach failed: %d\n", err)) + attach_and_load(&skel); + if (!skel) goto cleanup; bss = skel->bss; - bss->my_pid = getpid(); err = trigger_fstat_events(bss->my_pid); if (err < 0) @@ -195,6 +214,39 @@ static void test_d_path_check_types(void) test_d_path_check_types__destroy(skel); } +/* Check if the verifier correctly generates code for + * accessing the memory modified by d_path helper. + */ +static void test_d_path_mem_access(void) +{ + int localfd = -1; + char path_template[] = "/dev/shm/d_path_loadgen.XXXXXX"; + struct test_d_path__bss *bss; + struct test_d_path *skel; + + attach_and_load(&skel); + if (!skel) + goto cleanup; + + bss = skel->bss; + + localfd = mkstemp(path_template); + if (CHECK(localfd < 0, "trigger", "mkstemp failed\n")) + goto cleanup; + + if (CHECK(fallocate(localfd, 0, 0, 1024) < 0, "trigger", "fallocate failed\n")) + goto cleanup; + remove(path_template); + + if (CHECK(!bss->path_match_fallocate, "check", + "failed to read fallocate path")) + goto cleanup; + +cleanup: + syscall_close(localfd); + test_d_path__destroy(skel); +} + void test_d_path(void) { if (test__start_subtest("basic")) @@ -205,4 +257,7 @@ void test_d_path(void) if (test__start_subtest("check_alloc_mem")) test_d_path_check_types(); + + if (test__start_subtest("check_mem_access")) + test_d_path_mem_access(); } diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c new file mode 100644 index 000000000000..fb2cea710db3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "dmabuf_iter.skel.h" + +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <linux/dma-buf.h> +#include <linux/dma-heap.h> +#include <linux/udmabuf.h> + +static int udmabuf = -1; +static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter"; +static size_t udmabuf_test_buffer_size; +static int sysheap_dmabuf = -1; +static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter"; +static size_t sysheap_test_buffer_size; + +static int create_udmabuf(void) +{ + struct udmabuf_create create; + int dev_udmabuf, memfd, local_udmabuf; + + udmabuf_test_buffer_size = 10 * getpagesize(); + + if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) + return -1; + + memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING); + if (!ASSERT_OK_FD(memfd, "memfd_create")) + return -1; + + if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate")) + goto close_memfd; + + if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal")) + goto close_memfd; + + dev_udmabuf = open("/dev/udmabuf", O_RDONLY); + if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf")) + goto close_memfd; + + memset(&create, 0, sizeof(create)); + create.memfd = memfd; + create.flags = UDMABUF_FLAGS_CLOEXEC; + create.offset = 0; + create.size = udmabuf_test_buffer_size; + + local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create); + close(dev_udmabuf); + if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create")) + goto close_memfd; + + if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name")) + goto close_udmabuf; + + return local_udmabuf; + +close_udmabuf: + close(local_udmabuf); +close_memfd: + close(memfd); + return -1; +} + +static int create_sys_heap_dmabuf(size_t bytes) +{ + struct dma_heap_allocation_data data = { + .len = bytes, + .fd = 0, + .fd_flags = O_RDWR | O_CLOEXEC, + .heap_flags = 0, + }; + int heap_fd, ret; + + if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) + return -1; + + heap_fd = open("/dev/dma_heap/system", O_RDONLY); + if (!ASSERT_OK_FD(heap_fd, "open dma heap")) + return -1; + + ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data); + close(heap_fd); + if (!ASSERT_OK(ret, "syheap alloc")) + return -1; + + if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name")) + goto close_sysheap_dmabuf; + + return data.fd; + +close_sysheap_dmabuf: + close(data.fd); + return -1; +} + +static int create_test_buffers(void) +{ + udmabuf = create_udmabuf(); + + sysheap_test_buffer_size = 20 * getpagesize(); + sysheap_dmabuf = create_sys_heap_dmabuf(sysheap_test_buffer_size); + + if (udmabuf < 0 || sysheap_dmabuf < 0) + return -1; + + return 0; +} + +static void destroy_test_buffers(void) +{ + close(udmabuf); + udmabuf = -1; + + close(sysheap_dmabuf); + sysheap_dmabuf = -1; +} + +enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT }; +struct DmabufInfo { + unsigned long inode; + unsigned long size; + char name[DMA_BUF_NAME_LEN]; + char exporter[32]; +}; + +static bool check_dmabuf_info(const struct DmabufInfo *bufinfo, + unsigned long size, + const char *name, const char *exporter) +{ + return size == bufinfo->size && + !strcmp(name, bufinfo->name) && + !strcmp(exporter, bufinfo->exporter); +} + +static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel) +{ + int iter_fd; + char buf[256]; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + while (read(iter_fd, buf, sizeof(buf)) > 0) + ; /* Read out all contents */ + + /* Next reads should return 0 */ + ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read"); + + close(iter_fd); +} + +static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel) +{ + bool found_test_sysheap_dmabuf = false; + bool found_test_udmabuf = false; + struct DmabufInfo bufinfo; + size_t linesize = 0; + char *line = NULL; + FILE *iter_file; + int iter_fd, f = INODE; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + iter_file = fdopen(iter_fd, "r"); + if (!ASSERT_OK_PTR(iter_file, "fdopen")) + goto close_iter_fd; + + while (getline(&line, &linesize, iter_file) != -1) { + if (f % FIELD_COUNT == INODE) { + ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1, + "read inode"); + } else if (f % FIELD_COUNT == SIZE) { + ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1, + "read size"); + } else if (f % FIELD_COUNT == NAME) { + ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1, + "read name"); + } else if (f % FIELD_COUNT == EXPORTER) { + ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1, + "read exporter"); + + if (check_dmabuf_info(&bufinfo, + sysheap_test_buffer_size, + sysheap_test_buffer_name, + "system")) + found_test_sysheap_dmabuf = true; + else if (check_dmabuf_info(&bufinfo, + udmabuf_test_buffer_size, + udmabuf_test_buffer_name, + "udmabuf")) + found_test_udmabuf = true; + } + ++f; + } + + ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields"); + + ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf"); + ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf"); + + free(line); + fclose(iter_file); +close_iter_fd: + close(iter_fd); +} + +static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel) +{ + int iter_fd; + char buf[1024]; + size_t total_bytes_read = 0; + ssize_t bytes_read; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0) + total_bytes_read += bytes_read; + + ASSERT_GT(total_bytes_read, 4096, "total_bytes_read"); + + close(iter_fd); +} + + +static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + char key[DMA_BUF_NAME_LEN]; + int err, fd; + bool found; + + /* No need to attach it, just run it directly */ + fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each); + + err = bpf_prog_test_run_opts(fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key")) + return; + + do { + ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup"); + ASSERT_TRUE(found, "found test buffer"); + } while (bpf_map_get_next_key(map_fd, key, key)); +} + +void test_dmabuf_iter(void) +{ + struct dmabuf_iter *skel = NULL; + int map_fd; + const bool f = false; + + skel = dmabuf_iter__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load")) + return; + + map_fd = bpf_map__fd(skel->maps.testbuf_hash); + if (!ASSERT_OK_FD(map_fd, "map_fd")) + goto destroy_skel; + + if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY), + "insert udmabuf")) + goto destroy_skel; + if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY), + "insert sysheap buffer")) + goto destroy_skel; + + if (!ASSERT_OK(create_test_buffers(), "create_test_buffers")) + goto destroy; + + if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach")) + goto destroy; + + if (test__start_subtest("no_infinite_reads")) + subtest_dmabuf_iter_check_no_infinite_reads(skel); + if (test__start_subtest("default_iter")) + subtest_dmabuf_iter_check_default_iter(skel); + if (test__start_subtest("lots_of_buffers")) { + size_t NUM_BUFS = 100; + int buffers[NUM_BUFS]; + int i; + + for (i = 0; i < NUM_BUFS; ++i) { + buffers[i] = create_sys_heap_dmabuf(getpagesize()); + if (!ASSERT_OK_FD(buffers[i], "dmabuf_fd")) + goto cleanup_bufs; + } + + subtest_dmabuf_iter_check_lots_of_buffers(skel); + +cleanup_bufs: + for (--i; i >= 0; --i) + close(buffers[i]); + } + if (test__start_subtest("open_coded")) + subtest_dmabuf_iter_check_open_coded(skel, map_fd); + +destroy: + destroy_test_buffers(); +destroy_skel: + dmabuf_iter__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index b614a5272dfd..5fda11590708 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -10,6 +10,7 @@ enum test_setup_type { SETUP_SYSCALL_SLEEP, SETUP_SKB_PROG, SETUP_SKB_PROG_TP, + SETUP_XDP_PROG, }; static struct { @@ -18,9 +19,21 @@ static struct { } success_tests[] = { {"test_read_write", SETUP_SYSCALL_SLEEP}, {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_copy", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_copy_xdp", SETUP_XDP_PROG}, + {"test_dynptr_memset_zero", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_notzero", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_zero_offset", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_zero_adjusted", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_overflow", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_overflow_offset", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_readonly", SETUP_SKB_PROG}, + {"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG}, {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG}, {"test_adjust", SETUP_SYSCALL_SLEEP}, {"test_adjust_err", SETUP_SYSCALL_SLEEP}, {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP}, @@ -30,10 +43,21 @@ static struct { {"test_dynptr_skb_no_buff", SETUP_SKB_PROG}, {"test_dynptr_skb_strcmp", SETUP_SKB_PROG}, {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP}, + {"test_probe_read_user_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG}, + {"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP}, }; +#define PAGE_SIZE_64K 65536 + static void verify_success(const char *prog_name, enum test_setup_type setup_type) { + char user_data[384] = {[0 ... 382] = 'a', '\0'}; struct dynptr_success *skel; struct bpf_program *prog; struct bpf_link *link; @@ -55,6 +79,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ if (!ASSERT_OK(err, "dynptr_success__load")) goto cleanup; + skel->bss->user_ptr = user_data; + skel->data->test_len[0] = sizeof(user_data); + memcpy(skel->bss->expected_str, user_data, sizeof(user_data)); + switch (setup_type) { case SETUP_SYSCALL_SLEEP: link = bpf_program__attach(prog); @@ -109,11 +137,36 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ ); link = bpf_program__attach(prog); - if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) { + bpf_object__close(obj); goto cleanup; + } err = bpf_prog_test_run_opts(aux_prog_fd, &topts); bpf_link__destroy(link); + bpf_object__close(obj); + + if (!ASSERT_OK(err, "test_run")) + goto cleanup; + + break; + } + case SETUP_XDP_PROG: + { + char data[90000]; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .repeat = 1, + ); + + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = sizeof(data); + else + opts.data_size_in = 5000; + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &opts); if (!ASSERT_OK(err, "test_run")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 438583e1f2d1..c9fcb70cbafb 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -10,8 +10,8 @@ void test_empty_skb(void) struct empty_skb *bpf_obj = NULL; struct nstoken *tok = NULL; struct bpf_program *prog; + struct ethhdr eth_hlen; char eth_hlen_pp[15]; - char eth_hlen[14]; int veth_ifindex; int ipip_ifindex; int err; @@ -25,7 +25,9 @@ void test_empty_skb(void) int err; int ret; int lwt_egress_ret; /* expected retval at lwt/egress */ + __be16 h_proto; bool success_on_tc; + bool adjust_room; } tests[] = { /* Empty packets are always rejected. */ @@ -46,6 +48,28 @@ void test_empty_skb(void) .err = -EINVAL, }, + /* ETH_HLEN-sized packets with IPv4/IPv6 EtherType but + * no L3 header are rejected. + */ + { + .msg = "veth short IPv4 ingress packet", + .data_in = ð_hlen, + .data_size_in = sizeof(eth_hlen), + .ifindex = &veth_ifindex, + .err = -EINVAL, + .h_proto = htons(ETH_P_IP), + .adjust_room = true, + }, + { + .msg = "veth short IPv6 ingress packet", + .data_in = ð_hlen, + .data_size_in = sizeof(eth_hlen), + .ifindex = &veth_ifindex, + .err = -EINVAL, + .h_proto = htons(ETH_P_IPV6), + .adjust_room = true, + }, + /* ETH_HLEN-sized packets: * - can not be redirected at LWT_XMIT * - can be redirected at TC to non-tunneling dest @@ -54,7 +78,7 @@ void test_empty_skb(void) { /* __bpf_redirect_common */ .msg = "veth ETH_HLEN packet ingress", - .data_in = eth_hlen, + .data_in = ð_hlen, .data_size_in = sizeof(eth_hlen), .ifindex = &veth_ifindex, .ret = -ERANGE, @@ -68,7 +92,7 @@ void test_empty_skb(void) * tc: skb->len=14 <= skb_network_offset=14 */ .msg = "ipip ETH_HLEN packet ingress", - .data_in = eth_hlen, + .data_in = ð_hlen, .data_size_in = sizeof(eth_hlen), .ifindex = &ipip_ifindex, .ret = -ERANGE, @@ -108,17 +132,27 @@ void test_empty_skb(void) SYS(out, "ip addr add 192.168.1.1/16 dev ipip0"); ipip_ifindex = if_nametoindex("ipip0"); + memset(eth_hlen_pp, 0, sizeof(eth_hlen_pp)); + memset(ð_hlen, 0, sizeof(eth_hlen)); + bpf_obj = empty_skb__open_and_load(); if (!ASSERT_OK_PTR(bpf_obj, "open skeleton")) goto out; for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].data_in == ð_hlen) + eth_hlen.h_proto = tests[i].h_proto; + bpf_object__for_each_program(prog, bpf_obj->obj) { bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL; bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2); + bool is_adjust_room = !strcmp(bpf_program__name(prog), "tc_adjust_room"); int expected_ret; char buf[128]; + if (tests[i].adjust_room != is_adjust_room) + continue; + expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret; tattr.data_in = tests[i].data_in; diff --git a/tools/testing/selftests/bpf/prog_tests/exceptions.c b/tools/testing/selftests/bpf/prog_tests/exceptions.c index 516f4a13013c..e8cbaf2a3e82 100644 --- a/tools/testing/selftests/bpf/prog_tests/exceptions.c +++ b/tools/testing/selftests/bpf/prog_tests/exceptions.c @@ -83,6 +83,7 @@ static void test_exceptions_success(void) RUN_SUCCESS(exception_assert_range_with, 1); RUN_SUCCESS(exception_bad_assert_range, 0); RUN_SUCCESS(exception_bad_assert_range_with, 10); + RUN_SUCCESS(exception_throw_from_void_global, 11); #define RUN_EXT(load_ret, attach_err, expr, msg, after_link) \ { \ @@ -127,7 +128,7 @@ static void test_exceptions_success(void) bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), "exception_cb_mod"), "set_attach_target")) goto done; - }), "FENTRY/FEXIT programs cannot attach to exception callback", 0); + }), "Tracing programs cannot attach to exception callback", 0); if (test__start_subtest("throwing fentry -> exception_cb")) RUN_EXT(-EINVAL, true, ({ @@ -137,7 +138,7 @@ static void test_exceptions_success(void) bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), "exception_cb_mod"), "set_attach_target")) goto done; - }), "FENTRY/FEXIT programs cannot attach to exception callback", 0); + }), "Tracing programs cannot attach to exception callback", 0); if (test__start_subtest("non-throwing fexit -> exception_cb")) RUN_EXT(-EINVAL, true, ({ @@ -147,7 +148,7 @@ static void test_exceptions_success(void) bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), "exception_cb_mod"), "set_attach_target")) goto done; - }), "FENTRY/FEXIT programs cannot attach to exception callback", 0); + }), "Tracing programs cannot attach to exception callback", 0); if (test__start_subtest("throwing fexit -> exception_cb")) RUN_EXT(-EINVAL, true, ({ @@ -157,7 +158,7 @@ static void test_exceptions_success(void) bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), "exception_cb_mod"), "set_attach_target")) goto done; - }), "FENTRY/FEXIT programs cannot attach to exception callback", 0); + }), "Tracing programs cannot attach to exception callback", 0); if (test__start_subtest("throwing extension (with custom cb) -> exception_cb")) RUN_EXT(-EINVAL, true, ({ diff --git a/tools/testing/selftests/bpf/prog_tests/exe_ctx.c b/tools/testing/selftests/bpf/prog_tests/exe_ctx.c new file mode 100644 index 000000000000..aed6a6ef0876 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/exe_ctx.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2026 Valve Corporation. + * Author: Changwoo Min <changwoo@igalia.com> + */ + +#include <test_progs.h> +#include <sys/syscall.h> +#include "test_ctx.skel.h" + +void test_exe_ctx(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + cpu_set_t old_cpuset, target_cpuset; + struct test_ctx *skel; + int err, prog_fd; + + /* 1. Pin the current process to CPU 0. */ + if (sched_getaffinity(0, sizeof(old_cpuset), &old_cpuset) == 0) { + CPU_ZERO(&target_cpuset); + CPU_SET(0, &target_cpuset); + ASSERT_OK(sched_setaffinity(0, sizeof(target_cpuset), + &target_cpuset), "setaffinity"); + } + + skel = test_ctx__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto restore_affinity; + + err = test_ctx__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* 2. When we run this, the kernel will execute the BPF prog on CPU 0. */ + prog_fd = bpf_program__fd(skel->progs.trigger_all_contexts); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(err, "test_run_trigger"); + + /* 3. Wait for the local CPU's softirq/tasklet to finish. */ + for (int i = 0; i < 1000; i++) { + if (skel->bss->count_task > 0 && + skel->bss->count_hardirq > 0 && + skel->bss->count_softirq > 0) + break; + usleep(1000); /* Wait 1ms per iteration, up to 1 sec total */ + } + + /* On CPU 0, these should now all be non-zero. */ + ASSERT_GT(skel->bss->count_task, 0, "task_ok"); + ASSERT_GT(skel->bss->count_hardirq, 0, "hardirq_ok"); + ASSERT_GT(skel->bss->count_softirq, 0, "softirq_ok"); + +cleanup: + test_ctx__destroy(skel); + +restore_affinity: + ASSERT_OK(sched_setaffinity(0, sizeof(old_cpuset), &old_cpuset), + "restore_affinity"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index a1d52e73fb16..3078d8264deb 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -83,8 +83,8 @@ static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *ma int err; memset(&info, 0, len); - info.nr_map_ids = *nr_map_ids, - info.map_ids = ptr_to_u64(map_ids), + info.nr_map_ids = *nr_map_ids; + info.map_ids = ptr_to_u64(map_ids); err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) @@ -293,7 +293,7 @@ static int get_btf_id_by_fd(int btf_fd, __u32 *id) * 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1 * 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2 * 3) Close the btf_fd, now refcnt=1 - * Wait and check that BTF stil exists. + * Wait and check that BTF still exists. */ static void check_fd_array_cnt__referenced_btfs(void) { @@ -312,7 +312,7 @@ static void check_fd_array_cnt__referenced_btfs(void) /* btf should still exist when original file descriptor is closed */ err = get_btf_id_by_fd(extra_fds[0], &btf_id); - if (!ASSERT_GE(err, 0, "get_btf_id_by_fd")) + if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd")) goto cleanup; Close(extra_fds[0]); @@ -412,8 +412,8 @@ static void check_fd_array_cnt__fd_array_too_big(void) ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG"); cleanup_fds: - while (i > 0) - Close(extra_fds[--i]); + while (i-- > 0) + Close(extra_fds[i]); } void test_fd_array_cnt(void) diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c new file mode 100644 index 000000000000..ca46fdd6e1ae --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <stdbool.h> +#include <test_progs.h> +#include "fd_htab_lookup.skel.h" + +struct htab_op_ctx { + int fd; + int loop; + unsigned int entries; + bool stop; +}; + +#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err))) + +static void *htab_lookup_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + unsigned int j; + + for (j = 0; j < ctx->entries; j++) { + unsigned int key = j, zero = 0, value; + int inner_fd, err; + + err = bpf_map_lookup_elem(ctx->fd, &key, &value); + if (err) { + ctx->stop = true; + return ERR_TO_RETVAL(1, err); + } + + inner_fd = bpf_map_get_fd_by_id(value); + if (inner_fd < 0) { + /* The old map has been freed */ + if (inner_fd == -ENOENT) + continue; + ctx->stop = true; + return ERR_TO_RETVAL(2, inner_fd); + } + + err = bpf_map_lookup_elem(inner_fd, &zero, &value); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(3, err); + } + close(inner_fd); + + if (value != key) { + ctx->stop = true; + return ERR_TO_RETVAL(4, -EINVAL); + } + } + } + + return NULL; +} + +static void *htab_update_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + unsigned int j; + + for (j = 0; j < ctx->entries; j++) { + unsigned int key = j, zero = 0; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (inner_fd < 0) { + ctx->stop = true; + return ERR_TO_RETVAL(1, inner_fd); + } + + err = bpf_map_update_elem(inner_fd, &zero, &key, 0); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(2, err); + } + + err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(3, err); + } + close(inner_fd); + } + } + + return NULL; +} + +static int setup_htab(int fd, unsigned int entries) +{ + unsigned int i; + + for (i = 0; i < entries; i++) { + unsigned int key = i, zero = 0; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "new array")) + return -1; + + err = bpf_map_update_elem(inner_fd, &zero, &key, 0); + if (!ASSERT_OK(err, "init array")) { + close(inner_fd); + return -1; + } + + err = bpf_map_update_elem(fd, &key, &inner_fd, 0); + if (!ASSERT_OK(err, "init outer")) { + close(inner_fd); + return -1; + } + close(inner_fd); + } + + return 0; +} + +static int get_int_from_env(const char *name, int dft) +{ + const char *value; + + value = getenv(name); + if (!value) + return dft; + + return atoi(value); +} + +void test_fd_htab_lookup(void) +{ + unsigned int i, wr_nr = 8, rd_nr = 16; + pthread_t tids[wr_nr + rd_nr]; + struct fd_htab_lookup *skel; + struct htab_op_ctx ctx; + int err; + + skel = fd_htab_lookup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load")) + return; + + ctx.fd = bpf_map__fd(skel->maps.outer_map); + ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5); + ctx.stop = false; + ctx.entries = 8; + + err = setup_htab(ctx.fd, ctx.entries); + if (err) + goto destroy; + + memset(tids, 0, sizeof(tids)); + for (i = 0; i < wr_nr; i++) { + err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + for (i = 0; i < rd_nr; i++) { + err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + +reap: + for (i = 0; i < wr_nr + rd_nr; i++) { + void *ret = NULL; + char desc[32]; + + if (!tids[i]) + continue; + + snprintf(desc, sizeof(desc), "thread %u", i + 1); + err = pthread_join(tids[i], &ret); + ASSERT_OK(err, desc); + ASSERT_EQ(ret, NULL, desc); + } +destroy: + fd_htab_lookup__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 130f5b82d2e6..5ef1804e44df 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -12,13 +12,24 @@ void test_fentry_fexit(void) int err, prog_fd, i; LIBBPF_OPTS(bpf_test_run_opts, topts); - fentry_skel = fentry_test_lskel__open_and_load(); + fentry_skel = fentry_test_lskel__open(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto close_prog; - fexit_skel = fexit_test_lskel__open_and_load(); + + fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fentry_test_lskel__load(fentry_skel); + if (!ASSERT_OK(err, "fentry_skel_load")) + goto close_prog; + + fexit_skel = fexit_test_lskel__open(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto close_prog; + fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fexit_test_lskel__load(fexit_skel); + if (!ASSERT_OK(err, "fexit_skel_load")) + goto close_prog; + err = fentry_test_lskel__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index aee1bc77a17f..ec882328eb59 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -43,8 +43,13 @@ static void fentry_test(void) struct fentry_test_lskel *fentry_skel = NULL; int err; - fentry_skel = fentry_test_lskel__open_and_load(); - if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) + fentry_skel = fentry_test_lskel__open(); + if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open")) + goto cleanup; + + fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fentry_test_lskel__load(fentry_skel); + if (!ASSERT_OK(err, "fentry_skel_load")) goto cleanup; err = fentry_test_common(fentry_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index f29fc789c14b..92c20803ea76 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -111,7 +111,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, struct bpf_link_info link_info; struct bpf_program *pos; const char *pos_sec_name; - char *tgt_name; + const char *tgt_name; __s32 btf_id; tgt_name = strstr(prog_name[i], "/"); @@ -347,6 +347,17 @@ static void test_func_sockmap_update(void) prog_name, false, NULL); } +static void test_func_replace_void(void) +{ + const char *prog_name[] = { + "freplace/foo", + }; + test_fexit_bpf2bpf_common("./freplace_void.bpf.o", + "./test_global_func7.bpf.o", + ARRAY_SIZE(prog_name), + prog_name, false, NULL); +} + static void test_obj_load_failure_common(const char *obj_file, const char *target_obj_file, const char *exp_msg) @@ -432,6 +443,15 @@ static void test_func_replace_global_func(void) prog_name, false, NULL); } +static void test_func_replace_int_with_void(void) +{ + /* Make sure we can't freplace with the wrong type */ + test_obj_load_failure_common("freplace_int_with_void.bpf.o", + "./test_global_func2.bpf.o", + "Return type UNKNOWN of test_freplace_int_with_void()" + " doesn't match type INT of global_func2()"); +} + static int find_prog_btf_id(const char *name, __u32 attach_prog_fd) { struct bpf_prog_info info = {}; @@ -597,4 +617,8 @@ void serial_test_fexit_bpf2bpf(void) test_fentry_to_cgroup_bpf(); if (test__start_subtest("func_replace_progmap")) test_func_replace_progmap(); + if (test__start_subtest("freplace_int_with_void")) + test_func_replace_int_with_void(); + if (test__start_subtest("freplace_void")) + test_func_replace_void(); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index 1c13007e37dd..94eed753560c 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -43,8 +43,13 @@ static void fexit_test(void) struct fexit_test_lskel *fexit_skel = NULL; int err; - fexit_skel = fexit_test_lskel__open_and_load(); - if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) + fexit_skel = fexit_test_lskel__open(); + if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open")) + goto cleanup; + + fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fexit_test_lskel__load(fexit_skel); + if (!ASSERT_OK(err, "fexit_skel_load")) goto cleanup; err = fexit_test_common(fexit_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c new file mode 100644 index 000000000000..5cde32b35da4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include <network_helpers.h> +#include "file_reader.skel.h" +#include "file_reader_fail.skel.h" +#include <dlfcn.h> +#include <sys/mman.h> + +const char *user_ptr = "hello world"; +char file_contents[256000]; + +void *get_executable_base_addr(void) +{ + Dl_info info; + + if (!dladdr((void *)&get_executable_base_addr, &info)) { + fprintf(stderr, "dladdr failed\n"); + return NULL; + } + + return info.dli_fbase; +} + +static int initialize_file_contents(void) +{ + int fd, page_sz = sysconf(_SC_PAGESIZE); + ssize_t n = 0, cur, off; + void *addr; + + fd = open("/proc/self/exe", O_RDONLY); + if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n")) + return 1; + + do { + cur = read(fd, file_contents + n, sizeof(file_contents) - n); + if (!ASSERT_GT(cur, 0, "read success")) + break; + n += cur; + } while (n < sizeof(file_contents)); + + close(fd); + + if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n")) + return 1; + + addr = get_executable_base_addr(); + if (!ASSERT_NEQ(addr, NULL, "get executable address")) + return 1; + + /* page-align base file address */ + addr = (void *)((unsigned long)addr & ~(page_sz - 1)); + + /* + * Page out range 0..512K, use 0..256K for positive tests and + * 256K..512K for negative tests expecting page faults + */ + for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) { + if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT), + "madvise pageout")) + return errno; + } + + return 0; +} + +static void run_test(const char *prog_name) +{ + struct file_reader *skel; + struct bpf_program *prog; + int err, fd; + + err = initialize_file_contents(); + if (!ASSERT_OK(err, "initialize file contents")) + return; + + skel = file_reader__open(); + if (!ASSERT_OK_PTR(skel, "file_reader__open")) + return; + + bpf_object__for_each_program(prog, skel->obj) { + bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0); + } + + memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents)); + skel->bss->pid = getpid(); + + err = file_reader__load(skel); + if (!ASSERT_OK(err, "file_reader__load")) + goto cleanup; + + err = file_reader__attach(skel); + if (!ASSERT_OK(err, "file_reader__attach")) + goto cleanup; + + fd = open("/proc/self/exe", O_RDONLY); + if (fd >= 0) + close(fd); + + ASSERT_EQ(skel->bss->err, 0, "err"); + ASSERT_EQ(skel->bss->run_success, 1, "run_success"); +cleanup: + file_reader__destroy(skel); +} + +void test_file_reader(void) +{ + if (test__start_subtest("on_open_expect_fault")) + run_test("on_open_expect_fault"); + + if (test__start_subtest("on_open_validate_file_read")) + run_test("on_open_validate_file_read"); + + if (test__start_subtest("negative")) + RUN_TESTS(file_reader_fail); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index e59af2aa6601..e40114620751 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -37,6 +37,7 @@ static noinline void uprobe_func(void) static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, ssize_t offset, ssize_t entry_offset) { + ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */; struct bpf_link_info info; __u32 len = sizeof(info); char buf[PATH_MAX]; @@ -97,6 +98,7 @@ again: case BPF_PERF_EVENT_UPROBE: case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset"); ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, "name_len"); @@ -241,20 +243,32 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, .retprobe = type == BPF_PERF_EVENT_URETPROBE, .bpf_cookie = PERF_EVENT_COOKIE, ); + const char *sema[1] = { + "uprobe_link_info_sema_1", + }; + __u64 *ref_ctr_offset; struct bpf_link *link; int link_fd, err; + err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema, + (unsigned long **) &ref_ctr_offset, STT_OBJECT); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object")) + return; + + opts.ref_ctr_offset = *ref_ctr_offset; link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run, 0, /* self pid */ UPROBE_FILE, uprobe_offset, &opts); if (!ASSERT_OK_PTR(link, "attach_uprobe")) - return; + goto out; link_fd = bpf_link__fd(link); - err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); + err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset); ASSERT_OK(err, "verify_perf_link_info"); bpf_link__destroy(link); +out: + free(ref_ctr_offset); } static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 08bae13248c4..fb4892681464 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -570,7 +570,7 @@ static int create_tap(const char *ifname) }; int fd, ret; - strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, ifname); fd = open("/dev/net/tun", O_RDWR); if (fd < 0) @@ -599,7 +599,7 @@ static int ifup(const char *ifname) struct ifreq ifr = {}; int sk, ret; - strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, ifname); sk = socket(PF_INET, SOCK_DGRAM, 0); if (sk < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 09f6487f58b9..5fea3209566e 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -6,6 +6,7 @@ #include "for_each_array_map_elem.skel.h" #include "for_each_map_elem_write_key.skel.h" #include "for_each_multi_maps.skel.h" +#include "for_each_hash_modify.skel.h" static unsigned int duration; @@ -203,6 +204,40 @@ out: for_each_multi_maps__destroy(skel); } +static void test_hash_modify(void) +{ + struct for_each_hash_modify *skel; + int max_entries, i, err; + __u64 key, val; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1 + ); + + skel = for_each_hash_modify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load")) + return; + + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i; + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + +out: + for_each_hash_modify__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) @@ -213,4 +248,6 @@ void test_for_each(void) test_write_map_key(); if (test__start_subtest("multi_maps")) test_multi_maps(); + if (test__start_subtest("hash_modify")) + test_hash_modify(); } diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c index b7b77a6b2979..0de8facca4c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/free_timer.c +++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c @@ -124,6 +124,10 @@ void test_free_timer(void) int err; skel = free_timer__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "open_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c index 5a0b51157451..43a26ec69a8e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c +++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c @@ -8,11 +8,12 @@ #include <unistd.h> #include <test_progs.h> #include "test_get_xattr.skel.h" +#include "test_set_remove_xattr.skel.h" #include "test_fsverity.skel.h" static const char testfile[] = "/tmp/test_progs_fs_kfuncs"; -static void test_xattr(void) +static void test_get_xattr(const char *name, const char *value, bool allow_access) { struct test_get_xattr *skel = NULL; int fd = -1, err; @@ -25,7 +26,7 @@ static void test_xattr(void) close(fd); fd = -1; - err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0); + err = setxattr(testfile, name, value, strlen(value) + 1, 0); if (err && errno == EOPNOTSUPP) { printf("%s:SKIP:local fs doesn't support xattr (%d)\n" "To run this test, make sure /tmp filesystem supports xattr.\n", @@ -48,16 +49,23 @@ static void test_xattr(void) goto out; fd = open(testfile, O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "open_file")) goto out; - ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file"); - /* Trigger security_inode_getxattr */ - err = getxattr(testfile, "user.kfuncs", v, sizeof(v)); - ASSERT_EQ(err, -1, "getxattr_return"); - ASSERT_EQ(errno, EINVAL, "getxattr_errno"); - ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry"); + err = getxattr(testfile, name, v, sizeof(v)); + + if (allow_access) { + ASSERT_EQ(err, -1, "getxattr_return"); + ASSERT_EQ(errno, EINVAL, "getxattr_errno"); + ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file"); + ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry"); + } else { + ASSERT_EQ(err, strlen(value) + 1, "getxattr_return"); + ASSERT_EQ(skel->bss->found_xattr_from_file, 0, "found_xattr_from_file"); + ASSERT_EQ(skel->bss->found_xattr_from_dentry, 0, "found_xattr_from_dentry"); + } out: close(fd); @@ -65,6 +73,127 @@ out: remove(testfile); } +/* xattr value we will set to security.bpf.foo */ +static const char value_foo[] = "hello"; + +static void read_and_validate_foo(struct test_set_remove_xattr *skel) +{ + char value_out[32]; + int err; + + err = getxattr(testfile, skel->rodata->xattr_foo, value_out, sizeof(value_out)); + ASSERT_EQ(err, sizeof(value_foo), "getxattr size foo"); + ASSERT_EQ(strncmp(value_out, value_foo, sizeof(value_foo)), 0, "strncmp value_foo"); +} + +static void set_foo(struct test_set_remove_xattr *skel) +{ + ASSERT_OK(setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0), + "setxattr foo"); +} + +static void validate_bar_match(struct test_set_remove_xattr *skel) +{ + char value_out[32]; + int err; + + err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out)); + ASSERT_EQ(err, sizeof(skel->data->value_bar), "getxattr size bar"); + ASSERT_EQ(strncmp(value_out, skel->data->value_bar, sizeof(skel->data->value_bar)), 0, + "strncmp value_bar"); +} + +static void validate_bar_removed(struct test_set_remove_xattr *skel) +{ + char value_out[32]; + int err; + + err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out)); + ASSERT_LT(err, 0, "getxattr size bar should fail"); +} + +static void test_set_remove_xattr(void) +{ + struct test_set_remove_xattr *skel = NULL; + int fd = -1, err; + + fd = open(testfile, O_CREAT | O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "create_file")) + return; + + close(fd); + fd = -1; + + skel = test_set_remove_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_set_remove_xattr__open_and_load")) + return; + + /* Set security.bpf.foo to "hello" */ + err = setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0); + if (err && errno == EOPNOTSUPP) { + printf("%s:SKIP:local fs doesn't support xattr (%d)\n" + "To run this test, make sure /tmp filesystem supports xattr.\n", + __func__, errno); + test__skip(); + goto out; + } + + if (!ASSERT_OK(err, "setxattr")) + goto out; + + skel->bss->monitored_pid = getpid(); + err = test_set_remove_xattr__attach(skel); + if (!ASSERT_OK(err, "test_set_remove_xattr__attach")) + goto out; + + /* First, test not _locked version of the kfuncs with getxattr. */ + + /* Read security.bpf.foo and trigger test_inode_getxattr. This + * bpf program will set security.bpf.bar to "world". + */ + read_and_validate_foo(skel); + validate_bar_match(skel); + + /* Read security.bpf.foo and trigger test_inode_getxattr again. + * This will remove xattr security.bpf.bar. + */ + read_and_validate_foo(skel); + validate_bar_removed(skel); + + ASSERT_TRUE(skel->bss->set_security_bpf_bar_success, "set_security_bpf_bar_success"); + ASSERT_TRUE(skel->bss->remove_security_bpf_bar_success, "remove_security_bpf_bar_success"); + ASSERT_TRUE(skel->bss->set_security_selinux_fail, "set_security_selinux_fail"); + ASSERT_TRUE(skel->bss->remove_security_selinux_fail, "remove_security_selinux_fail"); + + /* Second, test _locked version of the kfuncs, with setxattr */ + + /* Set security.bpf.foo and trigger test_inode_setxattr. This + * bpf program will set security.bpf.bar to "world". + */ + set_foo(skel); + validate_bar_match(skel); + + /* Set security.bpf.foo and trigger test_inode_setxattr again. + * This will remove xattr security.bpf.bar. + */ + set_foo(skel); + validate_bar_removed(skel); + + ASSERT_TRUE(skel->bss->locked_set_security_bpf_bar_success, + "locked_set_security_bpf_bar_success"); + ASSERT_TRUE(skel->bss->locked_remove_security_bpf_bar_success, + "locked_remove_security_bpf_bar_success"); + ASSERT_TRUE(skel->bss->locked_set_security_selinux_fail, + "locked_set_security_selinux_fail"); + ASSERT_TRUE(skel->bss->locked_remove_security_selinux_fail, + "locked_remove_security_selinux_fail"); + +out: + close(fd); + test_set_remove_xattr__destroy(skel); + remove(testfile); +} + #ifndef SHA256_DIGEST_SIZE #define SHA256_DIGEST_SIZE 32 #endif @@ -141,8 +270,21 @@ out: void test_fs_kfuncs(void) { - if (test__start_subtest("xattr")) - test_xattr(); + /* Matches xattr_names in progs/test_get_xattr.c */ + if (test__start_subtest("user_xattr")) + test_get_xattr("user.kfuncs", "hello", true); + + if (test__start_subtest("security_bpf_xattr")) + test_get_xattr("security.bpf.xxx", "hello", true); + + if (test__start_subtest("security_bpf_xattr_error")) + test_get_xattr("security.bpf", "hello", false); + + if (test__start_subtest("security_selinux_xattr_error")) + test_get_xattr("security.selinux", "hello", false); + + if (test__start_subtest("set_remove_xattr")) + test_set_remove_xattr(); if (test__start_subtest("fsverity")) test_fsverity(); diff --git a/tools/testing/selftests/bpf/prog_tests/fsession_test.c b/tools/testing/selftests/bpf/prog_tests/fsession_test.c new file mode 100644 index 000000000000..a299aeb8cc2e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fsession_test.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 ChinaTelecom */ +#include <test_progs.h> +#include "fsession_test.skel.h" + +static int check_result(struct fsession_test *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* Trigger test function calls */ + prog_fd = bpf_program__fd(skel->progs.test1); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return err; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return topts.retval; + + for (int i = 0; i < sizeof(*skel->bss) / sizeof(__u64); i++) { + if (!ASSERT_EQ(((__u64 *)skel->bss)[i], 1, "test_result")) + return -EINVAL; + } + + return 0; +} + +static void test_fsession_basic(void) +{ + struct fsession_test *skel = NULL; + int err; + + skel = fsession_test__open(); + if (!ASSERT_OK_PTR(skel, "fsession_test__open")) + return; + + err = fsession_test__load(skel); + if (err == -EOPNOTSUPP) { + test__skip(); + goto cleanup; + } + if (!ASSERT_OK(err, "fsession_test__load")) + goto cleanup; + + err = fsession_test__attach(skel); + if (!ASSERT_OK(err, "fsession_attach")) + goto cleanup; + + check_result(skel); +cleanup: + fsession_test__destroy(skel); +} + +static void test_fsession_reattach(void) +{ + struct fsession_test *skel = NULL; + int err; + + skel = fsession_test__open(); + if (!ASSERT_OK_PTR(skel, "fsession_test__open")) + return; + + err = fsession_test__load(skel); + if (err == -EOPNOTSUPP) { + test__skip(); + goto cleanup; + } + if (!ASSERT_OK(err, "fsession_test__load")) + goto cleanup; + + /* first attach */ + err = fsession_test__attach(skel); + if (!ASSERT_OK(err, "fsession_first_attach")) + goto cleanup; + + if (check_result(skel)) + goto cleanup; + + /* detach */ + fsession_test__detach(skel); + + /* reset counters */ + memset(skel->bss, 0, sizeof(*skel->bss)); + + /* second attach */ + err = fsession_test__attach(skel); + if (!ASSERT_OK(err, "fsession_second_attach")) + goto cleanup; + + if (check_result(skel)) + goto cleanup; + +cleanup: + fsession_test__destroy(skel); +} + +static void test_fsession_cookie(void) +{ + struct fsession_test *skel = NULL; + int err; + + skel = fsession_test__open(); + if (!ASSERT_OK_PTR(skel, "fsession_test__open")) + goto cleanup; + + /* + * The test_fsession_basic() will test the session cookie with + * bpf_get_func_ip() case, so we need only check + * the cookie without bpf_get_func_ip() case here + */ + bpf_program__set_autoload(skel->progs.test6, false); + + err = fsession_test__load(skel); + if (err == -EOPNOTSUPP) { + test__skip(); + goto cleanup; + } + if (!ASSERT_OK(err, "fsession_test__load")) + goto cleanup; + + err = fsession_test__attach(skel); + if (!ASSERT_OK(err, "fsession_attach")) + goto cleanup; + + skel->bss->test6_entry_result = 1; + skel->bss->test6_exit_result = 1; + + check_result(skel); +cleanup: + fsession_test__destroy(skel); +} + +void test_fsession_test(void) +{ + if (test__start_subtest("fsession_test")) + test_fsession_basic(); + if (test__start_subtest("fsession_reattach")) + test_fsession_reattach(); + if (test__start_subtest("fsession_cookie")) + test_fsession_cookie(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c index 64a9c95d4acf..7bf8adc41e99 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include "get_func_args_test.skel.h" +#include "get_func_args_fsession_test.skel.h" void test_get_func_args_test(void) { @@ -33,12 +34,38 @@ void test_get_func_args_test(void) ASSERT_EQ(topts.retval >> 16, 1, "test_run"); ASSERT_EQ(topts.retval & 0xffff, 1234 + 29, "test_run"); + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); ASSERT_EQ(skel->bss->test3_result, 1, "test3_result"); ASSERT_EQ(skel->bss->test4_result, 1, "test4_result"); + ASSERT_EQ(skel->bss->test5_result, 1, "test5_result"); + ASSERT_EQ(skel->bss->test6_result, 1, "test6_result"); cleanup: get_func_args_test__destroy(skel); } + +void test_get_func_args_fsession_test(void) +{ + struct get_func_args_fsession_test *skel = NULL; + int err; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = get_func_args_fsession_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "get_func_args_fsession_test__open_and_load")) + return; + + err = get_func_args_fsession_test__attach(skel); + if (!ASSERT_OK(err, "get_func_args_fsession_test__attach")) + goto cleanup; + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test1), &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); +cleanup: + get_func_args_fsession_test__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c index c40242dfa8fb..357fdedfea93 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -2,6 +2,7 @@ #include <test_progs.h> #include "get_func_ip_test.skel.h" #include "get_func_ip_uprobe_test.skel.h" +#include "get_func_ip_fsession_test.skel.h" static noinline void uprobe_trigger(void) { @@ -137,3 +138,28 @@ void test_get_func_ip_test(void) test_function_entry(); test_function_body(); } + +void test_get_func_ip_fsession_test(void) +{ + struct get_func_ip_fsession_test *skel = NULL; + int err; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = get_func_ip_fsession_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "get_func_ip_fsession_test__open_and_load")) + return; + + err = get_func_ip_fsession_test__attach(skel); + if (!ASSERT_OK(err, "get_func_ip_fsession_test__attach")) + goto cleanup; + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test1), &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(skel->bss->test1_entry_result, 1, "test1_entry_result"); + ASSERT_EQ(skel->bss->test1_exit_result, 1, "test1_exit_result"); + +cleanup: + get_func_ip_fsession_test__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c index a742dd994d60..d7c3df165adc 100644 --- a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c +++ b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c @@ -59,7 +59,7 @@ static void *htab_update_fn(void *arg) return NULL; } -void test_htab_reuse(void) +static void test_htab_reuse_basic(void) { unsigned int i, wr_nr = 1, rd_nr = 4; pthread_t tids[wr_nr + rd_nr]; @@ -99,3 +99,170 @@ reap: } htab_reuse__destroy(skel); } + +/* + * Writes consistency test for BPF_F_LOCK update + * + * The race: + * 1. Thread A: BPF_F_LOCK|BPF_EXIST update + * 2. Thread B: delete element then update it with BPF_ANY + */ + +struct htab_val_large { + struct bpf_spin_lock lock; + __u32 seq; + __u64 data[256]; +}; + +struct consistency_ctx { + int fd; + int start_fd; + int loop; + volatile bool torn_write; +}; + +static void wait_for_start(int fd) +{ + char buf; + + read(fd, &buf, 1); +} + +static void *locked_update_fn(void *arg) +{ + struct consistency_ctx *ctx = arg; + struct htab_val_large value; + unsigned int key = 1; + int i; + + memset(&value, 0xAA, sizeof(value)); + wait_for_start(ctx->start_fd); + + for (i = 0; i < ctx->loop; i++) { + value.seq = i; + bpf_map_update_elem(ctx->fd, &key, &value, + BPF_F_LOCK | BPF_EXIST); + } + + return NULL; +} + +/* Delete + update: removes the element then re-creates it with BPF_ANY. */ +static void *delete_update_fn(void *arg) +{ + struct consistency_ctx *ctx = arg; + struct htab_val_large value; + unsigned int key = 1; + int i; + + memset(&value, 0xBB, sizeof(value)); + + wait_for_start(ctx->start_fd); + + for (i = 0; i < ctx->loop; i++) { + value.seq = i; + bpf_map_delete_elem(ctx->fd, &key); + bpf_map_update_elem(ctx->fd, &key, &value, BPF_ANY | BPF_F_LOCK); + } + + return NULL; +} + +static void *locked_lookup_fn(void *arg) +{ + struct consistency_ctx *ctx = arg; + struct htab_val_large value; + unsigned int key = 1; + int i, j; + + wait_for_start(ctx->start_fd); + + for (i = 0; i < ctx->loop && !ctx->torn_write; i++) { + if (bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK)) + continue; + + for (j = 0; j < 256; j++) { + if (value.data[j] != value.data[0]) { + ctx->torn_write = true; + return NULL; + } + } + } + + return NULL; +} + +static void test_htab_reuse_consistency(void) +{ + int threads_total = 6, threads = 2; + pthread_t tids[threads_total]; + struct consistency_ctx ctx; + struct htab_val_large seed; + struct htab_reuse *skel; + unsigned int key = 1, i; + int pipefd[2]; + int err; + + skel = htab_reuse__open_and_load(); + if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load")) + return; + + if (!ASSERT_OK(pipe(pipefd), "pipe")) + goto out; + + ctx.fd = bpf_map__fd(skel->maps.htab_lock_consistency); + ctx.start_fd = pipefd[0]; + ctx.loop = 100000; + ctx.torn_write = false; + + /* Seed the element so locked updaters have something to find */ + memset(&seed, 0xBB, sizeof(seed)); + err = bpf_map_update_elem(ctx.fd, &key, &seed, BPF_ANY); + if (!ASSERT_OK(err, "seed_element")) + goto close_pipe; + + memset(tids, 0, sizeof(tids)); + for (i = 0; i < threads; i++) { + err = pthread_create(&tids[i], NULL, locked_update_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) + goto stop; + } + for (i = 0; i < threads; i++) { + err = pthread_create(&tids[threads + i], NULL, delete_update_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) + goto stop; + } + for (i = 0; i < threads; i++) { + err = pthread_create(&tids[threads * 2 + i], NULL, locked_lookup_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) + goto stop; + } + + /* Release all threads simultaneously */ + close(pipefd[1]); + pipefd[1] = -1; + +stop: + for (i = 0; i < threads_total; i++) { + if (!tids[i]) + continue; + pthread_join(tids[i], NULL); + } + + ASSERT_FALSE(ctx.torn_write, "no torn writes detected"); + +close_pipe: + if (pipefd[1] >= 0) + close(pipefd[1]); + close(pipefd[0]); +out: + htab_reuse__destroy(skel); +} + +void test_htab_reuse(void) +{ + if (test__start_subtest("basic")) + test_htab_reuse_basic(); + if (test__start_subtest("consistency")) + test_htab_reuse_consistency(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c index 2bc85f4814f4..ea1a6766fbe9 100644 --- a/tools/testing/selftests/bpf/prog_tests/htab_update.c +++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c @@ -15,17 +15,17 @@ struct htab_update_ctx { static void test_reenter_update(void) { struct htab_update *skel; - unsigned int key, value; + void *value = NULL; + unsigned int key, value_size; int err; skel = htab_update__open(); if (!ASSERT_OK_PTR(skel, "htab_update__open")) return; - /* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */ - bpf_program__set_autoload(skel->progs.lookup_elem_raw, true); + bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true); err = htab_update__load(skel); - if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err) + if (!ASSERT_TRUE(!err, "htab_update__load") || err) goto out; skel->bss->pid = getpid(); @@ -33,15 +33,35 @@ static void test_reenter_update(void) if (!ASSERT_OK(err, "htab_update__attach")) goto out; - /* Will trigger the reentrancy of bpf_map_update_elem() */ + value_size = bpf_map__value_size(skel->maps.htab); + + value = calloc(1, value_size); + if (!ASSERT_OK_PTR(value, "calloc value")) + goto out; + /* + * First update: plain insert. This should NOT trigger the re-entrancy + * path, because there is no old element to free yet. + */ key = 0; - value = 0; - err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0); - if (!ASSERT_OK(err, "add element")) + err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY); + if (!ASSERT_OK(err, "first update (insert)")) + goto out; + + /* + * Second update: replace existing element with same key and trigger + * the reentrancy of bpf_map_update_elem(). + * check_and_free_fields() calls bpf_obj_free_fields() on the old + * value, which is where fentry program runs and performs a nested + * bpf_map_update_elem(), triggering -EDEADLK. + */ + memset(value, 0, value_size); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY); + if (!ASSERT_OK(err, "second update (replace)")) goto out; - ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy"); + ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy"); out: + free(value); htab_update__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/iter_buf_null_fail.c b/tools/testing/selftests/bpf/prog_tests/iter_buf_null_fail.c new file mode 100644 index 000000000000..ea97787b870d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/iter_buf_null_fail.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "iter_buf_null_fail.skel.h" + +void test_iter_buf_null_fail(void) +{ + RUN_TESTS(iter_buf_null_fail); +} diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 3cea71f9c500..a539980a2fbe 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -253,6 +253,11 @@ static void subtest_css_iters(void) { "/cg1/cg2" }, { "/cg1/cg2/cg3" }, { "/cg1/cg2/cg3/cg4" }, + { "/cg1/cg5" }, + { "/cg1/cg5/cg6" }, + { "/cg1/cg7" }, + { "/cg1/cg7/cg8" }, + { "/cg1/cg7/cg8/cg9" }, }; int err, cg_nr = ARRAY_SIZE(cgs); int i; @@ -284,7 +289,8 @@ static void subtest_css_iters(void) ASSERT_EQ(skel->bss->post_order_cnt, cg_nr, "post_order_cnt"); ASSERT_EQ(skel->bss->last_cg_id, get_cgroup_id(cgs[0].path), "last_cg_id"); - ASSERT_EQ(skel->bss->tree_high, cg_nr - 1, "tree_high"); + ASSERT_EQ(skel->bss->children_cnt, 3, "children_cnt"); + ASSERT_EQ(skel->bss->tree_high, 3, "tree_high"); iters_css__detach(skel); cleanup: cleanup_cgroup_environment(); diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c new file mode 100644 index 000000000000..97b00c7efe94 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Microsoft */ +#include <test_progs.h> +#include "kfunc_call_test.skel.h" +#include "kfunc_call_test.lskel.h" +#include "test_kernel_flag.skel.h" + +void test_kernel_flag(void) +{ + struct test_kernel_flag *lsm_skel; + struct kfunc_call_test *skel = NULL; + struct kfunc_call_test_lskel *lskel = NULL; + int ret; + + lsm_skel = test_kernel_flag__open_and_load(); + if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel")) + return; + + lsm_skel->bss->monitored_tid = sys_gettid(); + + ret = test_kernel_flag__attach(lsm_skel); + if (!ASSERT_OK(ret, "test_kernel_flag__attach")) + goto close_prog; + + /* Test with skel. This should pass the gatekeeper */ + skel = kfunc_call_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + goto close_prog; + + /* Test with lskel. This should fail due to blocking kernel-based bpf() invocations */ + lskel = kfunc_call_test_lskel__open_and_load(); + if (!ASSERT_ERR_PTR(lskel, "lskel")) + goto close_prog; + +close_prog: + if (skel) + kfunc_call_test__destroy(skel); + if (lskel) + kfunc_call_test_lskel__destroy(lskel); + + lsm_skel->bss->monitored_tid = 0; + test_kernel_flag__destroy(lsm_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index f79c8e53cb3e..62f3fb79f5d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -74,6 +74,8 @@ static struct kfunc_test_params kfunc_tests[] = { TC_TEST(kfunc_call_test1, 12), TC_TEST(kfunc_call_test2, 3), TC_TEST(kfunc_call_test4, -1234), + TC_TEST(kfunc_call_test5, 0), + TC_TEST(kfunc_call_test5_asm, 0), TC_TEST(kfunc_call_test_ref_btf_id, 0), TC_TEST(kfunc_call_test_get_mem, 42), SYSCALL_TEST(kfunc_syscall_test, 0), diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c index 8cd298b78e44..04aaf4c9cf5e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c @@ -14,7 +14,7 @@ static struct { const char *prog_name; int expected_runtime_err; } kfunc_dynptr_tests[] = { - {"dynptr_data_null", -EBADMSG}, + {"dynptr_data_null", -EINVAL}, }; static bool kfunc_not_supported; diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c b/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c new file mode 100644 index 000000000000..5e4793c9c29a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_implicit_args.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "kfunc_implicit_args.skel.h" + +void test_kfunc_implicit_args(void) +{ + RUN_TESTS(kfunc_implicit_args); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 8e13a3416a21..399fe9103f83 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -57,7 +57,8 @@ static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel) if (!ASSERT_OK(ret, "kmem_cache_lookup")) break; - ASSERT_STREQ(r.name, name, "kmem_cache_name"); + ASSERT_STRNEQ(r.name, name, sizeof(r.name) - 1, + "kmem_cache_name"); ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize"); seen++; @@ -103,11 +104,8 @@ void test_kmem_cache_iter(void) if (!ASSERT_GE(iter_fd, 0, "iter_create")) goto destroy; - memset(buf, 0, sizeof(buf)); - while (read(iter_fd, buf, sizeof(buf) > 0)) { - /* Read out all contents */ - printf("%s", buf); - } + while (read(iter_fd, buf, sizeof(buf)) > 0) + ; /* Read out all contents */ /* Next reads should return 0 */ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read"); diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index e19ef509ebf8..2e0ddef77ba5 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <sys/prctl.h> #include <test_progs.h> #include "kprobe_multi.skel.h" #include "trace_helpers.h" @@ -7,6 +9,8 @@ #include "kprobe_multi_session.skel.h" #include "kprobe_multi_session_cookie.skel.h" #include "kprobe_multi_verifier.skel.h" +#include "kprobe_write_ctx.skel.h" +#include "kprobe_multi_sleepable.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -217,7 +221,9 @@ static void test_attach_api_syms(void) static void test_attach_api_fails(void) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + LIBBPF_OPTS(bpf_test_run_opts, topts); struct kprobe_multi *skel = NULL; + struct kprobe_multi_sleepable *sl_skel = NULL; struct bpf_link *link = NULL; unsigned long long addrs[2]; const char *syms[2] = { @@ -225,7 +231,7 @@ static void test_attach_api_fails(void) "bpf_fentry_test2", }; __u64 cookies[2]; - int saved_error; + int saved_error, err; addrs[0] = ksym_get_addr("bpf_fentry_test1"); addrs[1] = ksym_get_addr("bpf_fentry_test2"); @@ -324,9 +330,63 @@ static void test_attach_api_fails(void) if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error")) goto cleanup; + /* fail_7 - non-existent wildcard pattern (slow path) */ + LIBBPF_OPTS_RESET(opts); + + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, + "__nonexistent_func_xyz_*", + &opts); + saved_error = -errno; + if (!ASSERT_ERR_PTR(link, "fail_7")) + goto cleanup; + + if (!ASSERT_EQ(saved_error, -ENOENT, "fail_7_error")) + goto cleanup; + + /* fail_8 - non-existent exact name (fast path), same error as wildcard */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, + "__nonexistent_func_xyz_123", + &opts); + saved_error = -errno; + if (!ASSERT_ERR_PTR(link, "fail_8")) + goto cleanup; + + if (!ASSERT_EQ(saved_error, -ENOENT, "fail_8_error")) + goto cleanup; + + /* fail_9 - sleepable kprobe multi should not attach */ + sl_skel = kprobe_multi_sleepable__open(); + if (!ASSERT_OK_PTR(sl_skel, "sleep_skel_open")) + goto cleanup; + + sl_skel->bss->user_ptr = sl_skel; + + err = bpf_program__set_flags(sl_skel->progs.handle_kprobe_multi_sleepable, + BPF_F_SLEEPABLE); + if (!ASSERT_OK(err, "sleep_skel_set_flags")) + goto cleanup; + + err = kprobe_multi_sleepable__load(sl_skel); + if (!ASSERT_OK(err, "sleep_skel_load")) + goto cleanup; + + link = bpf_program__attach_kprobe_multi_opts(sl_skel->progs.handle_kprobe_multi_sleepable, + "bpf_fentry_test1", NULL); + saved_error = -errno; + + if (!ASSERT_ERR_PTR(link, "fail_9")) + goto cleanup; + + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_9_error")) + goto cleanup; + + err = bpf_prog_test_run_opts(bpf_program__fd(sl_skel->progs.fentry), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + cleanup: bpf_link__destroy(link); kprobe_multi__destroy(skel); + kprobe_multi_sleepable__destroy(sl_skel); } static void test_session_skel_api(void) @@ -352,8 +412,13 @@ static void test_session_skel_api(void) ASSERT_OK(err, "test_run"); ASSERT_EQ(topts.retval, 0, "test_run"); - /* bpf_fentry_test1-4 trigger return probe, result is 2 */ - for (i = 0; i < 4; i++) + /* + * bpf_fentry_test1 is hit by both the wildcard probe and the exact + * name probe (test_kprobe_syms), so entry + return fires twice: 4. + * bpf_fentry_test2-4 are hit only by the wildcard probe: 2. + */ + ASSERT_EQ(skel->bss->kprobe_session_result[0], 4, "kprobe_session_result"); + for (i = 1; i < 4; i++) ASSERT_EQ(skel->bss->kprobe_session_result[i], 2, "kprobe_session_result"); /* bpf_fentry_test5-8 trigger only entry probe, result is 1 */ @@ -422,220 +487,6 @@ static void test_unique_match(void) kprobe_multi__destroy(skel); } -static size_t symbol_hash(long key, void *ctx __maybe_unused) -{ - return str_hash((const char *) key); -} - -static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) -{ - return strcmp((const char *) key1, (const char *) key2) == 0; -} - -static bool is_invalid_entry(char *buf, bool kernel) -{ - if (kernel && strchr(buf, '[')) - return true; - if (!kernel && !strchr(buf, '[')) - return true; - return false; -} - -static bool skip_entry(char *name) -{ - /* - * We attach to almost all kernel functions and some of them - * will cause 'suspicious RCU usage' when fprobe is attached - * to them. Filter out the current culprits - arch_cpu_idle - * default_idle and rcu_* functions. - */ - if (!strcmp(name, "arch_cpu_idle")) - return true; - if (!strcmp(name, "default_idle")) - return true; - if (!strncmp(name, "rcu_", 4)) - return true; - if (!strcmp(name, "bpf_dispatcher_xdp_func")) - return true; - if (!strncmp(name, "__ftrace_invalid_address__", - sizeof("__ftrace_invalid_address__") - 1)) - return true; - return false; -} - -/* Do comparision by ignoring '.llvm.<hash>' suffixes. */ -static int compare_name(const char *name1, const char *name2) -{ - const char *res1, *res2; - int len1, len2; - - res1 = strstr(name1, ".llvm."); - res2 = strstr(name2, ".llvm."); - len1 = res1 ? res1 - name1 : strlen(name1); - len2 = res2 ? res2 - name2 : strlen(name2); - - if (len1 == len2) - return strncmp(name1, name2, len1); - if (len1 < len2) - return strncmp(name1, name2, len1) <= 0 ? -1 : 1; - return strncmp(name1, name2, len2) >= 0 ? 1 : -1; -} - -static int load_kallsyms_compare(const void *p1, const void *p2) -{ - return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); -} - -static int search_kallsyms_compare(const void *p1, const struct ksym *p2) -{ - return compare_name(p1, p2->name); -} - -static int get_syms(char ***symsp, size_t *cntp, bool kernel) -{ - size_t cap = 0, cnt = 0; - char *name = NULL, *ksym_name, **syms = NULL; - struct hashmap *map; - struct ksyms *ksyms; - struct ksym *ks; - char buf[256]; - FILE *f; - int err = 0; - - ksyms = load_kallsyms_custom_local(load_kallsyms_compare); - if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local")) - return -EINVAL; - - /* - * The available_filter_functions contains many duplicates, - * but other than that all symbols are usable in kprobe multi - * interface. - * Filtering out duplicates by using hashmap__add, which won't - * add existing entry. - */ - - if (access("/sys/kernel/tracing/trace", F_OK) == 0) - f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); - else - f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); - - if (!f) - return -EINVAL; - - map = hashmap__new(symbol_hash, symbol_equal, NULL); - if (IS_ERR(map)) { - err = libbpf_get_error(map); - goto error; - } - - while (fgets(buf, sizeof(buf), f)) { - if (is_invalid_entry(buf, kernel)) - continue; - - free(name); - if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) - continue; - if (skip_entry(name)) - continue; - - ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); - if (!ks) { - err = -EINVAL; - goto error; - } - - ksym_name = ks->name; - err = hashmap__add(map, ksym_name, 0); - if (err == -EEXIST) { - err = 0; - continue; - } - if (err) - goto error; - - err = libbpf_ensure_mem((void **) &syms, &cap, - sizeof(*syms), cnt + 1); - if (err) - goto error; - - syms[cnt++] = ksym_name; - } - - *symsp = syms; - *cntp = cnt; - -error: - free(name); - fclose(f); - hashmap__free(map); - if (err) - free(syms); - return err; -} - -static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) -{ - unsigned long *addr, *addrs, *tmp_addrs; - int err = 0, max_cnt, inc_cnt; - char *name = NULL; - size_t cnt = 0; - char buf[256]; - FILE *f; - - if (access("/sys/kernel/tracing/trace", F_OK) == 0) - f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); - else - f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); - - if (!f) - return -ENOENT; - - /* In my local setup, the number of entries is 50k+ so Let us initially - * allocate space to hold 64k entries. If 64k is not enough, incrementally - * increase 1k each time. - */ - max_cnt = 65536; - inc_cnt = 1024; - addrs = malloc(max_cnt * sizeof(long)); - if (addrs == NULL) { - err = -ENOMEM; - goto error; - } - - while (fgets(buf, sizeof(buf), f)) { - if (is_invalid_entry(buf, kernel)) - continue; - - free(name); - if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) - continue; - if (skip_entry(name)) - continue; - - if (cnt == max_cnt) { - max_cnt += inc_cnt; - tmp_addrs = realloc(addrs, max_cnt); - if (!tmp_addrs) { - err = -ENOMEM; - goto error; - } - addrs = tmp_addrs; - } - - addrs[cnt++] = (unsigned long)addr; - } - - *addrsp = addrs; - *cntp = cnt; - -error: - free(name); - fclose(f); - if (err) - free(addrs); - return err; -} - static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts) { long attach_start_ns, attach_end_ns; @@ -667,25 +518,23 @@ static void test_kprobe_multi_bench_attach(bool kernel) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; - char **syms = NULL; - size_t cnt = 0; + struct ksyms *ksyms = NULL; - if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms")) + if (!ASSERT_OK(bpf_get_ksyms(&ksyms, kernel), "bpf_get_ksyms")) return; skel = kprobe_multi_empty__open_and_load(); if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) goto cleanup; - opts.syms = (const char **) syms; - opts.cnt = cnt; + opts.syms = (const char **)ksyms->filtered_syms; + opts.cnt = ksyms->filtered_cnt; do_bench_test(skel, &opts); cleanup: kprobe_multi_empty__destroy(skel); - if (syms) - free(syms); + free_kallsyms_local(ksyms); } static void test_kprobe_multi_bench_attach_addr(bool kernel) @@ -696,13 +545,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel) size_t cnt = 0; int err; - err = get_addrs(&addrs, &cnt, kernel); + err = bpf_get_addrs(&addrs, &cnt, kernel); if (err == -ENOENT) { test__skip(); return; } - if (!ASSERT_OK(err, "get_addrs")) + if (!ASSERT_OK(err, "bpf_get_addrs")) return; skel = kprobe_multi_empty__open_and_load(); @@ -753,6 +602,108 @@ cleanup: kprobe_multi_override__destroy(skel); } +static void test_override(void) +{ + struct kprobe_multi_override *skel = NULL; + int err; + + skel = kprobe_multi_override__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + + /* no override */ + err = prctl(0xffff, 0); + ASSERT_EQ(err, -1, "err"); + + /* kprobe.multi override */ + skel->links.test_override = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + SYS_PREFIX "sys_prctl", NULL); + if (!ASSERT_OK_PTR(skel->links.test_override, "bpf_program__attach_kprobe_multi_opts")) + goto cleanup; + + err = prctl(0xffff, 0); + ASSERT_EQ(err, 123, "err"); + + bpf_link__destroy(skel->links.test_override); + skel->links.test_override = NULL; + + /* kprobe override */ + skel->links.test_kprobe_override = bpf_program__attach_kprobe(skel->progs.test_kprobe_override, + false, SYS_PREFIX "sys_prctl"); + if (!ASSERT_OK_PTR(skel->links.test_kprobe_override, "bpf_program__attach_kprobe")) + goto cleanup; + + err = prctl(0xffff, 0); + ASSERT_EQ(err, 123, "err"); + +cleanup: + kprobe_multi_override__destroy(skel); +} + +#ifdef __x86_64__ +static void test_attach_write_ctx(void) +{ + struct kprobe_write_ctx *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_write_ctx__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load")) + return; + + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts")) + bpf_link__destroy(link); + + kprobe_write_ctx__destroy(skel); +} +#else +static void test_attach_write_ctx(void) +{ + test__skip(); +} +#endif + +/* + * Test kprobe_multi handles shadow symbols (vmlinux + module duplicate). + * bpf_fentry_shadow_test exists in both vmlinux and bpf_testmod. + * kprobe_multi resolves via ftrace_lookup_symbols() which finds the + * vmlinux symbol first and stops, so this should always succeed. + */ +static void test_attach_probe_dup_sym(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + const char *syms[1] = { "bpf_fentry_shadow_test" }; + struct kprobe_multi *skel = NULL; + struct bpf_link *link1 = NULL, *link2 = NULL; + + skel = kprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + opts.syms = syms; + opts.cnt = ARRAY_SIZE(syms); + + link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, + NULL, &opts); + if (!ASSERT_OK_PTR(link1, "attach_kprobe_multi_dup_sym")) + goto cleanup; + + opts.retprobe = true; + link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe_manual, + NULL, &opts); + if (!ASSERT_OK_PTR(link2, "attach_kretprobe_multi_dup_sym")) + goto cleanup; + +cleanup: + bpf_link__destroy(link2); + bpf_link__destroy(link1); + kprobe_multi__destroy(skel); +} + void serial_test_kprobe_multi_bench_attach(void) { if (test__start_subtest("kernel")) @@ -786,11 +737,17 @@ void test_kprobe_multi_test(void) test_attach_api_fails(); if (test__start_subtest("attach_override")) test_attach_override(); + if (test__start_subtest("override")) + test_override(); if (test__start_subtest("session")) test_session_skel_api(); if (test__start_subtest("session_cookie")) test_session_cookie_skel_api(); if (test__start_subtest("unique_match")) test_unique_match(); + if (test__start_subtest("attach_write_ctx")) + test_attach_write_ctx(); + if (test__start_subtest("dup_sym")) + test_attach_probe_dup_sym(); RUN_TESTS(kprobe_multi_verifier); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 77d07e0a4a55..6f25b5f39a79 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -7,6 +7,7 @@ #include "linked_list.skel.h" #include "linked_list_fail.skel.h" +#include "linked_list_peek.skel.h" static char log_buf[1024 * 1024]; @@ -71,7 +72,7 @@ static struct { { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" }, { "obj_new_acq", "Unreleased reference id=" }, { "use_after_drop", "invalid mem access 'scalar'" }, - { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" }, + { "ptr_walk_scalar", "type=rdonly_untrusted_mem expected=percpu_ptr_" }, { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" }, { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" }, { "direct_read_head", "direct access to bpf_list_head is disallowed" }, @@ -86,12 +87,12 @@ static struct { { "incorrect_value_type", "operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, " "but arg is at offset=0 in struct bar" }, - { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, + { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0x1ffffffff) disallowed" }, { "incorrect_node_off1", "bpf_list_node not found at offset=49" }, { "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=48 in struct foo" }, { "no_head_type", "bpf_list_head not found at offset=0" }, { "incorrect_head_var_off1", "R1 doesn't have constant offset" }, - { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, + { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0x1ffffffff) disallowed" }, { "incorrect_head_off1", "bpf_list_head not found at offset=25" }, { "incorrect_head_off2", "bpf_list_head not found at offset=1" }, { "pop_front_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" }, @@ -805,3 +806,8 @@ void test_linked_list(void) test_linked_list_success(LIST_IN_LIST, true); test_linked_list_success(TEST_ALL, false); } + +void test_linked_list_peek(void) +{ + RUN_TESTS(linked_list_peek); +} diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c new file mode 100644 index 000000000000..0a12af924a99 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "testing_helpers.h" +#include "livepatch_trampoline.skel.h" + +#define LIVEPATCH_ENABLED_PATH "/sys/kernel/livepatch/livepatch_sample/enabled" + +static int load_livepatch(void) +{ + char path[4096]; + + /* CI will set KBUILD_OUTPUT */ + snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko", + getenv("KBUILD_OUTPUT") ? : "../../../.."); + + return load_module(path, env_verbosity > VERBOSE_NONE); +} + +static void unload_livepatch(void) +{ + /* Disable the livepatch before unloading the module */ + if (!access(LIVEPATCH_ENABLED_PATH, F_OK)) + system("echo 0 > " LIVEPATCH_ENABLED_PATH); + + unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE); +} + +static void read_proc_cmdline(void) +{ + char buf[4096]; + int fd, ret; + + fd = open("/proc/cmdline", O_RDONLY); + if (!ASSERT_OK_FD(fd, "open /proc/cmdline")) + return; + + ret = read(fd, buf, sizeof(buf)); + if (!ASSERT_GT(ret, 0, "read /proc/cmdline")) + goto out; + + ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp"); + +out: + close(fd); +} + +static void __test_livepatch_trampoline(bool fexit_first) +{ + struct livepatch_trampoline *skel = NULL; + int err; + + skel = livepatch_trampoline__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + skel->bss->my_pid = getpid(); + + if (!fexit_first) { + /* fentry program is loaded first by default */ + err = livepatch_trampoline__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + } else { + /* Manually load fexit program first. */ + skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline); + if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit")) + goto out; + + skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline); + if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry")) + goto out; + } + + read_proc_cmdline(); + + ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit"); + ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit"); +out: + livepatch_trampoline__destroy(skel); +} + +void test_livepatch_trampoline(void) +{ + int retry_cnt = 0; + int err; + + /* Skip if kernel was built without CONFIG_LIVEPATCH */ + if (access("/sys/kernel/livepatch", F_OK)) { + test__skip(); + return; + } + +retry: + err = load_livepatch(); + if (err) { + if (err == -ENOENT) { + test__skip(); + return; + } + + if (retry_cnt) { + ASSERT_OK(1, "load_livepatch"); + goto out; + } + /* + * Something else (previous run of the same test?) loaded + * the KLP module. Unload the KLP module and retry. + */ + unload_livepatch(); + retry_cnt++; + goto retry; + } + + if (test__start_subtest("fentry_first")) + __test_livepatch_trampoline(false); + + if (test__start_subtest("fexit_first")) + __test_livepatch_trampoline(true); +out: + unload_livepatch(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index 169ce689b97c..d6f14a232002 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -7,6 +7,10 @@ #include "test_log_buf.skel.h" #include "bpf_util.h" +#if !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + static size_t libbpf_log_pos; static char libbpf_log_buf[1024 * 1024]; static bool libbpf_log_error; diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_bdev.c b/tools/testing/selftests/bpf/prog_tests/lsm_bdev.c new file mode 100644 index 000000000000..a970798e1173 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lsm_bdev.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */ + +/* + * Test BPF LSM block device integrity hooks with dm-verity. + * + * Creates a dm-verity device over loopback, which triggers + * security_bdev_setintegrity() during verity_preresume(). + * Verifies that the BPF program correctly tracks the integrity + * metadata in its hashmap. + */ + +#define _GNU_SOURCE +#include <test_progs.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include "lsm_bdev.skel.h" + +/* Must match the definition in progs/lsm_bdev.c. */ +struct verity_info { + __u8 has_roothash; + __u8 sig_valid; + __u32 setintegrity_cnt; +}; + +#define DATA_SIZE_MB 8 +#define HASH_SIZE_MB 1 +#define DM_NAME "bpf_test_verity" +#define DM_DEV_PATH "/dev/mapper/" DM_NAME + +/* Run a command and optionally capture the first line of stdout. */ +static int run_cmd(const char *cmd, char *out, size_t out_sz) +{ + FILE *fp; + int ret; + + fp = popen(cmd, "r"); + if (!fp) + return -1; + + if (out && out_sz > 0) { + if (!fgets(out, out_sz, fp)) + out[0] = '\0'; + /* strip trailing newline */ + out[strcspn(out, "\n")] = '\0'; + } + + ret = pclose(fp); + return WIFEXITED(ret) ? WEXITSTATUS(ret) : -1; +} + +static bool has_prerequisites(void) +{ + if (getuid() != 0) { + printf("SKIP: must be root\n"); + return false; + } + + if (run_cmd("modprobe loop 2>/dev/null", NULL, 0) && + run_cmd("ls /dev/loop-control 2>/dev/null", NULL, 0)) { + printf("SKIP: no loop device support\n"); + return false; + } + + if (run_cmd("modprobe dm-verity 2>/dev/null", NULL, 0) && + run_cmd("dmsetup targets 2>/dev/null | grep -q verity", NULL, 0)) { + printf("SKIP: dm-verity module not available\n"); + return false; + } + + if (run_cmd("which veritysetup >/dev/null 2>&1", NULL, 0)) { + printf("SKIP: veritysetup not found\n"); + return false; + } + + return true; +} + +void test_lsm_bdev(void) +{ + char data_img[] = "/tmp/bpf_verity_data_XXXXXX"; + char hash_img[] = "/tmp/bpf_verity_hash_XXXXXX"; + char data_loop[64] = {}; + char hash_loop[64] = {}; + char roothash[256] = {}; + char cmd[512]; + int data_fd = -1, hash_fd = -1; + struct lsm_bdev *skel = NULL; + struct verity_info val; + struct stat st; + __u32 dev_key; + int err; + + if (!has_prerequisites()) { + test__skip(); + return; + } + + /* Clean up any stale device from a previous crashed run. */ + snprintf(cmd, sizeof(cmd), "dmsetup remove %s 2>/dev/null", DM_NAME); + run_cmd(cmd, NULL, 0); + + /* Create temporary image files. */ + data_fd = mkstemp(data_img); + if (!ASSERT_OK_FD(data_fd, "mkstemp data")) + return; + + hash_fd = mkstemp(hash_img); + if (!ASSERT_OK_FD(hash_fd, "mkstemp hash")) + goto cleanup; + + if (!ASSERT_OK(ftruncate(data_fd, DATA_SIZE_MB * 1024 * 1024), + "truncate data")) + goto cleanup; + + if (!ASSERT_OK(ftruncate(hash_fd, HASH_SIZE_MB * 1024 * 1024), + "truncate hash")) + goto cleanup; + + close(data_fd); + data_fd = -1; + close(hash_fd); + hash_fd = -1; + + /* Set up loop devices. */ + snprintf(cmd, sizeof(cmd), + "losetup --find --show %s 2>/dev/null", data_img); + if (!ASSERT_OK(run_cmd(cmd, data_loop, sizeof(data_loop)), + "losetup data")) + goto teardown; + + snprintf(cmd, sizeof(cmd), + "losetup --find --show %s 2>/dev/null", hash_img); + if (!ASSERT_OK(run_cmd(cmd, hash_loop, sizeof(hash_loop)), + "losetup hash")) + goto teardown; + + /* Format the dm-verity device and capture the root hash. */ + snprintf(cmd, sizeof(cmd), + "veritysetup format %s %s 2>/dev/null | " + "grep -i 'root hash' | awk '{print $NF}'", + data_loop, hash_loop); + if (!ASSERT_OK(run_cmd(cmd, roothash, sizeof(roothash)), + "veritysetup format")) + goto teardown; + + if (!ASSERT_GT((int)strlen(roothash), 0, "roothash not empty")) + goto teardown; + + /* Load and attach BPF program before activating dm-verity. */ + skel = lsm_bdev__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + goto teardown; + + err = lsm_bdev__attach(skel); + if (!ASSERT_OK(err, "skel attach")) + goto teardown; + + /* Activate dm-verity — triggers verity_preresume() hooks. */ + snprintf(cmd, sizeof(cmd), + "veritysetup open %s %s %s %s 2>/dev/null", + data_loop, DM_NAME, hash_loop, roothash); + if (!ASSERT_OK(run_cmd(cmd, NULL, 0), "veritysetup open")) + goto teardown; + + /* Get the dm device's dev_t. */ + if (!ASSERT_OK(stat(DM_DEV_PATH, &st), "stat dm dev")) + goto remove_dm; + + dev_key = (__u32)st.st_rdev; + + /* Look up the device in the BPF map and verify. */ + err = bpf_map__lookup_elem(skel->maps.verity_devices, + &dev_key, sizeof(dev_key), + &val, sizeof(val), 0); + if (!ASSERT_OK(err, "map lookup")) + goto remove_dm; + + ASSERT_EQ(val.has_roothash, 1, "has_roothash"); + ASSERT_EQ(val.sig_valid, 0, "sig_valid (unsigned)"); + /* + * verity_preresume() always calls security_bdev_setintegrity() + * for the roothash. The signature-validity call only happens + * when CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG is enabled. + */ + ASSERT_GE(val.setintegrity_cnt, 1, "setintegrity_cnt min"); + ASSERT_LE(val.setintegrity_cnt, 2, "setintegrity_cnt max"); + + /* Verify that the alloc hook fired at least once. */ + ASSERT_GT(skel->bss->alloc_count, 0, "alloc_count"); + +remove_dm: + snprintf(cmd, sizeof(cmd), "dmsetup remove %s 2>/dev/null", DM_NAME); + run_cmd(cmd, NULL, 0); + +teardown: + if (data_loop[0]) { + snprintf(cmd, sizeof(cmd), "losetup -d %s 2>/dev/null", + data_loop); + run_cmd(cmd, NULL, 0); + } + if (hash_loop[0]) { + snprintf(cmd, sizeof(cmd), "losetup -d %s 2>/dev/null", + hash_loop); + run_cmd(cmd, NULL, 0); + } + +cleanup: + lsm_bdev__destroy(skel); + if (data_fd >= 0) + close(data_fd); + if (hash_fd >= 0) + close(hash_fd); + unlink(data_img); + unlink(hash_img); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index fb1eb8c67361..ccec0fcdabc1 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -5,7 +5,6 @@ #include <time.h> #include <net/if.h> -#include <linux/if_tun.h> #include <linux/icmp.h> #include "test_progs.h" @@ -37,34 +36,6 @@ static inline int netns_delete(void) return system("ip netns del " NETNS ">/dev/null 2>&1"); } -static int open_tuntap(const char *dev_name, bool need_mac) -{ - int err = 0; - struct ifreq ifr; - int fd = open("/dev/net/tun", O_RDWR); - - if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)")) - return -1; - - ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); - strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); - ifr.ifr_name[IFNAMSIZ - 1] = '\0'; - - err = ioctl(fd, TUNSETIFF, &ifr); - if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { - close(fd); - return -1; - } - - err = fcntl(fd, F_SETFL, O_NONBLOCK); - if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { - close(fd); - return -1; - } - - return fd; -} - #define ICMP_PAYLOAD_SIZE 100 /* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */ diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c new file mode 100644 index 000000000000..b6391af5f6f9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c @@ -0,0 +1,540 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <netinet/in.h> + +#include "network_helpers.h" +#include "test_progs.h" + +#define BPF_FILE "test_lwt_ip_encap.bpf.o" + +#define NETNS_NAME_SIZE 32 +#define NETNS_BASE "ns-lwt-ip-encap" + +#define IP4_ADDR_1 "172.16.1.100" +#define IP4_ADDR_2 "172.16.2.100" +#define IP4_ADDR_3 "172.16.3.100" +#define IP4_ADDR_4 "172.16.4.100" +#define IP4_ADDR_5 "172.16.5.100" +#define IP4_ADDR_6 "172.16.6.100" +#define IP4_ADDR_7 "172.16.7.100" +#define IP4_ADDR_8 "172.16.8.100" +#define IP4_ADDR_GRE "172.16.16.100" + +#define IP4_ADDR_SRC IP4_ADDR_1 +#define IP4_ADDR_DST IP4_ADDR_4 + +#define IP6_ADDR_1 "fb01::1" +#define IP6_ADDR_2 "fb02::1" +#define IP6_ADDR_3 "fb03::1" +#define IP6_ADDR_4 "fb04::1" +#define IP6_ADDR_5 "fb05::1" +#define IP6_ADDR_6 "fb06::1" +#define IP6_ADDR_7 "fb07::1" +#define IP6_ADDR_8 "fb08::1" +#define IP6_ADDR_GRE "fb10::1" + +#define IP6_ADDR_SRC IP6_ADDR_1 +#define IP6_ADDR_DST IP6_ADDR_4 + +/* Setup/topology: + * + * NS1 NS2 NS3 + * veth1 <---> veth2 veth3 <---> veth4 (the top route) + * veth5 <---> veth6 veth7 <---> veth8 (the bottom route) + * + * Each vethN gets IP[4|6]_ADDR_N address. + * + * IP*_ADDR_SRC = IP*_ADDR_1 + * IP*_ADDR_DST = IP*_ADDR_4 + * + * All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST. + * + * By default, routes are configured to allow packets to go + * IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route). + * + * A GRE device is installed in NS3 with IP*_ADDR_GRE, and + * NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8 + * (the bottom route). + * + * Tests: + * + * 1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping + * from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE. + * + * 2a. In an egress test, a bpf LWT_XMIT program is installed on veth1 + * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE. + * + * ping: SRC->[encap at veth1:egress]->GRE:decap->DST + * ping replies go DST->SRC directly + * + * 2b. In an ingress test, a bpf LWT_IN program is installed on veth2 + * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE. + * + * ping: SRC->[encap at veth2:ingress]->GRE:decap->DST + * ping replies go DST->SRC directly + */ + +static int create_ns(char *name, size_t name_sz) +{ + if (!name) + goto fail; + + if (!ASSERT_OK(append_tid(name, name_sz), "append TID")) + goto fail; + + SYS(fail, "ip netns add %s", name); + + /* rp_filter gets confused by what these tests are doing, so disable it */ + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name); + /* Disable IPv6 DAD because it sometimes takes too long and fails tests */ + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name); + + return 0; +fail: + return -1; +} + +static int set_top_addr(const char *ns1, const char *ns2, const char *ns3) +{ + SYS(fail, "ip -n %s a add %s/24 dev veth1", ns1, IP4_ADDR_1); + SYS(fail, "ip -n %s a add %s/24 dev veth2", ns2, IP4_ADDR_2); + SYS(fail, "ip -n %s a add %s/24 dev veth3", ns2, IP4_ADDR_3); + SYS(fail, "ip -n %s a add %s/24 dev veth4", ns3, IP4_ADDR_4); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4); + + SYS(fail, "ip -n %s link set dev veth1 up", ns1); + SYS(fail, "ip -n %s link set dev veth2 up", ns2); + SYS(fail, "ip -n %s link set dev veth3 up", ns2); + SYS(fail, "ip -n %s link set dev veth4 up", ns3); + + return 0; +fail: + return 1; +} + +static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3) +{ + SYS(fail, "ip -n %s a add %s/24 dev veth5", ns1, IP4_ADDR_5); + SYS(fail, "ip -n %s a add %s/24 dev veth6", ns2, IP4_ADDR_6); + SYS(fail, "ip -n %s a add %s/24 dev veth7", ns2, IP4_ADDR_7); + SYS(fail, "ip -n %s a add %s/24 dev veth8", ns3, IP4_ADDR_8); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8); + + SYS(fail, "ip -n %s link set dev veth5 up", ns1); + SYS(fail, "ip -n %s link set dev veth6 up", ns2); + SYS(fail, "ip -n %s link set dev veth7 up", ns2); + SYS(fail, "ip -n %s link set dev veth8 up", ns3); + + return 0; +fail: + return 1; +} + +static int configure_vrf(const char *ns1, const char *ns2) +{ + if (!ns1 || !ns2) + goto fail; + + SYS(fail, "ip -n %s link add red type vrf table 1001", ns1); + SYS(fail, "ip -n %s link set red up", ns1); + SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1); + SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1); + SYS(fail, "ip -n %s link set veth1 vrf red", ns1); + SYS(fail, "ip -n %s link set veth5 vrf red", ns1); + + SYS(fail, "ip -n %s link add red type vrf table 1001", ns2); + SYS(fail, "ip -n %s link set red up", ns2); + SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2); + SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2); + SYS(fail, "ip -n %s link set veth2 vrf red", ns2); + SYS(fail, "ip -n %s link set veth3 vrf red", ns2); + SYS(fail, "ip -n %s link set veth6 vrf red", ns2); + SYS(fail, "ip -n %s link set veth7 vrf red", ns2); + + return 0; +fail: + return -1; +} + +static int configure_ns1(const char *ns1, const char *vrf) +{ + struct nstoken *nstoken = NULL; + + if (!ns1 || !vrf) + goto fail; + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open ns1")) + goto fail; + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth1 %s", IP4_ADDR_2, vrf); + SYS(fail, "ip route add default dev veth1 via %s %s", IP4_ADDR_2, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf); + SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth5 %s", IP4_ADDR_6, vrf); + SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf); + SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int configure_ns2(const char *ns2, const char *vrf) +{ + struct nstoken *nstoken = NULL; + + if (!ns2 || !vrf) + goto fail; + + nstoken = open_netns(ns2); + if (!ASSERT_OK_PTR(nstoken, "open ns2")) + goto fail; + + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2); + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth2 %s", IP4_ADDR_1, vrf); + SYS(fail, "ip route add %s/32 dev veth3 %s", IP4_ADDR_4, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth6 %s", IP4_ADDR_5, vrf); + SYS(fail, "ip route add %s/32 dev veth7 %s", IP4_ADDR_8, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int configure_ns3(const char *ns3) +{ + struct nstoken *nstoken = NULL; + + if (!ns3) + goto fail; + + nstoken = open_netns(ns3); + if (!ASSERT_OK_PTR(nstoken, "open ns3")) + goto fail; + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth4", IP4_ADDR_3); + SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_1, IP4_ADDR_3); + SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_2, IP4_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_1, IP6_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_2, IP6_ADDR_3); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth8", IP4_ADDR_7); + SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_5, IP4_ADDR_7); + SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_6, IP4_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_5, IP6_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_6, IP6_ADDR_7); + + /* Configure IPv4 GRE device */ + SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255", + IP4_ADDR_1, IP4_ADDR_GRE); + SYS(fail, "ip link set gre_dev up"); + SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE); + + /* Configure IPv6 GRE device */ + SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255", + IP6_ADDR_1, IP6_ADDR_GRE); + SYS(fail, "ip link set gre6_dev up"); + SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf) +{ + if (!ns1 || !ns2 || !ns3 || !vrf) + goto fail; + + SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2); + SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3); + SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2); + SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3); + + if (vrf[0]) { + if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf")) + goto fail; + } + if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses")) + goto fail; + + if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses")) + goto fail; + + if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes")) + goto fail; + + if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes")) + goto fail; + + if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes")) + goto fail; + + /* Link bottom route to the GRE tunnels */ + SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s", + ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf); + SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s", + ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf); + SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s", + ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf); + SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s", + ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf); + + return 0; +fail: + return -1; +} + +static int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf) +{ + SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf); + + return 0; +fail: + return -1; +} + +static int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf) +{ + SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf); + + return 0; +fail: + return -1; +} + +#define GSO_SIZE 5000 +#define GSO_TCP_PORT 9000 +/* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */ +static int test_gso_fix(const char *ns1, const char *ns3, int family) +{ + const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST; + char gso_packet[GSO_SIZE] = {}; + struct nstoken *nstoken = NULL; + int sfd, cfd, afd; + ssize_t bytes; + int ret = -1; + + if (!ns1 || !ns3) + return ret; + + nstoken = open_netns(ns3); + if (!ASSERT_OK_PTR(nstoken, "open ns3")) + return ret; + + sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL); + if (!ASSERT_OK_FD(sfd, "start server")) + goto close_netns; + + close_netns(nstoken); + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open ns1")) + goto close_server; + + cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL); + if (!ASSERT_OK_FD(cfd, "connect to server")) + goto close_server; + + close_netns(nstoken); + nstoken = NULL; + + afd = accept(sfd, NULL, NULL); + if (!ASSERT_OK_FD(afd, "accept")) + goto close_client; + + /* Send a packet larger than MTU */ + bytes = send(cfd, gso_packet, GSO_SIZE, 0); + if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet")) + goto close_accept; + + /* Verify we received all expected bytes */ + bytes = read(afd, gso_packet, GSO_SIZE); + if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet")) + goto close_accept; + + ret = 0; + +close_accept: + close(afd); +close_client: + close(cfd); +close_server: + close(sfd); +close_netns: + close_netns(nstoken); + + return ret; +} + +static int check_ping_ok(const char *ns1) +{ + SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST); + SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST); + return 0; +fail: + return -1; +} + +static int check_ping_fails(const char *ns1) +{ + int ret; + + ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST); + if (!ret) + return -1; + + ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST); + if (!ret) + return -1; + + return 0; +} + +#define EGRESS true +#define INGRESS false +#define IPV4_ENCAP true +#define IPV6_ENCAP false +static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf) +{ + char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-"; + char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-"; + char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-"; + char *sec = ipv4_encap ? "encap_gre" : "encap_gre6"; + + if (!vrf) + return; + + if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1")) + goto out; + if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2")) + goto out; + if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3")) + goto out; + + if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network")) + goto out; + + /* By default, pings work */ + if (!ASSERT_OK(check_ping_ok(ns1), "ping OK")) + goto out; + + /* Remove NS2->DST routes, ping fails */ + SYS(out, "ip -n %s route del %s/32 dev veth3 %s", ns2, IP4_ADDR_DST, vrf); + SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf); + if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail")) + goto out; + + /* Install replacement routes (LWT/eBPF), pings succeed */ + if (egress) { + SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s", + ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf); + SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s", + ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf); + } else { + SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s", + ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf); + SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s", + ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf); + } + + if (!ASSERT_OK(check_ping_ok(ns1), "ping OK")) + goto out; + + /* Skip GSO tests with VRF: VRF routing needs properly assigned + * source IP/device, which is easy to do with ping but hard with TCP. + */ + if (egress && !vrf[0]) { + if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO")) + goto out; + } + + /* Negative test: remove routes to GRE devices: ping fails */ + if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev")) + goto out; + if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail")) + goto out; + + /* Another negative test */ + if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf), + "add unreachable routes")) + goto out; + ASSERT_OK(check_ping_fails(ns1), "ping expected fail"); + +out: + SYS_NOFAIL("ip netns del %s", ns1); + SYS_NOFAIL("ip netns del %s", ns2); + SYS_NOFAIL("ip netns del %s", ns3); +} + +void test_lwt_ip_encap_vrf_ipv6(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red"); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red"); +} + +void test_lwt_ip_encap_vrf_ipv4(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red"); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red"); +} + +void test_lwt_ip_encap_ipv6(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV6_ENCAP, EGRESS, ""); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV6_ENCAP, INGRESS, ""); +} + +void test_lwt_ip_encap_ipv4(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV4_ENCAP, EGRESS, ""); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV4_ENCAP, INGRESS, ""); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_misc.c b/tools/testing/selftests/bpf/prog_tests/lwt_misc.c new file mode 100644 index 000000000000..6940fca38512 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_misc.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "lwt_misc.skel.h" + +void test_lwt_misc(void) +{ + RUN_TESTS(lwt_misc); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c new file mode 100644 index 000000000000..1b25d5c5f8fb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Connects 6 network namespaces through veths. + * Each NS may have different IPv6 global scope addresses : + * + * NS1 NS2 NS3 NS4 NS5 NS6 + * lo veth1 <-> veth2 veth3 <-> veth4 veth5 <-> veth6 lo veth7 <-> veth8 veth9 <-> veth10 lo + * fb00 ::1 ::12 ::21 ::34 ::43 ::56 ::65 ::78 ::87 ::910 ::109 ::6 + * fd00 ::4 + * fc42 ::1 + * + * All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a + * IPv6 header with a Segment Routing Header, with segments : + * fd00::1 -> fd00::2 -> fd00::3 -> fd00::4 + * + * 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions : + * - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1 + * - fd00::2 : remove the TLV, change the flags, add a tag + * - fd00::3 : apply an End.T action to fd00::4, through routing table 117 + * + * fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet. + * Each End.BPF action will validate the operations applied on the SRH by the + * previous BPF program in the chain, otherwise the packet is dropped. + * + * An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this + * datagram can be read on NS6 when binding to fb00::6. + */ + +#include "network_helpers.h" +#include "test_progs.h" + +#define NETNS_BASE "lwt-seg6local-" +#define BPF_FILE "test_lwt_seg6local.bpf.o" + +static void cleanup(void) +{ + int ns; + + for (ns = 1; ns < 7; ns++) + SYS_NOFAIL("ip netns del %s%d", NETNS_BASE, ns); +} + +static int setup(void) +{ + int ns; + + for (ns = 1; ns < 7; ns++) + SYS(fail, "ip netns add %s%d", NETNS_BASE, ns); + + SYS(fail, "ip -n %s6 link set dev lo up", NETNS_BASE); + + for (ns = 1; ns < 6; ns++) { + int local_id = ns * 2 - 1; + int peer_id = ns * 2; + int next_ns = ns + 1; + + SYS(fail, "ip -n %s%d link add veth%d type veth peer name veth%d netns %s%d", + NETNS_BASE, ns, local_id, peer_id, NETNS_BASE, next_ns); + + SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, ns, local_id); + SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, next_ns, peer_id); + + /* All link scope addresses to veths */ + SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link", + NETNS_BASE, ns, local_id, peer_id, local_id); + SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link", + NETNS_BASE, next_ns, peer_id, local_id, peer_id); + } + + + SYS(fail, "ip -n %s5 -6 route add fb00::109 table 117 dev veth9 scope link", NETNS_BASE); + + SYS(fail, "ip -n %s1 -6 addr add fb00::1/16 dev lo", NETNS_BASE); + SYS(fail, "ip -n %s1 -6 route add fb00::6 dev veth1 via fb00::21", NETNS_BASE); + + SYS(fail, "ip -n %s2 -6 route add fb00::6 encap bpf in obj %s sec encap_srh dev veth2", + NETNS_BASE, BPF_FILE); + SYS(fail, "ip -n %s2 -6 route add fd00::1 dev veth3 via fb00::43 scope link", NETNS_BASE); + + SYS(fail, "ip -n %s3 -6 route add fc42::1 dev veth5 via fb00::65", NETNS_BASE); + SYS(fail, + "ip -n %s3 -6 route add fd00::1 encap seg6local action End.BPF endpoint obj %s sec add_egr_x dev veth4", + NETNS_BASE, BPF_FILE); + + SYS(fail, + "ip -n %s4 -6 route add fd00::2 encap seg6local action End.BPF endpoint obj %s sec pop_egr dev veth6", + NETNS_BASE, BPF_FILE); + SYS(fail, "ip -n %s4 -6 addr add fc42::1 dev lo", NETNS_BASE); + SYS(fail, "ip -n %s4 -6 route add fd00::3 dev veth7 via fb00::87", NETNS_BASE); + + SYS(fail, "ip -n %s5 -6 route add fd00::4 table 117 dev veth9 via fb00::109", NETNS_BASE); + SYS(fail, + "ip -n %s5 -6 route add fd00::3 encap seg6local action End.BPF endpoint obj %s sec inspect_t dev veth8", + NETNS_BASE, BPF_FILE); + + SYS(fail, "ip -n %s6 -6 addr add fb00::6/16 dev lo", NETNS_BASE); + SYS(fail, "ip -n %s6 -6 addr add fd00::4/16 dev lo", NETNS_BASE); + + for (ns = 1; ns < 6; ns++) + SYS(fail, "ip netns exec %s%d sysctl -wq net.ipv6.conf.all.forwarding=1", + NETNS_BASE, ns); + + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.all.seg6_enabled=1", NETNS_BASE); + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.lo.seg6_enabled=1", NETNS_BASE); + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.veth10.seg6_enabled=1", NETNS_BASE); + + return 0; +fail: + return -1; +} + +#define SERVER_PORT 7330 +#define CLIENT_PORT 2121 +void test_lwt_seg6local(void) +{ + struct sockaddr_in6 server_addr = {}; + const char *ns1 = NETNS_BASE "1"; + const char *ns6 = NETNS_BASE "6"; + struct nstoken *nstoken = NULL; + const char foobar[] = "foobar"; + ssize_t bytes; + int sfd, cfd; + char buf[7]; + + if (!ASSERT_OK(setup(), "setup")) + goto out; + + nstoken = open_netns(ns6); + if (!ASSERT_OK_PTR(nstoken, "open ns6")) + goto out; + + sfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::6", SERVER_PORT, NULL); + if (!ASSERT_OK_FD(sfd, "start server")) + goto close_netns; + + close_netns(nstoken); + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open ns1")) + goto close_server; + + cfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::1", CLIENT_PORT, NULL); + if (!ASSERT_OK_FD(cfd, "start client")) + goto close_server; + + close_netns(nstoken); + nstoken = NULL; + + /* Send a packet larger than MTU */ + server_addr.sin6_family = AF_INET6; + server_addr.sin6_port = htons(SERVER_PORT); + if (!ASSERT_EQ(inet_pton(AF_INET6, "fb00::6", &server_addr.sin6_addr), 1, + "build target addr")) + goto close_client; + + bytes = sendto(cfd, foobar, sizeof(foobar), 0, + (struct sockaddr *)&server_addr, sizeof(server_addr)); + if (!ASSERT_EQ(bytes, sizeof(foobar), "send packet")) + goto close_client; + + /* Verify we received all expected bytes */ + bytes = read(sfd, buf, sizeof(buf)); + if (!ASSERT_EQ(bytes, sizeof(buf), "receive packet")) + goto close_client; + ASSERT_STREQ(buf, foobar, "check udp packet"); + +close_client: + close(cfd); +close_server: + close(sfd); +close_netns: + close_netns(nstoken); + +out: + cleanup(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c new file mode 100644 index 000000000000..6bdc6d6de0da --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Google LLC. */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <test_progs.h> +#include <bpf/btf.h> + +#include "map_excl.skel.h" + +static void test_map_excl_allowed(void) +{ + struct map_excl *skel = map_excl__open(); + int err; + + err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access); + if (!ASSERT_OK(err, "bpf_map__set_exclusive_program")) + goto out; + + bpf_program__set_autoload(skel->progs.should_have_access, true); + bpf_program__set_autoload(skel->progs.should_not_have_access, false); + + err = map_excl__load(skel); + ASSERT_OK(err, "map_excl__load"); +out: + map_excl__destroy(skel); +} + +static void test_map_excl_denied(void) +{ + struct map_excl *skel = map_excl__open(); + int err; + + err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access); + if (!ASSERT_OK(err, "bpf_map__make_exclusive")) + goto out; + + bpf_program__set_autoload(skel->progs.should_have_access, false); + bpf_program__set_autoload(skel->progs.should_not_have_access, true); + + err = map_excl__load(skel); + ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n"); +out: + map_excl__destroy(skel); + +} + +void test_map_excl(void) +{ + if (test__start_subtest("map_excl_allowed")) + test_map_excl_allowed(); + if (test__start_subtest("map_excl_denied")) + test_map_excl_denied(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 8743df599567..03b46f17cf53 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -118,19 +118,39 @@ exit: static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu) { - long gp_seq = READ_ONCE(rcu->bss->gp_seq); LIBBPF_OPTS(bpf_test_run_opts, opts); + int ret; - if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace), - &opts), "do_call_rcu_tasks_trace")) + WRITE_ONCE(rcu->bss->done, 0); + ret = bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.call_rcu_tasks_trace), &opts); + if (!ASSERT_OK(ret, "call_rcu_tasks_trace")) return -EFAULT; - if (!ASSERT_OK(opts.retval, "opts.retval == 0")) + if (!ASSERT_OK(opts.retval, "call_rcu_tasks_trace retval")) return -EFAULT; - while (gp_seq == READ_ONCE(rcu->bss->gp_seq)) + while (!READ_ONCE(rcu->bss->done)) sched_yield(); return 0; } +static void wait_for_map_release(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, lopts); + struct map_kptr *skel; + int ret; + + skel = map_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) + return; + + do { + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.count_ref), &lopts); + ASSERT_OK(ret, "count_ref ret"); + ASSERT_OK(lopts.retval, "count_ref retval"); + } while (skel->bss->num_of_refs != 2); + + map_kptr__destroy(skel); +} + void serial_test_map_kptr(void) { struct rcu_tasks_trace_gp *skel; @@ -140,24 +160,24 @@ void serial_test_map_kptr(void) skel = rcu_tasks_trace_gp__open_and_load(); if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load")) return; - if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach")) - goto end; if (test__start_subtest("success-map")) { test_map_kptr_success(true); ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); ASSERT_OK(kern_sync_rcu(), "sync rcu"); + wait_for_map_release(); + /* Observe refcount dropping to 1 on bpf_map_free_deferred */ test_map_kptr_success(false); ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); ASSERT_OK(kern_sync_rcu(), "sync rcu"); + wait_for_map_release(); + /* Observe refcount dropping to 1 on synchronous delete elem */ test_map_kptr_success(true); } -end: rcu_tasks_trace_gp__destroy(skel); - return; } diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c new file mode 100644 index 000000000000..506ed55e8528 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> + +#include "map_kptr_race.skel.h" + +static int get_map_id(int map_fd) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + if (!ASSERT_OK(bpf_map_get_info_by_fd(map_fd, &info, &len), "get_map_info")) + return -1; + return info.id; +} + +static int read_refs(struct map_kptr_race *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + int ret; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.count_ref), &opts); + if (!ASSERT_OK(ret, "count_ref run")) + return -1; + if (!ASSERT_OK(opts.retval, "count_ref retval")) + return -1; + return skel->bss->num_of_refs; +} + +static void test_htab_leak(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct map_kptr_race *skel, *watcher; + int ret, map_id; + + skel = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_htab_leak), &opts); + if (!ASSERT_OK(ret, "test_htab_leak run")) + goto out_skel; + if (!ASSERT_OK(opts.retval, "test_htab_leak retval")) + goto out_skel; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_hash_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free); + if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "htab refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + map_kptr_race__destroy(skel); +} + +static void test_percpu_htab_leak(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct map_kptr_race *skel, *watcher; + int ret, map_id; + + skel = map_kptr_race__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + if (skel->rodata->nr_cpus > 16) + skel->rodata->nr_cpus = 16; + + ret = map_kptr_race__load(skel); + if (!ASSERT_OK(ret, "load")) + goto out_skel; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_percpu_htab_leak), &opts); + if (!ASSERT_OK(ret, "test_percpu_htab_leak run")) + goto out_skel; + if (!ASSERT_OK(opts.retval, "test_percpu_htab_leak retval")) + goto out_skel; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_percpu_hash_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free); + if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "percpu_htab refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + map_kptr_race__destroy(skel); +} + +static void test_sk_ls_leak(void) +{ + struct map_kptr_race *skel, *watcher; + int listen_fd = -1, client_fd = -1, map_id; + + skel = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (!ASSERT_OK(map_kptr_race__attach(skel), "attach")) + goto out_skel; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(listen_fd, 0, "start_server")) + goto out_skel; + + client_fd = connect_to_fd(listen_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto out_skel; + + if (!ASSERT_EQ(skel->bss->sk_ls_leak_done, 1, "sk_ls_leak_done")) + goto out_skel; + + close(client_fd); + client_fd = -1; + close(listen_fd); + listen_fd = -1; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_sk_ls_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.sk_map_free = bpf_program__attach(watcher->progs.sk_map_free); + if (!ASSERT_OK_PTR(watcher->links.sk_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "sk_ls refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + if (client_fd >= 0) + close(client_fd); + if (listen_fd >= 0) + close(listen_fd); + map_kptr_race__destroy(skel); +} + +void serial_test_map_kptr_race(void) +{ + if (test__start_subtest("htab_leak")) + test_htab_leak(); + if (test__start_subtest("percpu_htab_leak")) + test_percpu_htab_leak(); + if (test__start_subtest("sk_ls_leak")) + test_sk_ls_leak(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..40d4f687bd9c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <test_progs.h> +#include "mem_rdonly_untrusted.skel.h" + +void test_mem_rdonly_untrusted(void) +{ + RUN_TESTS(mem_rdonly_untrusted); +} diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c index 653b0a20fab9..c62907732c19 100644 --- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -7,24 +7,29 @@ * 3. call listen() for 1 server socket. (migration target) * 4. update a map to migrate all child sockets * to the last server socket (migrate_map[cookie] = 4) - * 5. call shutdown() for first 4 server sockets + * 5. for TCP_ESTABLISHED and TCP_SYN_RECV cases, verify via epoll + * that the last server socket is not ready before migration. + * 6. call shutdown() for first 4 server sockets * and migrate the requests in the accept queue * to the last server socket. - * 6. call listen() for the second server socket. - * 7. call shutdown() for the last server + * 7. for TCP_ESTABLISHED and TCP_SYN_RECV cases, verify via epoll + * that the last server socket is ready after migration. + * 8. call listen() for the second server socket. + * 9. call shutdown() for the last server * and migrate the requests in the accept queue * to the second server socket. - * 8. call listen() for the last server. - * 9. call shutdown() for the second server + * 10. call listen() for the last server. + * 11. call shutdown() for the second server * and migrate the requests in the accept queue * to the last server socket. - * 10. call accept() for the last server socket. + * 12. call accept() for the last server socket. * * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> */ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include <sys/epoll.h> #include "test_progs.h" #include "test_migrate_reuseport.skel.h" @@ -350,21 +355,51 @@ static int update_maps(struct migrate_reuseport_test_case *test_case, static int migrate_dance(struct migrate_reuseport_test_case *test_case) { + struct epoll_event ev = { + .events = EPOLLIN, + }; + int epoll = -1, nfds; int i, err; + if (test_case->state != BPF_TCP_NEW_SYN_RECV) { + epoll = epoll_create1(0); + if (!ASSERT_NEQ(epoll, -1, "epoll_create1")) + return -1; + + ev.data.fd = test_case->servers[MIGRATED_TO]; + if (!ASSERT_OK(epoll_ctl(epoll, EPOLL_CTL_ADD, + test_case->servers[MIGRATED_TO], &ev), + "epoll_ctl")) + goto close_epoll; + + nfds = epoll_wait(epoll, &ev, 1, 0); + if (!ASSERT_EQ(nfds, 0, "epoll_wait 1")) + goto close_epoll; + } + /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests * to the last listener based on eBPF. */ for (i = 0; i < MIGRATED_TO; i++) { err = shutdown(test_case->servers[i], SHUT_RDWR); if (!ASSERT_OK(err, "shutdown")) - return -1; + goto close_epoll; } /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */ if (test_case->state == BPF_TCP_NEW_SYN_RECV) return 0; + nfds = epoll_wait(epoll, &ev, 1, 0); + if (!ASSERT_EQ(nfds, 1, "epoll_wait 2")) { +close_epoll: + if (epoll >= 0) + close(epoll); + return -1; + } + + close(epoll); + /* Note that we use the second listener instead of the * first one here. * diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c index a70c99c2f8c8..4661d77ebdfc 100644 --- a/tools/testing/selftests/bpf/prog_tests/modify_return.c +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -5,6 +5,7 @@ */ #include <test_progs.h> +#include <unistd.h> #include "modify_return.skel.h" #define LOWER(x) ((x) & 0xffff) @@ -23,11 +24,13 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) if (!ASSERT_OK_PTR(skel, "skel_load")) goto cleanup; + skel->bss->input_retval = input_retval; + skel->bss->test_pid = getpid(); + err = modify_return__attach(skel); if (!ASSERT_OK(err, "modify_return__attach failed")) goto cleanup; - skel->bss->input_retval = input_retval; prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); @@ -49,8 +52,7 @@ cleanup: modify_return__destroy(skel); } -/* TODO: conflict with get_func_ip_test */ -void serial_test_modify_return(void) +void test_modify_return(void) { run_test(0 /* input_retval */, 2 /* want_side_effect */, diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 6d391d95f96e..92c336333fcb 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -6,7 +6,22 @@ #include "test_module_attach.skel.h" #include "testing_helpers.h" -static int duration; +static const char * const read_tests[] = { + "handle_raw_tp", + "handle_tp_btf", + "handle_fentry", + "handle_fentry_explicit", + "handle_fmod_ret", +}; + +static const char * const detach_tests[] = { + "handle_fentry", + "handle_fexit", + "kprobe_multi", +}; + +static const int READ_SZ = 456; +static const int WRITE_SZ = 457; static int trigger_module_test_writable(int *val) { @@ -33,53 +48,73 @@ static int trigger_module_test_writable(int *val) return 0; } -void test_module_attach(void) +static void test_module_attach_prog(const char *prog_name, int sz, + const char *attach_target, int ret) { - const int READ_SZ = 456; - const int WRITE_SZ = 457; - struct test_module_attach* skel; - struct test_module_attach__bss *bss; - struct bpf_link *link; + struct test_module_attach *skel; + struct bpf_program *prog; int err; - int writable_val = 0; skel = test_module_attach__open(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!ASSERT_OK_PTR(skel, "module_attach open")) return; - err = bpf_program__set_attach_target(skel->progs.handle_fentry_manual, - 0, "bpf_testmod_test_read"); - ASSERT_OK(err, "set_attach_target"); + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "module_attach find_program")) + goto cleanup; + bpf_program__set_autoload(prog, true); - err = bpf_program__set_attach_target(skel->progs.handle_fentry_explicit_manual, - 0, "bpf_testmod:bpf_testmod_test_read"); - ASSERT_OK(err, "set_attach_target_explicit"); + if (attach_target) { + err = bpf_program__set_attach_target(prog, 0, attach_target); + if (!ASSERT_OK(err, attach_target)) + goto cleanup; + } err = test_module_attach__load(skel); - if (CHECK(err, "skel_load", "failed to load skeleton\n")) + if (!ASSERT_OK(err, "module_attach load")) + goto cleanup; + + err = test_module_attach__attach(skel); + if (!ASSERT_OK(err, "module_attach attach")) + goto cleanup; + + if (sz) { + /* trigger both read and write though each test uses only one */ + ASSERT_OK(trigger_module_test_read(sz), "trigger_read"); + ASSERT_OK(trigger_module_test_write(sz), "trigger_write"); + + ASSERT_EQ(skel->bss->sz, sz, prog_name); + } + + if (ret) + ASSERT_EQ(skel->bss->retval, ret, "ret"); +cleanup: + test_module_attach__destroy(skel); +} + +static void test_module_attach_writable(void) +{ + struct test_module_attach__bss *bss; + struct test_module_attach *skel; + int writable_val = 0; + int err; + + skel = test_module_attach__open(); + if (!ASSERT_OK_PTR(skel, "module_attach open")) return; + bpf_program__set_autoload(skel->progs.handle_raw_tp_writable_bare, true); + + err = test_module_attach__load(skel); + if (!ASSERT_OK(err, "module_attach load")) + goto cleanup; + bss = skel->bss; err = test_module_attach__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "module_attach attach")) goto cleanup; - /* trigger tracepoint */ - ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read"); - ASSERT_OK(trigger_module_test_write(WRITE_SZ), "trigger_write"); - - ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp"); - ASSERT_EQ(bss->raw_tp_bare_write_sz, WRITE_SZ, "raw_tp_bare"); - ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf"); - ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry"); - ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual"); - ASSERT_EQ(bss->fentry_explicit_read_sz, READ_SZ, "fentry_explicit"); - ASSERT_EQ(bss->fentry_explicit_manual_read_sz, READ_SZ, "fentry_explicit_manual"); - ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit"); - ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); - ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); - bss->raw_tp_writable_bare_early_ret = true; bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4; ASSERT_OK(trigger_module_test_writable(&writable_val), @@ -87,31 +122,73 @@ void test_module_attach(void) ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in"); ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val, "writable_test_out"); +cleanup: + test_module_attach__destroy(skel); +} - test_module_attach__detach(skel); - - /* attach fentry/fexit and make sure it get's module reference */ - link = bpf_program__attach(skel->progs.handle_fentry); - if (!ASSERT_OK_PTR(link, "attach_fentry")) - goto cleanup; +static void test_module_attach_detach(const char *prog_name) +{ + struct test_module_attach *skel; + struct bpf_program *prog; + struct bpf_link *link; + int err; - ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); - bpf_link__destroy(link); + skel = test_module_attach__open(); + if (!ASSERT_OK_PTR(skel, "module_attach open")) + return; - link = bpf_program__attach(skel->progs.handle_fexit); - if (!ASSERT_OK_PTR(link, "attach_fexit")) + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "module_attach find_program")) goto cleanup; + bpf_program__set_autoload(prog, true); - ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); - bpf_link__destroy(link); + err = test_module_attach__load(skel); + if (!ASSERT_OK(err, "module_attach load")) + goto cleanup; - link = bpf_program__attach(skel->progs.kprobe_multi); - if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) + /* attach and make sure it gets module reference */ + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "module_attach attach")) goto cleanup; - ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); + ASSERT_ERR(try_unload_module("bpf_testmod", 1, false), "try_unload_module"); bpf_link__destroy(link); - cleanup: test_module_attach__destroy(skel); } + +void test_module_attach(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(read_tests); i++) { + if (!test__start_subtest(read_tests[i])) + continue; + test_module_attach_prog(read_tests[i], READ_SZ, NULL, 0); + } + if (test__start_subtest("handle_raw_tp_bare")) + test_module_attach_prog("handle_raw_tp_bare", WRITE_SZ, NULL, 0); + if (test__start_subtest("handle_raw_tp_writable_bare")) + test_module_attach_writable(); + if (test__start_subtest("handle_fentry_manual")) { + test_module_attach_prog("handle_fentry_manual", READ_SZ, + "bpf_testmod_test_read", 0); + } + if (test__start_subtest("handle_fentry_explicit_manual")) { + test_module_attach_prog("handle_fentry_explicit_manual", + READ_SZ, + "bpf_testmod:bpf_testmod_test_read", 0); + } + if (test__start_subtest("handle_fexit")) + test_module_attach_prog("handle_fexit", READ_SZ, NULL, -EIO); + if (test__start_subtest("handle_fexit_ret")) + test_module_attach_prog("handle_fexit_ret", 0, NULL, 0); + for (i = 0; i < ARRAY_SIZE(detach_tests); i++) { + char test_name[50]; + + snprintf(test_name, sizeof(test_name), "%s_detach", detach_tests[i]); + if (!test__start_subtest(test_name)) + continue; + test_module_attach_detach(detach_tests[i]); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index f8eb7f9d4fd2..8fade8bdc451 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -6,11 +6,13 @@ #include <netinet/in.h> #include <test_progs.h> #include <unistd.h> +#include <errno.h> #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" +#include "mptcp_sockmap.skel.h" #define NS_TEST "mptcp_ns" #define ADDR_1 "10.0.1.1" @@ -436,6 +438,142 @@ close_cgroup: close(cgroup_fd); } +/* Test sockmap on MPTCP server handling non-mp-capable clients. */ +static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, client_fd1 = -1, client_fd2 = -1; + int server_fd1 = -1, server_fd2 = -1, sent, recvd; + char snd[9] = "123456789"; + char rcv[10]; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client without MPTCP enabled */ + client_fd1 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd1 = accept(listen_fd, NULL, 0); + skel->bss->sk_index = 1; + client_fd2 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd2 = accept(listen_fd, NULL, 0); + /* test normal redirect behavior: data sent by client_fd1 can be + * received by client_fd2 + */ + skel->bss->redirect_idx = 1; + sent = send(client_fd1, snd, sizeof(snd), 0); + if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)")) + goto end; + + /* try to recv more bytes to avoid truncation check */ + recvd = recv(client_fd2, rcv, sizeof(rcv), 0); + if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)")) + goto end; + +end: + if (client_fd1 >= 0) + close(client_fd1); + if (client_fd2 >= 0) + close(client_fd2); + if (server_fd1 >= 0) + close(server_fd1); + if (server_fd2 >= 0) + close(server_fd2); + close(listen_fd); +} + +/* Test sockmap rejection of MPTCP sockets - both server and client sides. */ +static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, server_fd = -1, client_fd1 = -1; + int err, zero = 0; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client with MPTCP enabled */ + client_fd1 = connect_to_fd(listen_fd, 0); + if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1")) + goto end; + + /* bpf_sock_map_update() called from sockops should reject MPTCP sk */ + if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject")) + goto end; + + server_fd = accept(listen_fd, NULL, 0); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &server_fd, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed")) + goto end; + + /* MPTCP client should also be disallowed */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &client_fd1, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed")) + goto end; +end: + if (client_fd1 >= 0) + close(client_fd1); + if (server_fd >= 0) + close(server_fd); + close(listen_fd); +} + +static void test_mptcp_sockmap(void) +{ + struct mptcp_sockmap *skel; + struct netns_obj *netns; + int cgroup_fd, err; + + cgroup_fd = test__join_cgroup("/mptcp_sockmap"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap")) + return; + + skel = mptcp_sockmap__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap")) + goto close_cgroup; + + skel->links.mptcp_sockmap_inject = + bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap")) + goto skel_destroy; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect), + bpf_map__fd(skel->maps.sock_map), + BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap")) + goto skel_destroy; + + if (endpoint_init("subflow") < 0) + goto close_netns; + + test_sockmap_with_mptcp_fallback(skel); + test_sockmap_reject_mptcp(skel); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_sockmap__destroy(skel); +close_cgroup: + close(cgroup_fd); +} + void test_mptcp(void) { if (test__start_subtest("base")) @@ -444,4 +582,6 @@ void test_mptcp(void) test_mptcpify(); if (test__start_subtest("subflow")) test_subflow(); + if (test__start_subtest("sockmap")) + test_mptcp_sockmap(); } diff --git a/tools/testing/selftests/bpf/prog_tests/net_timestamping.c b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c new file mode 100644 index 000000000000..dbfd87499b6b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c @@ -0,0 +1,239 @@ +#include <linux/net_tstamp.h> +#include <sys/time.h> +#include <linux/errqueue.h> +#include "test_progs.h" +#include "network_helpers.h" +#include "net_timestamping.skel.h" + +#define CG_NAME "/net-timestamping-test" +#define NSEC_PER_SEC 1000000000LL + +static const char addr4_str[] = "127.0.0.1"; +static const char addr6_str[] = "::1"; +static struct net_timestamping *skel; +static const int cfg_payload_len = 30; +static struct timespec usr_ts; +static u64 delay_tolerance_nsec = 10000000000; /* 10 seconds */ +int SK_TS_SCHED; +int SK_TS_TXSW; +int SK_TS_ACK; + +static int64_t timespec_to_ns64(struct timespec *ts) +{ + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; +} + +static void validate_key(int tskey, int tstype) +{ + static int expected_tskey = -1; + + if (tstype == SCM_TSTAMP_SCHED) + expected_tskey = cfg_payload_len - 1; + + ASSERT_EQ(expected_tskey, tskey, "tskey mismatch"); + + expected_tskey = tskey; +} + +static void validate_timestamp(struct timespec *cur, struct timespec *prev) +{ + int64_t cur_ns, prev_ns; + + cur_ns = timespec_to_ns64(cur); + prev_ns = timespec_to_ns64(prev); + + ASSERT_LT(cur_ns - prev_ns, delay_tolerance_nsec, "latency"); +} + +static void test_socket_timestamp(struct scm_timestamping *tss, int tstype, + int tskey) +{ + static struct timespec prev_ts; + + validate_key(tskey, tstype); + + switch (tstype) { + case SCM_TSTAMP_SCHED: + validate_timestamp(&tss->ts[0], &usr_ts); + SK_TS_SCHED += 1; + break; + case SCM_TSTAMP_SND: + validate_timestamp(&tss->ts[0], &prev_ts); + SK_TS_TXSW += 1; + break; + case SCM_TSTAMP_ACK: + validate_timestamp(&tss->ts[0], &prev_ts); + SK_TS_ACK += 1; + break; + } + + prev_ts = tss->ts[0]; +} + +static void test_recv_errmsg_cmsg(struct msghdr *msg) +{ + struct sock_extended_err *serr = NULL; + struct scm_timestamping *tss = NULL; + struct cmsghdr *cm; + + for (cm = CMSG_FIRSTHDR(msg); + cm && cm->cmsg_len; + cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level == SOL_SOCKET && + cm->cmsg_type == SCM_TIMESTAMPING) { + tss = (void *)CMSG_DATA(cm); + } else if ((cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_RECVERR) || + (cm->cmsg_level == SOL_PACKET && + cm->cmsg_type == PACKET_TX_TIMESTAMP)) { + serr = (void *)CMSG_DATA(cm); + ASSERT_EQ(serr->ee_origin, SO_EE_ORIGIN_TIMESTAMPING, + "cmsg type"); + } + + if (serr && tss) + test_socket_timestamp(tss, serr->ee_info, + serr->ee_data); + } +} + +static bool socket_recv_errmsg(int fd) +{ + static char ctrl[1024 /* overprovision*/]; + char data[cfg_payload_len]; + static struct msghdr msg; + struct iovec entry; + int n = 0; + + memset(&msg, 0, sizeof(msg)); + memset(&entry, 0, sizeof(entry)); + memset(ctrl, 0, sizeof(ctrl)); + + entry.iov_base = data; + entry.iov_len = cfg_payload_len; + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + n = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (n == -1) + ASSERT_EQ(errno, EAGAIN, "recvmsg MSG_ERRQUEUE"); + + if (n >= 0) + test_recv_errmsg_cmsg(&msg); + + return n == -1; +} + +static void test_socket_timestamping(int fd) +{ + while (!socket_recv_errmsg(fd)); + + ASSERT_EQ(SK_TS_SCHED, 1, "SCM_TSTAMP_SCHED"); + ASSERT_EQ(SK_TS_TXSW, 1, "SCM_TSTAMP_SND"); + ASSERT_EQ(SK_TS_ACK, 1, "SCM_TSTAMP_ACK"); + + SK_TS_SCHED = 0; + SK_TS_TXSW = 0; + SK_TS_ACK = 0; +} + +static void test_tcp(int family, bool enable_socket_timestamping) +{ + struct net_timestamping__bss *bss; + char buf[cfg_payload_len]; + int sfd = -1, cfd = -1; + unsigned int sock_opt; + struct netns_obj *ns; + int cg_fd; + int ret; + + cg_fd = test__join_cgroup(CG_NAME); + if (!ASSERT_OK_FD(cg_fd, "join cgroup")) + return; + + ns = netns_new("net_timestamping_ns", true); + if (!ASSERT_OK_PTR(ns, "create ns")) + goto out; + + skel = net_timestamping__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skel")) + goto out; + + if (!ASSERT_OK(net_timestamping__attach(skel), "attach skel")) + goto out; + + skel->links.skops_sockopt = + bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd); + if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup")) + goto out; + + bss = skel->bss; + memset(bss, 0, sizeof(*bss)); + + skel->bss->monitored_pid = getpid(); + + sfd = start_server(family, SOCK_STREAM, + family == AF_INET6 ? addr6_str : addr4_str, 0, 0); + if (!ASSERT_OK_FD(sfd, "start_server")) + goto out; + + cfd = connect_to_fd(sfd, 0); + if (!ASSERT_OK_FD(cfd, "connect_to_fd_server")) + goto out; + + if (enable_socket_timestamping) { + sock_opt = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID | + SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK; + ret = setsockopt(cfd, SOL_SOCKET, SO_TIMESTAMPING, + (char *) &sock_opt, sizeof(sock_opt)); + if (!ASSERT_OK(ret, "setsockopt SO_TIMESTAMPING")) + goto out; + + ret = clock_gettime(CLOCK_REALTIME, &usr_ts); + if (!ASSERT_OK(ret, "get user time")) + goto out; + } + + ret = write(cfd, buf, sizeof(buf)); + if (!ASSERT_EQ(ret, sizeof(buf), "send to server")) + goto out; + + if (enable_socket_timestamping) + test_socket_timestamping(cfd); + + ASSERT_EQ(bss->nr_active, 1, "nr_active"); + ASSERT_EQ(bss->nr_snd, 2, "nr_snd"); + ASSERT_EQ(bss->nr_sched, 1, "nr_sched"); + ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw"); + ASSERT_EQ(bss->nr_ack, 1, "nr_ack"); + +out: + if (sfd >= 0) + close(sfd); + if (cfd >= 0) + close(cfd); + net_timestamping__destroy(skel); + netns_free(ns); + close(cg_fd); +} + +void test_net_timestamping(void) +{ + if (test__start_subtest("INET4: bpf timestamping")) + test_tcp(AF_INET, false); + if (test__start_subtest("INET4: bpf and socket timestamping")) + test_tcp(AF_INET, true); + if (test__start_subtest("INET6: bpf timestamping")) + test_tcp(AF_INET6, false); + if (test__start_subtest("INET6: bpf and socket timestamping")) + test_tcp(AF_INET6, true); +} diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c index ac3c3c097c0e..e00cd34586dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c @@ -33,20 +33,25 @@ void test_netns_cookie(void) skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup( skel->progs.get_netns_cookie_sockops, cgroup_fd); - if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach")) + if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach_sockops")) goto done; verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg); map = bpf_map__fd(skel->maps.sock_map); err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0); - if (!ASSERT_OK(err, "prog_attach")) + if (!ASSERT_OK(err, "prog_attach_sk_msg")) goto done; tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx); err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta); - if (!ASSERT_OK(err, "prog_attach")) + if (!ASSERT_OK(err, "prog_attach_tcx")) goto done; + skel->links.get_netns_cookie_cgroup_skb = bpf_program__attach_cgroup( + skel->progs.get_netns_cookie_cgroup_skb, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_cgroup_skb, "prog_attach_cgroup_skb")) + goto cleanup_tc; + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno)) goto cleanup_tc; @@ -69,16 +74,18 @@ void test_netns_cookie(void) if (!ASSERT_OK(err, "getsockopt")) goto cleanup_tc; - ASSERT_EQ(val, cookie_expected_value, "cookie_value"); + ASSERT_EQ(val, cookie_expected_value, "cookie_value_sockops"); err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies), &client_fd, &val); if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)")) goto cleanup_tc; - ASSERT_EQ(val, cookie_expected_value, "cookie_value"); - ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value"); - ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value"); + ASSERT_EQ(val, cookie_expected_value, "cookie_value_sk_msg"); + ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value_init_tcx"); + ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value_tcx"); + ASSERT_EQ(skel->bss->cgroup_skb_init_netns_cookie, cookie_expected_value, "cookie_value_init_cgroup_skb"); + ASSERT_EQ(skel->bss->cgroup_skb_netns_cookie, cookie_expected_value, "cookie_value_cgroup_skb"); cleanup_tc: err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd); diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 761ce24bce38..99c953f2be21 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -200,41 +200,28 @@ static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg) return; } -static void test_in_netns(int (*fn)(void *), void *arg) -{ - struct nstoken *nstoken = NULL; - - SYS(cleanup, "ip netns add ns_current_pid_tgid"); - SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up"); - - nstoken = open_netns("ns_current_pid_tgid"); - if (!ASSERT_OK_PTR(nstoken, "open_netns")) - goto cleanup; - - test_ns_current_pid_tgid_new_ns(fn, arg); - -cleanup: - if (nstoken) - close_netns(nstoken); - SYS_NOFAIL("ip netns del ns_current_pid_tgid"); -} - /* TODO: use a different tracepoint */ -void serial_test_ns_current_pid_tgid(void) +void serial_test_current_pid_tgid(void) { if (test__start_subtest("root_ns_tp")) test_current_pid_tgid_tp(NULL); if (test__start_subtest("new_ns_tp")) test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL); - if (test__start_subtest("new_ns_cgrp")) { - int cgroup_fd = -1; - - cgroup_fd = test__join_cgroup("/sock_addr"); - if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) { - test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd); - close(cgroup_fd); - } +} + +void test_ns_current_pid_tgid_cgrp(void) +{ + int cgroup_fd = test__join_cgroup("/sock_addr"); + + if (ASSERT_OK_FD(cgroup_fd, "join_cgroup")) { + test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_cgrp, &cgroup_fd); + close(cgroup_fd); } - if (test__start_subtest("new_ns_sk_msg")) - test_in_netns(test_current_pid_tgid_sk_msg, NULL); } + +void test_ns_current_pid_tgid_sk_msg(void) +{ + test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_sk_msg, NULL); +} + + diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c index 343da65864d6..a72ae0b29f6e 100644 --- a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c +++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "cgroup_helpers.h" #include "percpu_alloc_array.skel.h" #include "percpu_alloc_cgrp_local_storage.skel.h" #include "percpu_alloc_fail.skel.h" @@ -115,6 +116,328 @@ static void test_failure(void) { RUN_TESTS(percpu_alloc_fail); } +static void test_percpu_map_op_cpu_flag(struct bpf_map *map, void *keys, size_t key_sz, u32 entries, + int nr_cpus, bool test_batch) +{ + size_t value_sz = sizeof(u32), value_sz_cpus, value_sz_total; + u32 *values = NULL, *values_percpu = NULL; + const u32 value = 0xDEADC0DE; + int i, j, cpu, map_fd, err; + u64 batch = 0, flags; + void *values_row; + u32 count, v; + LIBBPF_OPTS(bpf_map_batch_opts, batch_opts); + + value_sz_cpus = value_sz * nr_cpus; + values = calloc(entries, value_sz_cpus); + if (!ASSERT_OK_PTR(values, "calloc values")) + return; + + values_percpu = calloc(entries, roundup(value_sz, 8) * nr_cpus); + if (!ASSERT_OK_PTR(values_percpu, "calloc values_percpu")) { + free(values); + return; + } + + value_sz_total = value_sz_cpus * entries; + memset(values, 0, value_sz_total); + + map_fd = bpf_map__fd(map); + flags = BPF_F_CPU | BPF_F_ALL_CPUS; + err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags cpu|all_cpus")) + goto out; + + err = bpf_map_update_elem(map_fd, keys, values, flags); + if (!ASSERT_ERR(err, "bpf_map_update_elem cpu|all_cpus")) + goto out; + + flags = BPF_F_ALL_CPUS; + err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags all_cpus")) + goto out; + + flags = BPF_F_LOCK | BPF_F_CPU; + err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem_flags BPF_F_LOCK")) + goto out; + + flags = BPF_F_LOCK | BPF_F_ALL_CPUS; + err = bpf_map_update_elem(map_fd, keys, values, flags); + if (!ASSERT_ERR(err, "bpf_map_update_elem BPF_F_LOCK")) + goto out; + + flags = (u64)nr_cpus << 32 | BPF_F_CPU; + err = bpf_map_update_elem(map_fd, keys, values, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_elem -ERANGE")) + goto out; + + err = bpf_map__update_elem(map, keys, key_sz, values, value_sz, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map__update_elem -ERANGE")) + goto out; + + err = bpf_map_lookup_elem_flags(map_fd, keys, values, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_lookup_elem_flags -ERANGE")) + goto out; + + err = bpf_map__lookup_elem(map, keys, key_sz, values, value_sz, flags); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map__lookup_elem -ERANGE")) + goto out; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + /* clear value on all cpus */ + values[0] = 0; + flags = BPF_F_ALL_CPUS; + for (i = 0; i < entries; i++) { + err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__update_elem all_cpus")) + goto out; + } + + /* update value on specified cpu */ + for (i = 0; i < entries; i++) { + values[0] = value; + flags = (u64)cpu << 32 | BPF_F_CPU; + err = bpf_map__update_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__update_elem specified cpu")) + goto out; + + /* lookup then check value on CPUs */ + for (j = 0; j < nr_cpus; j++) { + flags = (u64)j << 32 | BPF_F_CPU; + err = bpf_map__lookup_elem(map, keys + i * key_sz, key_sz, values, + value_sz, flags); + if (!ASSERT_OK(err, "bpf_map__lookup_elem specified cpu")) + goto out; + if (!ASSERT_EQ(values[0], j != cpu ? 0 : value, + "bpf_map__lookup_elem value on specified cpu")) + goto out; + } + } + } + + if (!test_batch) + goto out; + + count = entries; + batch_opts.elem_flags = (u64)nr_cpus << 32 | BPF_F_CPU; + err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts); + if (!ASSERT_EQ(err, -ERANGE, "bpf_map_update_batch -ERANGE")) + goto out; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + memset(values, 0, value_sz_total); + + /* clear values across all CPUs */ + count = entries; + batch_opts.elem_flags = BPF_F_ALL_CPUS; + err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts); + if (!ASSERT_OK(err, "bpf_map_update_batch all_cpus")) + goto out; + if (!ASSERT_EQ(count, entries, "bpf_map_update_batch count")) + goto out; + + /* update values on specified CPU */ + for (i = 0; i < entries; i++) + values[i] = value; + + count = entries; + batch_opts.elem_flags = (u64)cpu << 32 | BPF_F_CPU; + err = bpf_map_update_batch(map_fd, keys, values, &count, &batch_opts); + if (!ASSERT_OK(err, "bpf_map_update_batch specified cpu")) + goto out; + if (!ASSERT_EQ(count, entries, "bpf_map_update_batch count")) + goto out; + + /* lookup values on specified CPU */ + batch = 0; + count = entries; + memset(values, 0, entries * value_sz); + err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values, &count, &batch_opts); + if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch specified cpu")) + goto out; + if (!ASSERT_EQ(count, entries, "bpf_map_lookup_batch count")) + goto out; + + for (i = 0; i < entries; i++) + if (!ASSERT_EQ(values[i], value, + "bpf_map_lookup_batch value on specified cpu")) + goto out; + + /* lookup values from all CPUs */ + batch = 0; + count = entries; + batch_opts.elem_flags = 0; + memset(values_percpu, 0, roundup(value_sz, 8) * nr_cpus * entries); + err = bpf_map_lookup_batch(map_fd, NULL, &batch, keys, values_percpu, &count, + &batch_opts); + if (!ASSERT_TRUE(!err || err == -ENOENT, "bpf_map_lookup_batch all_cpus")) + goto out; + if (!ASSERT_EQ(count, entries, "bpf_map_lookup_batch count")) + goto out; + + for (i = 0; i < entries; i++) { + values_row = (void *) values_percpu + + roundup(value_sz, 8) * i * nr_cpus; + for (j = 0; j < nr_cpus; j++) { + v = *(u32 *) (values_row + roundup(value_sz, 8) * j); + if (!ASSERT_EQ(v, j != cpu ? 0 : value, + "bpf_map_lookup_batch value all_cpus")) + goto out; + } + } + } + +out: + free(values_percpu); + free(values); +} + +static void test_percpu_map_cpu_flag(enum bpf_map_type map_type) +{ + struct percpu_alloc_array *skel; + size_t key_sz = sizeof(int); + int *keys, nr_cpus, i, err; + struct bpf_map *map; + u32 max_entries; + + nr_cpus = libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus")) + return; + + max_entries = nr_cpus * 2; + keys = calloc(max_entries, key_sz); + if (!ASSERT_OK_PTR(keys, "calloc keys")) + return; + + for (i = 0; i < max_entries; i++) + keys[i] = i; + + skel = percpu_alloc_array__open(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open")) { + free(keys); + return; + } + + map = skel->maps.percpu; + bpf_map__set_type(map, map_type); + bpf_map__set_max_entries(map, max_entries); + + err = percpu_alloc_array__load(skel); + if (!ASSERT_OK(err, "test_percpu_alloc__load")) + goto out; + + test_percpu_map_op_cpu_flag(map, keys, key_sz, nr_cpus, nr_cpus, true); +out: + percpu_alloc_array__destroy(skel); + free(keys); +} + +static void test_percpu_array_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_ARRAY); +} + +static void test_percpu_hash_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_PERCPU_HASH); +} + +static void test_lru_percpu_hash_cpu_flag(void) +{ + test_percpu_map_cpu_flag(BPF_MAP_TYPE_LRU_PERCPU_HASH); +} + +static void test_percpu_cgroup_storage_cpu_flag(void) +{ + struct percpu_alloc_array *skel = NULL; + struct bpf_cgroup_storage_key key; + int cgroup, prog_fd, nr_cpus, err; + struct bpf_map *map; + + nr_cpus = libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_cpus, 0, "libbpf_num_possible_cpus")) + return; + + err = setup_cgroup_environment(); + if (!ASSERT_OK(err, "setup_cgroup_environment")) + return; + + cgroup = create_and_get_cgroup("/cg_percpu"); + if (!ASSERT_GE(cgroup, 0, "create_and_get_cgroup")) { + cleanup_cgroup_environment(); + return; + } + + err = join_cgroup("/cg_percpu"); + if (!ASSERT_OK(err, "join_cgroup")) + goto out; + + skel = percpu_alloc_array__open_and_load(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open_and_load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.cgroup_egress); + err = bpf_prog_attach(prog_fd, cgroup, BPF_CGROUP_INET_EGRESS, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + map = skel->maps.percpu_cgroup_storage; + err = bpf_map_get_next_key(bpf_map__fd(map), NULL, &key); + if (!ASSERT_OK(err, "bpf_map_get_next_key")) + goto out; + + test_percpu_map_op_cpu_flag(map, &key, sizeof(key), 1, nr_cpus, false); +out: + bpf_prog_detach2(-1, cgroup, BPF_CGROUP_INET_EGRESS); + close(cgroup); + cleanup_cgroup_environment(); + percpu_alloc_array__destroy(skel); +} + +static void test_map_op_cpu_flag(enum bpf_map_type map_type) +{ + u32 max_entries = 1, count = max_entries; + u64 flags, batch = 0, val = 0; + int err, map_fd, key = 0; + LIBBPF_OPTS(bpf_map_batch_opts, batch_opts); + + map_fd = bpf_map_create(map_type, "test_cpu_flag", sizeof(int), sizeof(u64), max_entries, + NULL); + if (!ASSERT_GE(map_fd, 0, "bpf_map_create")) + return; + + flags = BPF_F_ALL_CPUS; + err = bpf_map_update_elem(map_fd, &key, &val, flags); + ASSERT_ERR(err, "bpf_map_update_elem all_cpus"); + + batch_opts.elem_flags = BPF_F_ALL_CPUS; + err = bpf_map_update_batch(map_fd, &key, &val, &count, &batch_opts); + ASSERT_ERR(err, "bpf_map_update_batch all_cpus"); + + flags = BPF_F_CPU; + err = bpf_map_lookup_elem_flags(map_fd, &key, &val, flags); + ASSERT_ERR(err, "bpf_map_lookup_elem_flags cpu"); + + batch_opts.elem_flags = BPF_F_CPU; + err = bpf_map_lookup_batch(map_fd, NULL, &batch, &key, &val, &count, &batch_opts); + ASSERT_ERR(err, "bpf_map_lookup_batch cpu"); + + close(map_fd); +} + +static void test_array_cpu_flag(void) +{ + test_map_op_cpu_flag(BPF_MAP_TYPE_ARRAY); +} + +static void test_hash_cpu_flag(void) +{ + test_map_op_cpu_flag(BPF_MAP_TYPE_HASH); +} + void test_percpu_alloc(void) { if (test__start_subtest("array")) @@ -125,4 +448,16 @@ void test_percpu_alloc(void) test_cgrp_local_storage(); if (test__start_subtest("failure_tests")) test_failure(); + if (test__start_subtest("cpu_flag_percpu_array")) + test_percpu_array_cpu_flag(); + if (test__start_subtest("cpu_flag_percpu_hash")) + test_percpu_hash_cpu_flag(); + if (test__start_subtest("cpu_flag_lru_percpu_hash")) + test_lru_percpu_hash_cpu_flag(); + if (test__start_subtest("cpu_flag_percpu_cgroup_storage")) + test_percpu_cgroup_storage_cpu_flag(); + if (test__start_subtest("cpu_flag_array")) + test_array_cpu_flag(); + if (test__start_subtest("cpu_flag_hash")) + test_hash_cpu_flag(); } diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c b/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c new file mode 100644 index 000000000000..2a8b2381306b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +/* + * Test that replacing an inner percpu array map with one that has different + * max_entries is rejected. percpu_array_map_gen_lookup() inlines the + * template's index_mask, so allowing a smaller replacement would cause OOB. + */ +void test_percpu_array_inner_map(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts); + int outer_fd, tmpl_fd, good_fd, bad_fd, err; + int zero = 0; + + /* Create template: percpu array with 8 entries */ + tmpl_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "tmpl", + sizeof(int), sizeof(long), 8, NULL); + if (!ASSERT_OK_FD(tmpl_fd, "create_tmpl")) + return; + + /* Create outer array-of-maps using template */ + opts.inner_map_fd = tmpl_fd; + outer_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer", + sizeof(int), sizeof(int), 1, &opts); + if (!ASSERT_OK_FD(outer_fd, "create_outer")) + goto close_tmpl; + + /* Insert template as initial inner map */ + err = bpf_map_update_elem(outer_fd, &zero, &tmpl_fd, 0); + if (!ASSERT_OK(err, "insert_tmpl")) + goto close_outer; + + /* Replacement with same max_entries should succeed */ + good_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "good", + sizeof(int), sizeof(long), 8, NULL); + if (!ASSERT_OK_FD(good_fd, "create_good")) + goto close_outer; + + err = bpf_map_update_elem(outer_fd, &zero, &good_fd, 0); + ASSERT_OK(err, "replace_same_max_entries"); + close(good_fd); + + /* Replacement with fewer max_entries must fail */ + bad_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "bad", + sizeof(int), sizeof(long), 2, NULL); + if (!ASSERT_OK_FD(bad_fd, "create_bad")) + goto close_outer; + + err = bpf_map_update_elem(outer_fd, &zero, &bad_fd, 0); + ASSERT_ERR(err, "replace_smaller_max_entries"); + close(bad_fd); + +close_outer: + close(outer_fd); +close_tmpl: + close(tmpl_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c index bc24f83339d6..0a7ef770c487 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c @@ -15,6 +15,10 @@ static void check_good_sample(struct test_perf_branches *skel) int pbe_size = sizeof(struct perf_branch_entry); int duration = 0; + if (CHECK(!skel->bss->run_cnt, "invalid run_cnt", + "checked sample validity before prog run")) + return; + if (CHECK(!skel->bss->valid, "output not valid", "no valid sample from prog")) return; @@ -45,6 +49,10 @@ static void check_bad_sample(struct test_perf_branches *skel) int written_stack = skel->bss->written_stack_out; int duration = 0; + if (CHECK(!skel->bss->run_cnt, "invalid run_cnt", + "checked sample validity before prog run")) + return; + if (CHECK(!skel->bss->valid, "output not valid", "no valid sample from prog")) return; @@ -83,8 +91,12 @@ static void test_perf_branches_common(int perf_fd, err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err)) goto out_destroy; - /* spin the loop for a while (random high number) */ - for (i = 0; i < 1000000; ++i) + + /* Spin the loop for a while by using a high iteration count, and by + * checking whether the specific run count marker has been explicitly + * incremented at least once by the backing perf_event BPF program. + */ + for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i) ++j; test_perf_branches__detach(skel); @@ -116,11 +128,11 @@ static void test_perf_branches_hw(void) pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); /* - * Some setups don't support branch records (virtual machines, !x86), - * so skip test in this case. + * Some setups don't support LBR (virtual machines, !x86, AMD Milan Zen + * 3 which only supports BRS), so skip test in this case. */ if (pfd < 0) { - if (errno == ENOENT || errno == EOPNOTSUPP) { + if (errno == ENOENT || errno == EOPNOTSUPP || errno == EINVAL) { printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n", __func__); test__skip(); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index d940ff87fa08..9e3a0d217af8 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ #define _GNU_SOURCE -#include <pthread.h> -#include <sched.h> +#include <linux/compiler.h> #include <test_progs.h> #include "testing_helpers.h" #include "test_perf_link.skel.h" @@ -12,23 +11,14 @@ static void burn_cpu(void) { - volatile int j = 0; - cpu_set_t cpu_set; - int i, err; - - /* generate some branches on cpu 0 */ - CPU_ZERO(&cpu_set); - CPU_SET(0, &cpu_set); - err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); - ASSERT_OK(err, "set_thread_affinity"); + int i; /* spin the loop for a while (random high number) */ for (i = 0; i < 1000000; ++i) - ++j; + barrier(); } -/* TODO: often fails in concurrent mode */ -void serial_test_perf_link(void) +void test_perf_link(void) { struct test_perf_link *skel = NULL; struct perf_event_attr attr; @@ -45,7 +35,7 @@ void serial_test_perf_link(void) attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; attr.sample_freq = 1000; - pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c new file mode 100644 index 000000000000..9ae49b587f3e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <test_progs.h> + + +#include "test_pinning_devmap.skel.h" + +void test_pinning_devmap_reuse(void) +{ + const char *pinpath1 = "/sys/fs/bpf/pinmap1"; + const char *pinpath2 = "/sys/fs/bpf/pinmap2"; + struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL; + int err; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + + /* load the object a first time */ + skel1 = test_pinning_devmap__open_and_load(); + if (!ASSERT_OK_PTR(skel1, "skel_load1")) + goto out; + + /* load the object a second time, re-using the pinned map */ + skel2 = test_pinning_devmap__open_and_load(); + if (!ASSERT_OK_PTR(skel2, "skel_load2")) + goto out; + + /* we can close the reference safely without + * the map's refcount falling to 0 + */ + test_pinning_devmap__destroy(skel1); + skel1 = NULL; + + /* now, swap the pins */ + err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE); + if (!ASSERT_OK(err, "swap pins")) + goto out; + + /* load the object again, this time the re-use should fail */ + skel1 = test_pinning_devmap__open_and_load(); + if (!ASSERT_ERR_PTR(skel1, "skel_load3")) + goto out; + +out: + unlink(pinpath1); + unlink(pinpath2); + test_pinning_devmap__destroy(skel1); + test_pinning_devmap__destroy(skel2); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c new file mode 100644 index 000000000000..16bd74be3dbe --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "test_pinning_htab.skel.h" + +static void unpin_map(const char *map_name, const char *pin_path) +{ + struct test_pinning_htab *skel; + struct bpf_map *map; + int err; + + skel = test_pinning_htab__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + + map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name")) + goto out; + + err = bpf_map__pin(map, pin_path); + if (!ASSERT_OK(err, "bpf_map__pin")) + goto out; + + err = bpf_map__unpin(map, pin_path); + ASSERT_OK(err, "bpf_map__unpin"); +out: + test_pinning_htab__destroy(skel); +} + +void test_pinning_htab(void) +{ + if (test__start_subtest("timer_prealloc")) + unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc"); + if (test__start_subtest("timer_no_prealloc")) + unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/prepare.c b/tools/testing/selftests/bpf/prog_tests/prepare.c new file mode 100644 index 000000000000..fb5cdad97116 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/prepare.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta */ + +#include <test_progs.h> +#include <network_helpers.h> +#include "prepare.skel.h" + +static bool check_prepared(struct bpf_object *obj) +{ + bool is_prepared = true; + const struct bpf_map *map; + + bpf_object__for_each_map(map, obj) { + if (bpf_map__fd(map) < 0) + is_prepared = false; + } + + return is_prepared; +} + +static void test_prepare_no_load(void) +{ + struct prepare *skel; + int err; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + ); + + skel = prepare__open(); + if (!ASSERT_OK_PTR(skel, "prepare__open")) + return; + + if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared")) + goto cleanup; + + err = bpf_object__prepare(skel->obj); + + if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared")) + goto cleanup; + + if (!ASSERT_OK(err, "bpf_object__prepare")) + goto cleanup; + +cleanup: + prepare__destroy(skel); +} + +static void test_prepare_load(void) +{ + struct prepare *skel; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + ); + + skel = prepare__open(); + if (!ASSERT_OK_PTR(skel, "prepare__open")) + return; + + if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared")) + goto cleanup; + + err = bpf_object__prepare(skel->obj); + if (!ASSERT_OK(err, "bpf_object__prepare")) + goto cleanup; + + err = prepare__load(skel); + if (!ASSERT_OK(err, "prepare__load")) + goto cleanup; + + if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.program); + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) + goto cleanup; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + goto cleanup; + + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + goto cleanup; + + ASSERT_EQ(skel->bss->err, 0, "err"); + +cleanup: + prepare__destroy(skel); +} + +void test_prepare(void) +{ + if (test__start_subtest("prepare_load")) + test_prepare_load(); + if (test__start_subtest("prepare_no_load")) + test_prepare_no_load(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c index 509883e6823a..5d3c00a08a88 100644 --- a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c +++ b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c @@ -6,6 +6,7 @@ #include "epilogue_tailcall.skel.h" #include "pro_epilogue_goto_start.skel.h" #include "epilogue_exit.skel.h" +#include "pro_epilogue_with_kfunc.skel.h" struct st_ops_args { __u64 a; @@ -55,6 +56,7 @@ void test_pro_epilogue(void) RUN_TESTS(pro_epilogue); RUN_TESTS(pro_epilogue_goto_start); RUN_TESTS(epilogue_exit); + RUN_TESTS(pro_epilogue_with_kfunc); if (test__start_subtest("tailcall")) test_tailcall(); } diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index 8721671321de..7a9d13aa2c87 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> -/* TODO: corrupts other tests uses connect() */ -void serial_test_probe_user(void) +void test_probe_user(void) { static const char *const prog_names[] = { "handle_sys_connect", @@ -20,6 +19,11 @@ void serial_test_probe_user(void) struct bpf_program *kprobe_progs[prog_count]; struct bpf_object *obj; static const int zero = 0; + struct test_pro_bss { + struct sockaddr_in old; + __u32 test_pid; + }; + struct test_pro_bss results = {}; size_t i; obj = bpf_object__open_file(obj_file, &opts); @@ -34,6 +38,23 @@ void serial_test_probe_user(void) goto cleanup; } + { + struct bpf_map *bss_map; + struct test_pro_bss bss_init = {}; + + bss_init.test_pid = getpid(); + bss_map = bpf_object__find_map_by_name(obj, "test_pro.bss"); + if (!ASSERT_OK_PTR(bss_map, "find_bss_map")) + goto cleanup; + if (!ASSERT_EQ(bpf_map__value_size(bss_map), sizeof(bss_init), + "bss_size")) + goto cleanup; + err = bpf_map__set_initial_value(bss_map, &bss_init, + sizeof(bss_init)); + if (!ASSERT_OK(err, "set_bss_init")) + goto cleanup; + } + err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) goto cleanup; @@ -62,11 +83,13 @@ void serial_test_probe_user(void) connect(sock_fd, &curr, sizeof(curr)); close(sock_fd); - err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp); + err = bpf_map_lookup_elem(results_map_fd, &zero, &results); if (CHECK(err, "get_kprobe_res", "failed to get kprobe res: %d\n", err)) goto cleanup; + memcpy(&tmp, &results.old, sizeof(tmp)); + in = (struct sockaddr_in *)&tmp; if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res", "wrong kprobe res from probe read: %s:%u\n", diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c index 14f2796076e0..7607cfc2408c 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c @@ -54,3 +54,128 @@ void test_prog_tests_framework(void) return; clear_test_state(state); } + +static void dummy_emit(const char *buf, bool force) {} + +void test_prog_tests_framework_expected_msgs(void) +{ + struct expected_msgs msgs; + int i, j, error_cnt; + const struct { + const char *name; + const char *log; + const char *expected; + struct expect_msg *pats; + } cases[] = { + { + .name = "simple-ok", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "simple-fail", + .log = "aaabbbddd", + .expected = "MATCHED SUBSTR: 'aaa'\n" + "EXPECTED SUBSTR: 'ccc'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-ok-mid", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "foo", .negative = true }, + { .substr = "bar", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-ok-tail", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "foo", .negative = true }, + {} + } + }, + { + .name = "negative-ok-head", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "foo", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-fail-head", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'aaa'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa", .negative = true }, + { .substr = "bbb" }, + {} + } + }, + { + .name = "negative-fail-tail", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'ccc'\n", + .pats = (struct expect_msg[]) { + { .substr = "bbb" }, + { .substr = "ccc", .negative = true }, + {} + } + }, + { + .name = "negative-fail-mid-1", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'bbb'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "bbb", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-fail-mid-2", + .log = "aaabbb222ccc", + .expected = "UNEXPECTED SUBSTR: '222'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "222", .negative = true }, + { .substr = "bbb", .negative = true }, + { .substr = "ccc" }, + {} + } + } + }; + + for (i = 0; i < ARRAY_SIZE(cases); i++) { + if (test__start_subtest(cases[i].name)) { + error_cnt = env.subtest_state->error_cnt; + msgs.patterns = cases[i].pats; + msgs.cnt = 0; + for (j = 0; cases[i].pats[j].substr; j++) + msgs.cnt++; + validate_msgs(cases[i].log, &msgs, dummy_emit); + fflush(stderr); + env.subtest_state->error_cnt = error_cnt; + if (cases[i].expected) + ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output"); + else + ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output"); + test__end_subtest(); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index a043af9cd6d9..41441325e179 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -28,9 +28,9 @@ static void test_queue_stack_map_by_type(int type) vals[i] = rand(); if (type == QUEUE) - strncpy(file, "./test_queue_map.bpf.o", sizeof(file)); + strscpy(file, "./test_queue_map.bpf.o"); else if (type == STACK) - strncpy(file, "./test_stack_map.bpf.o", sizeof(file)); + strscpy(file, "./test_stack_map.bpf.o"); else return; diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c index 9818f06c97c5..a854fb38e418 100644 --- a/tools/testing/selftests/bpf/prog_tests/rbtree.c +++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c @@ -8,6 +8,8 @@ #include "rbtree_fail.skel.h" #include "rbtree_btf_fail__wrong_node_type.skel.h" #include "rbtree_btf_fail__add_wrong_type.skel.h" +#include "rbtree_search.skel.h" +#include "rbtree_search_kptr.skel.h" static void test_rbtree_add_nodes(void) { @@ -187,3 +189,13 @@ void test_rbtree_fail(void) { RUN_TESTS(rbtree_fail); } + +void test_rbtree_search(void) +{ + RUN_TESTS(rbtree_search); +} + +void test_rbtree_search_kptr(void) +{ + RUN_TESTS(rbtree_search_kptr); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index ebe0c12b5536..246eb259c08a 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -28,6 +28,7 @@ static void test_success(void) bpf_program__set_autoload(skel->progs.two_regions, true); bpf_program__set_autoload(skel->progs.non_sleepable_1, true); bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + bpf_program__set_autoload(skel->progs.nested_rcu_region, true); bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true); bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true); bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true); @@ -78,9 +79,13 @@ static const char * const inproper_region_tests[] = { "non_sleepable_rcu_mismatch", "inproper_sleepable_helper", "inproper_sleepable_kfunc", - "nested_rcu_region", + "nested_rcu_region_unbalanced_1", + "nested_rcu_region_unbalanced_2", "rcu_read_lock_global_subprog_lock", "rcu_read_lock_global_subprog_unlock", + "rcu_read_lock_sleepable_helper_global_subprog", + "rcu_read_lock_sleepable_kfunc_global_subprog", + "rcu_read_lock_sleepable_global_subprog_indirect", }; static void test_inproper_region(void) diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c index c7b9ba8b1d06..a8d1eaa67020 100644 --- a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c @@ -24,6 +24,7 @@ struct read_ret_desc { { .name = "copy_from_user", .ret = -EFAULT }, { .name = "copy_from_user_task", .ret = -EFAULT }, { .name = "copy_from_user_str", .ret = -EFAULT }, + { .name = "copy_from_user_task_str", .ret = -EFAULT }, }; void test_read_vsyscall(void) diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c index 8100509e561b..0ffa01d54ce2 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c @@ -149,3 +149,70 @@ close_prog: fentry_recursive_target__destroy(target_skel); fentry_recursive__destroy(tracing_skel); } + +static void *fentry_target_test_run(void *arg) +{ + for (;;) { + int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST); + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err; + + if (prog_fd == -1) + break; + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "fentry_target test_run")) + break; + } + + return NULL; +} + +void test_fentry_attach_stress(void) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_skel = NULL; + struct bpf_program *prog; + int err, i, tgt_prog_fd; + pthread_t thread; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, + "fentry_recursive_target__open_and_load")) + goto close_prog; + tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target); + err = pthread_create(&thread, NULL, + fentry_target_test_run, &tgt_prog_fd); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + for (i = 0; i < 1000; i++) { + tracing_skel = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open")) + goto stop_thread; + + prog = tracing_skel->progs.recursive_attach; + err = bpf_program__set_attach_target(prog, tgt_prog_fd, + "fentry_target"); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto stop_thread; + + err = fentry_recursive__load(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto stop_thread; + + err = fentry_recursive__attach(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__attach")) + goto stop_thread; + + fentry_recursive__destroy(tracing_skel); + tracing_skel = NULL; + } + +stop_thread: + __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST); + err = pthread_join(thread, NULL); + ASSERT_OK(err, "pthread_join"); +close_prog: + fentry_recursive__destroy(tracing_skel); + fentry_recursive_target__destroy(target_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c index d6bd5e16e637..d2c0542716a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c @@ -44,3 +44,59 @@ void test_refcounted_kptr_wrong_owner(void) ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval"); refcounted_kptr__destroy(skel); } + +void test_percpu_hash_refcounted_kptr_refcount_leak(void) +{ + struct refcounted_kptr *skel; + int cpu_nr, fd, err, key = 0; + struct bpf_map *map; + size_t values_sz; + u64 *values; + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + cpu_nr = libbpf_num_possible_cpus(); + if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus")) + return; + + values = calloc(cpu_nr, sizeof(u64)); + if (!ASSERT_OK_PTR(values, "calloc values")) + return; + + skel = refcounted_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) { + free(values); + return; + } + + values_sz = cpu_nr * sizeof(u64); + memset(values, 0, values_sz); + + map = skel->maps.percpu_hash; + err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0); + if (!ASSERT_OK(err, "bpf_map__update_elem")) + goto out; + + fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak); + err = bpf_prog_test_run_opts(fd, &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + if (!ASSERT_EQ(opts.retval, 2, "opts.retval")) + goto out; + + err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0); + if (!ASSERT_OK(err, "bpf_map__update_elem")) + goto out; + + fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount); + err = bpf_prog_test_run_opts(fd, &opts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_EQ(opts.retval, 1, "opts.retval"); + +out: + refcounted_kptr__destroy(skel); + free(values); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 39d42271cc46..71f5240cc5b7 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -422,15 +422,69 @@ static bool is_valid_range(enum num_t t, struct range x) } } -static struct range range_improve(enum num_t t, struct range old, struct range new) +static struct range range_intersection(enum num_t t, struct range old, struct range new) { return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b)); } +/* + * Result is precise when 'x' and 'y' overlap or form a continuous range, + * result is an over-approximation if 'x' and 'y' do not overlap. + */ +static struct range range_union(enum num_t t, struct range x, struct range y) +{ + if (!is_valid_range(t, x)) + return y; + if (!is_valid_range(t, y)) + return x; + return range(t, min_t(t, x.a, y.a), max_t(t, x.b, y.b)); +} + +/* + * This function attempts to improve x range intersecting it with y. + * range_cast(... to_t ...) looses precision for ranges that pass to_t + * min/max boundaries. To avoid such precision loses this function + * splits both x and y into halves corresponding to non-overflowing + * sub-ranges: [0, smin] and [smax, -1]. + * Final result is computed as follows: + * + * ((x ∩ [0, smax]) ∩ (y ∩ [0, smax])) ∪ + * ((x ∩ [smin,-1]) ∩ (y ∩ [smin,-1])) + * + * Precision might still be lost if final union is not a continuous range. + */ +static struct range range_refine_in_halves(enum num_t x_t, struct range x, + enum num_t y_t, struct range y) +{ + struct range x_pos, x_neg, y_pos, y_neg, r_pos, r_neg; + u64 smax, smin, neg_one; + + if (t_is_32(x_t)) { + smax = (u64)(u32)S32_MAX; + smin = (u64)(u32)S32_MIN; + neg_one = (u64)(u32)(s32)(-1); + } else { + smax = (u64)S64_MAX; + smin = (u64)S64_MIN; + neg_one = U64_MAX; + } + x_pos = range_intersection(x_t, x, range(x_t, 0, smax)); + x_neg = range_intersection(x_t, x, range(x_t, smin, neg_one)); + y_pos = range_intersection(y_t, y, range(x_t, 0, smax)); + y_neg = range_intersection(y_t, y, range(y_t, smin, neg_one)); + r_pos = range_intersection(x_t, x_pos, range_cast(y_t, x_t, y_pos)); + r_neg = range_intersection(x_t, x_neg, range_cast(y_t, x_t, y_neg)); + return range_union(x_t, r_pos, r_neg); + +} + static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y) { struct range y_cast; + if (t_is_32(x_t) == t_is_32(y_t)) + x = range_refine_in_halves(x_t, x, y_t, y); + y_cast = range_cast(y_t, x_t, y); /* If we know that @@ -444,7 +498,40 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, */ if (x_t == S64 && y_t == S32 && y_cast.a <= S32_MAX && y_cast.b <= S32_MAX && (s64)x.a >= S32_MIN && (s64)x.b <= S32_MAX) - return range_improve(x_t, x, y_cast); + return range_intersection(x_t, x, y_cast); + + if (y_t == U32 && x_t == U64) { + u64 xmin_swap, xmax_swap, xmin_lower32, xmax_lower32; + + xmin_lower32 = x.a & 0xffffffff; + xmax_lower32 = x.b & 0xffffffff; + if (xmin_lower32 < y.a || xmin_lower32 > y.b) { + /* The 32 lower bits of the umin64 are outside the u32 + * range. Let's update umin64 to match the u32 range. + * We want to *increase* the umin64 to the *minimum* + * value that matches the u32 range. + */ + xmin_swap = swap_low32(x.a, y.a); + /* We should always only increase the minimum, so if + * the new value is lower than before, we need to + * increase the 32 upper bits by 1. + */ + if (xmin_swap < x.a) + xmin_swap += 0x100000000; + if (xmin_swap == x.b) + return range(x_t, x.b, x.b); + } else if (xmax_lower32 < y.a || xmax_lower32 > y.b) { + /* Same for the umax64, but we want to *decrease* + * umax64 to the *maximum* value that matches the u32 + * range. + */ + xmax_swap = swap_low32(x.b, y.b); + if (xmax_swap > x.b) + xmax_swap -= 0x100000000; + if (xmax_swap == x.a) + return range(x_t, x.a, x.a); + } + } /* the case when new range knowledge, *y*, is a 32-bit subregister * range, while previous range knowledge, *x*, is a full register @@ -462,11 +549,11 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b)); if (!is_valid_range(x_t, x_swap)) return x; - return range_improve(x_t, x, x_swap); + return range_intersection(x_t, x, x_swap); } /* otherwise, plain range cast and intersection works */ - return range_improve(x_t, x, y_cast); + return range_intersection(x_t, x, y_cast); } /* ======================= @@ -609,7 +696,7 @@ static void range_cond(enum num_t t, struct range x, struct range y, *newx = range(t, x.a, x.b); *newy = range(t, y.a + 1, y.b); } else if (x.a == x.b && x.b == y.b) { - /* X is a constant matching rigth side of Y */ + /* X is a constant matching right side of Y */ *newx = range(t, x.a, x.b); *newy = range(t, y.a, y.b - 1); } else if (y.a == y.b && x.a == y.a) { @@ -617,7 +704,7 @@ static void range_cond(enum num_t t, struct range x, struct range y, *newx = range(t, x.a + 1, x.b); *newy = range(t, y.a, y.b); } else if (y.a == y.b && x.b == y.b) { - /* Y is a constant matching rigth side of X */ + /* Y is a constant matching right side of X */ *newx = range(t, x.a, x.b - 1); *newy = range(t, y.a, y.b); } else { @@ -1163,7 +1250,23 @@ static int parse_range_cmp_log(const char *log_buf, struct case_spec spec, spec.compare_subregs ? "w0" : "r0", spec.compare_subregs ? "w" : "r", specs[i].reg_idx); - q = strstr(p, buf); + /* + * In the verifier log look for lines: + * 18: (bf) r0 = r6 ; R0=... R6=... + * Different verifier passes may print + * 18: (bf) r0 = r6 + * as well, but never followed by ';'. + */ + q = p; + while ((q = strstr(q, buf)) != NULL) { + const char *s = q + strlen(buf); + + while (*s == ' ' || *s == '\t') + s++; + if (*s == ';') + break; + q = s; + } if (!q) { *specs[i].state = (struct reg_state){.valid = false}; continue; @@ -2075,9 +2178,11 @@ static struct subtest_case crafted_cases[] = { {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}}, {U64, S64, {0, 0xffffffffULL}, {1, 1}}, {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}}, + {U64, S32, {0xfffffffe00000001, 0xffffffff00000000}, {S64_MIN, S64_MIN}}, + {U64, U32, {0xfffffffe00000000, U64_MAX - 1}, {U64_MAX, U64_MAX}}, {U64, U32, {0, 0x100000000}, {0, 0}}, - {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}}, + {U64, U32, {0xfffffffe, 0x300000000}, {0x80000000, 0x80000000}}, {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}}, /* these are tricky cases where lower 32 bits allow to tighten 64 diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c new file mode 100644 index 000000000000..f0a8c828f8f1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> +#include <sys/sysinfo.h> + +#include "res_spin_lock.skel.h" +#include "res_spin_lock_fail.skel.h" + +void test_res_spin_lock_failure(void) +{ + RUN_TESTS(res_spin_lock_fail); +} + +static volatile int skip; + +static void *spin_lock_thread(void *arg) +{ + int err, prog_fd = *(u32 *) arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 10000, + ); + + while (!READ_ONCE(skip)) { + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (err || topts.retval) { + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + break; + } + } + pthread_exit(arg); +} + +void test_res_spin_lock_success(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct res_spin_lock *skel; + pthread_t thread_id[16]; + int prog_fd, i, err; + void *ret; + + if (get_nprocs() < 2) { + test__skip(); + return; + } + + skel = res_spin_lock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "res_spin_lock__open_and_load")) + return; + /* AA deadlock */ + prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "error"); + ASSERT_OK(topts.retval, "retval"); + + prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_held_lock_max); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "error"); + ASSERT_OK(topts.retval, "retval"); + + /* Multi-threaded ABBA deadlock. */ + + prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_AB); + for (i = 0; i < 16; i++) { + int err; + + err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + goto end; + } + + topts.retval = 0; + topts.repeat = 1000; + int fd = bpf_program__fd(skel->progs.res_spin_lock_test_BA); + while (!topts.retval && !err && !READ_ONCE(skel->bss->err)) { + err = bpf_prog_test_run_opts(fd, &topts); + } + + WRITE_ONCE(skip, true); + + for (i = 0; i < 16; i++) { + if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) + goto end; + if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) + goto end; + } + + ASSERT_EQ(READ_ONCE(skel->bss->err), -EDEADLK, "timeout err"); + ASSERT_OK(err, "err"); + ASSERT_EQ(topts.retval, -EDEADLK, "timeout"); +end: + res_spin_lock__destroy(skel); + return; +} + +void serial_test_res_spin_lock_stress(void) +{ + if (libbpf_num_possible_cpus() < 3) { + test__skip(); + return; + } + + ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA"); + sleep(5); + unload_module("bpf_test_rqspinlock", false); + /* + * Insert bpf_test_rqspinlock.ko manually with test_mode=[1|2] to test + * other cases (ABBA, ABBCCA). + */ +} diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index 51544372f52e..41dfaaabb73f 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -101,9 +101,9 @@ static int resolve_symbols(void) int type_id; __u32 nr; - btf = btf__parse_elf("btf_data.bpf.o", NULL); + btf = btf__parse_raw("resolve_btfids.test.o.BTF"); if (CHECK(libbpf_get_error(btf), "resolve", - "Failed to load BTF from btf_data.bpf.o\n")) + "Failed to load BTF from resolve_btfids.test.o.BTF\n")) return -1; nr = btf__type_cnt(btf); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index da430df45aa4..64520684d2cb 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -17,6 +17,7 @@ #include "test_ringbuf_n.lskel.h" #include "test_ringbuf_map_key.lskel.h" #include "test_ringbuf_write.lskel.h" +#include "test_ringbuf_overwrite.lskel.h" #define EDONE 7777 @@ -97,7 +98,7 @@ static void ringbuf_write_subtest(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->maps.ringbuf.max_entries = 0x4000; + skel->maps.ringbuf.max_entries = 0x40000; err = test_ringbuf_write_lskel__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -108,7 +109,7 @@ static void ringbuf_write_subtest(void) mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) goto cleanup; - *mmap_ptr = 0x3000; + *mmap_ptr = 0x30000; ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); skel->bss->pid = getpid(); @@ -497,6 +498,68 @@ cleanup: test_ringbuf_map_key_lskel__destroy(skel_map_key); } +static void ringbuf_overwrite_mode_subtest(void) +{ + unsigned long size, len1, len2, len3, len4, len5; + unsigned long expect_avail_data, expect_prod_pos, expect_over_pos; + struct test_ringbuf_overwrite_lskel *skel; + int page_size = getpagesize(); + int err; + + skel = test_ringbuf_overwrite_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + size = page_size; + len1 = page_size / 2; + len2 = page_size / 4; + len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3; + len4 = len3 - 8; + len5 = len3; /* retry with len3 */ + + skel->maps.ringbuf.max_entries = size; + skel->rodata->LEN1 = len1; + skel->rodata->LEN2 = len2; + skel->rodata->LEN3 = len3; + skel->rodata->LEN4 = len4; + skel->rodata->LEN5 = len5; + + skel->bss->pid = getpid(); + + err = test_ringbuf_overwrite_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = test_ringbuf_overwrite_lskel__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + syscall(__NR_getpgid); + + ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1"); + ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2"); + ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3"); + ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4"); + ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5"); + + ASSERT_EQ(skel->bss->ring_size, size, "check_ring_size"); + + expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ; + ASSERT_EQ(skel->bss->avail_data, expect_avail_data, "check_avail_size"); + + ASSERT_EQ(skel->bss->cons_pos, 0, "check_cons_pos"); + + expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ; + ASSERT_EQ(skel->bss->prod_pos, expect_prod_pos, "check_prod_pos"); + + expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ; + ASSERT_EQ(skel->bss->over_pos, expect_over_pos, "check_over_pos"); + + test_ringbuf_overwrite_lskel__detach(skel); +cleanup: + test_ringbuf_overwrite_lskel__destroy(skel); +} + void test_ringbuf(void) { if (test__start_subtest("ringbuf")) @@ -507,4 +570,6 @@ void test_ringbuf(void) ringbuf_map_key_subtest(); if (test__start_subtest("ringbuf_write")) ringbuf_write_subtest(); + if (test__start_subtest("ringbuf_overwrite_mode")) + ringbuf_overwrite_mode_subtest(); } diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 036d4760d2c1..3dbcc091f16c 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -41,11 +41,7 @@ static struct bpf_object *obj; static __u32 index_zero; static int epfd; -static union sa46 { - struct sockaddr_in6 v6; - struct sockaddr_in v4; - sa_family_t family; -} srv_sa; +static struct sockaddr_storage srv_sa; #define RET_IF(condition, tag, format...) ({ \ if (CHECK_FAIL(condition)) { \ @@ -135,24 +131,24 @@ static int prepare_bpf_obj(void) return 0; } -static void sa46_init_loopback(union sa46 *sa, sa_family_t family) +static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family) { memset(sa, 0, sizeof(*sa)); - sa->family = family; - if (sa->family == AF_INET6) - sa->v6.sin6_addr = in6addr_loopback; + sa->ss_family = family; + if (sa->ss_family == AF_INET6) + ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback; else - sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + ((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); } -static void sa46_init_inany(union sa46 *sa, sa_family_t family) +static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family) { memset(sa, 0, sizeof(*sa)); - sa->family = family; - if (sa->family == AF_INET6) - sa->v6.sin6_addr = in6addr_any; + sa->ss_family = family; + if (sa->ss_family == AF_INET6) + ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any; else - sa->v4.sin_addr.s_addr = INADDR_ANY; + ((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY; } static int read_int_sysctl(const char *sysctl) @@ -228,7 +224,7 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, int cli_fd) { struct data_check expected = {}, result; - union sa46 cli_sa; + struct sockaddr_storage cli_sa; socklen_t addrlen; int err; @@ -251,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, } if (family == AF_INET6) { + struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa; + struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa; + expected.eth_protocol = htons(ETH_P_IPV6); - expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] && - !srv_sa.v6.sin6_addr.s6_addr32[2] && - !srv_sa.v6.sin6_addr.s6_addr32[1] && - !srv_sa.v6.sin6_addr.s6_addr32[0]; + expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] && + !srv_v6->sin6_addr.s6_addr32[2] && + !srv_v6->sin6_addr.s6_addr32[1] && + !srv_v6->sin6_addr.s6_addr32[0]; - memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32, - sizeof(cli_sa.v6.sin6_addr)); + memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32, + sizeof(cli_v6->sin6_addr)); memcpy(&expected.skb_addrs[4], &in6addr_loopback, sizeof(in6addr_loopback)); - expected.skb_ports[0] = cli_sa.v6.sin6_port; - expected.skb_ports[1] = srv_sa.v6.sin6_port; + expected.skb_ports[0] = cli_v6->sin6_port; + expected.skb_ports[1] = srv_v6->sin6_port; } else { + struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa; + struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa; + expected.eth_protocol = htons(ETH_P_IP); - expected.bind_inany = !srv_sa.v4.sin_addr.s_addr; + expected.bind_inany = !srv_v4->sin_addr.s_addr; - expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr; + expected.skb_addrs[0] = cli_v4->sin_addr.s_addr; expected.skb_addrs[1] = htonl(INADDR_LOOPBACK); - expected.skb_ports[0] = cli_sa.v4.sin_port; - expected.skb_ports[1] = srv_sa.v4.sin_port; + expected.skb_ports[0] = cli_v4->sin_port; + expected.skb_ports[1] = srv_v4->sin_port; } if (memcmp(&result, &expected, offsetof(struct data_check, @@ -364,16 +366,15 @@ static void check_results(void) static int send_data(int type, sa_family_t family, void *data, size_t len, enum result expected) { - union sa46 cli_sa; + struct sockaddr_storage cli_sa; int fd, err; fd = socket(family, type, 0); RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); - sa46_init_loopback(&cli_sa, family); + ss_init_loopback(&cli_sa, family); err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa)); RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); - err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa, sizeof(srv_sa)); RET_ERR(err != len && expected >= PASS, @@ -589,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany) socklen_t addrlen; if (inany) - sa46_init_inany(&srv_sa, family); + ss_init_inany(&srv_sa, family); else - sa46_init_loopback(&srv_sa, family); + ss_init_loopback(&srv_sa, family); addrlen = sizeof(srv_sa); /* diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 1702aa592c2c..7ac4d5a488aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -206,6 +206,11 @@ destroy_skel: skel_open_load_failure: close(pipe_c2p[0]); close(pipe_p2c[1]); + /* + * Child is either about to exit cleanly or stuck in case of errors. + * Nudge it to exit. + */ + kill(pid, SIGKILL); wait(NULL); } diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c index e12255121c15..77fe1bfb7504 100644 --- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c @@ -202,7 +202,7 @@ err_out: void test_setget_sockopt(void) { cg_fd = test__join_cgroup(CG_NAME); - if (cg_fd < 0) + if (!ASSERT_OK_FD(cg_fd, "join cgroup")) return; if (create_netns()) @@ -212,7 +212,7 @@ void test_setget_sockopt(void) if (!ASSERT_OK_PTR(skel, "open skel")) goto done; - strcpy(skel->rodata->veth, "binddevtest1"); + strscpy(skel->rodata->veth, "binddevtest1"); skel->rodata->veth_ifindex = if_nametoindex("binddevtest1"); if (!ASSERT_GT(skel->rodata->veth_ifindex, 0, "if_nametoindex")) goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/sha256.c b/tools/testing/selftests/bpf/prog_tests/sha256.c new file mode 100644 index 000000000000..604a0b1423d5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sha256.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright 2025 Google LLC */ + +#include <test_progs.h> +#include "bpf/libbpf_internal.h" + +#define MAX_LEN 4096 + +/* Test libbpf_sha256() for all lengths from 0 to MAX_LEN inclusively. */ +void test_sha256(void) +{ + /* + * The correctness of this value was verified by running this test with + * libbpf_sha256() replaced by OpenSSL's SHA256(). + */ + static const __u8 expected_digest_of_digests[SHA256_DIGEST_LENGTH] = { + 0x62, 0x30, 0x0e, 0x1d, 0xea, 0x7f, 0xc4, 0x74, + 0xfd, 0x8e, 0x64, 0x0b, 0xd8, 0x5f, 0xea, 0x04, + 0xf3, 0xef, 0x77, 0x42, 0xc2, 0x01, 0xb8, 0x90, + 0x6e, 0x19, 0x91, 0x1b, 0xca, 0xb3, 0x28, 0x42, + }; + __u64 seed = 0; + __u8 *data = NULL, *digests = NULL; + __u8 digest_of_digests[SHA256_DIGEST_LENGTH]; + size_t i; + + data = malloc(MAX_LEN); + if (!ASSERT_OK_PTR(data, "malloc")) + goto out; + digests = malloc((MAX_LEN + 1) * SHA256_DIGEST_LENGTH); + if (!ASSERT_OK_PTR(digests, "malloc")) + goto out; + + /* Generate MAX_LEN bytes of "random" data deterministically. */ + for (i = 0; i < MAX_LEN; i++) { + seed = (seed * 25214903917 + 11) & ((1ULL << 48) - 1); + data[i] = (__u8)(seed >> 16); + } + + /* Calculate a digest for each length 0 through MAX_LEN inclusively. */ + for (i = 0; i <= MAX_LEN; i++) + libbpf_sha256(data, i, &digests[i * SHA256_DIGEST_LENGTH]); + + /* Calculate and verify the digest of all the digests. */ + libbpf_sha256(digests, (MAX_LEN + 1) * SHA256_DIGEST_LENGTH, + digest_of_digests); + ASSERT_MEMEQ(digest_of_digests, expected_digest_of_digests, + SHA256_DIGEST_LENGTH, "digest_of_digests"); +out: + free(data); + free(digests); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 0b9bd1d6f7cc..10a0ab954b8a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -37,8 +37,10 @@ configure_stack(void) tc = popen("tc -V", "r"); if (CHECK_FAIL(!tc)) return false; - if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) + if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) { + pclose(tc); return false; + } if (strstr(tc_version, ", libbpf ")) prog = "test_sk_assign_libbpf.bpf.o"; else diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c new file mode 100644 index 000000000000..e2c867fd5244 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <test_progs.h> +#include "sk_bypass_prot_mem.skel.h" +#include "network_helpers.h" + +#ifndef PAGE_SIZE +#include <unistd.h> +#define PAGE_SIZE getpagesize() +#endif + +#define NR_PAGES 32 +#define NR_SOCKETS 2 +#define BUF_TOTAL (NR_PAGES * PAGE_SIZE / NR_SOCKETS) +#define BUF_SINGLE 1024 +#define NR_SEND (BUF_TOTAL / BUF_SINGLE) + +struct test_case { + char name[8]; + int family; + int type; + int (*create_sockets)(struct test_case *test_case, int sk[], int len); + long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel); +}; + +static int tcp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int server, i, err = 0; + + server = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (!ASSERT_GE(server, 0, "start_server_str")) + return server; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = connect_to_fd(server, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "connect_to_fd"); + err = sk[i]; + break; + } + + sk[i + 1] = accept(server, NULL, NULL); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "accept"); + err = sk[i + 1]; + break; + } + } + + close(server); + + return err; +} + +static int udp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int i, j, err, rcvbuf = BUF_TOTAL; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "start_server"); + return sk[i]; + } + + sk[i + 1] = connect_to_fd(sk[i], 0); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "connect_to_fd"); + return sk[i + 1]; + } + + err = connect_fd_to_fd(sk[i], sk[i + 1], 0); + if (err) { + ASSERT_EQ(err, 0, "connect_fd_to_fd"); + return err; + } + + for (j = 0; j < 2; j++) { + err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int)); + if (err) { + ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"); + return err; + } + } + } + + return 0; +} + +static long get_memory_allocated(struct test_case *test_case, + bool *activated, long *memory_allocated) +{ + int sk; + + *activated = true; + + /* AF_INET and AF_INET6 share the same memory_allocated. + * tcp_init_sock() is called by AF_INET and AF_INET6, + * but udp_lib_init_sock() is inline. + */ + sk = socket(AF_INET, test_case->type, 0); + if (!ASSERT_GE(sk, 0, "get_memory_allocated")) + return -1; + + close(sk); + + return *memory_allocated; +} + +static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->tcp_activated, + &skel->bss->tcp_memory_allocated); +} + +static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->udp_activated, + &skel->bss->udp_memory_allocated); +} + +static int check_bypass(struct test_case *test_case, + struct sk_bypass_prot_mem *skel, bool bypass) +{ + char buf[BUF_SINGLE] = {}; + long memory_allocated[2]; + int sk[NR_SOCKETS]; + int err, i, j; + + for (i = 0; i < ARRAY_SIZE(sk); i++) + sk[i] = -1; + + err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk)); + if (err) + goto close; + + memory_allocated[0] = test_case->get_memory_allocated(test_case, skel); + + /* allocate pages >= NR_PAGES */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = send(sk[i], buf, sizeof(buf), 0); + + /* Avoid too noisy logs when something failed. */ + if (bytes != sizeof(buf)) { + ASSERT_EQ(bytes, sizeof(buf), "send"); + if (bytes < 0) { + err = bytes; + goto drain; + } + } + } + } + + memory_allocated[1] = test_case->get_memory_allocated(test_case, skel); + + if (bypass) + ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass"); + else + ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass"); + +drain: + if (test_case->type == SOCK_DGRAM) { + /* UDP starts purging sk->sk_receive_queue after one RCU + * grace period, then udp_memory_allocated goes down, + * so drain the queue before close(). + */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC); + + if (bytes == sizeof(buf)) + continue; + if (bytes != -1 || errno != EAGAIN) + PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno)); + break; + } + } + } + +close: + for (i = 0; i < ARRAY_SIZE(sk); i++) { + if (sk[i] < 0) + break; + + close(sk[i]); + } + + return err; +} + +static void run_test(struct test_case *test_case) +{ + struct sk_bypass_prot_mem *skel; + struct nstoken *nstoken; + int cgroup, err; + + skel = sk_bypass_prot_mem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + skel->bss->nr_cpus = libbpf_num_possible_cpus(); + + err = sk_bypass_prot_mem__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto destroy_skel; + + cgroup = test__join_cgroup("/sk_bypass_prot_mem"); + if (!ASSERT_GE(cgroup, 0, "join_cgroup")) + goto destroy_skel; + + err = make_netns("sk_bypass_prot_mem"); + if (!ASSERT_EQ(err, 0, "make_netns")) + goto close_cgroup; + + nstoken = open_netns("sk_bypass_prot_mem"); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto remove_netns; + + err = check_bypass(test_case, skel, false); + if (!ASSERT_EQ(err, 0, "test_bypass(false)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1"); + if (!ASSERT_EQ(err, 0, "write_sysctl(1)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0"); + if (!ASSERT_EQ(err, 0, "write_sysctl(0)")) + goto close_netns; + + skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup); + if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + ASSERT_EQ(err, 0, "test_bypass(true by bpf)"); + +close_netns: + close_netns(nstoken); +remove_netns: + remove_netns("sk_bypass_prot_mem"); +close_cgroup: + close(cgroup); +destroy_skel: + sk_bypass_prot_mem__destroy(skel); +} + +static struct test_case test_cases[] = { + { + .name = "TCP ", + .family = AF_INET, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDP ", + .family = AF_INET, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, + { + .name = "TCPv6", + .family = AF_INET6, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDPv6", + .family = AF_INET6, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, +}; + +void serial_test_sk_bypass_prot_mem(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + if (test__start_subtest(test_cases[i].name)) + run_test(&test_cases[i]); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c index 3eefdfed1db9..657d897958b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c +++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c @@ -34,7 +34,7 @@ void test_skc_to_unix_sock(void) memset(&sockaddr, 0, sizeof(sockaddr)); sockaddr.sun_family = AF_UNIX; - strncpy(sockaddr.sun_path, sock_path, strlen(sock_path)); + strscpy(sockaddr.sun_path, sock_path); sockaddr.sun_path[0] = '\0'; err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)); diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 4be6fdb78c6a..4e4a82d54f79 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -114,8 +114,11 @@ static void test_snprintf_negative(void) ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5"); ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6"); ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7"); - ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); + ASSERT_OK(load_single_snprintf("\x80"), "non ascii plain text"); + ASSERT_ERR(load_single_snprintf("%\x80"), "non ascii in specifier"); ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); + ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8"); + ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9"); } void test_snprintf(void) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index d56e18b25528..27781df8f2fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -1,20 +1,875 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2024 Meta +#include <poll.h> #include <test_progs.h> #include "network_helpers.h" #include "sock_iter_batch.skel.h" #define TEST_NS "sock_iter_batch_netns" +#define TEST_CHILD_NS "sock_iter_batch_child_netns" +static const int init_batch_size = 16; static const int nr_soreuse = 4; +struct iter_out { + int idx; + __u64 cookie; +} __packed; + +struct sock_count { + __u64 cookie; + int count; +}; + +static int insert(__u64 cookie, struct sock_count counts[], int counts_len) +{ + int insert = -1; + int i = 0; + + for (; i < counts_len; i++) { + if (!counts[i].cookie) { + insert = i; + } else if (counts[i].cookie == cookie) { + insert = i; + break; + } + } + if (insert < 0) + return insert; + + counts[insert].cookie = cookie; + counts[insert].count++; + + return counts[insert].count; +} + +static int read_n(int iter_fd, int n, struct sock_count counts[], + int counts_len) +{ + struct iter_out out; + int nread = 1; + int i = 0; + + for (; nread > 0 && (n < 0 || i < n); i++) { + nread = read(iter_fd, &out, sizeof(out)); + if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread")) + break; + ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert"); + } + + ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n"); + + return i; +} + +static __u64 socket_cookie(int fd) +{ + __u64 cookie; + socklen_t cookie_len = sizeof(cookie); + + if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, + &cookie_len), "getsockopt(SO_COOKIE)")) + return 0; + return cookie; +} + +static bool was_seen(int fd, struct sock_count counts[], int counts_len) +{ + __u64 cookie = socket_cookie(fd); + int i = 0; + + for (; cookie && i < counts_len; i++) + if (cookie == counts[i].cookie) + return true; + + return false; +} + +static int get_seen_socket(int *fds, struct sock_count counts[], int n) +{ + int i = 0; + + for (; i < n; i++) + if (was_seen(fds[i], counts, n)) + return i; + return -1; +} + +static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n) +{ + int i, nread, iter_fd; + int nth_sock_idx = -1; + struct iter_out out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + return -1; + + for (; n >= 0; n--) { + nread = read(iter_fd, &out, sizeof(out)); + if (!nread || !ASSERT_GE(nread, 1, "nread")) + goto done; + } + + for (i = 0; i < fds_len && nth_sock_idx < 0; i++) + if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie) + nth_sock_idx = i; +done: + close(iter_fd); + return nth_sock_idx; +} + +static void destroy(int fd) +{ + struct sock_iter_batch *skel = NULL; + __u64 cookie = socket_cookie(fd); + struct bpf_link *link = NULL; + int iter_fd = -1; + int nread; + __u64 out; + + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + goto done; + + skel->rodata->destroy_cookie = cookie; + + if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + goto done; + + /* Delete matching socket. */ + nread = read(iter_fd, &out, sizeof(out)); + ASSERT_GE(nread, 0, "nread"); + if (nread) + ASSERT_EQ(out, cookie, "cookie matches"); +done: + if (iter_fd >= 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); + close(fd); +} + +static int get_seen_count(int fd, struct sock_count counts[], int n) +{ + __u64 cookie = socket_cookie(fd); + int count = 0; + int i = 0; + + for (; cookie && !count && i < n; i++) + if (cookie == counts[i].cookie) + count = counts[i].count; + + return count; +} + +static void check_n_were_seen_once(int *fds, int fds_len, int n, + struct sock_count counts[], int counts_len) +{ + int seen_once = 0; + int seen_cnt; + int i = 0; + + for (; i < fds_len; i++) { + /* Skip any sockets that were closed or that weren't seen + * exactly once. + */ + if (fds[i] < 0) + continue; + seen_cnt = get_seen_count(fds[i], counts, counts_len); + if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt")) + seen_once++; + } + + ASSERT_EQ(seen_once, n, "seen_once"); +} + +static int accept_from_one(struct pollfd *server_poll_fds, + int server_poll_fds_len) +{ + static const int poll_timeout_ms = 5000; /* 5s */ + int ret; + int i; + + ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms); + if (!ASSERT_EQ(ret, 1, "poll")) + return -1; + + for (i = 0; i < server_poll_fds_len; i++) + if (server_poll_fds[i].revents & POLLIN) + return accept(server_poll_fds[i].fd, NULL, NULL); + + return -1; +} + +static int *connect_to_server(int family, int sock_type, const char *addr, + __u16 port, int nr_connects, int *server_fds, + int server_fds_len) +{ + struct pollfd *server_poll_fds = NULL; + int *established_socks = NULL; + int i; + + server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds)); + if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds")) + return NULL; + + for (i = 0; i < server_fds_len; i++) { + server_poll_fds[i].fd = server_fds[i]; + server_poll_fds[i].events = POLLIN; + } + + i = 0; + + established_socks = malloc(sizeof(*established_socks) * nr_connects*2); + if (!ASSERT_OK_PTR(established_socks, "established_socks")) + goto error; + + while (nr_connects--) { + established_socks[i] = connect_to_addr_str(family, sock_type, + addr, port, NULL); + if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str")) + goto error; + i++; + established_socks[i] = accept_from_one(server_poll_fds, + server_fds_len); + if (!ASSERT_OK_FD(established_socks[i], "accept_from_one")) + goto error; + i++; + } + + free(server_poll_fds); + return established_socks; +error: + free_fds(established_socks, i); + free(server_poll_fds); + return NULL; +} + +static void remove_seen(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through the first socks_len - 1 sockets. */ + read_n(iter_fd, socks_len - 1, counts, counts_len); + + /* Make sure we saw socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); + + /* Close a socket we've already seen to remove it from the bucket. */ + close_idx = get_seen_socket(socks, counts, counts_len); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the last socket wasn't skipped and that there were no + * repeats. + */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); +} + +static void remove_seen_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Leave one established socket. */ + read_n(iter_fd, established_socks_len - 1, counts, counts_len); + + /* Close a socket we've already seen to remove it from the bucket. */ + close_idx = get_nth_socket(established_socks, established_socks_len, + link, listen_socks_len + 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + destroy(established_socks[close_idx]); + established_socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the last socket wasn't skipped and that there were no + * repeats. + */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); +} + +static void remove_unseen(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through the first socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw a socket from fds. */ + check_n_were_seen_once(socks, socks_len, 1, counts, counts_len); + + /* Close what would be the next socket in the bucket to exercise the + * condition where we need to skip past the first cookie we remembered. + */ + close_idx = get_nth_socket(socks, socks_len, link, 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the remaining sockets were seen exactly once and that we + * didn't repeat the socket that was already seen. + */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); +} + +static void remove_unseen_established(int family, int sock_type, + const char *addr, __u16 port, + int *listen_socks, int listen_socks_len, + int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw one established socks. */ + check_n_were_seen_once(established_socks, established_socks_len, 1, + counts, counts_len); + + /* Close what would be the next socket in the bucket to exercise the + * condition where we need to skip past the first cookie we remembered. + */ + close_idx = get_nth_socket(established_socks, established_socks_len, + link, listen_socks_len + 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + + destroy(established_socks[close_idx]); + established_socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the remaining sockets were seen exactly once and that we + * didn't repeat the socket that was already seen. + */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); +} + +static void remove_all(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx, i; + + /* Iterate through the first socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw a socket from fds. */ + check_n_were_seen_once(socks, socks_len, 1, counts, counts_len); + + /* Close all remaining sockets to exhaust the list of saved cookies and + * exit without putting any sockets into the batch on the next read. + */ + for (i = 0; i < socks_len - 1; i++) { + close_idx = get_nth_socket(socks, socks_len, link, 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + } + + /* Make sure there are no more sockets returned */ + ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); +} + +static void remove_all_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int *close_idx = NULL; + int i; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw one established socks. */ + check_n_were_seen_once(established_socks, established_socks_len, 1, + counts, counts_len); + + /* Close all remaining sockets to exhaust the list of saved cookies and + * exit without putting any sockets into the batch on the next read. + */ + close_idx = malloc(sizeof(int) * (established_socks_len - 1)); + if (!ASSERT_OK_PTR(close_idx, "close_idx malloc")) + return; + for (i = 0; i < established_socks_len - 1; i++) { + close_idx[i] = get_nth_socket(established_socks, + established_socks_len, link, + listen_socks_len + i); + if (!ASSERT_GE(close_idx[i], 0, "close_idx")) + return; + } + + for (i = 0; i < established_socks_len - 1; i++) { + destroy(established_socks[close_idx[i]]); + established_socks[close_idx[i]] = -1; + } + + /* Make sure there are no more sockets returned */ + ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); + free(close_idx); +} + +static void add_some(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through the first socks_len - 1 sockets. */ + read_n(iter_fd, socks_len - 1, counts, counts_len); + + /* Make sure we saw socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); + + /* Double the number of sockets in the bucket. */ + new_socks = start_reuseport_server(family, sock_type, addr, port, 0, + socks_len); + if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each of the original sockets was seen exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len, counts, + counts_len); +done: + free_fds(new_socks, socks_len); +} + +static void add_some_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, + int counts_len, struct bpf_link *link, + int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established_socks_len - 1 sockets. */ + read_n(iter_fd, established_socks_len - 1, counts, counts_len); + + /* Make sure we saw established_socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); + + /* Double the number of established sockets in the bucket. */ + new_socks = connect_to_server(family, sock_type, addr, port, + established_socks_len / 2, listen_socks, + listen_socks_len); + if (!ASSERT_OK_PTR(new_socks, "connect_to_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each of the original sockets was seen exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len, counts, counts_len); +done: + free_fds(new_socks, established_socks_len); +} + +static void force_realloc(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through the first socket just to initialize the batch. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Double the number of sockets in the bucket to force a realloc on the + * next read. + */ + new_socks = start_reuseport_server(family, sock_type, addr, port, 0, + socks_len); + if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each socket from the first set was seen exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len, counts, + counts_len); +done: + free_fds(new_socks, socks_len); +} + +static void force_realloc_established(int family, int sock_type, + const char *addr, __u16 port, + int *listen_socks, int listen_socks_len, + int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + /* Iterate through all sockets to trigger a realloc. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each socket was seen exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len, counts, counts_len); +} + +struct test_case { + void (*test)(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd); + const char *description; + int ehash_buckets; + int connections; + int init_socks; + int max_socks; + int sock_type; + int family; +}; + +static struct test_case resume_tests[] = { + { + .description = "udp: resume after removing a seen socket", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_seen, + }, + { + .description = "udp: resume after removing one unseen socket", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_unseen, + }, + { + .description = "udp: resume after removing all unseen sockets", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_all, + }, + { + .description = "udp: resume after adding a few sockets", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + /* Use AF_INET so that new sockets are added to the head of the + * bucket's list. + */ + .family = AF_INET, + .test = add_some, + }, + { + .description = "udp: force a realloc to occur", + .init_socks = init_batch_size, + .max_socks = init_batch_size * 2, + .sock_type = SOCK_DGRAM, + /* Use AF_INET6 so that new sockets are added to the tail of the + * bucket's list, needing to be added to the next batch to force + * a realloc. + */ + .family = AF_INET6, + .test = force_realloc, + }, + { + .description = "tcp: resume after removing a seen socket (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_seen, + }, + { + .description = "tcp: resume after removing one unseen socket (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_unseen, + }, + { + .description = "tcp: resume after removing all unseen sockets (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_all, + }, + { + .description = "tcp: resume after adding a few sockets (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + /* Use AF_INET so that new sockets are added to the head of the + * bucket's list. + */ + .family = AF_INET, + .test = add_some, + }, + { + .description = "tcp: force a realloc to occur (listening)", + .init_socks = init_batch_size, + .max_socks = init_batch_size * 2, + .sock_type = SOCK_STREAM, + /* Use AF_INET6 so that new sockets are added to the tail of the + * bucket's list, needing to be added to the next batch to force + * a realloc. + */ + .family = AF_INET6, + .test = force_realloc, + }, + { + .description = "tcp: resume after removing a seen socket (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_seen_established, + }, + { + .description = "tcp: resume after removing one unseen socket (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_unseen_established, + }, + { + .description = "tcp: resume after removing all unseen sockets (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_all_established, + }, + { + .description = "tcp: resume after adding a few sockets (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = add_some_established, + }, + { + .description = "tcp: force a realloc to occur (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + /* Bucket size will need to double when going from listening to + * established sockets. + */ + .connections = init_batch_size, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse + (init_batch_size * 2), + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = force_realloc_established, + }, +}; + +static void do_resume_test(struct test_case *tc) +{ + struct sock_iter_batch *skel = NULL; + struct sock_count *counts = NULL; + static const __u16 port = 10001; + struct nstoken *nstoken = NULL; + struct bpf_link *link = NULL; + int *established_fds = NULL; + int err, iter_fd = -1; + const char *addr; + int *fds = NULL; + + if (tc->ehash_buckets) { + SYS_NOFAIL("ip netns del " TEST_CHILD_NS); + SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d", + tc->ehash_buckets); + SYS(done, "ip netns add %s", TEST_CHILD_NS); + SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS); + nstoken = open_netns(TEST_CHILD_NS); + if (!ASSERT_OK_PTR(nstoken, "open_child_netns")) + goto done; + } + + counts = calloc(tc->max_socks, sizeof(*counts)); + if (!ASSERT_OK_PTR(counts, "counts")) + goto done; + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + goto done; + + /* Prepare a bucket of sockets in the kernel hashtable */ + addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1"; + fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0, + tc->init_socks); + if (!ASSERT_OK_PTR(fds, "start_reuseport_server")) + goto done; + if (tc->connections) { + established_fds = connect_to_server(tc->family, tc->sock_type, + addr, port, + tc->connections, fds, + tc->init_socks); + if (!ASSERT_OK_PTR(established_fds, "connect_to_server")) + goto done; + } + skel->rodata->ports[0] = 0; + skel->rodata->ports[1] = 0; + skel->rodata->sf = tc->family; + skel->rodata->ss = 0; + + err = sock_iter_batch__load(skel); + if (!ASSERT_OK(err, "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ? + skel->progs.iter_tcp_soreuse : + skel->progs.iter_udp_soreuse, + NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + goto done; + + tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks, + established_fds, tc->connections*2, counts, tc->max_socks, + link, iter_fd); +done: + close_netns(nstoken); + SYS_NOFAIL("ip netns del " TEST_CHILD_NS); + SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0"); + free(counts); + free_fds(fds, tc->init_socks); + free_fds(established_fds, tc->connections*2); + if (iter_fd >= 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); +} + +static void do_resume_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(resume_tests); i++) { + if (test__start_subtest(resume_tests[i].description)) { + do_resume_test(&resume_tests[i]); + } + } +} + static void do_test(int sock_type, bool onebyone) { int err, i, nread, to_read, total_read, iter_fd = -1; - int first_idx, second_idx, indices[nr_soreuse]; + struct iter_out outputs[nr_soreuse]; struct bpf_link *link = NULL; struct sock_iter_batch *skel; + int first_idx, second_idx; int *fds[2] = {}; skel = sock_iter_batch__open(); @@ -34,6 +889,9 @@ static void do_test(int sock_type, bool onebyone) goto done; skel->rodata->ports[i] = ntohs(local_port); } + skel->rodata->sf = AF_INET6; + if (sock_type == SOCK_STREAM) + skel->rodata->ss = TCP_LISTEN; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) @@ -55,38 +913,38 @@ static void do_test(int sock_type, bool onebyone) * from a bucket and leave one socket out from * that bucket on purpose. */ - to_read = (nr_soreuse - 1) * sizeof(*indices); + to_read = (nr_soreuse - 1) * sizeof(*outputs); total_read = 0; first_idx = -1; do { - nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); - if (nread <= 0 || nread % sizeof(*indices)) + nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read); + if (nread <= 0 || nread % sizeof(*outputs)) break; total_read += nread; if (first_idx == -1) - first_idx = indices[0]; - for (i = 0; i < nread / sizeof(*indices); i++) - ASSERT_EQ(indices[i], first_idx, "first_idx"); + first_idx = outputs[0].idx; + for (i = 0; i < nread / sizeof(*outputs); i++) + ASSERT_EQ(outputs[i].idx, first_idx, "first_idx"); } while (total_read < to_read); - ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread"); + ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread"); ASSERT_EQ(total_read, to_read, "total_read"); free_fds(fds[first_idx], nr_soreuse); fds[first_idx] = NULL; /* Read the "whole" second bucket */ - to_read = nr_soreuse * sizeof(*indices); + to_read = nr_soreuse * sizeof(*outputs); total_read = 0; second_idx = !first_idx; do { - nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); - if (nread <= 0 || nread % sizeof(*indices)) + nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read); + if (nread <= 0 || nread % sizeof(*outputs)) break; total_read += nread; - for (i = 0; i < nread / sizeof(*indices); i++) - ASSERT_EQ(indices[i], second_idx, "second_idx"); + for (i = 0; i < nread / sizeof(*outputs); i++) + ASSERT_EQ(outputs[i].idx, second_idx, "second_idx"); } while (total_read <= to_read); ASSERT_EQ(nread, 0, "nread"); /* Both so_reuseport ports should be in different buckets, so @@ -128,6 +986,7 @@ void test_sock_iter_batch(void) do_test(SOCK_DGRAM, true); do_test(SOCK_DGRAM, false); } + do_resume_tests(); close_netns(nstoken); done: diff --git a/tools/testing/selftests/bpf/prog_tests/sock_ops_get_sk.c b/tools/testing/selftests/bpf/prog_tests/sock_ops_get_sk.c new file mode 100644 index 000000000000..343d92c4df30 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_ops_get_sk.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "sock_ops_get_sk.skel.h" + +/* See progs/sock_ops_get_sk.c for the bug description. */ +static void run_sock_ops_test(int cgroup_fd, int prog_fd) +{ + int server_fd, client_fd, err; + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); + if (!ASSERT_OK(err, "prog_attach")) + return; + + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_OK_FD(server_fd, "start_server")) + goto detach; + + /* Trigger TCP handshake which causes TCP_NEW_SYN_RECV state where + * is_fullsock == 0 and is_locked_tcp_sock == 0. + */ + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) + goto close_server; + + close(client_fd); + +close_server: + close(server_fd); +detach: + bpf_prog_detach(cgroup_fd, BPF_CGROUP_SOCK_OPS); +} + +void test_ns_sock_ops_get_sk(void) +{ + struct sock_ops_get_sk *skel; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/sock_ops_get_sk"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup")) + return; + + skel = sock_ops_get_sk__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load")) + goto close_cgroup; + + /* Test SOCK_OPS_GET_SK with same src/dst register */ + if (test__start_subtest("get_sk")) { + run_sock_ops_test(cgroup_fd, + bpf_program__fd(skel->progs.sock_ops_get_sk_same_reg)); + ASSERT_EQ(skel->bss->null_seen, 1, "null_seen"); + ASSERT_EQ(skel->bss->bug_detected, 0, "bug_not_detected"); + } + + /* Test SOCK_OPS_GET_FIELD with same src/dst register */ + if (test__start_subtest("get_field")) { + run_sock_ops_test(cgroup_fd, + bpf_program__fd(skel->progs.sock_ops_get_field_same_reg)); + ASSERT_EQ(skel->bss->field_null_seen, 1, "field_null_seen"); + ASSERT_EQ(skel->bss->field_bug_detected, 0, "field_bug_not_detected"); + } + + /* Test SOCK_OPS_GET_SK with different src/dst register */ + if (test__start_subtest("get_sk_diff_reg")) { + run_sock_ops_test(cgroup_fd, + bpf_program__fd(skel->progs.sock_ops_get_sk_diff_reg)); + ASSERT_EQ(skel->bss->diff_reg_null_seen, 1, "diff_reg_null_seen"); + ASSERT_EQ(skel->bss->diff_reg_bug_detected, 0, "diff_reg_bug_not_detected"); + } + + sock_ops_get_sk__destroy(skel); +close_cgroup: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h index 1bdfb79ef009..0d59503a0c73 100644 --- a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h @@ -3,6 +3,7 @@ #ifndef __SOCKET_HELPERS__ #define __SOCKET_HELPERS__ +#include <sys/un.h> #include <linux/vm_sockets.h> /* include/linux/net.h */ @@ -16,11 +17,16 @@ #define VMADDR_CID_LOCAL 1 #endif +/* include/linux/compiler_types.h */ +#if __STDC_VERSION__ < 202311L && !defined(auto) +# define auto __auto_type +#endif + /* include/linux/cleanup.h */ #define __get_and_null(p, nullvalue) \ ({ \ - __auto_type __ptr = &(p); \ - __auto_type __val = *__ptr; \ + auto __ptr = &(p); \ + auto __val = *__ptr; \ *__ptr = nullvalue; \ __val; \ }) @@ -169,6 +175,15 @@ static inline void init_addr_loopback6(struct sockaddr_storage *ss, *len = sizeof(*addr6); } +static inline void init_addr_loopback_unix(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss)); + + addr->sun_family = AF_UNIX; + *len = sizeof(sa_family_t); +} + static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len) { @@ -190,6 +205,9 @@ static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, case AF_INET6: init_addr_loopback6(ss, len); return; + case AF_UNIX: + init_addr_loopback_unix(ss, len); + return; case AF_VSOCK: init_addr_loopback_vsock(ss, len); return; @@ -315,21 +333,27 @@ static inline int create_pair(int family, int sotype, int *p0, int *p1) { __close_fd int s, c = -1, p = -1; struct sockaddr_storage addr; - socklen_t len = sizeof(addr); + socklen_t len; int err; s = socket_loopback(family, sotype); if (s < 0) return s; - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - c = xsocket(family, sotype, 0); if (c < 0) return c; + init_addr_loopback(family, &addr, &len); + err = xbind(c, sockaddr(&addr), len); + if (err) + return err; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + err = connect(c, sockaddr(&addr), len); if (err) { if (errno != EINPROGRESS) { @@ -391,4 +415,59 @@ static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, return err; } +static inline const char *socket_kind_to_str(int sock_fd) +{ + socklen_t opt_len; + int domain, type; + + opt_len = sizeof(domain); + if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len)) + FAIL_ERRNO("getsockopt(SO_DOMAIN)"); + + opt_len = sizeof(type); + if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len)) + FAIL_ERRNO("getsockopt(SO_TYPE)"); + + switch (domain) { + case AF_INET: + switch (type) { + case SOCK_STREAM: + return "tcp4"; + case SOCK_DGRAM: + return "udp4"; + } + break; + case AF_INET6: + switch (type) { + case SOCK_STREAM: + return "tcp6"; + case SOCK_DGRAM: + return "udp6"; + } + break; + case AF_UNIX: + switch (type) { + case SOCK_STREAM: + return "u_str"; + case SOCK_DGRAM: + return "u_dgr"; + case SOCK_SEQPACKET: + return "u_seq"; + } + break; + case AF_VSOCK: + switch (type) { + case SOCK_STREAM: + return "v_str"; + case SOCK_DGRAM: + return "v_dgr"; + case SOCK_SEQPACKET: + return "v_seq"; + } + break; + } + + return "???"; +} + #endif // __SOCKET_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 1e3e4392dcca..d2846579285f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Cloudflare #include <error.h> -#include <netinet/tcp.h> +#include <linux/tcp.h> +#include <linux/socket.h> #include <sys/epoll.h> #include "test_progs.h" @@ -22,6 +23,15 @@ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ +/** + * SOL_TCP is defined in <netinet/tcp.h> (glibc), but the copybuf_address + * field of tcp_zerocopy_receive is not yet included in older versions. + * This workaround remains necessary until the glibc update propagates. + */ +#ifndef SOL_TCP +#define SOL_TCP 6 +#endif + static int connected_socket_v4(void) { struct sockaddr_in addr = { @@ -194,7 +204,7 @@ static void test_skmsg_helpers_with_link(enum bpf_map_type map_type) /* Fail since bpf_link for the same prog type has been created. */ link2 = bpf_program__attach_sockmap(prog_clone, map); if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) { - bpf_link__detach(link2); + bpf_link__destroy(link2); goto out; } @@ -220,7 +230,7 @@ static void test_skmsg_helpers_with_link(enum bpf_map_type map_type) if (!ASSERT_OK(err, "bpf_link_update")) goto out; out: - bpf_link__detach(link); + bpf_link__destroy(link); test_skmsg_load_helpers__destroy(skel); } @@ -407,7 +417,7 @@ static void test_sockmap_skb_verdict_attach_with_link(void) if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) goto out; - bpf_link__detach(link); + bpf_link__destroy(link); err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0); if (!ASSERT_OK(err, "bpf_prog_attach")) @@ -416,7 +426,7 @@ static void test_sockmap_skb_verdict_attach_with_link(void) /* Fail since attaching with the same prog/map has been done. */ link = bpf_program__attach_sockmap(prog, map); if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap")) - bpf_link__detach(link); + bpf_link__destroy(link); err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT); if (!ASSERT_OK(err, "bpf_prog_detach2")) @@ -536,13 +546,14 @@ out: } -static void test_sockmap_skb_verdict_fionread(bool pass_prog) +static void do_test_sockmap_skb_verdict_fionread(int sotype, bool pass_prog) { int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1; int expected, zero = 0, sent, recvd, avail; struct test_sockmap_pass_prog *pass = NULL; struct test_sockmap_drop_prog *drop = NULL; char buf[256] = "0123456789"; + int split_len = sizeof(buf) / 2; if (pass_prog) { pass = test_sockmap_pass_prog__open_and_load(); @@ -550,7 +561,10 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) return; verdict = bpf_program__fd(pass->progs.prog_skb_verdict); map = bpf_map__fd(pass->maps.sock_map_rx); - expected = sizeof(buf); + if (sotype == SOCK_DGRAM) + expected = split_len; /* FIONREAD for UDP is different from TCP */ + else + expected = sizeof(buf); } else { drop = test_sockmap_drop_prog__open_and_load(); if (!ASSERT_OK_PTR(drop, "open_and_load")) @@ -566,7 +580,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; - err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1); + err = create_socket_pairs(AF_INET, sotype, &c0, &c1, &p0, &p1); if (!ASSERT_OK(err, "create_socket_pairs()")) goto out; @@ -574,8 +588,9 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) goto out_close; - sent = xsend(p1, &buf, sizeof(buf), 0); - ASSERT_EQ(sent, sizeof(buf), "xsend(p0)"); + sent = xsend(p1, &buf, split_len, 0); + sent += xsend(p1, &buf, sizeof(buf) - split_len, 0); + ASSERT_EQ(sent, sizeof(buf), "xsend(p1)"); err = ioctl(c1, FIONREAD, &avail); ASSERT_OK(err, "ioctl(FIONREAD) error"); ASSERT_EQ(avail, expected, "ioctl(FIONREAD)"); @@ -597,6 +612,12 @@ out: test_sockmap_drop_prog__destroy(drop); } +static void test_sockmap_skb_verdict_fionread(bool pass_prog) +{ + do_test_sockmap_skb_verdict_fionread(SOCK_STREAM, pass_prog); + do_test_sockmap_skb_verdict_fionread(SOCK_DGRAM, pass_prog); +} + static void test_sockmap_skb_verdict_change_tail(void) { struct test_sockmap_change_tail *skel; @@ -726,13 +747,13 @@ static void test_sockmap_skb_verdict_peek_with_link(void) test_sockmap_skb_verdict_peek_helper(map); ASSERT_EQ(pass->bss->clone_called, 1, "clone_called"); out: - bpf_link__detach(link); + bpf_link__destroy(link); test_sockmap_pass_prog__destroy(pass); } static void test_sockmap_unconnected_unix(void) { - int err, map, stream = 0, dgram = 0, zero = 0; + int err, map, stream = -1, dgram = -1, zero = 0; struct test_sockmap_pass_prog *skel; skel = test_sockmap_pass_prog__open_and_load(); @@ -743,22 +764,22 @@ static void test_sockmap_unconnected_unix(void) stream = xsocket(AF_UNIX, SOCK_STREAM, 0); if (stream < 0) - return; + goto out; dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); - if (dgram < 0) { - close(stream); - return; - } + if (dgram < 0) + goto out; err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY); - ASSERT_ERR(err, "bpf_map_update_elem(stream)"); + if (!ASSERT_ERR(err, "bpf_map_update_elem(stream)")) + goto out; err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); ASSERT_OK(err, "bpf_map_update_elem(dgram)"); - +out: close(stream); close(dgram); + test_sockmap_pass_prog__destroy(skel); } static void test_sockmap_many_socket(void) @@ -1006,7 +1027,7 @@ static void test_sockmap_skb_verdict_vsock_poll(void) if (xrecv_nonblock(conn, &buf, 1, 0) != 1) FAIL("xrecv_nonblock"); detach: - bpf_link__detach(link); + bpf_link__destroy(link); close: xclose(conn); xclose(peer); @@ -1042,6 +1063,270 @@ close_map: xclose(map); } +/* it is used to reproduce WARNING */ +static void test_sockmap_zc(void) +{ + int map, err, sent, recvd, zero = 0, one = 1, on = 1; + char buf[10] = "0123456789", rcv[11], addr[100]; + struct test_sockmap_pass_prog *skel = NULL; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + struct tcp_zerocopy_receive zc; + socklen_t zc_len = sizeof(zc); + struct bpf_program *prog; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1)) + goto end; + + prog = skel->progs.prog_skb_verdict_ingress; + map = bpf_map__fd(skel->maps.sock_map_rx); + + err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto end; + + err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto end; + + err = bpf_map_update_elem(map, &one, &p1, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto end; + + sent = xsend(c0, buf, sizeof(buf), 0); + if (!ASSERT_EQ(sent, sizeof(buf), "xsend")) + goto end; + + /* trigger tcp_bpf_recvmsg_parser and inc copied_seq of p1 */ + recvd = recv_timeout(p1, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1)")) + goto end; + + /* uninstall sockmap of p1 */ + bpf_map_delete_elem(map, &one); + + /* trigger tcp stack and the rcv_nxt of p1 is less than copied_seq */ + sent = xsend(c1, buf, sizeof(buf) - 1, 0); + if (!ASSERT_EQ(sent, sizeof(buf) - 1, "xsend")) + goto end; + + err = setsockopt(p1, SOL_SOCKET, SO_ZEROCOPY, &on, sizeof(on)); + if (!ASSERT_OK(err, "setsockopt")) + goto end; + + memset(&zc, 0, sizeof(zc)); + zc.copybuf_address = (__u64)((unsigned long)addr); + zc.copybuf_len = sizeof(addr); + + err = getsockopt(p1, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len); + if (!ASSERT_OK(err, "getsockopt")) + goto end; + +end: + if (c0 >= 0) + close(c0); + if (p0 >= 0) + close(p0); + if (c1 >= 0) + close(c1); + if (p1 >= 0) + close(p1); + test_sockmap_pass_prog__destroy(skel); +} + +/* it is used to check whether copied_seq of sk is correct */ +static void test_sockmap_copied_seq(bool strp) +{ + int i, map, err, sent, recvd, zero = 0, one = 1; + struct test_sockmap_pass_prog *skel = NULL; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + char buf[10] = "0123456789", rcv[11]; + struct bpf_program *prog; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1)) + goto end; + + prog = skel->progs.prog_skb_verdict_ingress; + map = bpf_map__fd(skel->maps.sock_map_rx); + + err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach verdict")) + goto end; + + if (strp) { + prog = skel->progs.prog_skb_verdict_ingress_strp; + err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_PARSER, 0); + if (!ASSERT_OK(err, "bpf_prog_attach parser")) + goto end; + } + + err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem(p0)")) + goto end; + + err = bpf_map_update_elem(map, &one, &p1, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem(p1)")) + goto end; + + /* just trigger sockamp: data sent by c0 will be received by p1 */ + sent = xsend(c0, buf, sizeof(buf), 0); + if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c0), bpf")) + goto end; + + /* do partial read */ + recvd = recv_timeout(p1, rcv, 1, MSG_DONTWAIT, 1); + recvd += recv_timeout(p1, rcv + 1, sizeof(rcv) - 1, MSG_DONTWAIT, 1); + if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1), bpf") || + !ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch")) + goto end; + + /* uninstall sockmap of p1 and p0 */ + err = bpf_map_delete_elem(map, &one); + if (!ASSERT_OK(err, "bpf_map_delete_elem(1)")) + goto end; + + err = bpf_map_delete_elem(map, &zero); + if (!ASSERT_OK(err, "bpf_map_delete_elem(0)")) + goto end; + + /* now all sockets become plain socket, they should still work */ + for (i = 0; i < 5; i++) { + /* test copied_seq of p1 by running tcp native stack */ + sent = xsend(c1, buf, sizeof(buf), 0); + if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c1), native")) + goto end; + + recvd = recv(p1, rcv, sizeof(rcv), MSG_DONTWAIT); + if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1), native")) + goto end; + + /* p0 previously redirected skb to p1, we also check copied_seq of p0 */ + sent = xsend(c0, buf, sizeof(buf), 0); + if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c0), native")) + goto end; + + recvd = recv(p0, rcv, sizeof(rcv), MSG_DONTWAIT); + if (!ASSERT_EQ(recvd, sent, "recv_timeout(p0), native")) + goto end; + } + +end: + if (c0 >= 0) + close(c0); + if (p0 >= 0) + close(p0); + if (c1 >= 0) + close(c1); + if (p1 >= 0) + close(p1); + test_sockmap_pass_prog__destroy(skel); +} + +/* Wait until FIONREAD returns the expected value or timeout */ +static int wait_for_fionread(int fd, int expected, unsigned int timeout_ms) +{ + unsigned int elapsed = 0; + int avail = 0; + + while (elapsed < timeout_ms) { + if (ioctl(fd, FIONREAD, &avail) < 0) + return -errno; + if (avail >= expected) + return avail; + usleep(1000); + elapsed++; + } + return avail; +} + +/* it is used to send data to via native stack and BPF redirecting */ +static void test_sockmap_multi_channels(int sotype) +{ + int map, err, sent, recvd, zero = 0, one = 1, avail = 0, expected; + struct test_sockmap_pass_prog *skel = NULL; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + char buf[10] = "0123456789", rcv[11]; + struct bpf_program *prog; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + err = create_socket_pairs(AF_INET, sotype, &c0, &c1, &p0, &p1); + if (err) + goto end; + + prog = skel->progs.prog_skb_verdict_ingress; + map = bpf_map__fd(skel->maps.sock_map_rx); + + err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach verdict")) + goto end; + + err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem(p0)")) + goto end; + + err = bpf_map_update_elem(map, &one, &p1, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto end; + + /* send data to p1 via native stack */ + sent = xsend(c1, buf, 2, 0); + if (!ASSERT_EQ(sent, 2, "xsend(2)")) + goto end; + + avail = wait_for_fionread(p1, 2, IO_TIMEOUT_SEC); + ASSERT_EQ(avail, 2, "ioctl(FIONREAD) partial return"); + + /* send data to p1 via bpf redirecting */ + sent = xsend(c0, buf + 2, sizeof(buf) - 2, 0); + if (!ASSERT_EQ(sent, sizeof(buf) - 2, "xsend(remain-data)")) + goto end; + + /* Poll FIONREAD until expected bytes arrive, poll_read() is unreliable + * here since it may return immediately if prior data is already queued. + */ + expected = sotype == SOCK_DGRAM ? 2 : sizeof(buf); + avail = wait_for_fionread(p1, expected, IO_TIMEOUT_SEC); + ASSERT_EQ(avail, expected, "ioctl(FIONREAD) full return"); + + recvd = recv_timeout(p1, rcv, expected, MSG_DONTWAIT, 1); + if (!ASSERT_EQ(recvd, expected, "recv_timeout(p1)") || + !ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch")) + goto end; + + /* process remaining data for udp if secondary data is available */ + expected = sizeof(buf) - expected; + if (expected) { + avail = wait_for_fionread(p1, expected, IO_TIMEOUT_SEC); + ASSERT_EQ(avail, expected, "second ioctl(FIONREAD) full return"); + + recvd = recv_timeout(p1, rcv, expected, MSG_DONTWAIT, 1); + if (!ASSERT_EQ(recvd, expected, "second recv_timeout(p1)") || + !ASSERT_OK(memcmp(buf + sizeof(buf) - expected, rcv, recvd), + "second data mismatch")) + goto end; + } +end: + if (c0 >= 0) + close(c0); + if (p0 >= 0) + close(p0); + if (c1 >= 0) + close(c1); + if (p1 >= 0) + close(p1); + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -1108,4 +1393,14 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_vsock_poll(); if (test__start_subtest("sockmap vsock unconnected")) test_sockmap_vsock_unconnected(); + if (test__start_subtest("sockmap with zc")) + test_sockmap_zc(); + if (test__start_subtest("sockmap recover")) + test_sockmap_copied_seq(false); + if (test__start_subtest("sockmap recover with strp")) + test_sockmap_copied_seq(true); + if (test__start_subtest("sockmap tcp multi channels")) + test_sockmap_multi_channels(SOCK_STREAM); + if (test__start_subtest("sockmap udp multi channels")) + test_sockmap_multi_channels(SOCK_DGRAM); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 3e5571dd578d..d815efac52fd 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -5,12 +5,15 @@ #define MAX_TEST_NAME 80 +#define u32(v) ((u32){(v)}) +#define u64(v) ((u64){(v)}) + #define __always_unused __attribute__((__unused__)) #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_delete"); \ __ret; \ }) @@ -18,7 +21,7 @@ #define xbpf_map_lookup_elem(fd, key, val) \ ({ \ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_lookup"); \ __ret; \ }) @@ -26,7 +29,7 @@ #define xbpf_map_update_elem(fd, key, val, flags) \ ({ \ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_update"); \ __ret; \ }) @@ -35,7 +38,7 @@ ({ \ int __ret = \ bpf_prog_attach((prog), (target), (type), (flags)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("prog_attach(" #type ")"); \ __ret; \ }) @@ -43,7 +46,7 @@ #define xbpf_prog_detach2(prog, target, type) \ ({ \ int __ret = bpf_prog_detach2((prog), (target), (type)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("prog_detach2(" #type ")"); \ __ret; \ }) @@ -66,21 +69,15 @@ __ret; \ }) -static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) +static inline int add_to_sockmap(int mapfd, int fd1, int fd2) { - u64 value; - u32 key; int err; - key = 0; - value = fd1; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST); if (err) return err; - key = 1; - value = fd2; - return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST); } #endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862..6ed8e149e3d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -3,77 +3,62 @@ /* * Tests for sockmap/sockhash holding kTLS sockets. */ - +#include <error.h> #include <netinet/tcp.h> +#include <linux/tls.h> #include "test_progs.h" +#include "sockmap_helpers.h" +#include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_ktls.skel.h" #define MAX_TEST_NAME 80 #define TCP_ULP 31 -static int tcp_server(int family) +static int init_ktls_pairs(int c, int p) { - int err, s; - - s = socket(family, SOCK_STREAM, 0); - if (!ASSERT_GE(s, 0, "socket")) - return -1; + int err; + struct tls12_crypto_info_aes_gcm_128 crypto_rx; + struct tls12_crypto_info_aes_gcm_128 crypto_tx; - err = listen(s, SOMAXCONN); - if (!ASSERT_OK(err, "listen")) - return -1; - - return s; -} - -static int disconnect(int fd) -{ - struct sockaddr unspec = { AF_UNSPEC }; + err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto out; - return connect(fd, &unspec, sizeof(unspec)); + err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto out; + + memset(&crypto_rx, 0, sizeof(crypto_rx)); + memset(&crypto_tx, 0, sizeof(crypto_tx)); + crypto_rx.info.version = TLS_1_2_VERSION; + crypto_tx.info.version = TLS_1_2_VERSION; + crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128; + crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128; + + err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx)); + if (!ASSERT_OK(err, "setsockopt(TLS_TX)")) + goto out; + + err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx)); + if (!ASSERT_OK(err, "setsockopt(TLS_RX)")) + goto out; + return 0; +out: + return -1; } -/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */ -static void test_sockmap_ktls_disconnect_after_delete(int family, int map) +static int create_ktls_pairs(int family, int sotype, int *c, int *p) { - struct sockaddr_storage addr = {0}; - socklen_t len = sizeof(addr); - int err, cli, srv, zero = 0; - - srv = tcp_server(family); - if (srv == -1) - return; - - err = getsockname(srv, (struct sockaddr *)&addr, &len); - if (!ASSERT_OK(err, "getsockopt")) - goto close_srv; - - cli = socket(family, SOCK_STREAM, 0); - if (!ASSERT_GE(cli, 0, "socket")) - goto close_srv; - - err = connect(cli, (struct sockaddr *)&addr, len); - if (!ASSERT_OK(err, "connect")) - goto close_cli; - - err = bpf_map_update_elem(map, &zero, &cli, 0); - if (!ASSERT_OK(err, "bpf_map_update_elem")) - goto close_cli; + int err; - err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); - if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) - goto close_cli; - - err = bpf_map_delete_elem(map, &zero); - if (!ASSERT_OK(err, "bpf_map_delete_elem")) - goto close_cli; - - err = disconnect(cli); - ASSERT_OK(err, "disconnect"); + err = create_pair(family, sotype, c, p); + if (!ASSERT_OK(err, "create_pair()")) + return -1; -close_cli: - close(cli); -close_srv: - close(srv); + err = init_ktls_pairs(*c, *p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + return -1; + return 0; } static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map) @@ -146,6 +131,278 @@ static const char *fmt_test_name(const char *subtest_name, int family, return test_name; } +static void test_sockmap_ktls_offload(int family, int sotype) +{ + int err; + int c = 0, p = 0, sent, recvd; + char msg[12] = "hello world\0"; + char rcv[13]; + + err = create_ktls_pairs(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_ktls_pairs()")) + goto out; + + sent = send(c, msg, sizeof(msg), 0); + if (!ASSERT_OK(err, "send(msg)")) + goto out; + + recvd = recv(p, rcv, sizeof(rcv), 0); + if (!ASSERT_OK(err, "recv(msg)") || + !ASSERT_EQ(recvd, sent, "length mismatch")) + goto out; + + ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch"); + +out: + if (c) + close(c); + if (p) + close(p); +} + +static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push) +{ + int err, off; + int i, j; + int start_push = 0, push_len = 0; + int c = 0, p = 0, one = 1, sent, recvd; + int prog_fd, map_fd; + char msg[12] = "hello world\0"; + char rcv[20] = {0}; + struct test_sockmap_ktls *skel; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + skel->bss->cork_byte = sizeof(msg); + if (push) { + start_push = 1; + push_len = 2; + } + skel->bss->push_start = start_push; + skel->bss->push_end = push_len; + + off = sizeof(msg) / 2; + sent = send(c, msg, off, 0); + if (!ASSERT_EQ(sent, off, "send(msg)")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "expected no data")) + goto out; + + /* send remaining msg */ + sent = send(c, msg + off, sizeof(msg) - off, 0); + if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_OK(err, "recv(msg)") || + !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch")) + goto out; + + for (i = 0, j = 0; i < recvd;) { + /* skip checking the data that has been pushed in */ + if (i >= start_push && i <= start_push + push_len - 1) { + i++; + continue; + } + if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch")) + goto out; + i++; + j++; + } +out: + if (c) + close(c); + if (p) + close(p); + test_sockmap_ktls__destroy(skel); +} + +static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push) +{ + int c = -1, p = -1, one = 1, two = 2; + struct test_sockmap_ktls *skel; + unsigned char *data = NULL; + struct msghdr msg = {0}; + struct iovec iov[2]; + int prog_fd, map_fd; + int txrx_buf = 1024; + int iov_length = 8192; + int err; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int)); + err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int)); + if (!ASSERT_OK(err, "set buf limit")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) + goto out; + + skel->bss->apply_bytes = 1024; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + data = calloc(iov_length, sizeof(char)); + if (!data) + goto out; + + iov[0].iov_base = data; + iov[0].iov_len = iov_length; + iov[1].iov_base = data; + iov[1].iov_len = iov_length; + msg.msg_iov = iov; + msg.msg_iovlen = 2; + + for (;;) { + err = sendmsg(c, &msg, MSG_DONTWAIT); + if (err <= 0) + break; + } + +out: + if (data) + free(data); + if (c != -1) + close(c); + if (p != -1) + close(p); + + test_sockmap_ktls__destroy(skel); +} + +static void test_sockmap_ktls_tx_pop(int family, int sotype) +{ + char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0"; + int c = 0, p = 0, one = 1, sent, recvd; + struct test_sockmap_ktls *skel; + int prog_fd, map_fd; + char rcv[50] = {0}; + int err; + int i, m, r; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + struct { + int pop_start; + int pop_len; + } pop_policy[] = { + /* trim the start */ + {0, 2}, + {0, 10}, + {1, 2}, + {1, 10}, + /* trim the end */ + {35, 2}, + /* New entries should be added before this line */ + {-1, -1}, + }; + + i = 0; + while (pop_policy[i].pop_start >= 0) { + skel->bss->pop_start = pop_policy[i].pop_start; + skel->bss->pop_end = pop_policy[i].pop_len; + + sent = send(c, msg, sizeof(msg), 0); + if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch")) + goto out; + + /* verify the data + * msg: 0123456789a bcdefghij klmnopqrstuvwxyz + * | | + * popped data + */ + for (m = 0, r = 0; m < sizeof(msg);) { + /* skip checking the data that has been popped */ + if (m >= pop_policy[i].pop_start && + m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) { + m++; + continue; + } + + if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch")) + goto out; + m++; + r++; + } + i++; + } +out: + if (c) + close(c); + if (p) + close(p); + test_sockmap_ktls__destroy(skel); +} + static void run_tests(int family, enum bpf_map_type map_type) { int map; @@ -154,18 +411,135 @@ static void run_tests(int family, enum bpf_map_type map_type) if (!ASSERT_GE(map, 0, "bpf_map_create")) return; - if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type))) - test_sockmap_ktls_disconnect_after_delete(family, map); if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type))) test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map); close(map); } +/* + * Regression test for the KTLS + sockmap (verdict) reverse-order UAF. + * + * Vulnerable sequence: + * 1. Insert receiver socket into sockmap with BPF_SK_SKB_VERDICT program. + * sk->sk_data_ready becomes sk_psock_verdict_data_ready. + * 2. Configure TLS RX: tls_sw_strparser_arm() saves + * sk_psock_verdict_data_ready as rx_ctx->saved_data_ready. + * + * When data arrives, tls_rx_msg_ready() calls saved_data_ready() = + * sk_psock_verdict_data_ready(), which calls tcp_read_skb() and drains + * sk_receive_queue via __skb_unlink() without advancing copied_seq. + * tls_strp_msg_load() then finds the queue empty while tcp_inq() is still + * non-zero, hits WARN_ON_ONCE(!first), and leaves a dangling frag_list + * pointer that tls_decrypt_sg() walks — a use-after-free. + * + * The fix adds a tls_sw_has_ctx_rx() check to sk_psock_verdict_data_ready(), + * mirroring what sk_psock_strp_data_ready() already does: when a TLS RX + * context is present, defer to psock->saved_data_ready (sock_def_readable) + * instead of calling tcp_read_skb(), so TLS retains sole ownership of the + * receive queue. Data is then decrypted and returned correctly by + * tls_sw_recvmsg(). + */ +static void test_sockmap_ktls_verdict_with_tls_rx(int family, int sotype) +{ + struct tls12_crypto_info_aes_gcm_128 crypto_info = {}; + char send_buf[] = "hello ktls sockmap reverse order"; + char recv_buf[sizeof(send_buf)] = {}; + struct test_sockmap_ktls *skel; + int c = -1, p = -1, zero = 0; + int prog_fd, map_fd; + ssize_t n; + int err; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open_and_load")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_skb_verdict_pass); + map_fd = bpf_map__fd(skel->maps.sock_map_verdict); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_SKB_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk_skb verdict")) + goto out; + + /* Step 1: configure TLS TX on sender (no sockmap involvement) */ + err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP) client")) + goto out; + + crypto_info.info.version = TLS_1_2_VERSION; + crypto_info.info.cipher_type = TLS_CIPHER_AES_GCM_128; + memset(crypto_info.key, 0x01, sizeof(crypto_info.key)); + memset(crypto_info.salt, 0x02, sizeof(crypto_info.salt)); + + err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info)); + if (!ASSERT_OK(err, "setsockopt(TLS_TX)")) + goto out; + + /* Step 2: insert receiver into sockmap BEFORE TLS RX */ + err = bpf_map_update_elem(map_fd, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + + /* Step 3: configure TLS RX AFTER sockmap insertion */ + err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP) server")) + goto out; + + err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_info, sizeof(crypto_info)); + if (!ASSERT_OK(err, "setsockopt(TLS_RX)")) + goto out; + + /* + * A buggy kernel hits WARN_ON_ONCE in tls_strp_load_anchor_with_queue + * and may UAF in tls_decrypt_sg here. With the fix, + * sk_psock_verdict_data_ready defers to sock_def_readable and TLS + * decrypts the record normally. + */ + n = send(c, send_buf, sizeof(send_buf), 0); + if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "send")) + goto out; + + n = recv_timeout(p, recv_buf, sizeof(recv_buf), 0, 5); + if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "recv")) + goto out; + + ASSERT_OK(memcmp(send_buf, recv_buf, sizeof(send_buf)), "data integrity"); + +out: + if (c != -1) + close(c); + if (p != -1) + close(p); + test_sockmap_ktls__destroy(skel); +} + +static void run_ktls_test(int family, int sotype) +{ + if (test__start_subtest("tls simple offload")) + test_sockmap_ktls_offload(family, sotype); + if (test__start_subtest("tls tx cork")) + test_sockmap_ktls_tx_cork(family, sotype, false); + if (test__start_subtest("tls tx cork with push")) + test_sockmap_ktls_tx_cork(family, sotype, true); + if (test__start_subtest("tls tx egress with no buf")) + test_sockmap_ktls_tx_no_buf(family, sotype, true); + if (test__start_subtest("tls tx with pop")) + test_sockmap_ktls_tx_pop(family, sotype); + if (test__start_subtest("tls verdict with tls rx")) + test_sockmap_ktls_verdict_with_tls_rx(family, sotype); +} + void test_sockmap_ktls(void) { run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP); run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH); run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP); run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH); + run_ktls_test(AF_INET, SOCK_STREAM); + run_ktls_test(AF_INET6, SOCK_STREAM); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 4ee1148d22be..cc0c68bab907 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -899,7 +899,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); - bpf_link__detach(link); + bpf_link__destroy(link); } static void redir_partial(int family, int sotype, int sock_map, int parser_map) @@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map) goto close; n = xsend(c1, buf, sizeof(buf), 0); + if (n == -1) + goto close; if (n < sizeof(buf)) FAIL("incomplete write"); @@ -1366,237 +1368,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } } -static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, - int sock_mapfd, int nop_mapfd, - int verd_mapfd, enum redir_mode mode, - int send_flags) -{ - const char *log_prefix = redir_mode_str(mode); - unsigned int pass; - int err, n; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); - - err = add_to_sockmap(sock_mapfd, peer0, peer1); - if (err) - return; - - if (nop_mapfd >= 0) { - err = add_to_sockmap(nop_mapfd, cli0, cli1); - if (err) - return; - } - - /* Last byte is OOB data when send_flags has MSG_OOB bit set */ - n = xsend(cli1, "ab", 2, send_flags); - if (n >= 0 && n < 2) - FAIL("%s: incomplete send", log_prefix); - if (n < 2) - return; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - return; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - - if (send_flags & MSG_OOB) { - /* Check that we can't read OOB while in sockmap */ - errno = 0; - n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); - if (n != -1 || errno != EOPNOTSUPP) - FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d", - log_prefix, n, errno); - - /* Remove peer1 from sockmap */ - xbpf_map_delete_elem(sock_mapfd, &(int){ 1 }); - - /* Check that OOB was dropped on redirect */ - errno = 0; - n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); - if (n != -1 || errno != EINVAL) - FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d", - log_prefix, n, errno); - } -} - -static void unix_redir_to_connected(int sotype, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) -{ - int c0, c1, p0, p1; - int sfd[2]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - goto close0; - c1 = sfd[0], p1 = sfd[1]; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close0: - xclose(c0); - xclose(p0); -} - -static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int sotype) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS); - skel->bss->test_ingress = true; - unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int sotype) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(AF_UNIX); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - unix_skb_redir_to_connected(skel, map, sotype); -} - -/* Returns two connected loopback vsock sockets */ -static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) -{ - return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1); -} - -static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, - enum redir_mode mode, int sotype) -{ - const char *log_prefix = redir_mode_str(mode); - char a = 'a', b = 'b'; - int u0, u1, v0, v1; - int sfd[2]; - unsigned int pass; - int err, n; - u32 key; - - zero_verdict_count(verd_mapfd); - - if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd)) - return; - - u0 = sfd[0]; - u1 = sfd[1]; - - err = vsock_socketpair_connectible(sotype, &v0, &v1); - if (err) { - FAIL("vsock_socketpair_connectible() failed"); - goto close_uds; - } - - err = add_to_sockmap(sock_mapfd, u0, v0); - if (err) { - FAIL("add_to_sockmap failed"); - goto close_vsock; - } - - n = write(v1, &a, sizeof(a)); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto out; - - n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0); - if (n < 0) - FAIL("%s: recv() err, errno=%d", log_prefix, errno); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - if (b != a) - FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b); - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto out; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); -out: - key = 0; - bpf_map_delete_elem(sock_mapfd, &key); - key = 1; - bpf_map_delete_elem(sock_mapfd, &key); - -close_vsock: - close(v0); - close(v1); - -close_uds: - close(u0); - close(u1); -} - -static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, - int sotype) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype); - skel->bss->test_ingress = true; - vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(AF_VSOCK); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - - vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM); - vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET); -} - static void test_reuseport(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { @@ -1637,224 +1408,6 @@ static void test_reuseport(struct test_sockmap_listen *skel, } } -static int inet_socketpair(int family, int type, int *s, int *c) -{ - return create_pair(family, type | SOCK_NONBLOCK, s, c); -} - -static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, - enum redir_mode mode) -{ - int c0, c1, p0, p1; - int err; - - err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); - if (err) - return; - err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); - if (err) - goto close_cli0; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close_cli0: - xclose(c0); - xclose(p0); -} - -static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); - skel->bss->test_ingress = true; - udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int family) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(family); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - udp_skb_redir_to_connected(skel, map, family); -} - -static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) -{ - int c0, c1, p0, p1; - int sfd[2]; - int err; - - if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - err = inet_socketpair(family, type, &p1, &c1); - if (err) - goto close; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close: - xclose(c0); - xclose(p0); -} - -static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_EGRESS); - inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, - REDIR_EGRESS); - skel->bss->test_ingress = true; - inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_INGRESS); - inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, - REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, - int nop_mapfd, int verd_mapfd, - enum redir_mode mode, int send_flags) -{ - int c0, c1, p0, p1; - int sfd[2]; - int err; - - err = inet_socketpair(family, type, &p0, &c0); - if (err) - return; - - if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) - goto close_cli0; - c1 = sfd[0], p1 = sfd[1]; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd, - verd_mapfd, mode, send_flags); - - xclose(c1); - xclose(p1); -close_cli0: - xclose(c0); - xclose(p0); -} - -static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int nop_map = bpf_map__fd(skel->maps.nop_map); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, -1, verdict_map, - REDIR_EGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, -1, verdict_map, - REDIR_EGRESS, NO_FLAGS); - - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, NO_FLAGS); - - /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, MSG_OOB); - - skel->bss->test_ingress = true; - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, -1, verdict_map, - REDIR_INGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, -1, verdict_map, - REDIR_INGRESS, NO_FLAGS); - - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, NO_FLAGS); - - /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, MSG_OOB); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int family) -{ - const char *family_name, *map_name; - struct netns_obj *netns; - char s[MAX_TEST_NAME]; - - family_name = family_str(family); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - - netns = netns_new("sockmap_listen", true); - if (!ASSERT_OK_PTR(netns, "netns_new")) - return; - - inet_unix_skb_redir_to_connected(skel, map, family); - unix_inet_skb_redir_to_connected(skel, map, family); - - netns_free(netns); -} - static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, int family) { @@ -1863,8 +1416,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_redir(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_DGRAM); - test_udp_redir(skel, map, family); - test_udp_unix_redir(skel, map, family); } void serial_test_sockmap_listen(void) @@ -1880,16 +1431,10 @@ void serial_test_sockmap_listen(void) skel->bss->test_sockmap = true; run_tests(skel, skel->maps.sock_map, AF_INET); run_tests(skel, skel->maps.sock_map, AF_INET6); - test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); - test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM); - test_vsock_redir(skel, skel->maps.sock_map); skel->bss->test_sockmap = false; run_tests(skel, skel->maps.sock_hash, AF_INET); run_tests(skel, skel->maps.sock_hash, AF_INET6); - test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); - test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM); - test_vsock_redir(skel, skel->maps.sock_hash); test_sockmap_listen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c new file mode 100644 index 000000000000..9c461d93113d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for sockmap/sockhash redirection. + * + * BPF_MAP_TYPE_SOCKMAP + * BPF_MAP_TYPE_SOCKHASH + * x + * sk_msg-to-egress + * sk_msg-to-ingress + * sk_skb-to-egress + * sk_skb-to-ingress + * x + * AF_INET, SOCK_STREAM + * AF_INET6, SOCK_STREAM + * AF_INET, SOCK_DGRAM + * AF_INET6, SOCK_DGRAM + * AF_UNIX, SOCK_STREAM + * AF_UNIX, SOCK_DGRAM + * AF_VSOCK, SOCK_STREAM + * AF_VSOCK, SOCK_SEQPACKET + */ + +#include <errno.h> +#include <error.h> +#include <sched.h> +#include <stdio.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <linux/string.h> +#include <linux/vm_sockets.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "linux/const.h" +#include "test_progs.h" +#include "sockmap_helpers.h" +#include "test_sockmap_redir.skel.h" + +/* The meaning of SUPPORTED is "will redirect packet as expected". + */ +#define SUPPORTED _BITUL(0) + +/* Note on sk_skb-to-ingress ->af_vsock: + * + * Peer socket may receive the packet some time after the return from sendmsg(). + * In a typical usage scenario, recvmsg() will block until the redirected packet + * appears in the destination queue, or timeout if the packet was dropped. By + * that point, the verdict map has already been updated to reflect what has + * happened. + * + * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg() + * takes place. Which means we may race the execution of the verdict logic and + * read map_verd before it has been updated, i.e. we might observe + * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1. + * + * This confuses the selftest logic: if there was no packet dropped, where's the + * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0 + * (which implies the verdict program has not been ran) just re-read the verdict + * map again. + */ +#define UNSUPPORTED_RACY_VERD _BITUL(1) + +enum prog_type { + SK_MSG_EGRESS, + SK_MSG_INGRESS, + SK_SKB_EGRESS, + SK_SKB_INGRESS, +}; + +enum { + SEND_INNER = 0, + SEND_OUTER, +}; + +enum { + RECV_INNER = 0, + RECV_OUTER, +}; + +struct maps { + int in; + int out; + int verd; +}; + +struct combo_spec { + enum prog_type prog_type; + const char *in, *out; +}; + +struct redir_spec { + const char *name; + int idx_send; + int idx_recv; + enum prog_type prog_type; +}; + +struct socket_spec { + int family; + int sotype; + int send_flags; + int in[2]; + int out[2]; +}; + +static int socket_spec_pairs(struct socket_spec *s) +{ + return create_socket_pairs(s->family, s->sotype, + &s->in[0], &s->out[0], + &s->in[1], &s->out[1]); +} + +static void socket_spec_close(struct socket_spec *s) +{ + xclose(s->in[0]); + xclose(s->in[1]); + xclose(s->out[0]); + xclose(s->out[1]); +} + +static void get_redir_params(struct redir_spec *redir, + struct test_sockmap_redir *skel, int *prog_fd, + enum bpf_attach_type *attach_type, + int *redirect_flags) +{ + enum prog_type type = redir->prog_type; + struct bpf_program *prog; + bool sk_msg; + + sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS; + prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict; + + *prog_fd = bpf_program__fd(prog); + *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT; + + if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS) + *redirect_flags = BPF_F_INGRESS; + else + *redirect_flags = 0; +} + +static void try_recv(const char *prefix, int fd, int flags, bool expect_success) +{ + ssize_t n; + char buf; + + errno = 0; + n = recv(fd, &buf, 1, flags); + if (n < 0 && expect_success) + FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n); + if (!n && !expect_success) + FAIL("%s: expected failure: retval=%zd", prefix, n); +} + +static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out, + int sd_recv, int map_verd, int status) +{ + unsigned int drop, pass; + char recv_buf; + ssize_t n; + +get_verdict: + if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) || + xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass)) + return; + + if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) { + sched_yield(); + goto get_verdict; + } + + if (pass != 0) { + FAIL("unsupported: wanted verdict pass 0, have %u", pass); + return; + } + + /* If nothing was dropped, packet should have reached the peer */ + if (drop == 0) { + errno = 0; + n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC); + if (n != 1) + FAIL_ERRNO("unsupported: packet missing, retval=%zd", n); + } + + /* Ensure queues are empty */ + try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false); + if (sd_in != sd_send) + try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false); + + try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false); + if (sd_recv != sd_out) + try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false); +} + +static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer, + int sd_in, int sd_out, int sd_recv, + struct maps *maps, int status) +{ + unsigned int drop, pass; + char *send_buf = "ab"; + char recv_buf = '\0'; + ssize_t n, len = 1; + + /* Zero out the verdict map */ + if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) || + xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY)) + return; + + if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST)) + return; + + if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST)) + goto del_in; + + /* Last byte is OOB data when send_flags has MSG_OOB bit set */ + if (send_flags & MSG_OOB) + len++; + n = send(sd_send, send_buf, len, send_flags); + if (n >= 0 && n < len) + FAIL("incomplete send"); + if (n < 0) { + /* sk_msg redirect combo not supported? */ + if (status & SUPPORTED || errno != EACCES) + FAIL_ERRNO("send"); + goto out; + } + + if (!(status & SUPPORTED)) { + handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv, + maps->verd, status); + goto out; + } + + errno = 0; + n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC); + if (n != 1) { + FAIL_ERRNO("recv_timeout()"); + goto out; + } + + /* Check verdict _after_ recv(); af_vsock may need time to catch up */ + if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) || + xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass)) + goto out; + + if (drop != 0 || pass != 1) + FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u", + drop, pass); + + if (recv_buf != send_buf[0]) + FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]); + + if (send_flags & MSG_OOB) { + /* Fail reading OOB while in sockmap */ + try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out, + MSG_OOB | MSG_DONTWAIT, false); + + /* Remove sd_out from sockmap */ + xbpf_map_delete_elem(maps->out, &u32(0)); + + /* Check that OOB was dropped on redirect */ + try_recv("recv(sd_out, MSG_OOB)", sd_out, + MSG_OOB | MSG_DONTWAIT, false); + + goto del_in; + } +out: + xbpf_map_delete_elem(maps->out, &u32(0)); +del_in: + xbpf_map_delete_elem(maps->in, &u32(0)); +} + +static int is_redir_supported(enum prog_type type, const char *in, + const char *out) +{ + /* Matching based on strings returned by socket_kind_to_str(): + * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq + * Plus a wildcard: any + * Not in use: u_seq, v_dgr + */ + struct combo_spec *c, combos[] = { + /* Send to local: TCP -> any, but vsock */ + { SK_MSG_INGRESS, "tcp", "tcp" }, + { SK_MSG_INGRESS, "tcp", "udp" }, + { SK_MSG_INGRESS, "tcp", "u_str" }, + { SK_MSG_INGRESS, "tcp", "u_dgr" }, + + /* Send to egress: TCP -> TCP */ + { SK_MSG_EGRESS, "tcp", "tcp" }, + + /* Ingress to egress: any -> any */ + { SK_SKB_EGRESS, "any", "any" }, + + /* Ingress to local: any -> any, but vsock */ + { SK_SKB_INGRESS, "any", "tcp" }, + { SK_SKB_INGRESS, "any", "udp" }, + { SK_SKB_INGRESS, "any", "u_str" }, + { SK_SKB_INGRESS, "any", "u_dgr" }, + }; + + for (c = combos; c < combos + ARRAY_SIZE(combos); c++) { + if (c->prog_type == type && + (!strcmp(c->in, "any") || strstarts(in, c->in)) && + (!strcmp(c->out, "any") || strstarts(out, c->out))) + return SUPPORTED; + } + + return 0; +} + +static int get_support_status(enum prog_type type, const char *in, + const char *out) +{ + int status = is_redir_supported(type, in, out); + + if (type == SK_SKB_INGRESS && strstarts(out, "v_")) + status |= UNSUPPORTED_RACY_VERD; + + return status; +} + +static void test_socket(enum bpf_map_type type, struct redir_spec *redir, + struct maps *maps, struct socket_spec *s_in, + struct socket_spec *s_out) +{ + int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status; + const char *in_str, *out_str; + char s[MAX_TEST_NAME]; + + fd_in = s_in->in[0]; + fd_out = s_out->out[0]; + fd_send = s_in->in[redir->idx_send]; + fd_peer = s_in->in[redir->idx_send ^ 1]; + fd_recv = s_out->out[redir->idx_recv]; + flags = s_in->send_flags; + + in_str = socket_kind_to_str(fd_in); + out_str = socket_kind_to_str(fd_out); + status = get_support_status(redir->prog_type, in_str, out_str); + + snprintf(s, sizeof(s), + "%-4s %-17s %-5s %s %-5s%6s", + /* hash sk_skb-to-ingress u_str → v_str (OOB) */ + type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash", + redir->name, + in_str, + status & SUPPORTED ? "→" : " ", + out_str, + (flags & MSG_OOB) ? "(OOB)" : ""); + + if (!test__start_subtest(s)) + return; + + test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv, + maps, status); +} + +static void test_redir(enum bpf_map_type type, struct redir_spec *redir, + struct maps *maps) +{ + struct socket_spec *s, sockets[] = { + { AF_INET, SOCK_STREAM }, + // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */ + { AF_INET6, SOCK_STREAM }, + { AF_INET, SOCK_DGRAM }, + { AF_INET6, SOCK_DGRAM }, + { AF_UNIX, SOCK_STREAM }, + { AF_UNIX, SOCK_STREAM, MSG_OOB }, + { AF_UNIX, SOCK_DGRAM }, + // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */ + { AF_VSOCK, SOCK_STREAM }, + // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */ + { AF_VSOCK, SOCK_SEQPACKET }, + }; + + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) + if (socket_spec_pairs(s)) + goto out; + + /* Intra-proto */ + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) + test_socket(type, redir, maps, s, s); + + /* Cross-proto */ + for (int i = 0; i < ARRAY_SIZE(sockets); i++) { + for (int j = 0; j < ARRAY_SIZE(sockets); j++) { + struct socket_spec *out = &sockets[j]; + struct socket_spec *in = &sockets[i]; + + /* Skip intra-proto and between variants */ + if (out->send_flags || + (in->family == out->family && + in->sotype == out->sotype)) + continue; + + test_socket(type, redir, maps, in, out); + } + } +out: + while (--s >= sockets) + socket_spec_close(s); +} + +static void test_map(enum bpf_map_type type) +{ + struct redir_spec *r, redirs[] = { + { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS }, + { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS }, + { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS }, + { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS }, + }; + + for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) { + enum bpf_attach_type attach_type; + struct test_sockmap_redir *skel; + struct maps maps; + int prog_fd; + + skel = test_sockmap_redir__open_and_load(); + if (!skel) { + FAIL("open_and_load"); + return; + } + + switch (type) { + case BPF_MAP_TYPE_SOCKMAP: + maps.in = bpf_map__fd(skel->maps.nop_map); + maps.out = bpf_map__fd(skel->maps.sock_map); + break; + case BPF_MAP_TYPE_SOCKHASH: + maps.in = bpf_map__fd(skel->maps.nop_hash); + maps.out = bpf_map__fd(skel->maps.sock_hash); + break; + default: + FAIL("Unsupported bpf_map_type"); + return; + } + + skel->bss->redirect_type = type; + maps.verd = bpf_map__fd(skel->maps.verdict_map); + get_redir_params(r, skel, &prog_fd, &attach_type, + &skel->bss->redirect_flags); + + if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0)) + return; + + test_redir(type, r, &maps); + + if (xbpf_prog_detach2(prog_fd, maps.in, attach_type)) + return; + + test_sockmap_redir__destroy(skel); + } +} + +void serial_test_sockmap_redir(void) +{ + test_map(BPF_MAP_TYPE_SOCKMAP); + test_map(BPF_MAP_TYPE_SOCKHASH); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index ba6b3ec1156a..3a41c517b918 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -142,7 +142,7 @@ static int getsetsockopt(void) /* TCP_CONGESTION can extend the string */ - strcpy(buf.cc, "nv"); + strscpy(buf.cc, "nv"); err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv")); if (err) { log_err("Failed to call setsockopt(TCP_CONGESTION)"); @@ -190,7 +190,7 @@ static int getsetsockopt(void) fd = socket(AF_NETLINK, SOCK_RAW, 0); if (fd < 0) { log_err("Failed to create AF_NETLINK socket"); - return -1; + goto err; } buf.u32 = 1; @@ -211,6 +211,21 @@ static int getsetsockopt(void) } ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); + /* Trick bpf_tcp_sock() with IPPROTO_TCP */ + close(fd); + fd = socket(AF_INET, SOCK_RAW, IPPROTO_TCP); + if (!ASSERT_OK_FD(fd, "socket")) + goto err; + + /* The BPF prog intercepts this before the kernel sees it, any + * optlen works. Go with 4 bytes for simplicity. + */ + buf.u32 = 1; + optlen = sizeof(buf.u32); + err = setsockopt(fd, SOL_TCP, TCP_SAVED_SYN, &buf, optlen); + if (!ASSERT_ERR(err, "setsockopt(TCP_SAVED_SYN)")) + goto err; + free(big_buf); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index 2b0068742ef9..bbe476f4c47d 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -13,22 +13,23 @@ static struct { const char *err_msg; } spin_lock_fail_tests[] = { { "lock_id_kptr_preserve", - "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) " - "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "[0-9]\\+: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2)" + " R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n" + "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", - "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" + "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)" - " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n" + " R0=map_value(id=1,map=array_map,ks=4,vs=8)" + " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" " R0=map_value(id=2,ks=4,vs=8)" - " R1_w=map_value(id=2,ks=4,vs=8)\n" + " R1=map_value(id=2,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, @@ -50,6 +51,9 @@ static struct { { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" }, { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" }, + { "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" }, }; static int match_regex(const char *pattern, const char *string) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index b7ba5cd47d96..271b5cc9fc01 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -39,7 +39,7 @@ retry: bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 0832fd787457..b277dddd5af7 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -66,7 +66,7 @@ retry: bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c new file mode 100644 index 000000000000..da42b00e3d1f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "stacktrace_ips.skel.h" + +#ifdef __x86_64__ +static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...) +{ + __u64 ips[PERF_MAX_STACK_DEPTH]; + struct ksyms *ksyms = NULL; + int i, err = 0; + va_list args; + + /* sorted by addr */ + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) + return -1; + + /* unlikely, but... */ + if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max")) + return -1; + + err = bpf_map_lookup_elem(fd, &key, ips); + if (err) + goto out; + + /* + * Compare all symbols provided via arguments with stacktrace ips, + * and their related symbol addresses.t + */ + va_start(args, cnt); + + for (i = 0; i < cnt; i++) { + unsigned long val; + struct ksym *ksym; + + val = va_arg(args, unsigned long); + ksym = ksym_search_local(ksyms, ips[i]); + if (!ASSERT_OK_PTR(ksym, "ksym_search_local")) + break; + ASSERT_EQ(ksym->addr, val, "stack_cmp"); + } + + va_end(args); + +out: + free_kallsyms_local(ksyms); + return err; +} + +static void test_stacktrace_ips_kprobe_multi(bool retprobe) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, + .retprobe = retprobe + ); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts( + skel->progs.kprobe_multi_test, + "bpf_testmod_stacktrace_test", &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + if (retprobe) { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } else { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5, + ksym_get_addr("bpf_testmod_stacktrace_test"), + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void test_stacktrace_ips_raw_tp(void) +{ + __u32 info_len = sizeof(struct bpf_prog_info); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_prog_info info = {}; + struct stacktrace_ips *skel; + __u64 bpf_prog_ksym = 0; + int err; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.rawtp_test = bpf_program__attach_raw_tracepoint( + skel->progs.rawtp_test, + "bpf_testmod_test_read"); + if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint")) + goto cleanup; + + /* get bpf program address */ + info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym); + info.nr_jited_ksyms = 1; + err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test), + &info, &info_len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2, + bpf_prog_ksym, + ksym_get_addr("bpf_trace_run2")); + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void test_stacktrace_ips_kprobe(bool retprobe) +{ + LIBBPF_OPTS(bpf_kprobe_opts, opts, + .retprobe = retprobe + ); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.kprobe_test = bpf_program__attach_kprobe_opts( + skel->progs.kprobe_test, + "bpf_testmod_stacktrace_test", &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_test, "bpf_program__attach_kprobe_opts")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + if (retprobe) { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } else { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5, + ksym_get_addr("bpf_testmod_stacktrace_test"), + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void test_stacktrace_ips_trampoline(bool retprobe) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + if (retprobe) { + skel->links.fexit_test = bpf_program__attach_trace(skel->progs.fexit_test); + if (!ASSERT_OK_PTR(skel->links.fexit_test, "bpf_program__attach_trace")) + goto cleanup; + } else { + skel->links.fentry_test = bpf_program__attach_trace(skel->progs.fentry_test); + if (!ASSERT_OK_PTR(skel->links.fentry_test, "bpf_program__attach_trace")) + goto cleanup; + } + + trigger_module_test_read(1); + + load_kallsyms(); + + if (retprobe) { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } else { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5, + ksym_get_addr("bpf_testmod_stacktrace_test"), + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void __test_stacktrace_ips(void) +{ + if (test__start_subtest("kprobe_multi")) + test_stacktrace_ips_kprobe_multi(false); + if (test__start_subtest("kretprobe_multi")) + test_stacktrace_ips_kprobe_multi(true); + if (test__start_subtest("raw_tp")) + test_stacktrace_ips_raw_tp(); + if (test__start_subtest("kprobe")) + test_stacktrace_ips_kprobe(false); + if (test__start_subtest("kretprobe")) + test_stacktrace_ips_kprobe(true); + if (test__start_subtest("fentry")) + test_stacktrace_ips_trampoline(false); + if (test__start_subtest("fexit")) + test_stacktrace_ips_trampoline(true); +} +#else +static void __test_stacktrace_ips(void) +{ + test__skip(); +} +#endif + +void test_stacktrace_ips(void) +{ + __test_stacktrace_ips(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index df59e4ae2951..c23b97414813 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -1,46 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "stacktrace_map.skel.h" void test_stacktrace_map(void) { + struct stacktrace_map *skel; int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "oncpu"; - int err, prog_fd, stack_trace_len; - const char *file = "./test_stacktrace_map.bpf.o"; - __u32 key, val, duration = 0; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link; + int err, stack_trace_len; + __u32 key, val, stack_id, duration = 0; + __u64 stack[PERF_MAX_STACK_DEPTH]; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + skel = stacktrace_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) - goto close_prog; - - link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch"); - if (!ASSERT_OK_PTR(link, "attach_tp")) - goto close_prog; - - /* find map fds */ - control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK_FAIL(control_map_fd < 0)) - goto disable_pmu; - - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK_FAIL(stackid_hmap_fd < 0)) - goto disable_pmu; - - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK_FAIL(stackmap_fd < 0)) - goto disable_pmu; - - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (CHECK_FAIL(stack_amap_fd < 0)) - goto disable_pmu; + control_map_fd = bpf_map__fd(skel->maps.control_map); + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); + stackmap_fd = bpf_map__fd(skel->maps.stackmap); + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap); + err = stacktrace_map__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; /* give some time for bpf program run */ sleep(1); @@ -50,26 +31,32 @@ void test_stacktrace_map(void) bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", "err %d errno %d\n", err, errno)) - goto disable_pmu; - -disable_pmu: - bpf_link__destroy(link); -close_prog: - bpf_object__close(obj); + goto out; + + stack_id = skel->bss->stack_id; + err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack); + if (!ASSERT_OK(err, "lookup and delete target stack_id")) + goto out; + + err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack); + if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id")) + goto out; +out: + stacktrace_map__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index c6ef06f55cdb..e985d51d3d47 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void) { const char *prog_name = "oncpu"; int control_map_fd, stackid_hmap_fd, stackmap_fd; - const char *file = "./test_stacktrace_map.bpf.o"; + const char *file = "./stacktrace_map.bpf.o"; __u32 key, val, duration = 0; int err, prog_fd; struct bpf_program *prog; @@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void) bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c index 1932b1e0685c..dc2ccf6a14d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c @@ -40,7 +40,7 @@ void test_stacktrace_map_skip(void) skel->bss->control = 1; /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap")) diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c new file mode 100644 index 000000000000..c3cce5c292bd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stream.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <sys/mman.h> + +#include "stream.skel.h" +#include "stream_fail.skel.h" + +void test_stream_failure(void) +{ + RUN_TESTS(stream_fail); +} + +void test_stream_success(void) +{ + RUN_TESTS(stream); + return; +} + +void test_stream_syscall(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[64]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.stream_syscall); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + + ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EINVAL, "bad prog_fd"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -ENOENT, "bad stream id"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EFAULT, "bad stream buf"); + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 2, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 1, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stdout"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stderr"); + + stream__destroy(skel); +} + +static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + int ret, prog_fd; + char fault_addr[64]; + char buf[1024]; + + prog_fd = bpf_program__fd(prog); + + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + + sprintf(fault_addr, "0x%lx", *fault_addr_p); + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); + ASSERT_GT(ret, 0, "stream read"); + ASSERT_LE(ret, 1023, "len for buf"); + buf[ret] = '\0'; + + if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) { + fprintf(stderr, "Output from stream:\n%s\n", buf); + fprintf(stderr, "Fault Addr: %s\n", fault_addr); + } +} + +void test_stream_arena_fault_address(void) +{ + struct stream *skel; + +#if !defined(__x86_64__) && !defined(__aarch64__) + printf("%s:SKIP: arena fault reporting not supported\n", __func__); + test__skip(); + return; +#endif + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + if (test__start_subtest("read_fault")) + test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr); + if (test__start_subtest("write_fault")) + test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr); + + stream__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c new file mode 100644 index 000000000000..300032a19445 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include <test_progs.h> +#include "string_kfuncs_success.skel.h" +#include "string_kfuncs_failure1.skel.h" +#include "string_kfuncs_failure2.skel.h" +#include <sys/mman.h> + +static const char * const test_cases[] = { + "strcmp", + "strcasecmp", + "strncasecmp", + "strchr", + "strchrnul", + "strnchr", + "strrchr", + "strlen", + "strnlen", + "strspn_str", + "strspn_accept", + "strcspn_str", + "strcspn_reject", + "strstr", + "strcasestr", + "strnstr", + "strncasestr", +}; + +void run_too_long_tests(void) +{ + struct string_kfuncs_failure2 *skel; + struct bpf_program *prog; + char test_name[256]; + int err, i; + + skel = string_kfuncs_failure2__open_and_load(); + if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load")) + return; + + memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str)); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + sprintf(test_name, "test_%s_too_long", test_cases[i]); + if (!test__start_subtest(test_name)) + continue; + + prog = bpf_object__find_program_by_name(skel->obj, test_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + + ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG"); + } + +cleanup: + string_kfuncs_failure2__destroy(skel); +} + +void test_string_kfuncs(void) +{ + RUN_TESTS(string_kfuncs_success); + RUN_TESTS(string_kfuncs_failure1); + + run_too_long_tests(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c index 4006879ca3fe..98db9bafa44b 100644 --- a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c @@ -5,6 +5,7 @@ #include "struct_ops_private_stack_fail.skel.h" #include "struct_ops_private_stack_recur.skel.h" +#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) static void test_private_stack(void) { struct struct_ops_private_stack *skel; @@ -15,11 +16,6 @@ static void test_private_stack(void) if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack__open")) return; - if (skel->data->skip) { - test__skip(); - goto cleanup; - } - err = struct_ops_private_stack__load(skel); if (!ASSERT_OK(err, "struct_ops_private_stack__load")) goto cleanup; @@ -48,17 +44,9 @@ static void test_private_stack_fail(void) if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_fail__open")) return; - if (skel->data->skip) { - test__skip(); - goto cleanup; - } - err = struct_ops_private_stack_fail__load(skel); - if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load")) - goto cleanup; - return; + ASSERT_ERR(err, "struct_ops_private_stack_fail__load"); -cleanup: struct_ops_private_stack_fail__destroy(skel); } @@ -72,11 +60,6 @@ static void test_private_stack_recur(void) if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_recur__open")) return; - if (skel->data->skip) { - test__skip(); - goto cleanup; - } - err = struct_ops_private_stack_recur__load(skel); if (!ASSERT_OK(err, "struct_ops_private_stack_recur__load")) goto cleanup; @@ -95,7 +78,7 @@ cleanup: struct_ops_private_stack_recur__destroy(skel); } -void test_struct_ops_private_stack(void) +static void __test_struct_ops_private_stack(void) { if (test__start_subtest("private_stack")) test_private_stack(); @@ -104,3 +87,14 @@ void test_struct_ops_private_stack(void) if (test__start_subtest("private_stack_recur")) test_private_stack_recur(); } +#else +static void __test_struct_ops_private_stack(void) +{ + test__skip(); +} +#endif + +void test_struct_ops_private_stack(void) +{ + __test_struct_ops_private_stack(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c new file mode 100644 index 000000000000..6951786044ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/summarization.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "summarization_freplace.skel.h" +#include "summarization.skel.h" +#include <test_progs.h> + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load, + const char *err_msg) +{ + struct summarization_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct summarization *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = summarization__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "summarization__open")) + goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); + err = summarization__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "summarization__load")) + goto out; + freplace = summarization_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main_prog), + to_be_replaced); + err = summarization_freplace__load(freplace); + print_verifier_log(log); + + /* The might_sleep extension doesn't work yet as sleepable calls are not + * allowed, but preserve the check in case it's supported later and then + * this particular combination can be enabled. + */ + if (!strcmp("might_sleep", replacement) && err) { + ASSERT_HAS_SUBSTR(log, "sleepable helper bpf_copy_from_user#", "error log"); + ASSERT_EQ(err, -EINVAL, "err"); + test__skip(); + goto out; + } + + if (expect_load) { + ASSERT_OK(err, "summarization_freplace__load"); + } else { + ASSERT_ERR(err, "summarization_freplace__load"); + ASSERT_HAS_SUBSTR(log, err_msg, "error log"); + } + +out: + summarization_freplace__destroy(freplace); + summarization__destroy(main); +} + +/* There are two global subprograms in both summarization.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. + * + * This holds for might_sleep programs. It is ok to replace might_sleep with + * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced + * with might_sleep. + */ +void test_summarization_freplace(void) +{ + struct { + const char *main; + const char *to_be_replaced; + bool has_side_effect; + } mains[2][4] = { + { + { "main_changes_with_subprogs", "changes_pkt_data", true }, + { "main_changes_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }, + { + { "main_might_sleep_with_subprogs", "might_sleep", true }, + { "main_might_sleep_with_subprogs", "does_not_sleep", false }, + { "main_might_sleep", "main_might_sleep", true }, + { "main_does_not_sleep", "main_does_not_sleep", false }, + }, + }; + const char *pkt_err = "Extension program changes packet data"; + const char *slp_err = "Extension program may sleep"; + struct { + const char *func; + bool has_side_effect; + const char *err_msg; + } replacements[2][2] = { + { + { "changes_pkt_data", true, pkt_err }, + { "does_not_change_pkt_data", false, pkt_err }, + }, + { + { "might_sleep", true, slp_err }, + { "does_not_sleep", false, slp_err }, + }, + }; + char buf[64]; + + for (int t = 0; t < 2; t++) { + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[t][i].to_be_replaced, replacements[t][j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func, + mains[t][i].has_side_effect || !replacements[t][j].has_side_effect, + replacements[t][j].err_msg); + } + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 544144620ca6..7d534fde0af9 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -8,6 +8,7 @@ #include "tailcall_freplace.skel.h" #include "tc_bpf2bpf.skel.h" #include "tailcall_fail.skel.h" +#include "tailcall_sleepable.skel.h" /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -1195,7 +1196,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry, bool test_fexit, bool test_fentry_entry) { - int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val; + int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val; struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL; struct bpf_link *fentry_link = NULL, *fexit_link = NULL; struct bpf_program *prog, *fentry_prog; @@ -1600,6 +1601,7 @@ static void test_tailcall_bpf2bpf_freplace(void) goto out; err = bpf_link__destroy(freplace_link); + freplace_link = NULL; if (!ASSERT_OK(err, "destroy link")) goto out; @@ -1652,6 +1654,77 @@ static void test_tailcall_failure() RUN_TESTS(tailcall_fail); } +noinline void uprobe_sleepable_trigger(void) +{ + asm volatile (""); +} + +static void test_tailcall_sleepable(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct tailcall_sleepable *skel; + int prog_fd, map_fd; + int err, key; + + skel = tailcall_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "tailcall_sleepable__open")) + return; + + /* + * Test that we can't load uprobe_normal and uprobe_sleepable_1, + * because they share tailcall map. + */ + bpf_program__set_autoload(skel->progs.uprobe_normal, true); + bpf_program__set_autoload(skel->progs.uprobe_sleepable_1, true); + + err = tailcall_sleepable__load(skel); + if (!ASSERT_ERR(err, "tailcall_sleepable__load")) + goto out; + + tailcall_sleepable__destroy(skel); + + /* + * Test that we can tail call from sleepable to sleepable program. + */ + skel = tailcall_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "tailcall_sleepable__open")) + return; + + bpf_program__set_autoload(skel->progs.uprobe_sleepable_1, true); + bpf_program__set_autoload(skel->progs.uprobe_sleepable_2, true); + + err = tailcall_sleepable__load(skel); + if (!ASSERT_OK(err, "tailcall_sleepable__load")) + goto out; + + /* Add sleepable uprobe_sleepable_2 to jmp_table[0]. */ + key = 0; + prog_fd = bpf_program__fd(skel->progs.uprobe_sleepable_2); + map_fd = bpf_map__fd(skel->maps.jmp_table); + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update jmp_table")) + goto out; + + skel->bss->my_pid = getpid(); + + /* Attach uprobe_sleepable_1 to uprobe_sleepable_trigger and hit it. */ + opts.func_name = "uprobe_sleepable_trigger"; + skel->links.uprobe_sleepable_1 = bpf_program__attach_uprobe_opts( + skel->progs.uprobe_sleepable_1, + -1, + "/proc/self/exe", + 0 /* offset */, + &opts); + if (!ASSERT_OK_PTR(skel->links.uprobe_sleepable_1, "bpf_program__attach_uprobe_opts")) + goto out; + + uprobe_sleepable_trigger(); + ASSERT_EQ(skel->bss->executed, 1, "executed"); + +out: + tailcall_sleepable__destroy(skel); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -1706,4 +1779,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_freplace(); if (test__start_subtest("tailcall_failure")) test_tailcall_failure(); + if (test__start_subtest("tailcall_sleepable")) + test_tailcall_sleepable(); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h new file mode 100644 index 000000000000..8ae4fb2027f7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h @@ -0,0 +1,389 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __TASK_LOCAL_DATA_H +#define __TASK_LOCAL_DATA_H + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdatomic.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT +#include <pthread.h> +#endif + +#include <bpf/bpf.h> + +/* + * OPTIONS + * + * Define the option before including the header. Using different options in + * different translation units is strongly discouraged. + * + * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically + * + * Thread-specific memory for storing TLD is allocated lazily on the first call to + * tld_get_data(). The thread that calls it must also call tld_free() on thread exit + * to prevent memory leak. Pthread will be included if the option is defined. A pthread + * key will be registered with a destructor that calls tld_free(). Enabled only when + * the option is defined and TLD_DEFINE_KEY/tld_create_key() is called in the same + * translation unit. + * + * + * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically + * (default: 64 bytes) + * + * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using + * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be + * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated + * for these TLDs. This additional memory is allocated for every thread that calls + * tld_get_data() even if no tld_create_key are actually called, so be mindful of + * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory + * will be allocated for TLDs created with it. + * + * + * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62) + * + * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store, + * TLD_MAX_DATA_CNT. Must be consistent with task_local_data.bpf.h. + * + * + * TLD_DONT_ROUND_UP_DATA_SIZE - Don't round up memory size allocated for data if + * the memory allocator has low overhead aligned_alloc() implementation. + * + * For some memory allocators, when calling aligned_alloc(alignment, size), size + * does not need to be an integral multiple of alignment and it can be fulfilled + * without using round_up(size, alignment) bytes of memory. Enable this option to + * reduce memory usage. + */ + +#define TLD_PAGE_SIZE getpagesize() +#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1)) + +#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) +#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) + +#define TLD_ROUND_UP_POWER_OF_TWO(x) (1UL << (sizeof(x) * 8 - __builtin_clzl(x - 1))) + +#ifndef TLD_DYN_DATA_SIZE +#define TLD_DYN_DATA_SIZE 64 +#endif + +#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1) + +#ifndef TLD_NAME_LEN +#define TLD_NAME_LEN 62 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + __s16 off; +} tld_key_t; + +struct tld_metadata { + char name[TLD_NAME_LEN]; + _Atomic __u16 size; /* size of tld_data_u->data */ +}; + +struct tld_meta_u { + _Atomic __u16 cnt; + __u16 size; + struct tld_metadata metadata[]; +}; + +/* + * The unused field ensures map_val.start > 0. On the BPF side, __tld_fetch_key() + * calculates off by summing map_val.start and tld_key_t.off and treats off == 0 + * as key not cached. + */ +struct tld_data_u { + __u64 unused; + char data[] __attribute__((aligned(8))); +}; + +struct tld_map_value { + void *data; + struct tld_meta_u *meta; + __u16 start; /* offset of tld_data_u->data in a page */ +}; + +struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); +__thread struct tld_data_u *tld_data_p __attribute__((weak)); + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT +bool _Atomic tld_pthread_key_init __attribute__((weak)); +pthread_key_t tld_pthread_key __attribute__((weak)); + +static void tld_free(void); + +static void __tld_thread_exit_handler(void *unused) +{ + (void)unused; + tld_free(); +} +#endif + +static int __tld_init_meta_p(void) +{ + struct tld_meta_u *meta, *uninit = NULL; + int err = 0; + + meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE); + if (!meta) { + err = -ENOMEM; + goto out; + } + + memset(meta, 0, TLD_PAGE_SIZE); + meta->size = TLD_DYN_DATA_SIZE; + + if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) { + free(meta); + goto out; + } + +out: + return err; +} + +static int __tld_init_data_p(int map_fd) +{ + struct tld_map_value map_val; + struct tld_data_u *data; + int err, tid_fd = -1; + size_t size, size_pot; + + tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL); + if (tid_fd < 0) { + err = -errno; + goto out; + } + + /* + * tld_meta_p->size = TLD_DYN_DATA_SIZE + + * total size of TLDs defined via TLD_DEFINE_KEY() + */ + size = tld_meta_p->size + sizeof(struct tld_data_u); + size_pot = TLD_ROUND_UP_POWER_OF_TWO(size); +#ifdef TLD_DONT_ROUND_UP_DATA_SIZE + data = (struct tld_data_u *)aligned_alloc(size_pot, size); +#else + data = (struct tld_data_u *)aligned_alloc(size_pot, size_pot); +#endif + if (!data) { + err = -ENOMEM; + goto out; + } + + /* + * Always pass a page-aligned address to UPTR since the size of tld_map_value::data + * is a page in BTF. + */ + map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data); + map_val.start = (~TLD_PAGE_MASK & (intptr_t)data) + sizeof(struct tld_data_u); + map_val.meta = tld_meta_p; + + err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); + if (err) { + free(data); + goto out; + } + + tld_data_p = data; +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT + pthread_setspecific(tld_pthread_key, (void *)1); +#endif +out: + if (tid_fd >= 0) + close(tid_fd); + return err; +} + +static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) +{ + int err, i, sz, off = 0; + bool uninit = false; + __u16 cnt; + + if (!tld_meta_p) { + err = __tld_init_meta_p(); + if (err) + return (tld_key_t){(__s16)err}; + } + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT + if (atomic_compare_exchange_strong(&tld_pthread_key_init, &uninit, true)) { + err = pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); + if (err) + return (tld_key_t){(__s16)err}; + } +#endif + + for (i = 0; i < (int)TLD_MAX_DATA_CNT; i++) { +retry: + cnt = atomic_load(&tld_meta_p->cnt); + if (i < cnt) { + /* A metadata is not ready until size is updated with a non-zero value */ + while (!(sz = atomic_load(&tld_meta_p->metadata[i].size))) + sched_yield(); + + if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN)) + return (tld_key_t){-EEXIST}; + + off += TLD_ROUND_UP(sz, 8); + continue; + } + + /* + * TLD_DEFINE_KEY() is given memory upto a page while at most + * TLD_DYN_DATA_SIZE is allocated for tld_create_key() + */ + if (dyn_data) { + if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size || + tld_meta_p->size > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) + return (tld_key_t){-E2BIG}; + } else { + if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) + return (tld_key_t){-E2BIG}; + tld_meta_p->size += TLD_ROUND_UP(size, 8); + } + + /* + * Only one tld_create_key() can increase the current cnt by one and + * takes the latest available slot. Other threads will check again if a new + * TLD can still be added, and then compete for the new slot after the + * succeeding thread update the size. + */ + if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) + goto retry; + + strscpy(tld_meta_p->metadata[i].name, name); + atomic_store(&tld_meta_p->metadata[i].size, size); + return (tld_key_t){(__s16)off}; + } + + return (tld_key_t){-ENOSPC}; +} + +/** + * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD. + * + * @name: The name of the TLD + * @size: The size of the TLD + * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN + * + * The macro can only be used in file scope. + * + * A global variable key of opaque type, tld_key_t, will be declared and initialized before + * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key + * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD. + * bpf programs can also fetch the same key by name. + * + * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just + * enough memory will be allocated for each thread on the first call to tld_get_data(). + */ +#define TLD_DEFINE_KEY(key, name, size) \ +tld_key_t key; \ + \ +__attribute__((constructor(101))) \ +void __tld_define_key_##key(void) \ +{ \ + key = __tld_create_key(name, size, false); \ +} + +/** + * tld_create_key() - Create a TLD and return a key associated with the TLD. + * + * @name: The name the TLD + * @size: The size of the TLD + * + * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check + * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to + * locate the TLD. bpf programs can also fetch the same key by name. + * + * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is + * not known statically or a TLD needs to be created conditionally) + * + * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs + * created dynamically with tld_create_key(). Since only a user page is pinned to the + * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE - + * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page. + */ +__attribute__((unused)) +static tld_key_t tld_create_key(const char *name, size_t size) +{ + return __tld_create_key(name, size, true); +} + +__attribute__((unused)) +static inline bool tld_key_is_err(tld_key_t key) +{ + return key.off < 0; +} + +__attribute__((unused)) +static inline int tld_key_err_or_zero(tld_key_t key) +{ + return tld_key_is_err(key) ? key.off : 0; +} + +/** + * tld_get_data() - Get a pointer to the TLD associated with the given key of the + * calling thread. + * + * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map + * of task local data. + * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key(). + * + * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD + * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte + * aligned. + * + * Threads that call tld_get_data() must call tld_free() on exit to prevent + * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined. + */ +__attribute__((unused)) +static void *tld_get_data(int map_fd, tld_key_t key) +{ + if (!tld_meta_p) + return NULL; + + /* tld_data_p is allocated on the first invocation of tld_get_data() */ + if (!tld_data_p && __tld_init_data_p(map_fd)) + return NULL; + + return tld_data_p->data + key.off; +} + +/** + * tld_free() - Free task local data memory of the calling thread + * + * For the calling thread, all pointers to TLDs acquired before will become invalid. + * + * Users must call tld_free() on thread exit to prevent memory leak. Alternatively, + * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered + * to free the memory automatically. Calling tld_free() before thread exit is + * undefined behavior, which may lead to null-pointer dereference. + */ +__attribute__((unused)) +static void tld_free(void) +{ + if (tld_data_p) { + free(tld_data_p); + tld_data_p = NULL; + } +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __TASK_LOCAL_DATA_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 42e822ea352f..1b26c12f255a 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -25,24 +25,30 @@ static void test_sys_enter_exit(void) { struct task_local_storage *skel; + pid_t pid = sys_gettid(); int err; skel = task_local_storage__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - skel->bss->target_pid = sys_gettid(); - err = task_local_storage__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; + /* Set target_pid after attach so that syscalls made during + * attach are not counted. + */ + skel->bss->target_pid = pid; + sys_gettid(); sys_gettid(); - /* 3x syscalls: 1x attach and 2x gettid */ - ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); - ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt"); + skel->bss->target_pid = 0; + + /* 2x gettid syscalls */ + ASSERT_EQ(skel->bss->enter_cnt, 2, "enter_cnt"); + ASSERT_EQ(skel->bss->exit_cnt, 2, "exit_cnt"); ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt"); out: task_local_storage__destroy(skel); @@ -112,24 +118,24 @@ static void test_recursion(void) task_ls_recursion__detach(skel); /* Refer to the comment in BPF_PROG(on_update) for - * the explanation on the value 201 and 100. + * the explanation on the value 200 and 1. */ map_fd = bpf_map__fd(skel->maps.map_a); err = bpf_map_lookup_elem(map_fd, &task_fd, &value); ASSERT_OK(err, "lookup map_a"); - ASSERT_EQ(value, 201, "map_a value"); - ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy"); + ASSERT_EQ(value, 200, "map_a value"); + ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); map_fd = bpf_map__fd(skel->maps.map_b); err = bpf_map_lookup_elem(map_fd, &task_fd, &value); ASSERT_OK(err, "lookup map_b"); - ASSERT_EQ(value, 100, "map_b value"); + ASSERT_EQ(value, 1, "map_b value"); prog_fd = bpf_program__fd(skel->progs.on_update); memset(&info, 0, sizeof(info)); err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); ASSERT_OK(err, "get prog info"); - ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion"); + ASSERT_EQ(info.recursion_misses, 2, "on_update prog recursion"); prog_fd = bpf_program__fd(skel->progs.on_enter); memset(&info, 0, sizeof(info)); diff --git a/tools/testing/selftests/bpf/prog_tests/task_work_stress.c b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c new file mode 100644 index 000000000000..450d17d91a56 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <string.h> +#include <stdio.h> +#include "task_work_stress.skel.h" +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <time.h> +#include <stdlib.h> +#include <stdatomic.h> + +struct test_data { + int prog_fd; + atomic_int exit; +}; + +void *runner(void *test_data) +{ + struct test_data *td = test_data; + int err = 0; + LIBBPF_OPTS(bpf_test_run_opts, opts); + + while (!err && !atomic_load(&td->exit)) + err = bpf_prog_test_run_opts(td->prog_fd, &opts); + + return NULL; +} + +static int get_env_int(const char *str, int def) +{ + const char *s = getenv(str); + char *end; + int retval; + + if (!s || !*s) + return def; + errno = 0; + retval = strtol(s, &end, 10); + if (errno || *end || retval < 0) + return def; + return retval; +} + +static void task_work_run(bool enable_delete) +{ + struct task_work_stress *skel; + struct bpf_program *scheduler, *deleter; + int nthreads = 16; + int test_time_s = get_env_int("BPF_TASK_WORK_TEST_TIME", 1); + pthread_t tid[nthreads], tid_del; + bool started[nthreads], started_del = false; + struct test_data td_sched = { .exit = 0 }, td_del = { .exit = 1 }; + int i, err; + + skel = task_work_stress__open(); + if (!ASSERT_OK_PTR(skel, "task_work__open")) + return; + + scheduler = bpf_object__find_program_by_name(skel->obj, "schedule_task_work"); + bpf_program__set_autoload(scheduler, true); + + deleter = bpf_object__find_program_by_name(skel->obj, "delete_task_work"); + bpf_program__set_autoload(deleter, true); + + err = task_work_stress__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + for (i = 0; i < nthreads; ++i) + started[i] = false; + + td_sched.prog_fd = bpf_program__fd(scheduler); + for (i = 0; i < nthreads; ++i) { + if (pthread_create(&tid[i], NULL, runner, &td_sched) != 0) { + fprintf(stderr, "could not start thread"); + goto cancel; + } + started[i] = true; + } + + if (enable_delete) + atomic_store(&td_del.exit, 0); + + td_del.prog_fd = bpf_program__fd(deleter); + if (pthread_create(&tid_del, NULL, runner, &td_del) != 0) { + fprintf(stderr, "could not start thread"); + goto cancel; + } + started_del = true; + + /* Run stress test for some time */ + sleep(test_time_s); + +cancel: + atomic_store(&td_sched.exit, 1); + atomic_store(&td_del.exit, 1); + for (i = 0; i < nthreads; ++i) { + if (started[i]) + pthread_join(tid[i], NULL); + } + + if (started_del) + pthread_join(tid_del, NULL); + + ASSERT_GT(skel->bss->callback_scheduled, 0, "work scheduled"); + /* Some scheduling attempts should have failed due to contention */ + ASSERT_GT(skel->bss->schedule_error, 0, "schedule error"); + + if (enable_delete) { + /* If delete thread is enabled, it has cancelled some callbacks */ + ASSERT_GT(skel->bss->delete_success, 0, "delete success"); + ASSERT_LT(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks"); + } else { + /* Without delete thread number of scheduled callbacks is the same as fired */ + ASSERT_EQ(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks"); + } + +cleanup: + task_work_stress__destroy(skel); +} + +void test_task_work_stress(void) +{ + if (test__start_subtest("no_delete")) + task_work_run(false); + if (test__start_subtest("with_delete")) + task_work_run(true); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 924d0e25320c..d52a62af77bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -8,34 +8,6 @@ # define loopback 1 #endif -static inline __u32 id_from_prog_fd(int fd) -{ - struct bpf_prog_info prog_info = {}; - __u32 prog_info_len = sizeof(prog_info); - int err; - - err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); - if (!ASSERT_OK(err, "id_from_prog_fd")) - return 0; - - ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); - return prog_info.id; -} - -static inline __u32 id_from_link_fd(int fd) -{ - struct bpf_link_info link_info = {}; - __u32 link_info_len = sizeof(link_info); - int err; - - err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); - if (!ASSERT_OK(err, "id_from_link_fd")) - return 0; - - ASSERT_NEQ(link_info.id, 0, "link_info.id"); - return link_info.id; -} - static inline __u32 ifindex_from_link_fd(int fd) { struct bpf_link_info link_info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index 1af9ec1149aa..2186a24e7d8a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -13,7 +13,7 @@ #include "netlink_helpers.h" #include "tc_helpers.h" -void serial_test_tc_links_basic(void) +void test_ns_tc_links_basic(void) { LIBBPF_OPTS(bpf_prog_query_opts, optq); LIBBPF_OPTS(bpf_tcx_opts, optl); @@ -260,7 +260,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_before(void) +void test_ns_tc_links_before(void) { test_tc_links_before_target(BPF_TCX_INGRESS); test_tc_links_before_target(BPF_TCX_EGRESS); @@ -414,7 +414,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_after(void) +void test_ns_tc_links_after(void) { test_tc_links_after_target(BPF_TCX_INGRESS); test_tc_links_after_target(BPF_TCX_EGRESS); @@ -514,7 +514,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_revision(void) +void test_ns_tc_links_revision(void) { test_tc_links_revision_target(BPF_TCX_INGRESS); test_tc_links_revision_target(BPF_TCX_EGRESS); @@ -618,7 +618,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_chain_classic(void) +void test_ns_tc_links_chain_classic(void) { test_tc_chain_classic(BPF_TCX_INGRESS, false); test_tc_chain_classic(BPF_TCX_EGRESS, false); @@ -846,7 +846,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_replace(void) +void test_ns_tc_links_replace(void) { test_tc_links_replace_target(BPF_TCX_INGRESS); test_tc_links_replace_target(BPF_TCX_EGRESS); @@ -1158,7 +1158,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_invalid(void) +void test_ns_tc_links_invalid(void) { test_tc_links_invalid_target(BPF_TCX_INGRESS); test_tc_links_invalid_target(BPF_TCX_EGRESS); @@ -1314,7 +1314,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_prepend(void) +void test_ns_tc_links_prepend(void) { test_tc_links_prepend_target(BPF_TCX_INGRESS); test_tc_links_prepend_target(BPF_TCX_EGRESS); @@ -1470,7 +1470,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_append(void) +void test_ns_tc_links_append(void) { test_tc_links_append_target(BPF_TCX_INGRESS); test_tc_links_append_target(BPF_TCX_EGRESS); @@ -1568,7 +1568,7 @@ cleanup: ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); } -void serial_test_tc_links_dev_cleanup(void) +void test_ns_tc_links_dev_cleanup(void) { test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS); test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS); @@ -1672,7 +1672,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_links_chain_mixed(void) +void test_ns_tc_links_chain_mixed(void) { test_tc_chain_mixed(BPF_TCX_INGRESS); test_tc_chain_mixed(BPF_TCX_EGRESS); @@ -1782,7 +1782,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_links_ingress(void) +void test_ns_tc_links_ingress(void) { test_tc_links_ingress(BPF_TCX_INGRESS, true, true); test_tc_links_ingress(BPF_TCX_INGRESS, true, false); @@ -1823,7 +1823,7 @@ static int qdisc_replace(int ifindex, const char *kind, bool block) return err; } -void serial_test_tc_links_dev_chain0(void) +void test_ns_tc_links_dev_chain0(void) { int err, ifindex; @@ -1955,7 +1955,7 @@ cleanup: ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); } -void serial_test_tc_links_dev_mixed(void) +void test_ns_tc_links_dev_mixed(void) { test_tc_links_dev_mixed(BPF_TCX_INGRESS); test_tc_links_dev_mixed(BPF_TCX_EGRESS); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index f77f604389aa..2955750ddada 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -10,7 +10,7 @@ #include "test_tc_link.skel.h" #include "tc_helpers.h" -void serial_test_tc_opts_basic(void) +void test_ns_tc_opts_basic(void) { LIBBPF_OPTS(bpf_prog_attach_opts, opta); LIBBPF_OPTS(bpf_prog_detach_opts, optd); @@ -254,7 +254,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_before(void) +void test_ns_tc_opts_before(void) { test_tc_opts_before_target(BPF_TCX_INGRESS); test_tc_opts_before_target(BPF_TCX_EGRESS); @@ -445,7 +445,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_after(void) +void test_ns_tc_opts_after(void) { test_tc_opts_after_target(BPF_TCX_INGRESS); test_tc_opts_after_target(BPF_TCX_EGRESS); @@ -554,7 +554,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_revision(void) +void test_ns_tc_opts_revision(void) { test_tc_opts_revision_target(BPF_TCX_INGRESS); test_tc_opts_revision_target(BPF_TCX_EGRESS); @@ -655,7 +655,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_opts_chain_classic(void) +void test_ns_tc_opts_chain_classic(void) { test_tc_chain_classic(BPF_TCX_INGRESS, false); test_tc_chain_classic(BPF_TCX_EGRESS, false); @@ -864,7 +864,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_replace(void) +void test_ns_tc_opts_replace(void) { test_tc_opts_replace_target(BPF_TCX_INGRESS); test_tc_opts_replace_target(BPF_TCX_EGRESS); @@ -1017,7 +1017,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_invalid(void) +void test_ns_tc_opts_invalid(void) { test_tc_opts_invalid_target(BPF_TCX_INGRESS); test_tc_opts_invalid_target(BPF_TCX_EGRESS); @@ -1157,7 +1157,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_prepend(void) +void test_ns_tc_opts_prepend(void) { test_tc_opts_prepend_target(BPF_TCX_INGRESS); test_tc_opts_prepend_target(BPF_TCX_EGRESS); @@ -1297,7 +1297,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_append(void) +void test_ns_tc_opts_append(void) { test_tc_opts_append_target(BPF_TCX_INGRESS); test_tc_opts_append_target(BPF_TCX_EGRESS); @@ -1360,10 +1360,8 @@ static void test_tc_opts_dev_cleanup_target(int target) assert_mprog_count_ifindex(ifindex, target, 4); - ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); - ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); - ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); - return; + goto cleanup; + cleanup3: err = bpf_prog_detach_opts(fd3, loopback, target, &optd); ASSERT_OK(err, "prog_detach"); @@ -1387,7 +1385,7 @@ cleanup: ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); } -void serial_test_tc_opts_dev_cleanup(void) +void test_ns_tc_opts_dev_cleanup(void) { test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS); test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS); @@ -1563,7 +1561,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_opts_mixed(void) +void test_ns_tc_opts_mixed(void) { test_tc_opts_mixed_target(BPF_TCX_INGRESS); test_tc_opts_mixed_target(BPF_TCX_EGRESS); @@ -1642,7 +1640,7 @@ cleanup: assert_mprog_count(target, 0); } -void serial_test_tc_opts_demixed(void) +void test_ns_tc_opts_demixed(void) { test_tc_opts_demixed_target(BPF_TCX_INGRESS); test_tc_opts_demixed_target(BPF_TCX_EGRESS); @@ -1813,7 +1811,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_detach(void) +void test_ns_tc_opts_detach(void) { test_tc_opts_detach_target(BPF_TCX_INGRESS); test_tc_opts_detach_target(BPF_TCX_EGRESS); @@ -2020,7 +2018,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_detach_before(void) +void test_ns_tc_opts_detach_before(void) { test_tc_opts_detach_before_target(BPF_TCX_INGRESS); test_tc_opts_detach_before_target(BPF_TCX_EGRESS); @@ -2236,7 +2234,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_detach_after(void) +void test_ns_tc_opts_detach_after(void) { test_tc_opts_detach_after_target(BPF_TCX_INGRESS); test_tc_opts_detach_after_target(BPF_TCX_EGRESS); @@ -2265,7 +2263,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old) assert_mprog_count(target, 0); } -void serial_test_tc_opts_delete_empty(void) +void test_ns_tc_opts_delete_empty(void) { test_tc_opts_delete_empty(BPF_TCX_INGRESS, false); test_tc_opts_delete_empty(BPF_TCX_EGRESS, false); @@ -2372,7 +2370,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_chain_mixed(void) +void test_ns_tc_opts_chain_mixed(void) { test_tc_chain_mixed(BPF_TCX_INGRESS); test_tc_chain_mixed(BPF_TCX_EGRESS); @@ -2446,7 +2444,7 @@ cleanup: ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); } -void serial_test_tc_opts_max(void) +void test_ns_tc_opts_max(void) { test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false); test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false); @@ -2748,7 +2746,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_query(void) +void test_ns_tc_opts_query(void) { test_tc_opts_query_target(BPF_TCX_INGRESS); test_tc_opts_query_target(BPF_TCX_EGRESS); @@ -2807,7 +2805,7 @@ cleanup: test_tc_link__destroy(skel); } -void serial_test_tc_opts_query_attach(void) +void test_ns_tc_opts_query_attach(void) { test_tc_opts_query_attach_target(BPF_TCX_INGRESS); test_tc_opts_query_attach_target(BPF_TCX_EGRESS); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index c85798966aec..64fbda082309 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -56,6 +56,8 @@ #define MAC_DST_FWD "00:11:22:33:44:55" #define MAC_DST "00:22:33:44:55:66" +#define MAC_SRC_FWD "00:33:44:55:66:77" +#define MAC_SRC "00:44:55:66:77:88" #define IFADDR_STR_LEN 18 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q" @@ -207,11 +209,10 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) int err; if (result->dev_mode == MODE_VETH) { - SYS(fail, "ip link add src type veth peer name src_fwd"); - SYS(fail, "ip link add dst type veth peer name dst_fwd"); - - SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); - SYS(fail, "ip link set dst address " MAC_DST); + SYS(fail, "ip link add src address " MAC_SRC " type veth " + "peer name src_fwd address " MAC_SRC_FWD); + SYS(fail, "ip link add dst address " MAC_DST " type veth " + "peer name dst_fwd address " MAC_DST_FWD); } else if (result->dev_mode == MODE_NETKIT) { err = create_netkit(NETKIT_L3, "src", "src_fwd"); if (!ASSERT_OK(err, "create_ifindex_src")) @@ -1094,7 +1095,7 @@ static int tun_open(char *name) ifr.ifr_flags = IFF_TUN | IFF_NO_PI; if (*name) - strncpy(ifr.ifr_name, name, IFNAMSIZ); + strscpy(ifr.ifr_name, name); err = ioctl(fd, TUNSETIFF, &ifr); if (!ASSERT_OK(err, "ioctl TUNSETIFF")) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 56685fc03c7e..80e6315da2a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -507,6 +507,10 @@ static void misc(void) ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp"); + ASSERT_TRUE(misc_skel->bss->nodelay_est_ok, "nodelay_est_ok"); + ASSERT_TRUE(misc_skel->bss->nodelay_hdr_len_reject, "nodelay_hdr_len_reject"); + ASSERT_TRUE(misc_skel->bss->nodelay_write_hdr_reject, "nodelay_write_hdr_reject"); + check_linum: ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum"); sk_fds_close(&sk_fds); diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c new file mode 100644 index 000000000000..40d38280c091 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <linux/genetlink.h> +#include "network_helpers.h" +#include "bpf_smc.skel.h" + +#ifndef IPPROTO_SMC +#define IPPROTO_SMC 256 +#endif + +#define CLIENT_IP "127.0.0.1" +#define SERVER_IP "127.0.1.0" +#define SERVER_IP_VIA_RISK_PATH "127.0.2.0" + +#define SERVICE_1 80 +#define SERVICE_2 443 +#define SERVICE_3 8443 + +#define TEST_NS "bpf_smc_netns" + +static struct netns_obj *test_netns; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +#if defined(__s390x__) +/* s390x has default seid */ +static bool setup_ueid(void) { return true; } +static void cleanup_ueid(void) {} +#else +enum { + SMC_NETLINK_ADD_UEID = 10, + SMC_NETLINK_REMOVE_UEID +}; + +enum { + SMC_NLA_EID_TABLE_UNSPEC, + SMC_NLA_EID_TABLE_ENTRY, /* string */ +}; + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[1024]; +}; + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN) + +#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK" +#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID" + +static uint16_t smc_nl_family_id = -1; + +static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct nlattr *na; + struct sockaddr_nl nladdr; + int r, buflen; + char *buf; + + struct msgtemplate msg = {0}; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = nlmsg_flags; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 1; + na = (struct nlattr *)GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + 1 + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *)&msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) { + return -1; + } + } + return 0; +} + +static bool get_smc_nl_family_id(void) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlattr *nl; + int fd, ret; + pid_t pid; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "nl_family socket")) + return false; + + pid = getpid(); + + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "nl_family bind")) + goto fail; + + ret = send_cmd(fd, GENL_ID_CTRL, pid, + NLM_F_REQUEST, CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME, + strlen(SMC_GENL_FAMILY_NAME)); + if (!ASSERT_OK(ret, "nl_family query")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (msg.n.nlmsg_type == NLMSG_ERROR) + goto fail; + if (!ASSERT_FALSE(ret < 0 || !NLMSG_OK(&msg.n, ret), + "nl_family response")) + goto fail; + + nl = (struct nlattr *)GENLMSG_DATA(&msg); + nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len)); + if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type")) + goto fail; + + smc_nl_family_id = *(uint16_t *)NLA_DATA(nl); + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool smc_ueid(int op) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlmsgerr *err; + char test_ueid[32]; + int fd, ret; + pid_t pid; + + /* UEID required */ + memset(test_ueid, '\x20', sizeof(test_ueid)); + memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID)); + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "ueid socket")) + return false; + + pid = getpid(); + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "ueid bind")) + goto fail; + + ret = send_cmd(fd, smc_nl_family_id, pid, + NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY, + (void *)test_ueid, sizeof(test_ueid)); + if (!ASSERT_OK(ret, "ueid cmd")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(ret < 0 || + !NLMSG_OK(&msg.n, ret), "ueid response")) + goto fail; + + if (msg.n.nlmsg_type == NLMSG_ERROR) { + err = NLMSG_DATA(&msg); + switch (op) { + case SMC_NETLINK_REMOVE_UEID: + if (!ASSERT_FALSE((err->error && err->error != -ENOENT), + "ueid remove")) + goto fail; + break; + case SMC_NETLINK_ADD_UEID: + if (!ASSERT_OK(err->error, "ueid add")) + goto fail; + break; + default: + break; + } + } + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool setup_ueid(void) +{ + /* get smc nl id */ + if (!get_smc_nl_family_id()) + return false; + /* clear old ueid for bpftest */ + smc_ueid(SMC_NETLINK_REMOVE_UEID); + /* smc-loopback required ueid */ + return smc_ueid(SMC_NETLINK_ADD_UEID); +} + +static void cleanup_ueid(void) +{ + smc_ueid(SMC_NETLINK_REMOVE_UEID); +} +#endif /* __s390x__ */ + +static bool setup_netns(void) +{ + test_netns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(test_netns, "open net namespace")) + goto fail_netns; + + SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo"); + SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo"); + + return true; +fail_ip: + netns_free(test_netns); +fail_netns: + return false; +} + +static void cleanup_netns(void) +{ + netns_free(test_netns); +} + +static bool setup_smc(void) +{ + if (!setup_ueid()) + return false; + + if (!setup_netns()) + goto fail_netns; + + return true; +fail_netns: + cleanup_ueid(); + return false; +} + +static int set_client_addr_cb(int fd, void *opts) +{ + const char *src = (const char *)opts; + struct sockaddr_in localaddr; + + localaddr.sin_family = AF_INET; + localaddr.sin_port = htons(0); + localaddr.sin_addr.s_addr = inet_addr(src); + return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind"); +} + +static void run_link(const char *src, const char *dst, int port) +{ + struct network_helper_opts opts = {0}; + int server, client; + + server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL); + if (!ASSERT_OK_FD(server, "start service_1")) + return; + + opts.proto = IPPROTO_TCP; + opts.post_socket_cb = set_client_addr_cb; + opts.cb_opts = (void *)src; + + client = connect_to_fd_opts(server, &opts); + if (!ASSERT_OK_FD(client, "start connect")) + goto fail_client; + + close(client); +fail_client: + close(server); +} + +static void block_link(int map_fd, const char *src, const char *dst) +{ + struct smc_policy_ip_value val = { .mode = /* block */ 0 }; + struct smc_policy_ip_key key = { + .sip = inet_addr(src), + .dip = inet_addr(dst), + }; + + bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); +} + +/* + * This test describes a real-life service topology as follows: + * + * +-------------> service_1 + * link 1 | | + * +--------------------> server | link 2 + * | | V + * | +-------------> service_2 + * | link 3 + * client -------------------> server_via_unsafe_path -> service_3 + * + * Among them, + * 1. link-1 is very suitable for using SMC. + * 2. link-2 is not suitable for using SMC, because the mode of this link is + * kind of short-link services. + * 3. link-3 is also not suitable for using SMC, because the RDMA link is + * unavailable and needs to go through a long timeout before it can fallback + * to TCP. + * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl. + */ +static void test_topo(void) +{ + struct bpf_smc *skel; + int rc, map_fd; + + skel = bpf_smc__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load")) + return; + + rc = bpf_smc__attach(skel); + if (!ASSERT_OK(rc, "bpf_smc__attach")) + goto fail; + + map_fd = bpf_map__fd(skel->maps.smc_policy_ip); + if (!ASSERT_OK_FD(map_fd, "bpf_map__fd")) + goto fail; + + /* Mock the process of transparent replacement, since we will modify + * protocol to ipproto_smc accropding to it via + * fmod_ret/update_socket_protocol. + */ + write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck"); + + /* Configure ip strat */ + block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH); + block_link(map_fd, SERVER_IP, SERVER_IP); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_1); + /* should go with smc fallback */ + run_link(SERVER_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc fallback */ + run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3); + + ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count"); + +fail: + bpf_smc__destroy(skel); +} + +void test_bpf_smc(void) +{ + if (!setup_smc()) { + printf("setup for smc test failed, test SKIP:\n"); + test__skip(); + return; + } + + if (test__start_subtest("topo")) + test_topo(); + + cleanup_ueid(); + cleanup_netns(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c new file mode 100644 index 000000000000..7d1b478c99a0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms Inc. */ +#include <test_progs.h> +#include "test_btf_ext.skel.h" +#include "btf_helpers.h" + +static void subtest_line_func_info(void) +{ + struct test_btf_ext *skel; + struct bpf_prog_info info; + struct bpf_line_info line_info[128], *libbpf_line_info; + struct bpf_func_info func_info[128], *libbpf_func_info; + __u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt; + int err, fd; + + skel = test_btf_ext__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + fd = bpf_program__fd(skel->progs.global_func); + + memset(&info, 0, sizeof(info)); + info.line_info = ptr_to_u64(&line_info); + info.nr_line_info = sizeof(line_info); + info.line_info_rec_size = sizeof(*line_info); + err = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(err, "prog_line_info")) + goto out; + + libbpf_line_info = bpf_program__line_info(skel->progs.global_func); + libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func); + + memset(&info, 0, sizeof(info)); + info.func_info = ptr_to_u64(&func_info); + info.nr_func_info = sizeof(func_info); + info.func_info_rec_size = sizeof(*func_info); + err = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(err, "prog_func_info")) + goto out; + + libbpf_func_info = bpf_program__func_info(skel->progs.global_func); + libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func); + + if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info")) + goto out; + if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt")) + goto out; + if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info")) + goto out; + if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt")) + goto out; + ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info), + "line_info"); + ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info), + "func_info"); +out: + test_btf_ext__destroy(skel); +} + +void test_btf_ext(void) +{ + if (test__start_subtest("line_func_info")) + subtest_line_func_info(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c b/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c new file mode 100644 index 000000000000..7c35ca6f4539 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <net/if.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_dst_clear.skel.h" + +#define IPV4_IFACE_ADDR "1.0.0.1" +#define UDP_TEST_PORT 7777 + +void test_ns_dst_clear(void) +{ + LIBBPF_OPTS(bpf_tcx_opts, tcx_opts); + struct test_dst_clear *skel; + struct sockaddr_in addr; + struct bpf_link *link; + socklen_t addrlen; + char buf[128] = {}; + int sockfd, err; + + skel = test_dst_clear__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + + SYS(fail, "ip addr add %s/8 dev lo", IPV4_IFACE_ADDR); + + link = bpf_program__attach_tcx(skel->progs.dst_clear, + if_nametoindex("lo"), &tcx_opts); + if (!ASSERT_OK_PTR(link, "attach_tcx")) + goto fail; + skel->links.dst_clear = link; + + addrlen = sizeof(addr); + err = make_sockaddr(AF_INET, IPV4_IFACE_ADDR, UDP_TEST_PORT, + (void *)&addr, &addrlen); + if (!ASSERT_OK(err, "make_sockaddr")) + goto fail; + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (!ASSERT_NEQ(sockfd, -1, "socket")) + goto fail; + err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen); + close(sockfd); + if (!ASSERT_EQ(err, sizeof(buf), "send")) + goto fail; + + ASSERT_TRUE(skel->bss->had_dst, "had_dst"); + ASSERT_TRUE(skel->bss->dst_cleared, "dst_cleared"); + +fail: + test_dst_clear__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index e905cbaf6b3d..500446808908 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -18,6 +18,7 @@ #include "test_global_func15.skel.h" #include "test_global_func16.skel.h" #include "test_global_func17.skel.h" +#include "test_global_func_deep_stack.skel.h" #include "test_global_func_ctx_args.skel.h" #include "bpf/libbpf_internal.h" @@ -155,6 +156,7 @@ void test_test_global_funcs(void) RUN_TESTS(test_global_func15); RUN_TESTS(test_global_func16); RUN_TESTS(test_global_func17); + RUN_TESTS(test_global_func_deep_stack); RUN_TESTS(test_global_func_ctx_args); if (test__start_subtest("ctx_arg_rewrite")) diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 2a27f3714f5c..bdc4fc06bc5a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -139,7 +139,7 @@ static void test_lsm_tailcall(void) if (CHECK_FAIL(!err)) goto close_prog; - prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog); + prog_fd = bpf_program__fd(skel->progs.lsm_kernfs_init_security_prog); if (CHECK_FAIL(prog_fd < 0)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c new file mode 100644 index 000000000000..461ded722351 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_assoc.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "struct_ops_assoc.skel.h" +#include "struct_ops_assoc_reuse.skel.h" +#include "struct_ops_assoc_in_timer.skel.h" + +static void test_st_ops_assoc(void) +{ + struct struct_ops_assoc *skel = NULL; + int err, pid; + + skel = struct_ops_assoc__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_assoc__open")) + goto out; + + /* cannot explicitly associate struct_ops program */ + err = bpf_program__assoc_struct_ops(skel->progs.test_1_a, + skel->maps.st_ops_map_a, NULL); + ASSERT_ERR(err, "bpf_program__assoc_struct_ops(test_1_a, st_ops_map_a)"); + + err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_a, + skel->maps.st_ops_map_a, NULL); + ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_a, st_ops_map_a)"); + + err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_a, + skel->maps.st_ops_map_a, NULL); + ASSERT_OK(err, "bpf_program__assoc_struct_ops(sys_enter_prog_a, st_ops_map_a)"); + + err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_b, + skel->maps.st_ops_map_b, NULL); + ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_b, st_ops_map_b)"); + + err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_b, + skel->maps.st_ops_map_b, NULL); + ASSERT_OK(err, "bpf_program__assoc_struct_ops(sys_enter_prog_b, st_ops_map_b)"); + + /* sys_enter_prog_a already associated with map_a */ + err = bpf_program__assoc_struct_ops(skel->progs.sys_enter_prog_a, + skel->maps.st_ops_map_b, NULL); + ASSERT_ERR(err, "bpf_program__assoc_struct_ops(sys_enter_prog_a, st_ops_map_b)"); + + err = struct_ops_assoc__attach(skel); + if (!ASSERT_OK(err, "struct_ops_assoc__attach")) + goto out; + + /* run tracing prog that calls .test_1 and checks return */ + pid = getpid(); + skel->bss->test_pid = pid; + sys_gettid(); + skel->bss->test_pid = 0; + + ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a"); + ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b"); + + /* run syscall_prog that calls .test_1 and checks return */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_a), NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_b), NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a"); + ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b"); + +out: + struct_ops_assoc__destroy(skel); +} + +static void test_st_ops_assoc_reuse(void) +{ + struct struct_ops_assoc_reuse *skel = NULL; + int err; + + skel = struct_ops_assoc_reuse__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_reuse__open")) + goto out; + + err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_a, + skel->maps.st_ops_map_a, NULL); + ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_a, st_ops_map_a)"); + + err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog_b, + skel->maps.st_ops_map_b, NULL); + ASSERT_OK(err, "bpf_program__assoc_struct_ops(syscall_prog_b, st_ops_map_b)"); + + err = struct_ops_assoc_reuse__attach(skel); + if (!ASSERT_OK(err, "struct_ops_assoc__attach")) + goto out; + + /* run syscall_prog that calls .test_1 and checks return */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_a), NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog_b), NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + ASSERT_EQ(skel->bss->test_err_a, 0, "skel->bss->test_err_a"); + ASSERT_EQ(skel->bss->test_err_b, 0, "skel->bss->test_err_b"); + +out: + struct_ops_assoc_reuse__destroy(skel); +} + +static void test_st_ops_assoc_in_timer(void) +{ + struct struct_ops_assoc_in_timer *skel = NULL; + int err; + + skel = struct_ops_assoc_in_timer__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_in_timer__open")) + goto out; + + err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog, + skel->maps.st_ops_map, NULL); + ASSERT_OK(err, "bpf_program__assoc_struct_ops"); + + err = struct_ops_assoc_in_timer__attach(skel); + if (!ASSERT_OK(err, "struct_ops_assoc__attach")) + goto out; + + /* + * Run .test_1 by calling kfunc bpf_kfunc_multi_st_ops_test_1_prog_arg() and checks + * the return value. .test_1 will also schedule timer_cb that runs .test_1 again + * immediately. + */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog), NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + /* Check the return of the kfunc after timer_cb runs */ + while (!READ_ONCE(skel->bss->timer_cb_run)) + sched_yield(); + ASSERT_EQ(skel->bss->timer_test_1_ret, 1234, "skel->bss->timer_test_1_ret"); + ASSERT_EQ(skel->bss->test_err, 0, "skel->bss->test_err_a"); +out: + struct_ops_assoc_in_timer__destroy(skel); +} + +static void test_st_ops_assoc_in_timer_no_uref(void) +{ + struct struct_ops_assoc_in_timer *skel = NULL; + struct bpf_link *link; + int err; + + skel = struct_ops_assoc_in_timer__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_assoc_in_timer__open")) + goto out; + + err = bpf_program__assoc_struct_ops(skel->progs.syscall_prog, + skel->maps.st_ops_map, NULL); + ASSERT_OK(err, "bpf_program__assoc_struct_ops"); + + link = bpf_map__attach_struct_ops(skel->maps.st_ops_map); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) + goto out; + + /* + * Run .test_1 by calling kfunc bpf_kfunc_multi_st_ops_test_1_prog_arg() and checks + * the return value. .test_1 will also schedule timer_cb that runs .test_1 again. + * timer_cb will run 500ms after syscall_prog runs, when the user space no longer + * holds a reference to st_ops_map. + */ + skel->bss->timer_ns = 500000000; + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.syscall_prog), NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + /* Detach and close struct_ops map to cause it to be freed */ + bpf_link__destroy(link); + close(bpf_program__fd(skel->progs.syscall_prog)); + close(bpf_map__fd(skel->maps.st_ops_map)); + + /* Check the return of the kfunc after timer_cb runs */ + while (!READ_ONCE(skel->bss->timer_cb_run)) + sched_yield(); + ASSERT_EQ(skel->bss->timer_test_1_ret, -1, "skel->bss->timer_test_1_ret"); + ASSERT_EQ(skel->bss->test_err, 0, "skel->bss->test_err_a"); +out: + struct_ops_assoc_in_timer__destroy(skel); +} + +void test_struct_ops_assoc(void) +{ + if (test__start_subtest("st_ops_assoc")) + test_st_ops_assoc(); + if (test__start_subtest("st_ops_assoc_reuse")) + test_st_ops_assoc_reuse(); + if (test__start_subtest("st_ops_assoc_in_timer")) + test_st_ops_assoc_in_timer(); + if (test__start_subtest("st_ops_assoc_in_timer_no_uref")) + test_st_ops_assoc_in_timer_no_uref(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c new file mode 100644 index 000000000000..fd8762ba4b67 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "struct_ops_id_ops_mapping1.skel.h" +#include "struct_ops_id_ops_mapping2.skel.h" + +static void test_st_ops_id_ops_mapping(void) +{ + struct struct_ops_id_ops_mapping1 *skel1 = NULL; + struct struct_ops_id_ops_mapping2 *skel2 = NULL; + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int err, pid, prog1_fd, prog2_fd; + + skel1 = struct_ops_id_ops_mapping1__open_and_load(); + if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open")) + goto out; + + skel2 = struct_ops_id_ops_mapping2__open_and_load(); + if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open")) + goto out; + + err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map), + &info, &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto out; + + skel1->bss->st_ops_id = info.id; + + err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map), + &info, &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto out; + + skel2->bss->st_ops_id = info.id; + + err = struct_ops_id_ops_mapping1__attach(skel1); + if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach")) + goto out; + + err = struct_ops_id_ops_mapping2__attach(skel2); + if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach")) + goto out; + + /* run tracing prog that calls .test_1 and checks return */ + pid = getpid(); + skel1->bss->test_pid = pid; + skel2->bss->test_pid = pid; + sys_gettid(); + skel1->bss->test_pid = 0; + skel2->bss->test_pid = 0; + + /* run syscall_prog that calls .test_1 and checks return */ + prog1_fd = bpf_program__fd(skel1->progs.syscall_prog); + err = bpf_prog_test_run_opts(prog1_fd, NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + prog2_fd = bpf_program__fd(skel2->progs.syscall_prog); + err = bpf_prog_test_run_opts(prog2_fd, NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err"); + ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err"); + +out: + struct_ops_id_ops_mapping1__destroy(skel1); + struct_ops_id_ops_mapping2__destroy(skel2); +} + +void test_struct_ops_id_ops_mapping(void) +{ + if (test__start_subtest("st_ops_id_ops_mapping")) + test_st_ops_id_ops_mapping(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c new file mode 100644 index 000000000000..467cc72a3588 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c @@ -0,0 +1,16 @@ +#include <test_progs.h> + +#include "struct_ops_kptr_return.skel.h" +#include "struct_ops_kptr_return_fail__wrong_type.skel.h" +#include "struct_ops_kptr_return_fail__invalid_scalar.skel.h" +#include "struct_ops_kptr_return_fail__nonzero_offset.skel.h" +#include "struct_ops_kptr_return_fail__local_kptr.skel.h" + +void test_struct_ops_kptr_return(void) +{ + RUN_TESTS(struct_ops_kptr_return); + RUN_TESTS(struct_ops_kptr_return_fail__wrong_type); + RUN_TESTS(struct_ops_kptr_return_fail__invalid_scalar); + RUN_TESTS(struct_ops_kptr_return_fail__nonzero_offset); + RUN_TESTS(struct_ops_kptr_return_fail__local_kptr); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_args.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_args.c new file mode 100644 index 000000000000..0f321e889862 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_args.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "struct_ops_multi_args.skel.h" + +void test_struct_ops_multi_args(void) +{ + RUN_TESTS(struct_ops_multi_args); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c new file mode 100644 index 000000000000..da60c715fc59 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c @@ -0,0 +1,14 @@ +#include <test_progs.h> + +#include "struct_ops_refcounted.skel.h" +#include "struct_ops_refcounted_fail__ref_leak.skel.h" +#include "struct_ops_refcounted_fail__global_subprog.skel.h" +#include "struct_ops_refcounted_fail__tail_call.skel.h" + +void test_struct_ops_refcounted(void) +{ + RUN_TESTS(struct_ops_refcounted); + RUN_TESTS(struct_ops_refcounted_fail__ref_leak); + RUN_TESTS(struct_ops_refcounted_fail__global_subprog); + RUN_TESTS(struct_ops_refcounted_fail__tail_call); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c new file mode 100644 index 000000000000..2a1ff821bc97 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c @@ -0,0 +1,1615 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include "test_progs.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/foo" +#define MAX_INSNS 512 +#define FIXUP_SYSCTL_VALUE 0 + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +struct sysctl_test { + const char *descr; + size_t fixup_value_insn; + struct bpf_insn insns[MAX_INSNS]; + const char *prog_file; + enum bpf_attach_type attach_type; + const char *sysctl; + int open_flags; + int seek; + const char *newval; + const char *oldval; + enum { + LOAD_REJECT, + ATTACH_REJECT, + OP_EPERM, + SUCCESS, + } result; + struct bpf_object *obj; +}; + +static struct sysctl_test tests[] = { + { + .descr = "sysctl wrong attach_type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = 0, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = ATTACH_REJECT, + }, + { + .descr = "sysctl:read allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl:read deny all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + .descr = "ctx:write sysctl:read read ok", + .insns = { + /* If (write) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "ctx:write sysctl:write read ok", + .insns = { + /* If (write) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/domainname", + .open_flags = O_WRONLY, + .newval = "(none)", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { + .descr = "ctx:write sysctl:write read ok narrow", + .insns = { + /* u64 w = (u16)write & 1; */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write) + 2), +#endif + BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1), + /* return 1 - w; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/domainname", + .open_flags = O_WRONLY, + .newval = "(none)", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { + .descr = "ctx:write sysctl:read write reject", + .insns = { + /* write = X */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sysctl, write)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = LOAD_REJECT, + }, + { + .descr = "ctx:file_pos sysctl:read read ok", + .insns = { + /* If (file_pos == X) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .seek = 3, + .result = SUCCESS, + }, + { + .descr = "ctx:file_pos sysctl:read read ok narrow", + .insns = { + /* If (file_pos == X) */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos)), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos) + 3), +#endif + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 4, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .seek = 4, + .result = SUCCESS, + }, + { + .descr = "ctx:file_pos sysctl:read write ok", + .insns = { + /* file_pos = X */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sysctl, file_pos)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .oldval = "nux\n", + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl_value:base ok", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6), + /* buf == "tcp_mem\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x7463705f6d656d00ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl_value:base E2BIG truncated", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) too small */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:7] == "tcp_me\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x7463705f6d650000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full ok", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 17), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14), + + /* buf[0:8] == "net/ipv4" && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69707634ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), + + /* buf[8:16] == "/tcp_mem" && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x2f7463705f6d656dULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[16:24] == "\0") */ + BPF_LD_IMM64(BPF_REG_8, 0x0ULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full E2BIG truncated", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 16), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10), + + /* buf[0:8] == "net/ipv4" && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69707634ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[8:16] == "/tcp_me\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x2f7463705f6d6500ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_name sysctl:full E2BIG truncated small", + .insns = { + /* sysctl_get_name arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_name arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_name arg4 (flags) */ + BPF_MOV64_IMM(BPF_REG_4, 0), + + /* sysctl_get_name(ctx, buf, buf_len, flags) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:8] == "net/ip\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69700000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read ok, gt", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), + + /* buf[0:6] == "Linux\n\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e75780a0000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read ok, eq", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 7), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), + + /* buf[0:6] == "Linux\n\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e75780a0000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read E2BIG truncated", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 6), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), + + /* buf[0:6] == "Linux\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e7578000000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/ostype", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_current_value sysctl:read EINVAL", + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4), + + /* buf[0:8] is NUL-filled) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */ + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_current_value sysctl:write ok", + .fixup_value_insn = 6, + .insns = { + /* sysctl_get_current_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_current_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_current_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6), + + /* buf[0:4] == expected) */ + BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "600", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:read EINVAL", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_get_new_value sysctl:write ok", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 4), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + + /* buf[0:4] == "606\0") */ + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, + bpf_ntohl(0x36303600), 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:write ok long", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 24), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14), + + /* buf[0:8] == "3000000 " && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3330303030303020ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), + + /* buf[8:16] == "4000000 " && */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3430303030303020ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), + + /* buf[16:24] == "6000000\0") */ + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3630303030303000ULL)), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_WRONLY, + .newval = "3000000 4000000 6000000", + .result = OP_EPERM, + }, + { + .descr = "sysctl_get_new_value sysctl:write E2BIG", + .insns = { + /* sysctl_get_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 3), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_get_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_get_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 4), + + /* buf[0:3] == "60\0") */ + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, + bpf_ntohl(0x36300000), 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = OP_EPERM, + }, + { + .descr = "sysctl_set_new_value sysctl:read EINVAL", + .insns = { + /* sysctl_set_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_set_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_set_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + .descr = "sysctl_set_new_value sysctl:write ok", + .fixup_value_insn = 2, + .insns = { + /* sysctl_set_new_value arg2 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_LD_IMM64(BPF_REG_0, FIXUP_SYSCTL_VALUE), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + + /* sysctl_set_new_value arg3 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* sysctl_set_new_value(ctx, buf, buf_len) */ + BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_WRONLY, + .newval = "606", + .result = SUCCESS, + }, + { + "bpf_strtoul one number string", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul multi number string", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* "600 602\0" */ + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3630302036303200ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 8), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 18), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 16), + + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 602, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul buf_len = 0, reject", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 0), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = LOAD_REJECT, + }, + { + "bpf_strtoul supported base, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x30373700)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 8), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 63, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul unsupported base, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 3), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul buf with spaces only, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0d0c0a09)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtoul negative number, EINVAL", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* " -6\0" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0a2d3600)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtoul), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol negative number, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* " -6\0" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0a2d3600)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 10), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, -6, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol hex number, ok", + .insns = { + /* arg1 (buf) */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* "0xfe" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x30786665)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 4), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4), + /* res == expected) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 254, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol max long", + .insns = { + /* arg1 (buf) 9223372036854775807 */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3932323333373230ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3336383534373735ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3830370000000000ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 19), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected && */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 19, 6), + /* res == expected) */ + BPF_LD_IMM64(BPF_REG_8, 0x7fffffffffffffffULL), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "bpf_strtol overflow, ERANGE", + .insns = { + /* arg1 (buf) 9223372036854775808 */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3932323333373230ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3336383534373735ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3830380000000000ULL)), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + + /* arg2 (buf_len) */ + BPF_MOV64_IMM(BPF_REG_2, 19), + + /* arg3 (flags) */ + BPF_MOV64_IMM(BPF_REG_3, 0), + + /* arg4 (res) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_7), + + BPF_EMIT_CALL(BPF_FUNC_strtol), + + /* if (ret == expected) */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -ERANGE, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, + { + "C prog: deny all writes", + .prog_file = "./test_sysctl_prog.bpf.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_WRONLY, + .newval = "123 456 789", + .result = OP_EPERM, + }, + { + "C prog: deny access by name", + .prog_file = "./test_sysctl_prog.bpf.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/route/mtu_expires", + .open_flags = O_RDONLY, + .result = OP_EPERM, + }, + { + "C prog: read tcp_mem", + .prog_file = "./test_sysctl_prog.bpf.o", + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "net/ipv4/tcp_mem", + .open_flags = O_RDONLY, + .result = SUCCESS, + }, +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static int fixup_sysctl_value(const char *buf, size_t buf_len, + struct bpf_insn *prog, size_t insn_num) +{ + union { + uint8_t raw[sizeof(uint64_t)]; + uint64_t num; + } value = {}; + + if (buf_len > sizeof(value)) { + log_err("Value is too big (%zd) to use in fixup", buf_len); + return -1; + } + if (prog[insn_num].code != (BPF_LD | BPF_DW | BPF_IMM)) { + log_err("Can fixup only BPF_LD_IMM64 insns"); + return -1; + } + + memcpy(value.raw, buf, buf_len); + prog[insn_num].imm = (uint32_t)value.num; + prog[insn_num + 1].imm = (uint32_t)(value.num >> 32); + + return 0; +} + +static int load_sysctl_prog_insns(struct sysctl_test *test, + const char *sysctl_path) +{ + struct bpf_insn *prog = test->insns; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + int ret, insn_cnt; + + insn_cnt = probe_prog_length(prog); + + if (test->fixup_value_insn) { + char buf[128]; + ssize_t len; + int fd; + + fd = open(sysctl_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + log_err("open(%s) failed", sysctl_path); + return -1; + } + len = read(fd, buf, sizeof(buf)); + if (len == -1) { + log_err("read(%s) failed", sysctl_path); + close(fd); + return -1; + } + close(fd); + if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn)) + return -1; + } + + opts.log_buf = bpf_log_buf; + opts.log_size = BPF_LOG_BUF_SIZE; + + ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts); + if (ret < 0 && test->result != LOAD_REJECT) { + log_err(">>> Loading program error.\n" + ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); + } + + return ret; +} + +static int load_sysctl_prog_file(struct sysctl_test *test) +{ + struct bpf_object *obj; + int prog_fd; + + if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) { + if (test->result != LOAD_REJECT) + log_err(">>> Loading program (%s) error.\n", + test->prog_file); + return -1; + } + + test->obj = obj; + return prog_fd; +} + +static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path) +{ + return test->prog_file + ? load_sysctl_prog_file(test) + : load_sysctl_prog_insns(test, sysctl_path); +} + +static int access_sysctl(const char *sysctl_path, + const struct sysctl_test *test) +{ + int err = 0; + int fd; + + fd = open(sysctl_path, test->open_flags | O_CLOEXEC); + if (fd < 0) + return fd; + + if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) { + log_err("lseek(%d) failed", test->seek); + goto err; + } + + if (test->open_flags == O_RDONLY) { + char buf[128]; + + if (read(fd, buf, sizeof(buf)) == -1) + goto err; + if (test->oldval && + strncmp(buf, test->oldval, strlen(test->oldval))) { + log_err("Read value %s != %s", buf, test->oldval); + goto err; + } + } else if (test->open_flags == O_WRONLY) { + if (!test->newval) { + log_err("New value for sysctl is not set"); + goto err; + } + if (write(fd, test->newval, strlen(test->newval)) == -1) + goto err; + } else { + log_err("Unexpected sysctl access: neither read nor write"); + goto err; + } + + goto out; +err: + err = -1; +out: + close(fd); + return err; +} + +static int run_test_case(int cgfd, struct sysctl_test *test) +{ + enum bpf_attach_type atype = test->attach_type; + char sysctl_path[128]; + int progfd = -1; + int err = 0; + + printf("Test case: %s .. ", test->descr); + + snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s", + test->sysctl); + + progfd = load_sysctl_prog(test, sysctl_path); + if (progfd < 0) { + if (test->result == LOAD_REJECT) + goto out; + else + goto err; + } + + if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) { + if (test->result == ATTACH_REJECT) + goto out; + else + goto err; + } + + errno = 0; + if (access_sysctl(sysctl_path, test) == -1) { + if (test->result == OP_EPERM && errno == EPERM) + goto out; + else + goto err; + } + + if (test->result != SUCCESS) { + log_err("Unexpected success"); + goto err; + } + + goto out; +err: + err = -1; +out: + /* Detaching w/o checking return code: best effort attempt. */ + if (progfd != -1) + bpf_prog_detach(cgfd, atype); + bpf_object__close(test->obj); + close(progfd); + printf("[%s]\n", err ? "FAIL" : "PASS"); + return err; +} + +static int run_tests(int cgfd) +{ + int passes = 0; + int fails = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (run_test_case(cgfd, &tests[i])) + ++fails; + else + ++passes; + } + printf("Summary: %d PASSED, %d FAILED\n", passes, fails); + return fails ? -1 : 0; +} + +void test_sysctl(void) +{ + int cgfd; + + cgfd = cgroup_setup_and_join(CG_PATH); + if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup")) + goto out; + + if (!ASSERT_OK(run_tests(cgfd), "run_tests")) + goto out; + +out: + close(cgfd); + cleanup_cgroup_environment(); + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c new file mode 100644 index 000000000000..6a5806b36113 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <pthread.h> +#include <bpf/btf.h> +#include <test_progs.h> + +/* + * Only a page is pinned to kernel, so the maximum amount of dynamic data + * allowed is page_size - sizeof(struct tld_data_u) - static TLD fields. + */ +#define TLD_DYN_DATA_SIZE_MAX (getpagesize() - sizeof(struct tld_data_u) - 8) + +#define TLD_FREE_DATA_ON_THREAD_EXIT +#define TLD_DYN_DATA_SIZE TLD_DYN_DATA_SIZE_MAX +#include "task_local_data.h" + +struct test_tld_struct { + __u64 a; + __u64 b; + __u64 c; + __u64 d; +}; + +#include "test_task_local_data.skel.h" + +TLD_DEFINE_KEY(value0_key, "value0", sizeof(int)); + +/* + * Reset task local data between subtests by clearing metadata other + * than the statically defined value0. This is safe as subtests run + * sequentially. Users of task local data library should not touch + * library internal. + */ +static void reset_tld(__u16 dyn_data_size) +{ + if (tld_meta_p) { + /* Remove TLDs created by tld_create_key() */ + tld_meta_p->cnt = 1; + tld_meta_p->size = dyn_data_size + 8; + memset(&tld_meta_p->metadata[1], 0, + (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata)); + } +} + +/* Serialize access to bpf program's global variables */ +static pthread_mutex_t global_mutex; + +static tld_key_t *tld_keys; + +#define TEST_BASIC_THREAD_NUM 32 + +void *test_task_local_data_basic_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct test_task_local_data *skel = (struct test_task_local_data *)arg; + int fd, err, tid, *value0, *value1; + struct test_tld_struct *value2; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + value0 = tld_get_data(fd, value0_key); + if (!ASSERT_OK_PTR(value0, "tld_get_data")) + goto out; + + value1 = tld_get_data(fd, tld_keys[1]); + if (!ASSERT_OK_PTR(value1, "tld_get_data")) + goto out; + + value2 = tld_get_data(fd, tld_keys[2]); + if (!ASSERT_OK_PTR(value2, "tld_get_data")) + goto out; + + tid = sys_gettid(); + + *value0 = tid + 0; + *value1 = tid + 1; + value2->a = tid + 2; + value2->b = tid + 3; + value2->c = tid + 4; + value2->d = tid + 5; + + pthread_mutex_lock(&global_mutex); + /* Run task_main that read task local data and save to global variables */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + + ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0"); + ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1"); + ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a"); + ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b"); + ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c"); + ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d"); + pthread_mutex_unlock(&global_mutex); + + /* Make sure valueX are indeed local to threads */ + ASSERT_EQ(*value0, tid + 0, "value0"); + ASSERT_EQ(*value1, tid + 1, "value1"); + ASSERT_EQ(value2->a, tid + 2, "value2.a"); + ASSERT_EQ(value2->b, tid + 3, "value2.b"); + ASSERT_EQ(value2->c, tid + 4, "value2.c"); + ASSERT_EQ(value2->d, tid + 5, "value2.d"); + + *value0 = tid + 5; + *value1 = tid + 4; + value2->a = tid + 3; + value2->b = tid + 2; + value2->c = tid + 1; + value2->d = tid + 0; + + /* Run task_main again */ + pthread_mutex_lock(&global_mutex); + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + + ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0"); + ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1"); + ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a"); + ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b"); + ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c"); + ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d"); + pthread_mutex_unlock(&global_mutex); + +out: + pthread_exit(NULL); +} + +static void test_task_local_data_basic(void) +{ + struct test_task_local_data *skel; + pthread_t thread[TEST_BASIC_THREAD_NUM]; + char dummy_key_name[TLD_NAME_LEN]; + tld_key_t key; + int i, err; + + reset_tld(TLD_DYN_DATA_SIZE_MAX); + + ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init"); + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY"); + tld_keys[1] = tld_create_key("value1", sizeof(int)); + ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key"); + tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct)); + ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key"); + + /* + * Shouldn't be able to store data exceed a page. Create a TLD just big + * enough to exceed a page. Data already contains struct tld_data_u, + * value0 and value1 of int type, and value 2 of struct test_tld_struct. + */ + key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1 - + sizeof(struct tld_data_u) - + TLD_ROUND_UP(sizeof(int), 8) * 2 - + TLD_ROUND_UP(sizeof(struct test_tld_struct), 8)); + ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key"); + + key = tld_create_key("value2", sizeof(struct test_tld_struct)); + ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key"); + + /* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */ + for (i = 3; i < TLD_MAX_DATA_CNT; i++) { + snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i); + tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int)); + ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key"); + } + key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct)); + ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key"); + + /* Access TLDs from multiple threads and check if they are thread-specific */ + for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) { + err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel); + if (!ASSERT_OK(err, "pthread_create")) + goto out; + } + +out: + for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) + pthread_join(thread[i], NULL); + + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + +#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3) + +void *test_task_local_data_race_thread(void *arg) +{ + int err = 0, id = (intptr_t)arg; + char key_name[32]; + tld_key_t key; + + key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1); + if (tld_key_err_or_zero(key) != -E2BIG) { + err = 1; + goto out; + } + + /* Only one thread will succeed in creating value1 */ + key = tld_create_key("value1", sizeof(int)); + if (!tld_key_is_err(key)) + tld_keys[1] = key; + + /* Only one thread will succeed in creating value2 */ + key = tld_create_key("value2", sizeof(struct test_tld_struct)); + if (!tld_key_is_err(key)) + tld_keys[2] = key; + + snprintf(key_name, 32, "thread_%d", id); + tld_keys[id] = tld_create_key(key_name, sizeof(int)); + if (tld_key_is_err(tld_keys[id])) + err = 2; +out: + return (void *)(intptr_t)err; +} + +static void test_task_local_data_race(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + pthread_t thread[TEST_RACE_THREAD_NUM]; + struct test_task_local_data *skel; + int fd, i, j, err, *data; + void *ret = NULL; + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY"); + tld_keys[0] = value0_key; + + for (j = 0; j < 100; j++) { + reset_tld(TLD_DYN_DATA_SIZE_MAX); + + for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { + /* + * Try to make tld_create_key() race with each other. Call + * tld_create_key(), both valid and invalid, from different threads. + */ + err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread, + (void *)(intptr_t)(i + 3)); + if (CHECK_FAIL(err)) + break; + } + + /* Wait for all tld_create_key() to return */ + for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { + pthread_join(thread[i], &ret); + if (CHECK_FAIL(ret)) + break; + } + + /* Write a unique number to each TLD */ + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (CHECK_FAIL(!data)) + break; + *data = i; + } + + /* Read TLDs and check the value to see if any address collides with another */ + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (CHECK_FAIL(*data != i)) + break; + } + + /* Run task_main to make sure no invalid TLDs are added */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + } +out: + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + +static void test_task_local_data_dyn_size(__u16 dyn_data_size) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct test_task_local_data *skel; + int max_keys, i, err, fd, *data; + char name[TLD_NAME_LEN]; + tld_key_t key; + + reset_tld(dyn_data_size); + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + /* Create as many int-sized TLDs as the dynamic data size allows */ + max_keys = dyn_data_size / TLD_ROUND_UP(sizeof(int), 8); + for (i = 0; i < max_keys; i++) { + snprintf(name, TLD_NAME_LEN, "value_%d", i); + tld_keys[i] = tld_create_key(name, sizeof(int)); + if (!ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key")) + goto out; + + data = tld_get_data(fd, tld_keys[i]); + if (!ASSERT_OK_PTR(data, "tld_get_data")) + goto out; + *data = i; + } + + /* The next key should fail with E2BIG */ + key = tld_create_key("overflow", sizeof(int)); + ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key overflow"); + + /* Verify data for value_i do not overlap */ + for (i = 0; i < max_keys; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (!ASSERT_OK_PTR(data, "tld_get_data")) + goto out; + + ASSERT_EQ(*data, i, "tld_get_data value_i"); + } + + /* Verify BPF side can still read the static key */ + data = tld_get_data(fd, value0_key); + if (!ASSERT_OK_PTR(data, "tld_get_data value0")) + goto out; + *data = 0xdeadbeef; + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_EQ(skel->bss->test_value0, 0xdeadbeef, "tld_get_data value0"); + +out: + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + +void test_task_local_data(void) +{ + if (test__start_subtest("task_local_data_basic")) + test_task_local_data_basic(); + if (test__start_subtest("task_local_data_race")) + test_task_local_data_race(); + if (test__start_subtest("task_local_data_dyn_size_small")) + test_task_local_data_dyn_size(64); + if (test__start_subtest("task_local_data_dyn_size_zero")) + test_task_local_data_dyn_size(0); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_work.c b/tools/testing/selftests/bpf/prog_tests/test_task_work.c new file mode 100644 index 000000000000..774b31a5f6ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_task_work.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <string.h> +#include <stdio.h> +#include "task_work.skel.h" +#include "task_work_fail.skel.h" +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <time.h> + +static int perf_event_open(__u32 type, __u64 config, int pid) +{ + struct perf_event_attr attr = { + .type = type, + .config = config, + .size = sizeof(struct perf_event_attr), + .sample_period = 100000, + }; + + return syscall(__NR_perf_event_open, &attr, pid, -1, -1, 0); +} + +struct elem { + char data[128]; + struct bpf_task_work tw; +}; + +static int verify_map(struct bpf_map *map, const char *expected_data) +{ + int err; + struct elem value; + int processed_values = 0; + int k, sz; + + sz = bpf_map__max_entries(map); + for (k = 0; k < sz; ++k) { + err = bpf_map__lookup_elem(map, &k, sizeof(int), &value, sizeof(struct elem), 0); + if (err) + continue; + if (!ASSERT_EQ(strcmp(expected_data, value.data), 0, "map data")) { + fprintf(stderr, "expected '%s', found '%s' in %s map", expected_data, + value.data, bpf_map__name(map)); + return 2; + } + processed_values++; + } + + return processed_values == 0; +} + +static void task_work_run(const char *prog_name, const char *map_name) +{ + struct task_work *skel; + struct bpf_program *prog; + struct bpf_map *map; + struct bpf_link *link = NULL; + int err, pe_fd = -1, pid, status, pipefd[2]; + char user_string[] = "hello world"; + + if (!ASSERT_NEQ(pipe(pipefd), -1, "pipe")) + return; + + pid = fork(); + if (pid == 0) { + __u64 num = 1; + int i; + char buf; + + close(pipefd[1]); + read(pipefd[0], &buf, sizeof(buf)); + close(pipefd[0]); + + for (i = 0; i < 10000; ++i) + num *= time(0) % 7; + (void)num; + exit(0); + } + if (!ASSERT_GT(pid, 0, "fork() failed")) { + close(pipefd[0]); + close(pipefd[1]); + return; + } + + skel = task_work__open(); + if (!ASSERT_OK_PTR(skel, "task_work__open")) + return; + + bpf_object__for_each_program(prog, skel->obj) { + bpf_program__set_autoload(prog, false); + } + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "prog_name")) + goto cleanup; + bpf_program__set_autoload(prog, true); + skel->bss->user_ptr = (char *)user_string; + + err = task_work__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pe_fd = perf_event_open(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, pid); + if (pe_fd == -1 && (errno == ENOENT || errno == EOPNOTSUPP)) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto cleanup; + } + if (!ASSERT_NEQ(pe_fd, -1, "pe_fd")) { + fprintf(stderr, "perf_event_open errno: %d, pid: %d\n", errno, pid); + goto cleanup; + } + + link = bpf_program__attach_perf_event(prog, pe_fd); + if (!ASSERT_OK_PTR(link, "attach_perf_event")) + goto cleanup; + + /* perf event fd ownership is passed to bpf_link */ + pe_fd = -1; + close(pipefd[0]); + write(pipefd[1], user_string, 1); + close(pipefd[1]); + /* Wait to collect some samples */ + waitpid(pid, &status, 0); + pid = 0; + map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(map, "find map_name")) + goto cleanup; + if (!ASSERT_OK(verify_map(map, user_string), "verify map")) + goto cleanup; +cleanup: + if (pe_fd >= 0) + close(pe_fd); + bpf_link__destroy(link); + task_work__destroy(skel); + if (pid > 0) { + close(pipefd[0]); + write(pipefd[1], user_string, 1); + close(pipefd[1]); + waitpid(pid, &status, 0); + } +} + +void test_task_work(void) +{ + if (test__start_subtest("test_task_work_hash_map")) + task_work_run("oncpu_hash_map", "hmap"); + + if (test__start_subtest("test_task_work_array_map")) + task_work_run("oncpu_array_map", "arrmap"); + + if (test__start_subtest("test_task_work_lru_map")) + task_work_run("oncpu_lru_map", "lrumap"); + + RUN_TESTS(task_work_fail); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c new file mode 100644 index 000000000000..462512fb191f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * BPF-based flow shaping + * + * The test brings up two veth in two isolated namespaces, attach some flow + * shaping program onto it, and ensures that a manual speedtest maximum + * value matches the rate set in the BPF shapers. + */ + +#include <asm-generic/socket.h> +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <math.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <bpf/libbpf.h> +#include <pthread.h> +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tc_edt.skel.h" + +#define SERVER_NS "tc-edt-server-ns" +#define CLIENT_NS "tc-edt-client-ns" +#define IP4_ADDR_VETH1 "192.168.1.1" +#define IP4_ADDR_VETH2 "192.168.1.2" +#define IP4_ADDR_VETH2_HEX 0xC0A80102 + +#define TIMEOUT_MS 2000 +#define TEST_PORT 9000 +#define TARGET_RATE_MBPS 5.0 +#define TX_BYTES_COUNT (1 * 1000 * 1000) +#define RATE_ERROR_PERCENT 2.0 + +struct connection { + int server_listen_fd; + int server_conn_fd; + int client_conn_fd; +}; + +static int setup(struct test_tc_edt *skel) +{ + struct nstoken *nstoken_client, *nstoken_server; + int ret; + + if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns")) + goto fail; + if (!ASSERT_OK(make_netns(SERVER_NS), "create server ns")) + goto fail_delete_client_ns; + + nstoken_client = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken_client, "open client ns")) + goto fail_delete_server_ns; + SYS(fail_close_client_ns, "ip link add veth1 type veth peer name %s", + "veth2 netns " SERVER_NS); + SYS(fail_close_client_ns, "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1"); + SYS(fail_close_client_ns, "ip link set veth1 up"); + + nstoken_server = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken_server, "enter server ns")) + goto fail_close_client_ns; + SYS(fail_close_server_ns, "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2"); + SYS(fail_close_server_ns, "ip link set veth2 up"); + SYS(fail_close_server_ns, "tc qdisc add dev veth2 root fq"); + ret = tc_prog_attach("veth2", -1, bpf_program__fd(skel->progs.tc_prog)); + if (!ASSERT_OK(ret, "attach bpf prog")) + goto fail_close_server_ns; + skel->bss->target_rate = TARGET_RATE_MBPS * 1000 * 1000; + close_netns(nstoken_server); + close_netns(nstoken_client); + + return 0; + +fail_close_server_ns: + close_netns(nstoken_server); +fail_close_client_ns: + close_netns(nstoken_client); +fail_delete_server_ns: + remove_netns(SERVER_NS); +fail_delete_client_ns: + remove_netns(CLIENT_NS); +fail: + return -1; +} + +static void cleanup(void) +{ + remove_netns(CLIENT_NS); + remove_netns(SERVER_NS); +} + +static void run_test(void) +{ + int server_fd, client_fd, err; + double rate_mbps, rate_error; + struct nstoken *nstoken; + __u64 ts_start, ts_end; + + nstoken = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken, "open server ns")) + return; + server_fd = start_server(AF_INET, SOCK_STREAM, IP4_ADDR_VETH2, + TEST_PORT, TIMEOUT_MS); + if (!ASSERT_OK_FD(server_fd, "start server")) + return; + + close_netns(nstoken); + nstoken = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken, "open client ns")) + return; + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_OK_FD(client_fd, "connect client")) + return; + + ts_start = get_time_ns(); + err = send_recv_data(server_fd, client_fd, TX_BYTES_COUNT); + ts_end = get_time_ns(); + close_netns(nstoken); + ASSERT_OK(err, "send_recv_data"); + + rate_mbps = TX_BYTES_COUNT / ((ts_end - ts_start) / 1000.0); + rate_error = + fabs((rate_mbps - TARGET_RATE_MBPS) * 100.0 / TARGET_RATE_MBPS); + + ASSERT_LE(rate_error, RATE_ERROR_PERCENT, + "rate error is lower than threshold"); +} + +void test_tc_edt(void) +{ + struct test_tc_edt *skel; + + skel = test_tc_edt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open and load")) + return; + + if (!ASSERT_OK(setup(skel), "global setup")) + return; + + run_test(); + + cleanup(); + test_tc_edt__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c new file mode 100644 index 000000000000..1aa7c9463980 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c @@ -0,0 +1,712 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * End-to-end eBPF tunnel test suite + * The file tests BPF network tunnels implementation. For each tunnel + * type, the test validates that: + * - basic communication can first be established between the two veths + * - when adding a BPF-based encapsulation on client egress, it now fails + * to communicate with the server + * - when adding a kernel-based decapsulation on server ingress, client + * can now connect + * - when replacing the kernel-based decapsulation with a BPF-based one, + * the client can still connect + */ + +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <bpf/libbpf.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tc_tunnel.skel.h" + +#define SERVER_NS "tc-tunnel-server-ns" +#define CLIENT_NS "tc-tunnel-client-ns" +#define MAC_ADDR_VETH1 "00:11:22:33:44:55" +#define IP4_ADDR_VETH1 "192.168.1.1" +#define IP6_ADDR_VETH1 "fd::1" +#define MAC_ADDR_VETH2 "66:77:88:99:AA:BB" +#define IP4_ADDR_VETH2 "192.168.1.2" +#define IP6_ADDR_VETH2 "fd::2" + +#define TEST_NAME_MAX_LEN 64 +#define PROG_NAME_MAX_LEN 64 +#define TUNNEL_ARGS_MAX_LEN 128 +#define BUFFER_LEN 2000 +#define DEFAULT_TEST_DATA_SIZE 100 +#define GSO_TEST_DATA_SIZE BUFFER_LEN + +#define TIMEOUT_MS 1000 +#define TEST_PORT 8000 +#define UDP_PORT 5555 +#define MPLS_UDP_PORT 6635 +#define FOU_MPLS_PROTO 137 +#define VXLAN_ID 1 +#define VXLAN_PORT 8472 +#define MPLS_TABLE_ENTRIES_COUNT 65536 + +static char tx_buffer[BUFFER_LEN], rx_buffer[BUFFER_LEN]; + +struct subtest_cfg { + char *ebpf_tun_type; + char *iproute_tun_type; + char *mac_tun_type; + int ipproto; + void (*extra_decap_mod_args_cb)(struct subtest_cfg *cfg, char *dst); + bool tunnel_need_veth_mac; + bool configure_fou_rx_port; + char *tmode; + bool expect_kern_decap_failure; + bool configure_mpls; + bool test_gso; + char *tunnel_client_addr; + char *tunnel_server_addr; + char name[TEST_NAME_MAX_LEN]; + char *server_addr; + int client_egress_prog_fd; + int server_ingress_prog_fd; + char extra_decap_mod_args[TUNNEL_ARGS_MAX_LEN]; + int server_fd; +}; + +struct connection { + int client_fd; + int server_fd; +}; + +static int build_subtest_name(struct subtest_cfg *cfg, char *dst, size_t size) +{ + int ret; + + ret = snprintf(dst, size, "%s_%s", cfg->ebpf_tun_type, + cfg->mac_tun_type); + + return ret < 0 ? ret : 0; +} + +static int set_subtest_progs(struct subtest_cfg *cfg, struct test_tc_tunnel *skel) +{ + char prog_name[PROG_NAME_MAX_LEN]; + struct bpf_program *prog; + int ret; + + ret = snprintf(prog_name, PROG_NAME_MAX_LEN, "__encap_"); + if (ret < 0) + return ret; + ret = build_subtest_name(cfg, prog_name + ret, PROG_NAME_MAX_LEN - ret); + if (ret < 0) + return ret; + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!prog) + return -1; + + cfg->client_egress_prog_fd = bpf_program__fd(prog); + cfg->server_ingress_prog_fd = bpf_program__fd(skel->progs.decap_f); + return 0; +} + +static void set_subtest_addresses(struct subtest_cfg *cfg) +{ + if (cfg->ipproto == 6) + cfg->server_addr = IP6_ADDR_VETH2; + else + cfg->server_addr = IP4_ADDR_VETH2; + + /* Some specific tunnel types need specific addressing, it then + * has been already set in the configuration table. Otherwise, + * deduce the relevant addressing from the ipproto + */ + if (cfg->tunnel_client_addr && cfg->tunnel_server_addr) + return; + + if (cfg->ipproto == 6) { + cfg->tunnel_client_addr = IP6_ADDR_VETH1; + cfg->tunnel_server_addr = IP6_ADDR_VETH2; + } else { + cfg->tunnel_client_addr = IP4_ADDR_VETH1; + cfg->tunnel_server_addr = IP4_ADDR_VETH2; + } +} + +static int run_server(struct subtest_cfg *cfg) +{ + int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET; + struct nstoken *nstoken; + struct network_helper_opts opts = { + .timeout_ms = TIMEOUT_MS + }; + + nstoken = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken, "open server ns")) + return -1; + + cfg->server_fd = start_server_str(family, SOCK_STREAM, cfg->server_addr, + TEST_PORT, &opts); + close_netns(nstoken); + if (!ASSERT_OK_FD(cfg->server_fd, "start server")) + return -1; + + return 0; +} + +static int check_server_rx_data(struct subtest_cfg *cfg, + struct connection *conn, int len) +{ + int err; + + memset(rx_buffer, 0, BUFFER_LEN); + err = recv(conn->server_fd, rx_buffer, len, 0); + if (!ASSERT_EQ(err, len, "check rx data len")) + return 1; + if (!ASSERT_MEMEQ(tx_buffer, rx_buffer, len, "check received data")) + return 1; + return 0; +} + +static struct connection *connect_client_to_server(struct subtest_cfg *cfg) +{ + struct network_helper_opts opts = {.timeout_ms = 1000}; + int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET; + struct connection *conn = NULL; + int client_fd, server_fd; + + conn = malloc(sizeof(struct connection)); + if (!conn) + return conn; + + client_fd = connect_to_addr_str(family, SOCK_STREAM, cfg->server_addr, + TEST_PORT, &opts); + + if (client_fd < 0) { + free(conn); + return NULL; + } + + server_fd = accept(cfg->server_fd, NULL, NULL); + if (server_fd < 0) { + close(client_fd); + free(conn); + return NULL; + } + + conn->server_fd = server_fd; + conn->client_fd = client_fd; + + return conn; +} + +static void disconnect_client_from_server(struct subtest_cfg *cfg, + struct connection *conn) +{ + close(conn->server_fd); + close(conn->client_fd); + free(conn); +} + +static int send_and_test_data(struct subtest_cfg *cfg) +{ + struct connection *conn; + int err, res = -1; + + conn = connect_client_to_server(cfg); + if (!ASSERT_OK_PTR(conn, "connect to server")) + return -1; + + err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0); + if (!ASSERT_EQ(err, DEFAULT_TEST_DATA_SIZE, "send data from client")) + goto end; + if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE)) + goto end; + + if (!cfg->test_gso) { + res = 0; + goto end; + } + + err = send(conn->client_fd, tx_buffer, GSO_TEST_DATA_SIZE, 0); + if (!ASSERT_EQ(err, GSO_TEST_DATA_SIZE, "send (large) data from client")) + goto end; + if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE)) + goto end; + + res = 0; +end: + disconnect_client_from_server(cfg, conn); + return res; +} + +static void vxlan_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst) +{ + snprintf(dst, TUNNEL_ARGS_MAX_LEN, "id %d dstport %d udp6zerocsumrx", + VXLAN_ID, VXLAN_PORT); +} + +static void udp_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst) +{ + bool is_mpls = !strcmp(cfg->mac_tun_type, "mpls"); + + snprintf(dst, TUNNEL_ARGS_MAX_LEN, + "encap fou encap-sport auto encap-dport %d", + is_mpls ? MPLS_UDP_PORT : UDP_PORT); +} + +static int configure_fou_rx_port(struct subtest_cfg *cfg, bool add) +{ + bool is_mpls = strcmp(cfg->mac_tun_type, "mpls") == 0; + int fou_proto; + + if (is_mpls) + fou_proto = FOU_MPLS_PROTO; + else + fou_proto = cfg->ipproto == 6 ? 41 : 4; + + SYS(fail, "ip fou %s port %d ipproto %d%s", add ? "add" : "del", + is_mpls ? MPLS_UDP_PORT : UDP_PORT, fou_proto, + cfg->ipproto == 6 ? " -6" : ""); + + return 0; +fail: + return 1; +} + +static int add_fou_rx_port(struct subtest_cfg *cfg) +{ + return configure_fou_rx_port(cfg, true); +} + +static int del_fou_rx_port(struct subtest_cfg *cfg) +{ + return configure_fou_rx_port(cfg, false); +} + +static int update_tunnel_intf_addr(struct subtest_cfg *cfg) +{ + SYS(fail, "ip link set dev testtun0 address " MAC_ADDR_VETH2); + return 0; +fail: + return -1; +} + +static int configure_kernel_for_mpls(struct subtest_cfg *cfg) +{ + SYS(fail, "sysctl -qw net.mpls.platform_labels=%d", + MPLS_TABLE_ENTRIES_COUNT); + SYS(fail, "ip -f mpls route add 1000 dev lo"); + SYS(fail, "ip link set lo up"); + SYS(fail, "sysctl -qw net.mpls.conf.testtun0.input=1"); + SYS(fail, "sysctl -qw net.ipv4.conf.lo.rp_filter=0"); + return 0; +fail: + return -1; +} + +static int configure_encapsulation(struct subtest_cfg *cfg) +{ + int ret; + + ret = tc_prog_attach("veth1", -1, cfg->client_egress_prog_fd); + + return ret; +} + +static int configure_kernel_decapsulation(struct subtest_cfg *cfg) +{ + struct nstoken *nstoken = open_netns(SERVER_NS); + int ret = -1; + + if (!ASSERT_OK_PTR(nstoken, "open server ns")) + return ret; + + if (cfg->configure_fou_rx_port && + !ASSERT_OK(add_fou_rx_port(cfg), "configure FOU RX port")) + goto fail; + SYS(fail, "ip link add name testtun0 type %s %s remote %s local %s %s", + cfg->iproute_tun_type, cfg->tmode ? cfg->tmode : "", + cfg->tunnel_client_addr, cfg->tunnel_server_addr, + cfg->extra_decap_mod_args); + if (cfg->tunnel_need_veth_mac && + !ASSERT_OK(update_tunnel_intf_addr(cfg), "update testtun0 mac")) + goto fail; + if (cfg->configure_mpls && + (!ASSERT_OK(configure_kernel_for_mpls(cfg), + "configure MPLS decap"))) + goto fail; + SYS(fail, "sysctl -qw net.ipv4.conf.all.rp_filter=0"); + SYS(fail, "sysctl -qw net.ipv4.conf.testtun0.rp_filter=0"); + SYS(fail, "ip link set dev testtun0 up"); + + ret = 0; +fail: + close_netns(nstoken); + return ret; +} + +static void remove_kernel_decapsulation(struct subtest_cfg *cfg) +{ + SYS_NOFAIL("ip link del testtun0"); + if (cfg->configure_mpls) + SYS_NOFAIL("ip -f mpls route del 1000 dev lo"); + if (cfg->configure_fou_rx_port) + del_fou_rx_port(cfg); +} + +static int configure_ebpf_decapsulation(struct subtest_cfg *cfg) +{ + struct nstoken *nstoken = open_netns(SERVER_NS); + int ret = -1; + + if (!ASSERT_OK_PTR(nstoken, "open server ns")) + return ret; + + if (!cfg->expect_kern_decap_failure) + SYS(fail, "ip link del testtun0"); + + if (!ASSERT_OK(tc_prog_attach("veth2", cfg->server_ingress_prog_fd, -1), + "attach_program")) + goto fail; + + ret = 0; +fail: + close_netns(nstoken); + return ret; +} + +static void run_test(struct subtest_cfg *cfg) +{ + struct nstoken *nstoken; + + if (!ASSERT_OK(run_server(cfg), "run server")) + return; + + nstoken = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken, "open client ns")) + goto fail; + + /* Basic communication must work */ + if (!ASSERT_OK(send_and_test_data(cfg), "connect without any encap")) + goto fail; + + /* Attach encapsulation program to client */ + if (!ASSERT_OK(configure_encapsulation(cfg), "configure encapsulation")) + goto fail; + + /* If supported, insert kernel decap module, connection must succeed */ + if (!cfg->expect_kern_decap_failure) { + if (!ASSERT_OK(configure_kernel_decapsulation(cfg), + "configure kernel decapsulation")) + goto fail; + if (!ASSERT_OK(send_and_test_data(cfg), + "connect with encap prog and kern decap")) + goto fail; + } + + /* Replace kernel decapsulation with BPF decapsulation, test must pass */ + if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation")) + goto fail; + ASSERT_OK(send_and_test_data(cfg), "connect with encap and decap progs"); + +fail: + close_netns(nstoken); + close(cfg->server_fd); +} + +static int setup(void) +{ + struct nstoken *nstoken_client, *nstoken_server; + int fd, err; + + fd = open("/dev/urandom", O_RDONLY); + if (!ASSERT_OK_FD(fd, "open urandom")) + goto fail; + err = read(fd, tx_buffer, BUFFER_LEN); + close(fd); + + if (!ASSERT_EQ(err, BUFFER_LEN, "read random bytes")) + goto fail; + + /* Configure the testing network */ + if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns") || + !ASSERT_OK(make_netns(SERVER_NS), "create server ns")) + goto fail; + + nstoken_client = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken_client, "open client ns")) + goto fail_delete_ns; + SYS(fail_close_ns_client, "ip link add %s type veth peer name %s", + "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1, + "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2); + SYS(fail_close_ns_client, "ethtool -K veth1 tso off"); + SYS(fail_close_ns_client, "ip link set veth1 up"); + nstoken_server = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken_server, "open server ns")) + goto fail_close_ns_client; + SYS(fail_close_ns_server, "ip link set veth2 up"); + + close_netns(nstoken_server); + close_netns(nstoken_client); + return 0; + +fail_close_ns_server: + close_netns(nstoken_server); +fail_close_ns_client: + close_netns(nstoken_client); +fail_delete_ns: + SYS_NOFAIL("ip netns del " CLIENT_NS); + SYS_NOFAIL("ip netns del " SERVER_NS); +fail: + return -1; +} + +static int subtest_setup(struct test_tc_tunnel *skel, struct subtest_cfg *cfg) +{ + struct nstoken *nstoken_client, *nstoken_server; + int ret = -1; + + set_subtest_addresses(cfg); + if (!ASSERT_OK(set_subtest_progs(cfg, skel), + "find subtest progs")) + goto fail; + if (cfg->extra_decap_mod_args_cb) + cfg->extra_decap_mod_args_cb(cfg, cfg->extra_decap_mod_args); + + nstoken_client = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken_client, "open client ns")) + goto fail; + SYS(fail_close_client_ns, + "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1"); + SYS(fail_close_client_ns, "ip -4 route flush table main"); + SYS(fail_close_client_ns, + "ip -4 route add " IP4_ADDR_VETH2 " mtu 1450 dev veth1"); + SYS(fail_close_client_ns, + "ip -6 addr add " IP6_ADDR_VETH1 "/64 dev veth1 nodad"); + SYS(fail_close_client_ns, "ip -6 route flush table main"); + SYS(fail_close_client_ns, + "ip -6 route add " IP6_ADDR_VETH2 " mtu 1430 dev veth1"); + nstoken_server = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken_server, "open server ns")) + goto fail_close_client_ns; + SYS(fail_close_server_ns, + "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2"); + SYS(fail_close_server_ns, + "ip -6 addr add " IP6_ADDR_VETH2 "/64 dev veth2 nodad"); + + ret = 0; + +fail_close_server_ns: + close_netns(nstoken_server); +fail_close_client_ns: + close_netns(nstoken_client); +fail: + return ret; +} + + +static void subtest_cleanup(struct subtest_cfg *cfg) +{ + struct nstoken *nstoken; + + nstoken = open_netns(CLIENT_NS); + if (ASSERT_OK_PTR(nstoken, "open clien ns")) { + SYS_NOFAIL("tc qdisc delete dev veth1 parent ffff:fff1"); + SYS_NOFAIL("ip a flush veth1"); + close_netns(nstoken); + } + nstoken = open_netns(SERVER_NS); + if (ASSERT_OK_PTR(nstoken, "open clien ns")) { + SYS_NOFAIL("tc qdisc delete dev veth2 parent ffff:fff1"); + SYS_NOFAIL("ip a flush veth2"); + if (!cfg->expect_kern_decap_failure) + remove_kernel_decapsulation(cfg); + close_netns(nstoken); + } +} + +static void cleanup(void) +{ + remove_netns(CLIENT_NS); + remove_netns(SERVER_NS); +} + +static struct subtest_cfg subtests_cfg[] = { + { + .ebpf_tun_type = "ipip", + .mac_tun_type = "none", + .iproute_tun_type = "ipip", + .ipproto = 4, + }, + { + .ebpf_tun_type = "ipip6", + .mac_tun_type = "none", + .iproute_tun_type = "ip6tnl", + .ipproto = 4, + .tunnel_client_addr = IP6_ADDR_VETH1, + .tunnel_server_addr = IP6_ADDR_VETH2, + }, + { + .ebpf_tun_type = "ip6tnl", + .iproute_tun_type = "ip6tnl", + .mac_tun_type = "none", + .ipproto = 6, + }, + { + .mac_tun_type = "none", + .ebpf_tun_type = "sit", + .iproute_tun_type = "sit", + .ipproto = 6, + .tunnel_client_addr = IP4_ADDR_VETH1, + .tunnel_server_addr = IP4_ADDR_VETH2, + }, + { + .ebpf_tun_type = "vxlan", + .mac_tun_type = "eth", + .iproute_tun_type = "vxlan", + .ipproto = 4, + .extra_decap_mod_args_cb = vxlan_decap_mod_args_cb, + .tunnel_need_veth_mac = true + }, + { + .ebpf_tun_type = "ip6vxlan", + .mac_tun_type = "eth", + .iproute_tun_type = "vxlan", + .ipproto = 6, + .extra_decap_mod_args_cb = vxlan_decap_mod_args_cb, + .tunnel_need_veth_mac = true + }, + { + .ebpf_tun_type = "gre", + .mac_tun_type = "none", + .iproute_tun_type = "gre", + .ipproto = 4, + .test_gso = true + }, + { + .ebpf_tun_type = "gre", + .mac_tun_type = "eth", + .iproute_tun_type = "gretap", + .ipproto = 4, + .tunnel_need_veth_mac = true, + .test_gso = true + }, + { + .ebpf_tun_type = "gre", + .mac_tun_type = "mpls", + .iproute_tun_type = "gre", + .ipproto = 4, + .configure_mpls = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6gre", + .mac_tun_type = "none", + .iproute_tun_type = "ip6gre", + .ipproto = 6, + .test_gso = true, + }, + { + .ebpf_tun_type = "ip6gre", + .mac_tun_type = "eth", + .iproute_tun_type = "ip6gretap", + .ipproto = 6, + .tunnel_need_veth_mac = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6gre", + .mac_tun_type = "mpls", + .iproute_tun_type = "ip6gre", + .ipproto = 6, + .configure_mpls = true, + .test_gso = true + }, + { + .ebpf_tun_type = "udp", + .mac_tun_type = "none", + .iproute_tun_type = "ipip", + .ipproto = 4, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .test_gso = true + }, + { + .ebpf_tun_type = "udp", + .mac_tun_type = "eth", + .iproute_tun_type = "ipip", + .ipproto = 4, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .expect_kern_decap_failure = true, + .test_gso = true + }, + { + .ebpf_tun_type = "udp", + .mac_tun_type = "mpls", + .iproute_tun_type = "ipip", + .ipproto = 4, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .tmode = "mode any ttl 255", + .configure_mpls = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6udp", + .mac_tun_type = "none", + .iproute_tun_type = "ip6tnl", + .ipproto = 6, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6udp", + .mac_tun_type = "eth", + .iproute_tun_type = "ip6tnl", + .ipproto = 6, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .expect_kern_decap_failure = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6udp", + .mac_tun_type = "mpls", + .iproute_tun_type = "ip6tnl", + .ipproto = 6, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .tmode = "mode any ttl 255", + .expect_kern_decap_failure = true, + .test_gso = true + }, +}; + +void test_tc_tunnel(void) +{ + struct test_tc_tunnel *skel; + struct subtest_cfg *cfg; + int i, ret; + + skel = test_tc_tunnel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open and load")) + return; + + if (!ASSERT_OK(setup(), "global setup")) + goto out; + + for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) { + cfg = &subtests_cfg[i]; + ret = build_subtest_name(cfg, cfg->name, TEST_NAME_MAX_LEN); + if (ret < 0 || !test__start_subtest(cfg->name)) + continue; + if (subtest_setup(skel, cfg) == 0) + run_test(cfg); + subtest_cleanup(cfg); + } + cleanup(); + +out: + test_tc_tunnel__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index cec746e77cd3..eb9309931272 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -71,6 +71,8 @@ #define IP4_ADDR2_VETH1 "172.16.1.20" #define IP4_ADDR_TUNL_DEV0 "10.1.1.100" #define IP4_ADDR_TUNL_DEV1 "10.1.1.200" +#define IP6_ADDR_TUNL_DEV0 "fc80::100" +#define IP6_ADDR_TUNL_DEV1 "fc80::200" #define IP6_ADDR_VETH0 "::11" #define IP6_ADDR1_VETH1 "::22" @@ -98,6 +100,27 @@ #define XFRM_SPI_IN_TO_OUT 0x1 #define XFRM_SPI_OUT_TO_IN 0x2 +#define GRE_TUNL_DEV0 "gre00" +#define GRE_TUNL_DEV1 "gre11" + +#define IP6GRE_TUNL_DEV0 "ip6gre00" +#define IP6GRE_TUNL_DEV1 "ip6gre11" + +#define ERSPAN_TUNL_DEV0 "erspan00" +#define ERSPAN_TUNL_DEV1 "erspan11" + +#define IP6ERSPAN_TUNL_DEV0 "ip6erspan00" +#define IP6ERSPAN_TUNL_DEV1 "ip6erspan11" + +#define GENEVE_TUNL_DEV0 "geneve00" +#define GENEVE_TUNL_DEV1 "geneve11" + +#define IP6GENEVE_TUNL_DEV0 "ip6geneve00" +#define IP6GENEVE_TUNL_DEV1 "ip6geneve11" + +#define IP6TNL_TUNL_DEV0 "ip6tnl00" +#define IP6TNL_TUNL_DEV1 "ip6tnl11" + #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" static int config_device(void) @@ -216,6 +239,18 @@ fail: return -1; } +static int set_ipv4_addr(const char *dev0, const char *dev1) +{ + SYS(fail, "ip -n at_ns0 link set dev %s up", dev0); + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip link set dev %s up", dev1); + SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1); + + return 0; +fail: + return 1; +} + static int add_ipip_tunnel(enum ipip_encap encap) { int err; @@ -356,6 +391,99 @@ static void delete_xfrm_tunnel(void) IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); } +static int add_ipv4_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s", + dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s external", dev1, type); + + return set_ipv4_addr(dev0, dev1); +fail: + return -1; +} + +static void delete_tunnel(const char *dev0, const char *dev1) +{ + if (!dev0 || !dev1) + return; + + SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0); + SYS_NOFAIL("ip link delete dev %s", dev1); +} + +static int set_ipv6_addr(const char *dev0, const char *dev1) +{ + /* disable IPv6 DAD because it might take too long and fail tests */ + SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0); + SYS(fail, "ip -n at_ns0 link set dev veth0 up"); + SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1); + SYS(fail, "ip link set dev veth1 up"); + + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0); + SYS(fail, "ip -n at_ns0 link set dev %s up", dev0); + + SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1); + SYS(fail, "ip link set dev %s up", dev1); + return 0; +fail: + return 1; +} + +static int add_ipv6_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s", + dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s external", dev1, type); + + return set_ipv6_addr(dev0, dev1); +fail: + return -1; +} + +static int add_geneve_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s", + dev0, type, opt, IP4_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt); + + return set_ipv4_addr(dev0, dev1); +fail: + return -1; +} + +static int add_ip6geneve_tunnel(const char *dev0, const char *dev1, + const char *type, const char *opt) +{ + if (!type || !opt || !dev0 || !dev1) + return -1; + + SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s", + dev0, type, opt, IP6_ADDR1_VETH1); + + SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt); + + return set_ipv6_addr(dev0, dev1); +fail: + return -1; +} + static int test_ping(int family, const char *addr) { SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); @@ -364,37 +492,46 @@ fail: return -1; } -static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +static void ping_dev0(void) { - DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, - .priority = 1, .prog_fd = igr_fd); - DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, - .priority = 1, .prog_fd = egr_fd); - int ret; + /* ping from root namespace test */ + test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); +} - ret = bpf_tc_hook_create(hook); - if (!ASSERT_OK(ret, "create tc hook")) - return ret; +static void ping_dev1(void) +{ + struct nstoken *nstoken; - if (igr_fd >= 0) { - hook->attach_point = BPF_TC_INGRESS; - ret = bpf_tc_attach(hook, &opts1); - if (!ASSERT_OK(ret, "bpf_tc_attach")) { - bpf_tc_hook_destroy(hook); - return ret; - } - } + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns")) + return; - if (egr_fd >= 0) { - hook->attach_point = BPF_TC_EGRESS; - ret = bpf_tc_attach(hook, &opts2); - if (!ASSERT_OK(ret, "bpf_tc_attach")) { - bpf_tc_hook_destroy(hook); - return ret; - } - } + test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); + close_netns(nstoken); +} - return 0; +static void ping6_veth0(void) +{ + test_ping(AF_INET6, IP6_ADDR_VETH0); +} + +static void ping6_dev0(void) +{ + test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0); +} + +static void ping6_dev1(void) +{ + struct nstoken *nstoken; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns")) + return; + + test_ping(AF_INET, IP6_ADDR_TUNL_DEV1); + close_netns(nstoken); } static void test_vxlan_tunnel(void) @@ -404,11 +541,9 @@ static void test_vxlan_tunnel(void) int local_ip_map_fd = -1; int set_src_prog_fd, get_src_prog_fd; int set_dst_prog_fd; - int key = 0, ifindex = -1; + int key = 0; uint local_ip; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add vxlan tunnel */ err = add_vxlan_tunnel(); @@ -419,42 +554,22 @@ static void test_vxlan_tunnel(void) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(VXLAN_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src); set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src); - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + if (tc_prog_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; /* load and attach bpf prog to veth dev tc hook point */ - ifindex = if_nametoindex("veth1"); - if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) + if (tc_prog_attach("veth1", set_dst_prog_fd, -1)) goto done; /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; - ifindex = if_nametoindex(VXLAN_TUNL_DEV0); - if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + if (tc_prog_attach(VXLAN_TUNL_DEV0, -1, set_dst_prog_fd)) goto done; close_netns(nstoken); @@ -468,9 +583,7 @@ static void test_vxlan_tunnel(void) goto done; /* ping test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev0(); done: /* delete vxlan tunnel */ @@ -488,11 +601,9 @@ static void test_ip6vxlan_tunnel(void) int local_ip_map_fd = -1; int set_src_prog_fd, get_src_prog_fd; int set_dst_prog_fd; - int key = 0, ifindex = -1; + int key = 0; uint local_ip; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add vxlan tunnel */ err = add_ip6vxlan_tunnel(); @@ -503,31 +614,17 @@ static void test_ip6vxlan_tunnel(void) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src); set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src); - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + if (tc_prog_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; - ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0); - if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex")) - goto done; - tc_hook.ifindex = ifindex; set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst); - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + if (tc_prog_attach(IP6VXLAN_TUNL_DEV0, -1, set_dst_prog_fd)) goto done; close_netns(nstoken); @@ -541,9 +638,7 @@ static void test_ip6vxlan_tunnel(void) goto done; /* ping test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev0(); done: /* delete ipv6 vxlan tunnel */ @@ -557,12 +652,8 @@ done: static void test_ipip_tunnel(enum ipip_encap encap) { struct test_tunnel_kern *skel = NULL; - struct nstoken *nstoken; int set_src_prog_fd, get_src_prog_fd; - int ifindex = -1; int err; - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); /* add ipip tunnel */ err = add_ipip_tunnel(encap); @@ -573,10 +664,6 @@ static void test_ipip_tunnel(enum ipip_encap encap) skel = test_tunnel_kern__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex(IPIP_TUNL_DEV1); - if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex")) - goto done; - tc_hook.ifindex = ifindex; switch (encap) { case FOU: @@ -598,26 +685,11 @@ static void test_ipip_tunnel(enum ipip_encap encap) skel->progs.ipip_set_tunnel); } - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) - goto done; - - /* ping from root namespace test */ - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); - if (!ASSERT_OK(err, "test_ping")) + if (tc_prog_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; - /* ping from at_ns0 namespace test */ - nstoken = open_netns("at_ns0"); - if (!ASSERT_OK_PTR(nstoken, "setns")) - goto done; - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); - if (!ASSERT_OK(err, "test_ping")) - goto done; - close_netns(nstoken); + ping_dev0(); + ping_dev1(); done: /* delete ipip tunnel */ @@ -628,11 +700,8 @@ done: static void test_xfrm_tunnel(void) { - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, - .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_xdp_attach_opts, opts); struct test_tunnel_kern *skel = NULL; - struct nstoken *nstoken; int xdp_prog_fd; int tc_prog_fd; int ifindex; @@ -646,19 +715,16 @@ static void test_xfrm_tunnel(void) if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) goto done; - ifindex = if_nametoindex("veth1"); - if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) - goto done; /* attach tc prog to tunnel dev */ - tc_hook.ifindex = ifindex; tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); - if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) - goto done; - if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) + if (tc_prog_attach("veth1", tc_prog_fd, -1)) goto done; /* attach xdp prog to tunnel dev */ + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) goto done; @@ -666,14 +732,7 @@ static void test_xfrm_tunnel(void) if (!ASSERT_OK(err, "bpf_xdp_attach")) goto done; - /* ping from at_ns0 namespace test */ - nstoken = open_netns("at_ns0"); - if (!ASSERT_OK_PTR(nstoken, "setns")) - goto done; - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); - close_netns(nstoken); - if (!ASSERT_OK(err, "test_ping")) - goto done; + ping_dev1(); if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) goto done; @@ -690,6 +749,281 @@ done: test_tunnel_kern__destroy(skel); } +enum gre_test { + GRE, + GRE_NOKEY, + GRETAP, + GRETAP_NOKEY, +}; + +static void test_gre_tunnel(enum gre_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case GRE: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRE_NOKEY: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRETAP: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + case GRETAP_NOKEY: + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2"); + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel); + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + if (tc_prog_attach(GRE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); + +done: + delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum ip6gre_test { + IP6GRE, + IP6GRETAP +}; + +static void test_ip6gre_tunnel(enum ip6gre_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case IP6GRE: + err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1, + "ip6gre", "flowlabel 0xbcdef key 2"); + break; + case IP6GRETAP: + err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1, + "ip6gretap", "flowlabel 0xbcdef key 2"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel); + if (tc_prog_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + ping6_dev1(); + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum erspan_test { + V1, + V2 +}; + +static void test_erspan_tunnel(enum erspan_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case V1: + err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1, + "erspan", "seq key 2 erspan_ver 1 erspan 123"); + break; + case V2: + err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1, + "erspan", + "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel); + get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel); + if (tc_prog_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_ip6erspan_tunnel(enum erspan_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + switch (test) { + case V1: + err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1, + "ip6erspan", "seq key 2 erspan_ver 1 erspan 123"); + break; + case V2: + err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1, + "ip6erspan", + "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7"); + break; + } + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel); + if (tc_prog_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + ping_dev1(); +done: + delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_geneve_tunnel(void) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1, + "geneve", "dstport 6081"); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel); + get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel); + if (tc_prog_attach(GENEVE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +static void test_ip6geneve_tunnel(void) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1, + "geneve", ""); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel); + if (tc_prog_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping_dev0(); + ping_dev1(); +done: + delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + +enum ip6tnl_test { + IPIP6, + IP6IP6 +}; + +static void test_ip6tnl_tunnel(enum ip6tnl_test test) +{ + struct test_tunnel_kern *skel; + int set_fd, get_fd; + int err; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + return; + + err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", ""); + if (!ASSERT_OK(err, "add tunnel")) + goto done; + + switch (test) { + case IPIP6: + set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel); + break; + case IP6IP6: + set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel); + get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel); + break; + } + if (tc_prog_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd)) + goto done; + + ping6_veth0(); + switch (test) { + case IPIP6: + ping_dev0(); + ping_dev1(); + break; + case IP6IP6: + ping6_dev0(); + ping6_dev1(); + break; + } + +done: + delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1); + test_tunnel_kern__destroy(skel); +} + #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ @@ -707,6 +1041,20 @@ static void *test_tunnel_run_tests(void *arg) RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); RUN_TEST(xfrm_tunnel); + RUN_TEST(gre_tunnel, GRE); + RUN_TEST(gre_tunnel, GRE_NOKEY); + RUN_TEST(gre_tunnel, GRETAP); + RUN_TEST(gre_tunnel, GRETAP_NOKEY); + RUN_TEST(ip6gre_tunnel, IP6GRE); + RUN_TEST(ip6gre_tunnel, IP6GRETAP); + RUN_TEST(erspan_tunnel, V1); + RUN_TEST(erspan_tunnel, V2); + RUN_TEST(ip6erspan_tunnel, V1); + RUN_TEST(ip6erspan_tunnel, V2); + RUN_TEST(geneve_tunnel); + RUN_TEST(ip6geneve_tunnel); + RUN_TEST(ip6tnl_tunnel, IPIP6); + RUN_TEST(ip6tnl_tunnel, IP6IP6); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c new file mode 100644 index 000000000000..9aff08ac55c0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <string.h> +#include <stdio.h> + +#define __CHECK_STR(str, name) \ + do { \ + if (!ASSERT_HAS_SUBSTR(fix->output, (str), (name))) \ + goto out; \ + } while (0) + +struct fixture { + char tmpfile[80]; + int fd; + char *output; + size_t sz; + char veristat[80]; +}; + +static struct fixture *init_fixture(void) +{ + struct fixture *fix = malloc(sizeof(struct fixture)); + + /* for no_alu32 and cpuv4 veristat is in parent folder */ + if (access("./veristat", F_OK) == 0) + strscpy(fix->veristat, "./veristat"); + else if (access("../veristat", F_OK) == 0) + strscpy(fix->veristat, "../veristat"); + else + PRINT_FAIL("Can't find veristat binary"); + + snprintf(fix->tmpfile, sizeof(fix->tmpfile), "/tmp/test_veristat.XXXXXX"); + fix->fd = mkstemp(fix->tmpfile); + fix->sz = 1000000; + fix->output = malloc(fix->sz); + return fix; +} + +static void teardown_fixture(struct fixture *fix) +{ + free(fix->output); + close(fix->fd); + remove(fix->tmpfile); + free(fix); +} + +static void test_set_global_vars_succeeds(void) +{ + struct fixture *fix = init_fixture(); + + SYS(out, + "%s set_global_vars.bpf.o"\ + " -G \"var_s64 = 0xf000000000000001\" "\ + " -G \"var_u64 = 0xfedcba9876543210\" "\ + " -G \"var_s32 = -0x80000000\" "\ + " -G \"var_u32 = 0x76543210\" "\ + " -G \"var_s16 = -32768\" "\ + " -G \"var_u16 = 60652\" "\ + " -G \"var_s8 = -128\" "\ + " -G \"var_u8 = 255\" "\ + " -G \"var_ea = EA2\" "\ + " -G \"var_eb = EB2\" "\ + " -G \"var_ec=EC2\" "\ + " -G \"var_b = 1\" "\ + " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\ + " -G \"union1.struct3.var_u8_l = 0xaa\" "\ + " -G \"union1.struct3.var_u8_h = 0xaa\" "\ + " -G \"arr[3]= 171\" " \ + " -G \"arr[EA2] =172\" " \ + " -G \"enum_arr[EC2]=EA3\" " \ + " -G \"three_d[31][7][EA2]=173\"" \ + " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \ + " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \ + " -vl2 > %s", fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); + __CHECK_STR("=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210"); + __CHECK_STR("=0x80000000 ", "var_s32 = -0x80000000"); + __CHECK_STR("=0x76543210 ", "var_u32 = 0x76543210"); + __CHECK_STR("=0x8000 ", "var_s16 = -32768"); + __CHECK_STR("=0xecec ", "var_u16 = 60652"); + __CHECK_STR("=128 ", "var_s8 = -128"); + __CHECK_STR("=255 ", "var_u8 = 255"); + __CHECK_STR("=11 ", "var_ea = EA2"); + __CHECK_STR("=12 ", "var_eb = EB2"); + __CHECK_STR("=13 ", "var_ec = EC2"); + __CHECK_STR("=1 ", "var_b = 1"); + __CHECK_STR("=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); + __CHECK_STR("=0xaaaa ", "union1.var_u16 = 0xaaaa"); + __CHECK_STR("=171 ", "arr[3]= 171"); + __CHECK_STR("=172 ", "arr[EA2] =172"); + __CHECK_STR("=10 ", "enum_arr[EC2]=EA3"); + __CHECK_STR("=173 ", "matrix[31][7][11]=173"); + __CHECK_STR("=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); + __CHECK_STR("=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); + +out: + teardown_fixture(fix); +} + +static void test_set_global_vars_from_file_succeeds(void) +{ + struct fixture *fix = init_fixture(); + char input_file[80]; + const char *vars = "var_s16 = -32768\nvar_u16 = 60652"; + int fd; + + snprintf(input_file, sizeof(input_file), "/tmp/veristat_input.XXXXXX"); + fd = mkstemp(input_file); + if (!ASSERT_GE(fd, 0, "valid fd")) + goto out; + + write(fd, vars, strlen(vars)); + syncfs(fd); + SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s", + fix->veristat, input_file, fix->tmpfile); + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("=0x8000 ", "var_s16 = -32768"); + __CHECK_STR("=0xecec ", "var_u16 = 60652"); + +out: + close(fd); + remove(input_file); + teardown_fixture(fix); +} + +static void test_set_global_vars_out_of_range(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"var_s32 = 2147483648\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("is out of range [-2147483648; 2147483647]", "out of range"); + +out: + teardown_fixture(fix); +} + +static void test_unsupported_ptr_array_type(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr"); + +out: + teardown_fixture(fix); +} + +static void test_array_out_of_bounds(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Array index 99 is out of bounds", "arr[99]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_not_found(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't resolve enum value EG2", "arr[EG2]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_for_non_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1"); + +out: + teardown_fixture(fix); +} + +static void test_no_array_index_for_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1"); + +out: + teardown_fixture(fix); +} + +void test_veristat(void) +{ + if (test__start_subtest("set_global_vars_succeeds")) + test_set_global_vars_succeeds(); + + if (test__start_subtest("set_global_vars_out_of_range")) + test_set_global_vars_out_of_range(); + + if (test__start_subtest("set_global_vars_from_file_succeeds")) + test_set_global_vars_from_file_succeeds(); + + if (test__start_subtest("test_unsupported_ptr_array_type")) + test_unsupported_ptr_array_type(); + + if (test__start_subtest("test_array_out_of_bounds")) + test_array_out_of_bounds(); + + if (test__start_subtest("test_array_index_not_found")) + test_array_index_not_found(); + + if (test__start_subtest("test_array_index_for_non_array")) + test_array_index_for_non_array(); + + if (test__start_subtest("test_no_array_index_for_array")) + test_no_array_index_for_array(); + +} + +#undef __CHECK_STR diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c index 8d75424fe6bc..3e98a1665936 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c @@ -3,17 +3,50 @@ /* Create 3 namespaces with 3 veth peers, and forward packets in-between using * native XDP * - * XDP_TX - * NS1(veth11) NS2(veth22) NS3(veth33) - * | | | - * | | | - * (veth1, (veth2, (veth3, - * id:111) id:122) id:133) - * ^ | ^ | ^ | - * | | XDP_REDIRECT | | XDP_REDIRECT | | - * | ------------------ ------------------ | - * ----------------------------------------- - * XDP_REDIRECT + * Network topology: + * ---------- ---------- ---------- + * | NS1 | | NS2 | | NS3 | + * | veth11 | | veth22 | | veth33 | + * ----|----- -----|---- -----|---- + * | | | + * ----|------------------|----------------|---- + * | veth1 veth2 veth3 | + * | | + * | NSO | + * --------------------------------------------- + * + * Test cases: + * - [test_xdp_veth_redirect] : ping veth33 from veth11 + * + * veth11 veth22 veth33 + * (XDP_PASS) (XDP_TX) (XDP_PASS) + * | | | + * | | | + * veth1 veth2 veth3 + * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT) + * ^ | ^ | ^ | + * | | | | | | + * | ------------------ ------------------ | + * ----------------------------------------- + * + * - [test_xdp_veth_broadcast_redirect]: broadcast from veth11 + * - IPv4 ping : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS + * -> echo request received by all except veth11 + * - IPv4 ping : BPF_F_BROADCAST + * -> echo request received by all veth + * - [test_xdp_veth_egress]: + * - all src mac should be the magic mac + * + * veth11 veth22 veth33 + * (XDP_PASS) (XDP_PASS) (XDP_PASS) + * | | | + * | | | + * veth1 veth2 veth3 + * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT) + * | ^ ^ + * | | | + * ---------------------------------------- + * */ #define _GNU_SOURCE @@ -22,192 +55,545 @@ #include "network_helpers.h" #include "xdp_dummy.skel.h" #include "xdp_redirect_map.skel.h" +#include "xdp_redirect_multi_kern.skel.h" #include "xdp_tx.skel.h" +#include <uapi/linux/if_link.h> #define VETH_PAIRS_COUNT 3 -#define NS_SUFFIX_LEN 6 -#define VETH_NAME_MAX_LEN 16 +#define VETH_NAME_MAX_LEN 32 +#define IP_MAX_LEN 16 #define IP_SRC "10.1.1.11" #define IP_DST "10.1.1.33" -#define IP_CMD_MAX_LEN 128 - -struct skeletons { - struct xdp_dummy *xdp_dummy; - struct xdp_tx *xdp_tx; - struct xdp_redirect_map *xdp_redirect_maps; -}; +#define IP_NEIGH "10.1.1.253" +#define PROG_NAME_MAX_LEN 128 +#define NS_NAME_MAX_LEN 32 struct veth_configuration { char local_veth[VETH_NAME_MAX_LEN]; /* Interface in main namespace */ char remote_veth[VETH_NAME_MAX_LEN]; /* Peer interface in dedicated namespace*/ - const char *namespace; /* Namespace for the remote veth */ - char next_veth[VETH_NAME_MAX_LEN]; /* Local interface to redirect traffic to */ - char *remote_addr; /* IP address of the remote veth */ + char namespace[NS_NAME_MAX_LEN]; /* Namespace for the remote veth */ + int next_veth; /* Local interface to redirect traffic to */ + char remote_addr[IP_MAX_LEN]; /* IP address of the remote veth */ }; -static struct veth_configuration config[VETH_PAIRS_COUNT] = { - { - .local_veth = "veth1", - .remote_veth = "veth11", - .next_veth = "veth2", - .remote_addr = IP_SRC, - .namespace = "ns-veth11" - }, - { - .local_veth = "veth2", - .remote_veth = "veth22", - .next_veth = "veth3", - .remote_addr = NULL, - .namespace = "ns-veth22" - }, +struct net_configuration { + char ns0_name[NS_NAME_MAX_LEN]; + struct veth_configuration veth_cfg[VETH_PAIRS_COUNT]; +}; + +static const struct net_configuration default_config = { + .ns0_name = "ns0-", { - .local_veth = "veth3", - .remote_veth = "veth33", - .next_veth = "veth1", - .remote_addr = IP_DST, - .namespace = "ns-veth33" + { + .local_veth = "veth1-", + .remote_veth = "veth11", + .next_veth = 1, + .remote_addr = IP_SRC, + .namespace = "ns-veth11-" + }, + { + .local_veth = "veth2-", + .remote_veth = "veth22", + .next_veth = 2, + .remote_addr = "", + .namespace = "ns-veth22-" + }, + { + .local_veth = "veth3-", + .remote_veth = "veth33", + .next_veth = 0, + .remote_addr = IP_DST, + .namespace = "ns-veth33-" + } } }; -static int attach_programs_to_veth_pair(struct skeletons *skeletons, int index) +struct prog_configuration { + char local_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to local_veth */ + char remote_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to remote_veth */ + u32 local_flags; /* XDP flags to use on local_veth */ + u32 remote_flags; /* XDP flags to use on remote_veth */ +}; + +static int attach_programs_to_veth_pair(struct bpf_object **objs, size_t nb_obj, + struct net_configuration *net_config, + struct prog_configuration *prog, int index) { struct bpf_program *local_prog, *remote_prog; - struct bpf_link **local_link, **remote_link; struct nstoken *nstoken; - struct bpf_link *link; - int interface; - - switch (index) { - case 0: - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_0; - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_0; - remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog; - remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog; - break; - case 1: - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_1; - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_1; - remote_prog = skeletons->xdp_tx->progs.xdp_tx; - remote_link = &skeletons->xdp_tx->links.xdp_tx; - break; - case 2: - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_2; - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_2; - remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog; - remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog; - break; + int interface, ret, i; + + for (i = 0; i < nb_obj; i++) { + local_prog = bpf_object__find_program_by_name(objs[i], prog[index].local_name); + if (local_prog) + break; } - interface = if_nametoindex(config[index].local_veth); + if (!ASSERT_OK_PTR(local_prog, "find local program")) + return -1; + + for (i = 0; i < nb_obj; i++) { + remote_prog = bpf_object__find_program_by_name(objs[i], prog[index].remote_name); + if (remote_prog) + break; + } + if (!ASSERT_OK_PTR(remote_prog, "find remote program")) + return -1; + + interface = if_nametoindex(net_config->veth_cfg[index].local_veth); if (!ASSERT_NEQ(interface, 0, "non zero interface index")) return -1; - link = bpf_program__attach_xdp(local_prog, interface); - if (!ASSERT_OK_PTR(link, "attach xdp program to local veth")) + + ret = bpf_xdp_attach(interface, bpf_program__fd(local_prog), + prog[index].local_flags, NULL); + if (!ASSERT_OK(ret, "attach xdp program to local veth")) return -1; - *local_link = link; - nstoken = open_netns(config[index].namespace); + + nstoken = open_netns(net_config->veth_cfg[index].namespace); if (!ASSERT_OK_PTR(nstoken, "switch to remote veth namespace")) return -1; - interface = if_nametoindex(config[index].remote_veth); + + interface = if_nametoindex(net_config->veth_cfg[index].remote_veth); if (!ASSERT_NEQ(interface, 0, "non zero interface index")) { close_netns(nstoken); return -1; } - link = bpf_program__attach_xdp(remote_prog, interface); - *remote_link = link; - close_netns(nstoken); - if (!ASSERT_OK_PTR(link, "attach xdp program to remote veth")) + + ret = bpf_xdp_attach(interface, bpf_program__fd(remote_prog), + prog[index].remote_flags, NULL); + if (!ASSERT_OK(ret, "attach xdp program to remote veth")) { + close_netns(nstoken); return -1; + } + close_netns(nstoken); return 0; } -static int configure_network(struct skeletons *skeletons) +static int create_network(struct net_configuration *net_config) { - int interface_id; - int map_fd; - int err; - int i = 0; + struct nstoken *nstoken = NULL; + int i, err; + + memcpy(net_config, &default_config, sizeof(struct net_configuration)); + + /* Create unique namespaces */ + err = append_tid(net_config->ns0_name, NS_NAME_MAX_LEN); + if (!ASSERT_OK(err, "append TID to ns0 name")) + goto fail; + SYS(fail, "ip netns add %s", net_config->ns0_name); - /* First create and configure all interfaces */ for (i = 0; i < VETH_PAIRS_COUNT; i++) { - SYS(fail, "ip netns add %s", config[i].namespace); - SYS(fail, "ip link add %s type veth peer name %s netns %s", - config[i].local_veth, config[i].remote_veth, config[i].namespace); - SYS(fail, "ip link set dev %s up", config[i].local_veth); - if (config[i].remote_addr) - SYS(fail, "ip -n %s addr add %s/24 dev %s", config[i].namespace, - config[i].remote_addr, config[i].remote_veth); - SYS(fail, "ip -n %s link set dev %s up", config[i].namespace, - config[i].remote_veth); + err = append_tid(net_config->veth_cfg[i].namespace, NS_NAME_MAX_LEN); + if (!ASSERT_OK(err, "append TID to ns name")) + goto fail; + SYS(fail, "ip netns add %s", net_config->veth_cfg[i].namespace); } - /* Then configure the redirect map and attach programs to interfaces */ - map_fd = bpf_map__fd(skeletons->xdp_redirect_maps->maps.tx_port); - if (!ASSERT_GE(map_fd, 0, "open redirect map")) + /* Create interfaces */ + nstoken = open_netns(net_config->ns0_name); + if (!nstoken) goto fail; + for (i = 0; i < VETH_PAIRS_COUNT; i++) { - interface_id = if_nametoindex(config[i].next_veth); - if (!ASSERT_NEQ(interface_id, 0, "non zero interface index")) - goto fail; - err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY); - if (!ASSERT_OK(err, "configure interface redirection through map")) - goto fail; - if (attach_programs_to_veth_pair(skeletons, i)) - goto fail; + SYS(fail, "ip link add %s type veth peer name %s netns %s", + net_config->veth_cfg[i].local_veth, net_config->veth_cfg[i].remote_veth, + net_config->veth_cfg[i].namespace); + SYS(fail, "ip link set dev %s up", net_config->veth_cfg[i].local_veth); + if (net_config->veth_cfg[i].remote_addr[0]) + SYS(fail, "ip -n %s addr add %s/24 dev %s", + net_config->veth_cfg[i].namespace, + net_config->veth_cfg[i].remote_addr, + net_config->veth_cfg[i].remote_veth); + SYS(fail, "ip -n %s link set dev %s up", net_config->veth_cfg[i].namespace, + net_config->veth_cfg[i].remote_veth); } + close_netns(nstoken); return 0; fail: + close_netns(nstoken); return -1; } -static void cleanup_network(void) +static void cleanup_network(struct net_configuration *net_config) { int i; - /* Deleting namespaces is enough to automatically remove veth pairs as well - */ + SYS_NOFAIL("ip netns del %s", net_config->ns0_name); for (i = 0; i < VETH_PAIRS_COUNT; i++) - SYS_NOFAIL("ip netns del %s", config[i].namespace); + SYS_NOFAIL("ip netns del %s", net_config->veth_cfg[i].namespace); } -static int check_ping(struct skeletons *skeletons) +#define VETH_REDIRECT_SKEL_NB 3 +static void xdp_veth_redirect(u32 flags) { + struct prog_configuration ping_config[VETH_PAIRS_COUNT] = { + { + .local_name = "xdp_redirect_map_0", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_1", + .remote_name = "xdp_tx", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_2", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + } + }; + struct bpf_object *bpf_objs[VETH_REDIRECT_SKEL_NB]; + struct xdp_redirect_map *xdp_redirect_map; + struct net_configuration net_config; + struct nstoken *nstoken = NULL; + struct xdp_dummy *xdp_dummy; + struct xdp_tx *xdp_tx; + int map_fd; + int i; + + xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load")) + return; + + xdp_tx = xdp_tx__open_and_load(); + if (!ASSERT_OK_PTR(xdp_tx, "xdp_tx__open_and_load")) + goto destroy_xdp_dummy; + + xdp_redirect_map = xdp_redirect_map__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) + goto destroy_xdp_tx; + + if (!ASSERT_OK(create_network(&net_config), "create network")) + goto destroy_xdp_redirect_map; + + /* Then configure the redirect map and attach programs to interfaces */ + map_fd = bpf_map__fd(xdp_redirect_map->maps.tx_port); + if (!ASSERT_OK_FD(map_fd, "open redirect map")) + goto destroy_xdp_redirect_map; + + bpf_objs[0] = xdp_dummy->obj; + bpf_objs[1] = xdp_tx->obj; + bpf_objs[2] = xdp_redirect_map->obj; + + nstoken = open_netns(net_config.ns0_name); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + int next_veth = net_config.veth_cfg[i].next_veth; + int interface_id; + int err; + + interface_id = if_nametoindex(net_config.veth_cfg[next_veth].local_veth); + if (!ASSERT_NEQ(interface_id, 0, "non zero interface index")) + goto destroy_xdp_redirect_map; + err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY); + if (!ASSERT_OK(err, "configure interface redirection through map")) + goto destroy_xdp_redirect_map; + if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB, + &net_config, ping_config, i)) + goto destroy_xdp_redirect_map; + } + /* Test: if all interfaces are properly configured, we must be able to ping * veth33 from veth11 */ - return SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null", - config[0].namespace, IP_DST); + ASSERT_OK(SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null", + net_config.veth_cfg[0].namespace, IP_DST), "ping"); + +destroy_xdp_redirect_map: + close_netns(nstoken); + xdp_redirect_map__destroy(xdp_redirect_map); +destroy_xdp_tx: + xdp_tx__destroy(xdp_tx); +destroy_xdp_dummy: + xdp_dummy__destroy(xdp_dummy); + + cleanup_network(&net_config); } -void test_xdp_veth_redirect(void) +#define BROADCAST_REDIRECT_SKEL_NB 2 +static void xdp_veth_broadcast_redirect(u32 attach_flags, u64 redirect_flags) { - struct skeletons skeletons = {}; + struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = { + { + .local_name = "xdp_redirect_map_multi_prog", + .remote_name = "xdp_count_0", + .local_flags = attach_flags, + .remote_flags = attach_flags, + }, + { + .local_name = "xdp_redirect_map_multi_prog", + .remote_name = "xdp_count_1", + .local_flags = attach_flags, + .remote_flags = attach_flags, + }, + { + .local_name = "xdp_redirect_map_multi_prog", + .remote_name = "xdp_count_2", + .local_flags = attach_flags, + .remote_flags = attach_flags, + } + }; + struct bpf_object *bpf_objs[BROADCAST_REDIRECT_SKEL_NB]; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + struct xdp_redirect_map *xdp_redirect_map; + struct bpf_devmap_val devmap_val = {}; + struct net_configuration net_config; + struct nstoken *nstoken = NULL; + u16 protocol = ETH_P_IP; + int group_map; + int flags_map; + int cnt_map; + u64 cnt = 0; + int i, err; - skeletons.xdp_dummy = xdp_dummy__open_and_load(); - if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load")) + xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load")) return; - skeletons.xdp_tx = xdp_tx__open_and_load(); - if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load")) + xdp_redirect_map = xdp_redirect_map__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) + goto destroy_xdp_redirect_multi_kern; + + if (!ASSERT_OK(create_network(&net_config), "create network")) + goto destroy_xdp_redirect_map; + + group_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_all); + if (!ASSERT_OK_FD(group_map, "open map_all")) + goto destroy_xdp_redirect_map; + + flags_map = bpf_map__fd(xdp_redirect_multi_kern->maps.redirect_flags); + if (!ASSERT_OK_FD(group_map, "open map_all")) + goto destroy_xdp_redirect_map; + + err = bpf_map_update_elem(flags_map, &protocol, &redirect_flags, BPF_NOEXIST); + if (!ASSERT_OK(err, "init IP count")) + goto destroy_xdp_redirect_map; + + cnt_map = bpf_map__fd(xdp_redirect_map->maps.rxcnt); + if (!ASSERT_OK_FD(cnt_map, "open rxcnt map")) + goto destroy_xdp_redirect_map; + + bpf_objs[0] = xdp_redirect_multi_kern->obj; + bpf_objs[1] = xdp_redirect_map->obj; + + nstoken = open_netns(net_config.ns0_name); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth); + + if (attach_programs_to_veth_pair(bpf_objs, BROADCAST_REDIRECT_SKEL_NB, + &net_config, prog_cfg, i)) + goto destroy_xdp_redirect_map; + + SYS(destroy_xdp_redirect_map, + "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s", + net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth); + + devmap_val.ifindex = ifindex; + err = bpf_map_update_elem(group_map, &ifindex, &devmap_val, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; + + } + + SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ", + net_config.veth_cfg[0].namespace, IP_NEIGH); + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + err = bpf_map_lookup_elem(cnt_map, &i, &cnt); + if (!ASSERT_OK(err, "get IP cnt")) + goto destroy_xdp_redirect_map; + + if (redirect_flags & BPF_F_EXCLUDE_INGRESS) + /* veth11 shouldn't receive the ICMP requests; + * others should + */ + ASSERT_EQ(cnt, i ? 4 : 0, "compare IP cnt"); + else + /* All remote veth should receive the ICMP requests */ + ASSERT_EQ(cnt, 4, "compare IP cnt"); + } + +destroy_xdp_redirect_map: + close_netns(nstoken); + xdp_redirect_map__destroy(xdp_redirect_map); +destroy_xdp_redirect_multi_kern: + xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern); + + cleanup_network(&net_config); +} + +#define VETH_EGRESS_SKEL_NB 3 +static void xdp_veth_egress(u32 flags) +{ + struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = { + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "xdp_dummy_prog", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "store_mac_1", + .local_flags = flags, + .remote_flags = flags, + }, + { + .local_name = "xdp_redirect_map_all_prog", + .remote_name = "store_mac_2", + .local_flags = flags, + .remote_flags = flags, + } + }; + const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB]; + struct xdp_redirect_map *xdp_redirect_map; + struct bpf_devmap_val devmap_val = {}; + struct net_configuration net_config; + int mac_map, egress_map, res_map; + struct nstoken *nstoken = NULL; + struct xdp_dummy *xdp_dummy; + int err; + int i; + + xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load")) + return; + + xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load")) goto destroy_xdp_dummy; - skeletons.xdp_redirect_maps = xdp_redirect_map__open_and_load(); - if (!ASSERT_OK_PTR(skeletons.xdp_redirect_maps, "xdp_redirect_map__open_and_load")) - goto destroy_xdp_tx; + xdp_redirect_map = xdp_redirect_map__open_and_load(); + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) + goto destroy_xdp_redirect_multi_kern; - if (configure_network(&skeletons)) + if (!ASSERT_OK(create_network(&net_config), "create network")) goto destroy_xdp_redirect_map; - ASSERT_OK(check_ping(&skeletons), "ping"); + mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map); + if (!ASSERT_OK_FD(mac_map, "open mac_map")) + goto destroy_xdp_redirect_map; + + egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress); + if (!ASSERT_OK_FD(egress_map, "open map_egress")) + goto destroy_xdp_redirect_map; + + devmap_val.bpf_prog.fd = bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog); + + bpf_objs[0] = xdp_dummy->obj; + bpf_objs[1] = xdp_redirect_multi_kern->obj; + bpf_objs[2] = xdp_redirect_map->obj; + + nstoken = open_netns(net_config.ns0_name); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < VETH_PAIRS_COUNT; i++) { + int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth); + + SYS(destroy_xdp_redirect_map, + "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s", + net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth); + + if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB, + &net_config, prog_cfg, i)) + goto destroy_xdp_redirect_map; + + err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; + + devmap_val.ifindex = ifindex; + err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto destroy_xdp_redirect_map; + } + + SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ", + net_config.veth_cfg[0].namespace, IP_NEIGH); + + res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac); + if (!ASSERT_OK_FD(res_map, "open rx_map")) + goto destroy_xdp_redirect_map; + + for (i = 0; i < 2; i++) { + u32 key = i; + u64 res; + + err = bpf_map_lookup_elem(res_map, &key, &res); + if (!ASSERT_OK(err, "get MAC res")) + goto destroy_xdp_redirect_map; + + ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac"); + } destroy_xdp_redirect_map: - xdp_redirect_map__destroy(skeletons.xdp_redirect_maps); -destroy_xdp_tx: - xdp_tx__destroy(skeletons.xdp_tx); + close_netns(nstoken); + xdp_redirect_map__destroy(xdp_redirect_map); +destroy_xdp_redirect_multi_kern: + xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern); destroy_xdp_dummy: - xdp_dummy__destroy(skeletons.xdp_dummy); + xdp_dummy__destroy(xdp_dummy); + + cleanup_network(&net_config); +} + +void test_xdp_veth_redirect(void) +{ + if (test__start_subtest("0")) + xdp_veth_redirect(0); + + if (test__start_subtest("DRV_MODE")) + xdp_veth_redirect(XDP_FLAGS_DRV_MODE); + + if (test__start_subtest("SKB_MODE")) + xdp_veth_redirect(XDP_FLAGS_SKB_MODE); +} + +void test_xdp_veth_broadcast_redirect(void) +{ + if (test__start_subtest("0/BROADCAST")) + xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST); + + if (test__start_subtest("0/(BROADCAST | EXCLUDE_INGRESS)")) + xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); + + if (test__start_subtest("DRV_MODE/BROADCAST")) + xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, BPF_F_BROADCAST); + + if (test__start_subtest("DRV_MODE/(BROADCAST | EXCLUDE_INGRESS)")) + xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); + + if (test__start_subtest("SKB_MODE/BROADCAST")) + xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, BPF_F_BROADCAST); + + if (test__start_subtest("SKB_MODE/(BROADCAST | EXCLUDE_INGRESS)")) + xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); +} + +void test_xdp_veth_egress(void) +{ + if (test__start_subtest("0/egress")) + xdp_veth_egress(0); + + if (test__start_subtest("DRV_MODE/egress")) + xdp_veth_egress(XDP_FLAGS_DRV_MODE); - cleanup_network(); + if (test__start_subtest("SKB_MODE/egress")) + xdp_veth_egress(XDP_FLAGS_SKB_MODE); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c new file mode 100644 index 000000000000..7950c504ed28 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c @@ -0,0 +1,2611 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/bpf.h> +#include <errno.h> +#include <linux/bitmap.h> +#include <linux/if_link.h> +#include <linux/mman.h> +#include <linux/netdev.h> +#include <poll.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <unistd.h> + +#include "network_helpers.h" +#include "test_xsk.h" +#include "xsk_xdp_common.h" +#include "xsk_xdp_progs.skel.h" + +#define DEFAULT_BATCH_SIZE 64 +#define MIN_PKT_SIZE 64 +#define MAX_ETH_JUMBO_SIZE 9000 +#define MAX_INTERFACES 2 +#define MAX_TEARDOWN_ITER 10 +#define MAX_TX_BUDGET_DEFAULT 32 +#define PKT_DUMP_NB_TO_PRINT 16 +/* Just to align the data in the packet */ +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) +#define POLL_TMOUT 1000 +#define THREAD_TMOUT 3 +#define UMEM_HEADROOM_TEST_SIZE 128 +#define XSK_DESC__INVALID_OPTION (0xffff) +#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1) +#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024) +#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024) + +static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; + +bool opt_verbose; +pthread_barrier_t barr; +pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; + +int pkts_in_flight; + +/* The payload is a word consisting of a packet sequence number in the upper + * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's + * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0. + */ +static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) +{ + u32 *ptr = (u32 *)dest, i; + + start /= sizeof(*ptr); + size /= sizeof(*ptr); + for (i = 0; i < size; i++) + ptr[i] = htonl(pkt_nb << 16 | (i + start)); +} + +static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr) +{ + memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_LOOPBACK); +} + +static bool is_umem_valid(struct ifobject *ifobj) +{ + return !!ifobj->umem->umem; +} + +static u32 mode_to_xdp_flags(enum test_mode mode) +{ + return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; +} + +static u64 umem_size(struct xsk_umem_info *umem) +{ + return umem->num_frames * umem->frame_size; +} + +int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, + u64 size) +{ + struct xsk_umem_config cfg = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = umem->frame_size, + .frame_headroom = umem->frame_headroom, + .flags = XSK_UMEM__DEFAULT_FLAGS + }; + int ret; + + if (umem->fill_size) + cfg.fill_size = umem->fill_size; + + if (umem->comp_size) + cfg.comp_size = umem->comp_size; + + if (umem->unaligned_mode) + cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; + + ret = xsk_umem__create(&umem->umem, buffer, size, + &umem->fq, &umem->cq, &cfg); + if (ret) + return ret; + + umem->buffer = buffer; + if (ifobj->shared_umem && ifobj->rx_on) { + umem->base_addr = umem_size(umem); + umem->next_buffer = umem_size(umem); + } + + return 0; +} + +static u64 umem_alloc_buffer(struct xsk_umem_info *umem) +{ + u64 addr; + + addr = umem->next_buffer; + umem->next_buffer += umem->frame_size; + if (umem->next_buffer >= umem->base_addr + umem_size(umem)) + umem->next_buffer = umem->base_addr; + + return addr; +} + +static void umem_reset_alloc(struct xsk_umem_info *umem) +{ + umem->next_buffer = 0; +} + +static int enable_busy_poll(struct xsk_socket_info *xsk) +{ + int sock_opt; + + sock_opt = 1; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + return -errno; + + sock_opt = 20; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + return -errno; + + sock_opt = xsk->batch_size; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + return -errno; + + return 0; +} + +int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, + struct ifobject *ifobject, bool shared) +{ + struct xsk_socket_config cfg = {}; + struct xsk_ring_cons *rxr; + struct xsk_ring_prod *txr; + + xsk->umem = umem; + cfg.rx_size = xsk->rxqsize; + cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg.bind_flags = ifobject->bind_flags; + if (shared) + cfg.bind_flags |= XDP_SHARED_UMEM; + if (ifobject->mtu > MAX_ETH_PKT_SIZE) + cfg.bind_flags |= XDP_USE_SG; + if (umem->comp_size) + cfg.tx_size = umem->comp_size; + if (umem->fill_size) + cfg.rx_size = umem->fill_size; + + txr = ifobject->tx_on ? &xsk->tx : NULL; + rxr = ifobject->rx_on ? &xsk->rx : NULL; + return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg); +} + +static int set_ring_size(struct ifobject *ifobj) +{ + int ret; + u32 ctr = 0; + + while (ctr++ < SOCK_RECONF_CTR) { + ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring); + if (!ret) + break; + + /* Retry if it fails */ + if (ctr >= SOCK_RECONF_CTR || errno != EBUSY) + return -errno; + + usleep(USLEEP_MAX); + } + + return ret; +} + +int hw_ring_size_reset(struct ifobject *ifobj) +{ + ifobj->ring.tx_pending = ifobj->set_ring.default_tx; + ifobj->ring.rx_pending = ifobj->set_ring.default_rx; + return set_ring_size(ifobj); +} + +static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx) +{ + u32 i, j; + + for (i = 0; i < MAX_INTERFACES; i++) { + struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; + + ifobj->xsk = &ifobj->xsk_arr[0]; + ifobj->use_poll = false; + ifobj->use_fill_ring = true; + ifobj->release_rx = true; + ifobj->validation_func = NULL; + ifobj->use_metadata = false; + + if (i == 0) { + ifobj->rx_on = false; + ifobj->tx_on = true; + } else { + ifobj->rx_on = true; + ifobj->tx_on = false; + } + + memset(ifobj->umem, 0, sizeof(*ifobj->umem)); + ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; + ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + + for (j = 0; j < MAX_SOCKETS; j++) { + memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); + ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE; + if (i == 0) + ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default; + else + ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default; + + memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN); + memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN); + ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0); + ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1); + } + } + + if (ifobj_tx->hw_ring_size_supp) + hw_ring_size_reset(ifobj_tx); + + test->ifobj_tx = ifobj_tx; + test->ifobj_rx = ifobj_rx; + test->current_step = 0; + test->total_steps = 1; + test->nb_sockets = 1; + test->fail = false; + test->set_ring = false; + test->adjust_tail = false; + test->adjust_tail_support = false; + test->mtu = MAX_ETH_PKT_SIZE; + test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; + test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; + test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog; + test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk; +} + +void test_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx, enum test_mode mode, + const struct test_spec *test_to_run) +{ + struct pkt_stream *tx_pkt_stream; + struct pkt_stream *rx_pkt_stream; + u32 i; + + tx_pkt_stream = test->tx_pkt_stream_default; + rx_pkt_stream = test->rx_pkt_stream_default; + memset(test, 0, sizeof(*test)); + test->tx_pkt_stream_default = tx_pkt_stream; + test->rx_pkt_stream_default = rx_pkt_stream; + + for (i = 0; i < MAX_INTERFACES; i++) { + struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; + + ifobj->bind_flags = XDP_USE_NEED_WAKEUP; + if (mode == TEST_MODE_ZC) + ifobj->bind_flags |= XDP_ZEROCOPY; + else + ifobj->bind_flags |= XDP_COPY; + } + + memcpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE); + test->test_func = test_to_run->test_func; + test->mode = mode; + __test_spec_init(test, ifobj_tx, ifobj_rx); +} + +static void test_spec_reset(struct test_spec *test) +{ + __test_spec_init(test, test->ifobj_tx, test->ifobj_rx); +} + +static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx, + struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx, + struct bpf_map *xskmap_tx) +{ + test->xdp_prog_rx = xdp_prog_rx; + test->xdp_prog_tx = xdp_prog_tx; + test->xskmap_rx = xskmap_rx; + test->xskmap_tx = xskmap_tx; +} + +static int test_spec_set_mtu(struct test_spec *test, int mtu) +{ + int err; + + if (test->ifobj_rx->mtu != mtu) { + err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu); + if (err) + return err; + test->ifobj_rx->mtu = mtu; + } + if (test->ifobj_tx->mtu != mtu) { + err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu); + if (err) + return err; + test->ifobj_tx->mtu = mtu; + } + + return 0; +} + +void pkt_stream_reset(struct pkt_stream *pkt_stream) +{ + if (pkt_stream) { + pkt_stream->current_pkt_nb = 0; + pkt_stream->nb_rx_pkts = 0; + } +} + +static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) +{ + if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) + return NULL; + + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; +} + +static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) +{ + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { + (*pkts_sent)++; + if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid) + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; + pkt_stream->current_pkt_nb++; + } + return NULL; +} + +void pkt_stream_delete(struct pkt_stream *pkt_stream) +{ + free(pkt_stream->pkts); + free(pkt_stream); +} + +void pkt_stream_restore_default(struct test_spec *test) +{ + struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream; + struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream; + + if (tx_pkt_stream != test->tx_pkt_stream_default) { + pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream); + test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default; + } + + if (rx_pkt_stream != test->rx_pkt_stream_default) { + pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream); + test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default; + } +} + +static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) +{ + struct pkt_stream *pkt_stream; + + pkt_stream = calloc(1, sizeof(*pkt_stream)); + if (!pkt_stream) + return NULL; + + pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts)); + if (!pkt_stream->pkts) { + free(pkt_stream); + return NULL; + } + + pkt_stream->nb_pkts = nb_pkts; + return pkt_stream; +} + +static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt) +{ + u32 nb_frags = 1, next_frag; + + if (!pkt) + return 1; + + if (!pkt_stream->verbatim) { + if (!pkt->valid || !pkt->len) + return 1; + return ceil_u32(pkt->len, frame_size); + } + + /* Search for the end of the packet in verbatim mode */ + if (!pkt_continues(pkt->options) || !pkt->valid) + return nb_frags; + + next_frag = pkt_stream->current_pkt_nb; + pkt++; + while (next_frag++ < pkt_stream->nb_pkts) { + nb_frags++; + if (!pkt_continues(pkt->options) || !pkt->valid) + break; + pkt++; + } + return nb_frags; +} + +static bool set_pkt_valid(int offset, u32 len) +{ + return len <= MAX_ETH_JUMBO_SIZE; +} + +static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) +{ + pkt->offset = offset; + pkt->len = len; + pkt->valid = set_pkt_valid(offset, len); +} + +static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) +{ + bool prev_pkt_valid = pkt->valid; + + pkt_set(pkt_stream, pkt, offset, len); + pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid; +} + +static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) +{ + return ceil_u32(len, umem->frame_size) * umem->frame_size; +} + +static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = __pkt_stream_alloc(nb_pkts); + if (!pkt_stream) + return NULL; + + pkt_stream->nb_pkts = nb_pkts; + pkt_stream->max_pkt_len = pkt_len; + for (i = 0; i < nb_pkts; i++) { + struct pkt *pkt = &pkt_stream->pkts[i]; + + pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len); + pkt->pkt_nb = nb_start + i * nb_off; + } + + return pkt_stream; +} + +struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len) +{ + return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1); +} + +static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) +{ + return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); +} + +static int pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len) +{ + ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); + + if (!ifobj->xsk->pkt_stream) + return -ENOMEM; + + return 0; +} + +static int pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +{ + int ret; + + ret = pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len); + if (ret) + return ret; + + return pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len); +} + +static int __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, + int offset) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream); + if (!pkt_stream) + return -ENOMEM; + + for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2) + pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); + + ifobj->xsk->pkt_stream = pkt_stream; + + return 0; +} + +static int pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) +{ + int ret = __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset); + + if (ret) + return ret; + + return __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset); +} + +static int pkt_stream_receive_half(struct test_spec *test) +{ + struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream; + u32 i; + + if (test->ifobj_rx->xsk->pkt_stream != test->rx_pkt_stream_default) + /* Packet stream has already been replaced so we have to release this one. + * The newly created one will be freed by the restore_default() at the + * end of the test + */ + pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream); + + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts, + pkt_stream->pkts[0].len); + if (!test->ifobj_rx->xsk->pkt_stream) + return -ENOMEM; + + pkt_stream = test->ifobj_rx->xsk->pkt_stream; + for (i = 1; i < pkt_stream->nb_pkts; i += 2) + pkt_stream->pkts[i].valid = false; + + pkt_stream->nb_valid_entries /= 2; + + return 0; +} + +static int pkt_stream_even_odd_sequence(struct test_spec *test) +{ + struct pkt_stream *pkt_stream; + u32 i; + + for (i = 0; i < test->nb_sockets; i++) { + pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + if (!pkt_stream) + return -ENOMEM; + test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream; + + pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + if (!pkt_stream) + return -ENOMEM; + test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream; + } + + return 0; +} + +static void release_even_odd_sequence(struct test_spec *test) +{ + struct pkt_stream *later_free_tx = test->ifobj_tx->xsk->pkt_stream; + struct pkt_stream *later_free_rx = test->ifobj_rx->xsk->pkt_stream; + int i; + + for (i = 0; i < test->nb_sockets; i++) { + /* later_free_{rx/tx} will be freed by restore_default() */ + if (test->ifobj_tx->xsk_arr[i].pkt_stream != later_free_tx) + pkt_stream_delete(test->ifobj_tx->xsk_arr[i].pkt_stream); + if (test->ifobj_rx->xsk_arr[i].pkt_stream != later_free_rx) + pkt_stream_delete(test->ifobj_rx->xsk_arr[i].pkt_stream); + } + +} + +static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) +{ + if (!pkt->valid) + return pkt->offset; + return pkt->offset + umem_alloc_buffer(umem); +} + +static void pkt_stream_cancel(struct pkt_stream *pkt_stream) +{ + pkt_stream->current_pkt_nb--; +} + +static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len, + u32 pkt_nb, u32 bytes_written) +{ + void *data = xsk_umem__get_data(umem->buffer, addr); + + if (len < MIN_PKT_SIZE) + return; + + if (!bytes_written) { + gen_eth_hdr(xsk, data); + + len -= PKT_HDR_SIZE; + data += PKT_HDR_SIZE; + } else { + bytes_written -= PKT_HDR_SIZE; + } + + write_payload(data, pkt_nb, bytes_written, len); +} + +static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames, + u32 nb_frames, bool verbatim) +{ + u32 i, len = 0, pkt_nb = 0, payload = 0; + struct pkt_stream *pkt_stream; + + pkt_stream = __pkt_stream_alloc(nb_frames); + if (!pkt_stream) + return NULL; + + for (i = 0; i < nb_frames; i++) { + struct pkt *pkt = &pkt_stream->pkts[pkt_nb]; + struct pkt *frame = &frames[i]; + + pkt->offset = frame->offset; + if (verbatim) { + *pkt = *frame; + pkt->pkt_nb = payload; + if (!frame->valid || !pkt_continues(frame->options)) + payload++; + } else { + if (frame->valid) + len += frame->len; + if (frame->valid && pkt_continues(frame->options)) + continue; + + pkt->pkt_nb = pkt_nb; + pkt->len = len; + pkt->valid = frame->valid; + pkt->options = 0; + + len = 0; + } + + print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n", + pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb); + + if (pkt->valid && pkt->len > pkt_stream->max_pkt_len) + pkt_stream->max_pkt_len = pkt->len; + + if (pkt->valid) + pkt_stream->nb_valid_entries++; + + pkt_nb++; + } + + pkt_stream->nb_pkts = pkt_nb; + pkt_stream->verbatim = verbatim; + return pkt_stream; +} + +static int pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts) +{ + struct pkt_stream *pkt_stream; + + pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true); + if (!pkt_stream) + return -ENOMEM; + test->ifobj_tx->xsk->pkt_stream = pkt_stream; + + pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false); + if (!pkt_stream) + return -ENOMEM; + test->ifobj_rx->xsk->pkt_stream = pkt_stream; + + return 0; +} + +static void pkt_print_data(u32 *data, u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + u32 seqnum, pkt_nb; + + seqnum = ntohl(*data) & 0xffff; + pkt_nb = ntohl(*data) >> 16; + ksft_print_msg("%u:%u ", pkt_nb, seqnum); + data++; + } +} + +static void pkt_dump(void *pkt, u32 len, bool eth_header) +{ + struct ethhdr *ethhdr = pkt; + u32 i, *data; + + if (eth_header) { + /*extract L2 frame */ + ksft_print_msg("DEBUG>> L2: dst mac: "); + for (i = 0; i < ETH_ALEN; i++) + ksft_print_msg("%02X", ethhdr->h_dest[i]); + + ksft_print_msg("\nDEBUG>> L2: src mac: "); + for (i = 0; i < ETH_ALEN; i++) + ksft_print_msg("%02X", ethhdr->h_source[i]); + + data = pkt + PKT_HDR_SIZE; + } else { + data = pkt; + } + + /*extract L5 frame */ + ksft_print_msg("\nDEBUG>> L5: seqnum: "); + pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); + ksft_print_msg("...."); + if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { + ksft_print_msg("\n.... "); + pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, + PKT_DUMP_NB_TO_PRINT); + } + ksft_print_msg("\n---------------------------------------\n"); +} + +static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) +{ + u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; + u32 offset = addr % umem->frame_size, expected_offset; + int pkt_offset = pkt->valid ? pkt->offset : 0; + + if (!umem->unaligned_mode) + pkt_offset = 0; + + expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; + + if (offset == expected_offset) + return true; + + ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset); + return false; +} + +static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) +{ + void *data = xsk_umem__get_data(buffer, addr); + struct xdp_info *meta = data - sizeof(struct xdp_info); + + if (meta->count != pkt->pkt_nb) { + ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", + __func__, pkt->pkt_nb, + (unsigned long long)meta->count); + return false; + } + + return true; +} + +static int is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx, bool *supported) +{ + struct bpf_map *data_map; + int adjust_value = 0; + int key = 0; + int ret; + + data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); + if (!data_map || !bpf_map__is_internal(data_map)) { + ksft_print_msg("Error: could not find bss section of XDP program\n"); + return -EINVAL; + } + + ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value); + if (ret) { + ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret); + return ret; + } + + /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail + * helper is not supported. Skip the adjust_tail test case in this scenario. + */ + *supported = adjust_value != -EOPNOTSUPP; + + return 0; +} + +static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb, + u32 bytes_processed) +{ + u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum; + void *data = xsk_umem__get_data(umem->buffer, addr); + + addr -= umem->base_addr; + + if (addr >= umem->num_frames * umem->frame_size || + addr + len > umem->num_frames * umem->frame_size) { + ksft_print_msg("Frag invalid addr: %llx len: %u\n", + (unsigned long long)addr, len); + return false; + } + if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", + (unsigned long long)addr, len); + return false; + } + + pkt_data = data; + if (!bytes_processed) { + pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data); + len -= PKT_HDR_SIZE; + } else { + bytes_processed -= PKT_HDR_SIZE; + } + + expected_seqnum = bytes_processed / sizeof(*pkt_data); + seqnum = ntohl(*pkt_data) & 0xffff; + pkt_nb = ntohl(*pkt_data) >> 16; + + if (expected_pkt_nb != pkt_nb) { + ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n", + __func__, expected_pkt_nb, pkt_nb); + goto error; + } + if (expected_seqnum != seqnum) { + ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n", + __func__, expected_seqnum, seqnum); + goto error; + } + + words_to_end = len / sizeof(*pkt_data) - 1; + pkt_data += words_to_end; + seqnum = ntohl(*pkt_data) & 0xffff; + expected_seqnum += words_to_end; + if (expected_seqnum != seqnum) { + ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n", + __func__, expected_seqnum, seqnum); + goto error; + } + + return true; + +error: + pkt_dump(data, len, !bytes_processed); + return false; +} + +static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) +{ + if (pkt->len != len) { + ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n", + __func__, pkt->len, len); + pkt_dump(xsk_umem__get_data(buffer, addr), len, true); + return false; + } + + return true; +} + +static u32 load_value(u32 *counter) +{ + return __atomic_load_n(counter, __ATOMIC_ACQUIRE); +} + +static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret) +{ + u32 max_budget = MAX_TX_BUDGET_DEFAULT; + u32 cons, ready_to_send; + int delta; + + cons = load_value(xsk->tx.consumer); + ready_to_send = load_value(xsk->tx.producer) - cons; + *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + + delta = load_value(xsk->tx.consumer) - cons; + /* By default, xsk should consume exact @max_budget descs at one + * send in this case where hitting the max budget limit in while + * loop is triggered in __xsk_generic_xmit(). Please make sure that + * the number of descs to be sent is larger than @max_budget, or + * else the tx.consumer will be updated in xskq_cons_peek_desc() + * in time which hides the issue we try to verify. + */ + if (ready_to_send > max_budget && delta != max_budget) + return false; + + return true; +} + +int kick_tx(struct xsk_socket_info *xsk) +{ + int ret; + + if (xsk->check_consumer) { + if (!kick_tx_with_check(xsk, &ret)) + return TEST_FAILURE; + } else { + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + } + if (ret >= 0) + return TEST_PASS; + if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { + usleep(100); + return TEST_PASS; + } + return TEST_FAILURE; +} + +int kick_rx(struct xsk_socket_info *xsk) +{ + int ret; + + ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); + if (ret < 0) + return TEST_FAILURE; + + return TEST_PASS; +} + +static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) +{ + unsigned int rcvd; + u32 idx; + int ret; + + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { + ret = kick_tx(xsk); + if (ret) + return TEST_FAILURE; + } + + rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); + if (rcvd) { + if (rcvd > xsk->outstanding_tx) { + u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); + + ksft_print_msg("[%s] Too many packets completed\n", __func__); + ksft_print_msg("Last completion address: %llx\n", + (unsigned long long)addr); + return TEST_FAILURE; + } + + xsk_ring_cons__release(&xsk->umem->cq, rcvd); + xsk->outstanding_tx -= rcvd; + } + + return TEST_PASS; +} + +static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk) +{ + u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; + u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0; + struct pkt_stream *pkt_stream = xsk->pkt_stream; + struct ifobject *ifobj = test->ifobj_rx; + struct xsk_umem_info *umem = xsk->umem; + struct pollfd fds = { }; + struct pkt *pkt; + u64 first_addr = 0; + int ret; + + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLIN; + + ret = kick_rx(xsk); + if (ret) + return TEST_FAILURE; + + if (ifobj->use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) + return TEST_FAILURE; + + if (!ret) { + if (!is_umem_valid(test->ifobj_tx)) + return TEST_PASS; + + ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); + return TEST_CONTINUE; + } + + if (!(fds.revents & POLLIN)) + return TEST_CONTINUE; + } + + rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx); + if (!rcvd) + return TEST_CONTINUE; + + if (ifobj->use_fill_ring) { + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) + return TEST_FAILURE; + } + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + } + } + + while (frags_processed < rcvd) { + const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); + u64 addr = desc->addr, orig; + + orig = xsk_umem__extract_addr(addr); + addr = xsk_umem__add_offset_to_addr(addr); + + if (!nb_frags) { + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); + if (!pkt) { + ksft_print_msg("[%s] received too many packets addr: %lx len %u\n", + __func__, addr, desc->len); + return TEST_FAILURE; + } + } + + print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n", + addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid); + + if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || + !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && + !is_metadata_correct(pkt, umem->buffer, addr))) + return TEST_FAILURE; + + if (!nb_frags++) + first_addr = addr; + frags_processed++; + pkt_len += desc->len; + if (ifobj->use_fill_ring) + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + + if (pkt_continues(desc->options)) + continue; + + /* The complete packet has been received */ + if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || + !is_offset_correct(umem, pkt, addr)) + return TEST_FAILURE; + + pkt_stream->nb_rx_pkts++; + nb_frags = 0; + pkt_len = 0; + } + + if (nb_frags) { + /* In the middle of a packet. Start over from beginning of packet. */ + idx_rx -= nb_frags; + xsk_ring_cons__cancel(&xsk->rx, nb_frags); + if (ifobj->use_fill_ring) { + idx_fq -= nb_frags; + xsk_ring_prod__cancel(&umem->fq, nb_frags); + } + frags_processed -= nb_frags; + pkt_stream_cancel(pkt_stream); + pkts_sent--; + } + + if (ifobj->use_fill_ring) + xsk_ring_prod__submit(&umem->fq, frags_processed); + if (ifobj->release_rx) + xsk_ring_cons__release(&xsk->rx, frags_processed); + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= pkts_sent; + pthread_mutex_unlock(&pacing_mutex); + pkts_sent = 0; + + return TEST_CONTINUE; +} + +bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num, + unsigned long *bitmap) +{ + struct pkt_stream *pkt_stream = xsk->pkt_stream; + + if (!pkt_stream) { + __set_bit(sock_num, bitmap); + return false; + } + + if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) { + __set_bit(sock_num, bitmap); + if (bitmap_full(bitmap, test->nb_sockets)) + return true; + } + + return false; +} + +static int receive_pkts(struct test_spec *test) +{ + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + DECLARE_BITMAP(bitmap, test->nb_sockets); + struct xsk_socket_info *xsk; + u32 sock_num = 0; + int res, ret; + + bitmap_zero(bitmap, test->nb_sockets); + + ret = gettimeofday(&tv_now, NULL); + if (ret) + return TEST_FAILURE; + + timeradd(&tv_now, &tv_timeout, &tv_end); + + while (1) { + xsk = &test->ifobj_rx->xsk_arr[sock_num]; + + if ((all_packets_received(test, xsk, sock_num, bitmap))) + break; + + res = __receive_pkts(test, xsk); + if (!(res == TEST_PASS || res == TEST_CONTINUE)) + return res; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + return TEST_FAILURE; + + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + return TEST_FAILURE; + } + sock_num = (sock_num + 1) % test->nb_sockets; + } + + return TEST_PASS; +} + +static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout) +{ + u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len; + struct pkt_stream *pkt_stream = xsk->pkt_stream; + struct xsk_umem_info *umem = ifobject->umem; + bool use_poll = ifobject->use_poll; + struct pollfd fds = { }; + int ret; + + buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); + /* pkts_in_flight might be negative if many invalid packets are sent */ + if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) / + buffer_len)) { + ret = kick_tx(xsk); + if (ret) + return TEST_FAILURE; + return TEST_CONTINUE; + } + + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLOUT; + + while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) { + if (use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (timeout) { + if (ret < 0) { + ksft_print_msg("ERROR: [%s] Poll error %d\n", + __func__, errno); + return TEST_FAILURE; + } + if (ret == 0) + return TEST_PASS; + break; + } + if (ret <= 0) { + ksft_print_msg("ERROR: [%s] Poll error %d\n", + __func__, errno); + return TEST_FAILURE; + } + } + + complete_pkts(xsk, xsk->batch_size); + } + + for (i = 0; i < xsk->batch_size; i++) { + struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream); + u32 nb_frags_left, nb_frags, bytes_written = 0; + + if (!pkt) + break; + + nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt); + if (nb_frags > xsk->batch_size - i) { + pkt_stream_cancel(pkt_stream); + xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i); + break; + } + nb_frags_left = nb_frags; + + while (nb_frags_left--) { + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); + + tx_desc->addr = pkt_get_addr(pkt, ifobject->umem); + if (pkt_stream->verbatim) { + tx_desc->len = pkt->len; + tx_desc->options = pkt->options; + } else if (nb_frags_left) { + tx_desc->len = umem->frame_size; + tx_desc->options = XDP_PKT_CONTD; + } else { + tx_desc->len = pkt->len - bytes_written; + tx_desc->options = 0; + } + if (pkt->valid) + pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb, + bytes_written); + bytes_written += tx_desc->len; + + print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n", + tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb); + + if (nb_frags_left) { + i++; + if (pkt_stream->verbatim) + pkt = pkt_stream_get_next_tx_pkt(pkt_stream); + } + } + + if (pkt && pkt->valid) { + valid_pkts++; + valid_frags += nb_frags; + } + } + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight += valid_pkts; + pthread_mutex_unlock(&pacing_mutex); + + xsk_ring_prod__submit(&xsk->tx, i); + xsk->outstanding_tx += valid_frags; + + if (use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret <= 0) { + if (ret == 0 && timeout) + return TEST_PASS; + + ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret); + return TEST_FAILURE; + } + } + + if (!timeout) { + if (complete_pkts(xsk, i)) + return TEST_FAILURE; + + usleep(10); + return TEST_PASS; + } + + return TEST_CONTINUE; +} + +static int wait_for_tx_completion(struct xsk_socket_info *xsk) +{ + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + int ret; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + return TEST_FAILURE; + timeradd(&tv_now, &tv_timeout, &tv_end); + + while (xsk->outstanding_tx) { + ret = gettimeofday(&tv_now, NULL); + if (ret) + return TEST_FAILURE; + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__); + return TEST_FAILURE; + } + + complete_pkts(xsk, xsk->batch_size); + } + + return TEST_PASS; +} + +bool all_packets_sent(struct test_spec *test, unsigned long *bitmap) +{ + return bitmap_full(bitmap, test->nb_sockets); +} + +static int send_pkts(struct test_spec *test, struct ifobject *ifobject) +{ + bool timeout = !is_umem_valid(test->ifobj_rx); + DECLARE_BITMAP(bitmap, test->nb_sockets); + u32 i, ret; + + bitmap_zero(bitmap, test->nb_sockets); + + while (!(all_packets_sent(test, bitmap))) { + for (i = 0; i < test->nb_sockets; i++) { + struct pkt_stream *pkt_stream; + + pkt_stream = ifobject->xsk_arr[i].pkt_stream; + if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) { + __set_bit(i, bitmap); + continue; + } + ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout); + if (ret == TEST_CONTINUE && !test->fail) + continue; + + if ((ret || test->fail) && !timeout) + return TEST_FAILURE; + + if (ret == TEST_PASS && timeout) + return ret; + + ret = wait_for_tx_completion(&ifobject->xsk_arr[i]); + if (ret) + return TEST_FAILURE; + } + } + + return TEST_PASS; +} + +static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) +{ + int fd = xsk_socket__fd(xsk), err; + socklen_t optlen, expected_len; + + optlen = sizeof(*stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen); + if (err) { + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; + } + + expected_len = sizeof(struct xdp_statistics); + if (optlen != expected_len) { + ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n", + __func__, expected_len, optlen); + return TEST_FAILURE; + } + + return TEST_PASS; +} + +static int validate_rx_dropped(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + /* The receiver calls getsockopt after receiving the last (valid) + * packet which is not the final packet sent in this test (valid and + * invalid packets are sent in alternating fashion with the final + * packet being invalid). Since the last packet may or may not have + * been dropped already, both outcomes must be allowed. + */ + if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 || + stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_rx_full(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_ring_full) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_fill_empty(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_fill_ring_empty_descs) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_tx_invalid_descs(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + int fd = xsk_socket__fd(xsk); + struct xdp_statistics stats; + socklen_t optlen; + int err; + + optlen = sizeof(stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); + if (err) { + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; + } + + if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", + __func__, + (unsigned long long)stats.tx_invalid_descs, + ifobject->xsk->pkt_stream->nb_pkts); + return TEST_FAILURE; + } + + return TEST_PASS; +} + +static int xsk_configure(struct test_spec *test, struct ifobject *ifobject, + struct xsk_umem_info *umem, bool tx) +{ + int i, ret; + + for (i = 0; i < test->nb_sockets; i++) { + bool shared = (ifobject->shared_umem && tx) ? true : !!i; + u32 ctr = 0; + + while (ctr++ < SOCK_RECONF_CTR) { + ret = xsk_configure_socket(&ifobject->xsk_arr[i], umem, + ifobject, shared); + if (!ret) + break; + + /* Retry if it fails as xsk_socket__create() is asynchronous */ + if (ctr >= SOCK_RECONF_CTR) + return ret; + usleep(USLEEP_MAX); + } + if (ifobject->busy_poll) { + ret = enable_busy_poll(&ifobject->xsk_arr[i]); + if (ret) + return ret; + } + } + + return 0; +} + +static int thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject) +{ + int ret = xsk_configure(test, ifobject, test->ifobj_rx->umem, true); + + if (ret) + return ret; + ifobject->xsk = &ifobject->xsk_arr[0]; + ifobject->xskmap = test->ifobj_rx->xskmap; + memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); + ifobject->umem->base_addr = 0; + + return 0; +} + +static int xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, + bool fill_up) +{ + u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM; + u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts; + int ret; + + if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + buffers_to_fill = umem->num_frames; + else + buffers_to_fill = umem->fill_size; + + ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); + if (ret != buffers_to_fill) + return -ENOSPC; + + while (filled < buffers_to_fill) { + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts); + u64 addr; + u32 i; + + for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) { + if (!pkt) { + if (!fill_up) + break; + addr = filled * umem->frame_size + umem->base_addr; + } else if (pkt->offset >= 0) { + addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem); + } else { + addr = pkt->offset + umem_alloc_buffer(umem); + } + + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; + if (++filled >= buffers_to_fill) + break; + } + } + xsk_ring_prod__submit(&umem->fq, filled); + xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled); + + pkt_stream_reset(pkt_stream); + umem_reset_alloc(umem); + + return 0; +} + +static int thread_common_ops(struct test_spec *test, struct ifobject *ifobject) +{ + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + int mmap_flags; + u64 umem_sz; + void *bufs; + int ret; + u32 i; + + umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; + mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + + if (ifobject->umem->unaligned_mode) + mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB; + + if (ifobject->shared_umem) + umem_sz *= 2; + + bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (bufs == MAP_FAILED) + return -errno; + + ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz); + if (ret) + return ret; + + ret = xsk_configure(test, ifobject, ifobject->umem, false); + if (ret) + return ret; + + ifobject->xsk = &ifobject->xsk_arr[0]; + + if (!ifobject->rx_on) + return 0; + + ret = xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, + ifobject->use_fill_ring); + if (ret) + return ret; + + for (i = 0; i < test->nb_sockets; i++) { + ifobject->xsk = &ifobject->xsk_arr[i]; + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i); + if (ret) + return ret; + } + + return 0; +} + +void *worker_testapp_validate_tx(void *arg) +{ + struct test_spec *test = (struct test_spec *)arg; + struct ifobject *ifobject = test->ifobj_tx; + int err; + + if (test->current_step == 1) { + if (!ifobject->shared_umem) { + if (thread_common_ops(test, ifobject)) { + test->fail = true; + pthread_exit(NULL); + } + } else { + if (thread_common_ops_tx(test, ifobject)) { + test->fail = true; + pthread_exit(NULL); + } + } + } + + err = send_pkts(test, ifobject); + + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + if (err) + test->fail = true; + + pthread_exit(NULL); +} + +void *worker_testapp_validate_rx(void *arg) +{ + struct test_spec *test = (struct test_spec *)arg; + struct ifobject *ifobject = test->ifobj_rx; + int err; + + if (test->current_step == 1) { + err = thread_common_ops(test, ifobject); + } else { + xsk_clear_xskmap(ifobject->xskmap); + err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0); + if (err) + ksft_print_msg("Error: Failed to update xskmap, error %s\n", + strerror(-err)); + } + + pthread_barrier_wait(&barr); + + /* We leave only now in case of error to avoid getting stuck in the barrier */ + if (err) { + test->fail = true; + pthread_exit(NULL); + } + + err = receive_pkts(test); + + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + + if (err) { + if (!test->adjust_tail) { + test->fail = true; + } else { + bool supported; + + if (is_adjust_tail_supported(ifobject->xdp_progs, &supported)) + test->fail = true; + else if (!supported) + test->adjust_tail_support = false; + else + test->fail = true; + } + } + + pthread_exit(NULL); +} + +static void testapp_clean_xsk_umem(struct ifobject *ifobj) +{ + u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size; + + if (ifobj->shared_umem) + umem_sz *= 2; + + umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; + xsk_umem__delete(ifobj->umem->umem); + munmap(ifobj->umem->buffer, umem_sz); +} + +static void handler(int signum) +{ + pthread_exit(NULL); +} + +static bool xdp_prog_changed_rx(struct test_spec *test) +{ + struct ifobject *ifobj = test->ifobj_rx; + + return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; +} + +static bool xdp_prog_changed_tx(struct test_spec *test) +{ + struct ifobject *ifobj = test->ifobj_tx; + + return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode; +} + +static int xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, + struct bpf_map *xskmap, enum test_mode mode) +{ + int err; + + xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode)); + err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode)); + if (err) { + ksft_print_msg("Error attaching XDP program\n"); + return err; + } + + if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC)) + if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) { + ksft_print_msg("ERROR: XDP prog not in DRV mode\n"); + return -EINVAL; + } + + ifobj->xdp_prog = xdp_prog; + ifobj->xskmap = xskmap; + ifobj->mode = mode; + + return 0; +} + +static int xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, + struct ifobject *ifobj_tx) +{ + int err = 0; + + if (xdp_prog_changed_rx(test)) { + err = xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); + if (err) + return err; + } + + if (!ifobj_tx || ifobj_tx->shared_umem) + return 0; + + if (xdp_prog_changed_tx(test)) + err = xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); + + return err; +} + +static void clean_sockets(struct test_spec *test, struct ifobject *ifobj) +{ + u32 i; + + if (!ifobj || !test) + return; + + for (i = 0; i < test->nb_sockets; i++) + xsk_socket__delete(ifobj->xsk_arr[i].xsk); +} + +static void clean_umem(struct test_spec *test, struct ifobject *ifobj1, struct ifobject *ifobj2) +{ + if (!ifobj1) + return; + + testapp_clean_xsk_umem(ifobj1); + if (ifobj2 && !ifobj2->shared_umem) + testapp_clean_xsk_umem(ifobj2); +} + +static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1, + struct ifobject *ifobj2) +{ + pthread_t t0, t1; + int err; + + if (test->mtu > MAX_ETH_PKT_SIZE) { + if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp || + (ifobj2 && !ifobj2->multi_buff_zc_supp))) { + ksft_print_msg("Multi buffer for zero-copy not supported.\n"); + return TEST_SKIP; + } + if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp || + (ifobj2 && !ifobj2->multi_buff_supp))) { + ksft_print_msg("Multi buffer not supported.\n"); + return TEST_SKIP; + } + } + err = test_spec_set_mtu(test, test->mtu); + if (err) { + ksft_print_msg("Error, could not set mtu.\n"); + return TEST_FAILURE; + } + + if (ifobj2) { + if (pthread_barrier_init(&barr, NULL, 2)) + return TEST_FAILURE; + pkt_stream_reset(ifobj2->xsk->pkt_stream); + } + + test->current_step++; + pkt_stream_reset(ifobj1->xsk->pkt_stream); + pkts_in_flight = 0; + + signal(SIGUSR1, handler); + /*Spawn RX thread */ + pthread_create(&t0, NULL, ifobj1->func_ptr, test); + + if (ifobj2) { + pthread_barrier_wait(&barr); + if (pthread_barrier_destroy(&barr)) { + pthread_kill(t0, SIGUSR1); + clean_sockets(test, ifobj1); + clean_umem(test, ifobj1, NULL); + return TEST_FAILURE; + } + + /*Spawn TX thread */ + pthread_create(&t1, NULL, ifobj2->func_ptr, test); + + pthread_join(t1, NULL); + } + + if (!ifobj2) + pthread_kill(t0, SIGUSR1); + else + pthread_join(t0, NULL); + + if (test->total_steps == test->current_step || test->fail) { + clean_sockets(test, ifobj1); + clean_sockets(test, ifobj2); + clean_umem(test, ifobj1, ifobj2); + } + + if (test->fail) + return TEST_FAILURE; + + return TEST_PASS; +} + +static int testapp_validate_traffic(struct test_spec *test) +{ + struct ifobject *ifobj_rx = test->ifobj_rx; + struct ifobject *ifobj_tx = test->ifobj_tx; + + if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) || + (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) { + ksft_print_msg("No huge pages present.\n"); + return TEST_SKIP; + } + + if (test->set_ring) { + if (ifobj_tx->hw_ring_size_supp) { + if (set_ring_size(ifobj_tx)) { + ksft_print_msg("Failed to change HW ring size.\n"); + return TEST_FAILURE; + } + } else { + ksft_print_msg("Changing HW ring size not supported.\n"); + return TEST_SKIP; + } + } + + if (xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx)) + return TEST_FAILURE; + return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); +} + +static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj) +{ + return __testapp_validate_traffic(test, ifobj, NULL); +} + +int testapp_teardown(struct test_spec *test) +{ + int i; + + for (i = 0; i < MAX_TEARDOWN_ITER; i++) { + if (testapp_validate_traffic(test)) + return TEST_FAILURE; + test_spec_reset(test); + } + + return TEST_PASS; +} + +static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) +{ + thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr; + struct ifobject *tmp_ifobj = (*ifobj1); + + (*ifobj1)->func_ptr = (*ifobj2)->func_ptr; + (*ifobj2)->func_ptr = tmp_func_ptr; + + *ifobj1 = *ifobj2; + *ifobj2 = tmp_ifobj; +} + +int testapp_bidirectional(struct test_spec *test) +{ + int res; + + test->ifobj_tx->rx_on = true; + test->ifobj_rx->tx_on = true; + test->total_steps = 2; + if (testapp_validate_traffic(test)) + return TEST_FAILURE; + + print_verbose("Switching Tx/Rx direction\n"); + swap_directions(&test->ifobj_rx, &test->ifobj_tx); + res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); + + swap_directions(&test->ifobj_rx, &test->ifobj_tx); + return res; +} + +static int swap_xsk_resources(struct test_spec *test) +{ + int ret; + + test->ifobj_tx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_rx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default; + test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default; + test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1]; + test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1]; + + ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0); + if (ret) + return TEST_FAILURE; + + return TEST_PASS; +} + +int testapp_xdp_prog_cleanup(struct test_spec *test) +{ + test->total_steps = 2; + test->nb_sockets = 2; + if (testapp_validate_traffic(test)) + return TEST_FAILURE; + + if (swap_xsk_resources(test)) { + clean_sockets(test, test->ifobj_rx); + clean_sockets(test, test->ifobj_tx); + clean_umem(test, test->ifobj_rx, test->ifobj_tx); + return TEST_FAILURE; + } + + return testapp_validate_traffic(test); +} + +int testapp_headroom(struct test_spec *test) +{ + test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; + return testapp_validate_traffic(test); +} + +int testapp_stats_rx_dropped(struct test_spec *test) +{ + u32 umem_tr = test->ifobj_tx->umem_tailroom; + + if (test->mode == TEST_MODE_ZC) { + ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n"); + return TEST_SKIP; + } + + if (pkt_stream_replace_half(test, (MIN_PKT_SIZE * 3) + umem_tr, 0)) + return TEST_FAILURE; + test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - + XDP_PACKET_HEADROOM - (MIN_PKT_SIZE * 2) - umem_tr; + if (pkt_stream_receive_half(test)) + return TEST_FAILURE; + test->ifobj_rx->validation_func = validate_rx_dropped; + return testapp_validate_traffic(test); +} + +int testapp_stats_tx_invalid_descs(struct test_spec *test) +{ + if (pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0)) + return TEST_FAILURE; + test->ifobj_tx->validation_func = validate_tx_invalid_descs; + return testapp_validate_traffic(test); +} + +int testapp_stats_rx_full(struct test_spec *test) +{ + struct pkt_stream *tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); + if (!tmp) + return TEST_FAILURE; + test->ifobj_tx->xsk->pkt_stream = tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + if (!tmp) + return TEST_FAILURE; + test->ifobj_rx->xsk->pkt_stream = tmp; + + test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; + test->ifobj_rx->release_rx = false; + test->ifobj_rx->validation_func = validate_rx_full; + return testapp_validate_traffic(test); +} + +int testapp_stats_fill_empty(struct test_spec *test) +{ + struct pkt_stream *tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); + if (!tmp) + return TEST_FAILURE; + test->ifobj_tx->xsk->pkt_stream = tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + if (!tmp) + return TEST_FAILURE; + test->ifobj_rx->xsk->pkt_stream = tmp; + + test->ifobj_rx->use_fill_ring = false; + test->ifobj_rx->validation_func = validate_fill_empty; + return testapp_validate_traffic(test); +} + +int testapp_send_receive_unaligned(struct test_spec *test) +{ + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* Let half of the packets straddle a 4K buffer boundary */ + if (pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2)) + return TEST_FAILURE; + + return testapp_validate_traffic(test); +} + +int testapp_send_receive_unaligned_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE)) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_single_pkt(struct test_spec *test) +{ + struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}}; + + if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts))) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_send_receive_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE)) + return TEST_FAILURE; + + return testapp_validate_traffic(test); +} + +int testapp_invalid_desc_mb(struct test_spec *test) +{ + struct xsk_umem_info *umem = test->ifobj_tx->umem; + u64 umem_size = umem->num_frames * umem->frame_size; + struct pkt pkts[] = { + /* Valid packet for synch to start with */ + {0, MIN_PKT_SIZE, 0, true, 0}, + /* Zero frame len is not legal */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, 0, 0, false, 0}, + /* Invalid address in the second frame */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + /* Invalid len in the middle */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + /* Invalid options in the middle */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION}, + /* Transmit 2 frags, receive 3 */ + {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD}, + {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0}, + /* Middle frame crosses chunk boundary with small length */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0}, + /* Valid packet for synch so that something is received */ + {0, MIN_PKT_SIZE, 0, true, 0}}; + + if (umem->unaligned_mode) { + /* Crossing a chunk boundary allowed */ + pkts[12].valid = true; + pkts[13].valid = true; + } + + test->mtu = MAX_ETH_JUMBO_SIZE; + if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts))) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_invalid_desc(struct test_spec *test) +{ + struct xsk_umem_info *umem = test->ifobj_tx->umem; + u64 umem_size = umem->num_frames * umem->frame_size; + struct pkt pkts[] = { + /* Zero packet address allowed */ + {0, MIN_PKT_SIZE, 0, true}, + /* Allowed packet */ + {0, MIN_PKT_SIZE, 0, true}, + /* Straddling the start of umem */ + {-2, MIN_PKT_SIZE, 0, false}, + /* Packet too large */ + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, + /* Up to end of umem allowed */ + {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true}, + /* After umem ends */ + {umem_size, MIN_PKT_SIZE, 0, false}, + /* Straddle the end of umem */ + {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 4K boundary */ + {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 2K boundary */ + {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true}, + /* Valid packet for synch so that something is received */ + {0, MIN_PKT_SIZE, 0, true}}; + + if (umem->unaligned_mode) { + /* Crossing a page boundary allowed */ + pkts[7].valid = true; + } + if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { + /* Crossing a 2K frame size boundary not allowed */ + pkts[8].valid = false; + } + + if (test->ifobj_tx->shared_umem) { + pkts[4].offset += umem_size; + pkts[5].offset += umem_size; + pkts[6].offset += umem_size; + } + + if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts))) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_xdp_drop(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + if (pkt_stream_receive_half(test)) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_xdp_metadata_copy(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata, + skel_tx->progs.xsk_xdp_populate_metadata, + skel_rx->maps.xsk, skel_tx->maps.xsk); + test->ifobj_rx->use_metadata = true; + + skel_rx->bss->count = 0; + + return testapp_validate_traffic(test); +} + +int testapp_xdp_shared_umem(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + int ret; + + test->total_steps = 1; + test->nb_sockets = 2; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem, + skel_tx->progs.xsk_xdp_shared_umem, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + if (pkt_stream_even_odd_sequence(test)) + return TEST_FAILURE; + + ret = testapp_validate_traffic(test); + + release_even_odd_sequence(test); + + return ret; +} + +int testapp_poll_txq_tmout(struct test_spec *test) +{ + test->ifobj_tx->use_poll = true; + /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ + test->ifobj_tx->umem->frame_size = 2048; + if (pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048)) + return TEST_FAILURE; + return testapp_validate_traffic_single_thread(test, test->ifobj_tx); +} + +int testapp_poll_rxq_tmout(struct test_spec *test) +{ + test->ifobj_rx->use_poll = true; + return testapp_validate_traffic_single_thread(test, test->ifobj_rx); +} + +int testapp_too_many_frags(struct test_spec *test) +{ + struct pkt *pkts; + u32 max_frags, i; + int ret = TEST_FAILURE; + + if (test->mode == TEST_MODE_ZC) { + max_frags = test->ifobj_tx->xdp_zc_max_segs; + } else { + max_frags = test->ifobj_tx->max_skb_frags; + max_frags += 1; + } + + pkts = calloc(2 * max_frags + 2, sizeof(struct pkt)); + if (!pkts) + return TEST_FAILURE; + + test->mtu = MAX_ETH_JUMBO_SIZE; + + /* Valid packet for synch */ + pkts[0].len = MIN_PKT_SIZE; + pkts[0].valid = true; + + /* One valid packet with the max amount of frags */ + for (i = 1; i < max_frags + 1; i++) { + pkts[i].len = MIN_PKT_SIZE; + pkts[i].options = XDP_PKT_CONTD; + pkts[i].valid = true; + } + pkts[max_frags].options = 0; + + /* An invalid packet with the max amount of frags but signals packet + * continues on the last frag + */ + for (i = max_frags + 1; i < 2 * max_frags + 1; i++) { + pkts[i].len = MIN_PKT_SIZE; + pkts[i].options = XDP_PKT_CONTD; + pkts[i].valid = false; + } + + /* Valid packet for synch */ + pkts[2 * max_frags + 1].len = MIN_PKT_SIZE; + pkts[2 * max_frags + 1].valid = true; + + if (pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2)) { + free(pkts); + return TEST_FAILURE; + } + + ret = testapp_validate_traffic(test); + free(pkts); + return ret; +} + +static int xsk_load_xdp_programs(struct ifobject *ifobj) +{ + ifobj->xdp_progs = xsk_xdp_progs__open_and_load(); + if (libbpf_get_error(ifobj->xdp_progs)) + return libbpf_get_error(ifobj->xdp_progs); + + return 0; +} + +/* Simple test */ +static bool hugepages_present(void) +{ + size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE; + void *bufs; + + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB); + if (bufs == MAP_FAILED) + return false; + + mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; + munmap(bufs, mmap_sz); + return true; +} + +int init_iface(struct ifobject *ifobj, thread_func_t func_ptr) +{ + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); + int err; + + ifobj->func_ptr = func_ptr; + + err = xsk_load_xdp_programs(ifobj); + if (err) { + ksft_print_msg("Error loading XDP program\n"); + return err; + } + + if (hugepages_present()) + ifobj->unaligned_supp = true; + + err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts); + if (err) { + ksft_print_msg("Error querying XDP capabilities\n"); + return err; + } + if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG) + ifobj->multi_buff_supp = true; + if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) { + if (query_opts.xdp_zc_max_segs > 1) { + ifobj->multi_buff_zc_supp = true; + ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs; + } else { + ifobj->xdp_zc_max_segs = 0; + } + } + + return 0; +} + +int testapp_send_receive(struct test_spec *test) +{ + return testapp_validate_traffic(test); +} + +int testapp_send_receive_2k_frame(struct test_spec *test) +{ + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE)) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_poll_rx(struct test_spec *test) +{ + test->ifobj_rx->use_poll = true; + return testapp_validate_traffic(test); +} + +int testapp_poll_tx(struct test_spec *test) +{ + test->ifobj_tx->use_poll = true; + return testapp_validate_traffic(test); +} + +int testapp_aligned_inv_desc(struct test_spec *test) +{ + return testapp_invalid_desc(test); +} + +int testapp_aligned_inv_desc_2k_frame(struct test_spec *test) +{ + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + return testapp_invalid_desc(test); +} + +int testapp_unaligned_inv_desc(struct test_spec *test) +{ + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + return testapp_invalid_desc(test); +} + +int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test) +{ + u64 page_size, umem_size; + + /* Odd frame size so the UMEM doesn't end near a page boundary. */ + test->ifobj_tx->umem->frame_size = 4001; + test->ifobj_rx->umem->frame_size = 4001; + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* This test exists to test descriptors that staddle the end of + * the UMEM but not a page. + */ + page_size = sysconf(_SC_PAGESIZE); + umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; + assert(umem_size % page_size > MIN_PKT_SIZE); + assert(umem_size % page_size < page_size - MIN_PKT_SIZE); + + return testapp_invalid_desc(test); +} + +int testapp_aligned_inv_desc_mb(struct test_spec *test) +{ + return testapp_invalid_desc_mb(test); +} + +int testapp_unaligned_inv_desc_mb(struct test_spec *test) +{ + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + return testapp_invalid_desc_mb(test); +} + +int testapp_xdp_metadata(struct test_spec *test) +{ + return testapp_xdp_metadata_copy(test); +} + +int testapp_xdp_metadata_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + return testapp_xdp_metadata_copy(test); +} + +int testapp_hw_sw_min_ring_size(struct test_spec *test) +{ + int ret; + + test->set_ring = true; + test->total_steps = 2; + test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE; + test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2; + test->ifobj_tx->xsk->batch_size = 1; + test->ifobj_rx->xsk->batch_size = 1; + ret = testapp_validate_traffic(test); + if (ret) + return ret; + + /* Set batch size to hw_ring_size - 1 */ + test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1; + test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1; + return testapp_validate_traffic(test); +} + +int testapp_hw_sw_max_ring_size(struct test_spec *test) +{ + u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4; + int ret; + + test->set_ring = true; + test->total_steps = 2; + test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending; + test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending; + test->ifobj_rx->umem->num_frames = max_descs; + test->ifobj_rx->umem->fill_size = max_descs; + test->ifobj_rx->umem->comp_size = max_descs; + test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + + ret = testapp_validate_traffic(test); + if (ret) + return ret; + + /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when + * updating the Rx HW tail register. + */ + test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; + test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; + if (pkt_stream_replace(test, max_descs, MIN_PKT_SIZE)) { + clean_sockets(test, test->ifobj_tx); + clean_sockets(test, test->ifobj_rx); + clean_umem(test, test->ifobj_rx, test->ifobj_tx); + return TEST_FAILURE; + } + + return testapp_validate_traffic(test); +} + +static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail, + skel_tx->progs.xsk_xdp_adjust_tail, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + skel_rx->bss->adjust_value = adjust_value; + + return testapp_validate_traffic(test); +} + +static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len) +{ + int ret; + + test->adjust_tail_support = true; + test->adjust_tail = true; + test->total_steps = 1; + + ret = pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len); + if (ret) + return TEST_FAILURE; + + ret = pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value); + if (ret) + return TEST_FAILURE; + + ret = testapp_xdp_adjust_tail(test, value); + if (ret) + return ret; + + if (!test->adjust_tail_support) { + ksft_print_msg("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n", + mode_string(test), busy_poll_string(test)); + return TEST_SKIP; + } + + return 0; +} + +int testapp_adjust_tail_shrink(struct test_spec *test) +{ + /* Shrink by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2); +} + +int testapp_adjust_tail_shrink_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Shrink by the frag size */ + return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + +int testapp_adjust_tail_grow(struct test_spec *test) +{ + if (test->mode == TEST_MODE_SKB) + return TEST_SKIP; + + /* Grow by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2); +} + +int testapp_adjust_tail_grow_mb(struct test_spec *test) +{ + u32 grow_size; + + if (test->mode == TEST_MODE_SKB) + return TEST_SKIP; + + /* worst case scenario is when underlying setup will work on 3k + * buffers, let us account for it; given that we will use 6k as + * pkt_len, expect that it will be broken down to 2 descs each + * with 3k payload; + * + * 4k is truesize, 3k payload, 256 HR, 320 TR; + */ + grow_size = XSK_UMEM__MAX_FRAME_SIZE - + XSK_UMEM__LARGE_FRAME_SIZE - + XDP_PACKET_HEADROOM - + test->ifobj_tx->umem_tailroom; + test->mtu = MAX_ETH_JUMBO_SIZE; + + return testapp_adjust_tail(test, grow_size, XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + +int testapp_tx_queue_consumer(struct test_spec *test) +{ + int nr_packets; + + if (test->mode == TEST_MODE_ZC) { + ksft_print_msg("Can not run TX_QUEUE_CONSUMER test for ZC mode\n"); + return TEST_SKIP; + } + + nr_packets = MAX_TX_BUDGET_DEFAULT + 1; + if (pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE)) + return TEST_FAILURE; + test->ifobj_tx->xsk->batch_size = nr_packets; + test->ifobj_tx->xsk->check_consumer = true; + + return testapp_validate_traffic(test); +} + +struct ifobject *ifobject_create(void) +{ + struct ifobject *ifobj; + + ifobj = calloc(1, sizeof(struct ifobject)); + if (!ifobj) + return NULL; + + ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr)); + if (!ifobj->xsk_arr) + goto out_xsk_arr; + + ifobj->umem = calloc(1, sizeof(*ifobj->umem)); + if (!ifobj->umem) + goto out_umem; + + return ifobj; + +out_umem: + free(ifobj->xsk_arr); +out_xsk_arr: + free(ifobj); + return NULL; +} + +void ifobject_delete(struct ifobject *ifobj) +{ + free(ifobj->umem); + free(ifobj->xsk_arr); + free(ifobj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h new file mode 100644 index 000000000000..1ab8aee4ce56 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef TEST_XSK_H_ +#define TEST_XSK_H_ + +#include <linux/ethtool.h> +#include <linux/if_xdp.h> + +#include "../kselftest.h" +#include "xsk.h" + +#ifndef SO_PREFER_BUSY_POLL +#define SO_PREFER_BUSY_POLL 69 +#endif + +#ifndef SO_BUSY_POLL_BUDGET +#define SO_BUSY_POLL_BUDGET 70 +#endif + +#define TEST_PASS 0 +#define TEST_FAILURE -1 +#define TEST_CONTINUE 1 +#define TEST_SKIP 2 + +#define DEFAULT_PKT_CNT (4 * 1024) +#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) +#define HUGEPAGE_SIZE (2 * 1024 * 1024) +#define MIN_PKT_SIZE 64 +#define MAX_ETH_PKT_SIZE 1518 +#define MAX_INTERFACE_NAME_CHARS 16 +#define MAX_TEST_NAME_SIZE 48 +#define SOCK_RECONF_CTR 10 +#define USLEEP_MAX 10000 + +#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" +#define SMP_CACHE_BYTES_PATH "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size" + +extern bool opt_verbose; +#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) + + +static inline u32 ceil_u32(u32 a, u32 b) +{ + return (a + b - 1) / b; +} + +static inline u64 ceil_u64(u64 a, u64 b) +{ + return (a + b - 1) / b; +} + +static inline unsigned int read_procfs_val(const char *path) +{ + unsigned int read_val = 0; + FILE *file; + + file = fopen(path, "r"); + if (!file) { + ksft_print_msg("Error opening %s\n", path); + return 0; + } + + if (fscanf(file, "%u", &read_val) != 1) + ksft_print_msg("Error reading %s\n", path); + + fclose(file); + return read_val; +} + +/* Simple test */ +enum test_mode { + TEST_MODE_SKB, + TEST_MODE_DRV, + TEST_MODE_ZC, + TEST_MODE_ALL +}; + +struct ifobject; +struct test_spec; +typedef int (*validation_func_t)(struct ifobject *ifobj); +typedef void *(*thread_func_t)(void *arg); +typedef int (*test_func_t)(struct test_spec *test); + +struct xsk_socket_info { + struct xsk_ring_cons rx; + struct xsk_ring_prod tx; + struct xsk_umem_info *umem; + struct xsk_socket *xsk; + struct pkt_stream *pkt_stream; + u32 outstanding_tx; + u32 rxqsize; + u32 batch_size; + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + bool check_consumer; +}; + +int kick_rx(struct xsk_socket_info *xsk); +int kick_tx(struct xsk_socket_info *xsk); + +struct xsk_umem_info { + struct xsk_ring_prod fq; + struct xsk_ring_cons cq; + struct xsk_umem *umem; + u64 next_buffer; + u32 num_frames; + u32 frame_headroom; + void *buffer; + u32 frame_size; + u32 base_addr; + u32 fill_size; + u32 comp_size; + bool unaligned_mode; +}; + +struct set_hw_ring { + u32 default_tx; + u32 default_rx; +}; + +int hw_ring_size_reset(struct ifobject *ifobj); + +struct ifobject { + char ifname[MAX_INTERFACE_NAME_CHARS]; + struct xsk_socket_info *xsk; + struct xsk_socket_info *xsk_arr; + struct xsk_umem_info *umem; + thread_func_t func_ptr; + validation_func_t validation_func; + struct xsk_xdp_progs *xdp_progs; + struct bpf_map *xskmap; + struct bpf_program *xdp_prog; + struct ethtool_ringparam ring; + struct set_hw_ring set_ring; + enum test_mode mode; + int ifindex; + int mtu; + u32 bind_flags; + u32 xdp_zc_max_segs; + u32 umem_tailroom; + u32 max_skb_frags; + bool tx_on; + bool rx_on; + bool use_poll; + bool busy_poll; + bool use_fill_ring; + bool release_rx; + bool shared_umem; + bool use_metadata; + bool unaligned_supp; + bool multi_buff_supp; + bool multi_buff_zc_supp; + bool hw_ring_size_supp; +}; +struct ifobject *ifobject_create(void); +void ifobject_delete(struct ifobject *ifobj); +int init_iface(struct ifobject *ifobj, thread_func_t func_ptr); + +int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, u64 size); +int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, + struct ifobject *ifobject, bool shared); + + +struct pkt { + int offset; + u32 len; + u32 pkt_nb; + bool valid; + u16 options; +}; + +struct pkt_stream { + u32 nb_pkts; + u32 current_pkt_nb; + struct pkt *pkts; + u32 max_pkt_len; + u32 nb_rx_pkts; + u32 nb_valid_entries; + bool verbatim; +}; + +static inline bool pkt_continues(u32 options) +{ + return options & XDP_PKT_CONTD; +} + +struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len); +void pkt_stream_delete(struct pkt_stream *pkt_stream); +void pkt_stream_reset(struct pkt_stream *pkt_stream); +void pkt_stream_restore_default(struct test_spec *test); + +struct test_spec { + struct ifobject *ifobj_tx; + struct ifobject *ifobj_rx; + struct pkt_stream *tx_pkt_stream_default; + struct pkt_stream *rx_pkt_stream_default; + struct bpf_program *xdp_prog_rx; + struct bpf_program *xdp_prog_tx; + struct bpf_map *xskmap_rx; + struct bpf_map *xskmap_tx; + test_func_t test_func; + int mtu; + u16 total_steps; + u16 current_step; + u16 nb_sockets; + bool fail; + bool set_ring; + bool adjust_tail; + bool adjust_tail_support; + enum test_mode mode; + char name[MAX_TEST_NAME_SIZE]; +}; + +#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : "" +static inline char *mode_string(struct test_spec *test) +{ + switch (test->mode) { + case TEST_MODE_SKB: + return "SKB"; + case TEST_MODE_DRV: + return "DRV"; + case TEST_MODE_ZC: + return "ZC"; + default: + return "BOGUS"; + } +} + +void test_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx, enum test_mode mode, + const struct test_spec *test_to_run); + +int testapp_adjust_tail_grow(struct test_spec *test); +int testapp_adjust_tail_grow_mb(struct test_spec *test); +int testapp_adjust_tail_shrink(struct test_spec *test); +int testapp_adjust_tail_shrink_mb(struct test_spec *test); +int testapp_aligned_inv_desc(struct test_spec *test); +int testapp_aligned_inv_desc_2k_frame(struct test_spec *test); +int testapp_aligned_inv_desc_mb(struct test_spec *test); +int testapp_bidirectional(struct test_spec *test); +int testapp_headroom(struct test_spec *test); +int testapp_hw_sw_max_ring_size(struct test_spec *test); +int testapp_hw_sw_min_ring_size(struct test_spec *test); +int testapp_poll_rx(struct test_spec *test); +int testapp_poll_rxq_tmout(struct test_spec *test); +int testapp_poll_tx(struct test_spec *test); +int testapp_poll_txq_tmout(struct test_spec *test); +int testapp_send_receive(struct test_spec *test); +int testapp_send_receive_2k_frame(struct test_spec *test); +int testapp_send_receive_mb(struct test_spec *test); +int testapp_send_receive_unaligned(struct test_spec *test); +int testapp_send_receive_unaligned_mb(struct test_spec *test); +int testapp_single_pkt(struct test_spec *test); +int testapp_stats_fill_empty(struct test_spec *test); +int testapp_stats_rx_dropped(struct test_spec *test); +int testapp_stats_tx_invalid_descs(struct test_spec *test); +int testapp_stats_rx_full(struct test_spec *test); +int testapp_teardown(struct test_spec *test); +int testapp_too_many_frags(struct test_spec *test); +int testapp_tx_queue_consumer(struct test_spec *test); +int testapp_unaligned_inv_desc(struct test_spec *test); +int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test); +int testapp_unaligned_inv_desc_mb(struct test_spec *test); +int testapp_xdp_drop(struct test_spec *test); +int testapp_xdp_metadata(struct test_spec *test); +int testapp_xdp_metadata_mb(struct test_spec *test); +int testapp_xdp_prog_cleanup(struct test_spec *test); +int testapp_xdp_shared_umem(struct test_spec *test); + +void *worker_testapp_validate_rx(void *arg); +void *worker_testapp_validate_tx(void *arg); + +static const struct test_spec tests[] = { + {.name = "SEND_RECEIVE", .test_func = testapp_send_receive}, + {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame}, + {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt}, + {.name = "POLL_RX", .test_func = testapp_poll_rx}, + {.name = "POLL_TX", .test_func = testapp_poll_tx}, + {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout}, + {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout}, + {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc}, + {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame}, + {.name = "UMEM_HEADROOM", .test_func = testapp_headroom}, + {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional}, + {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped}, + {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs}, + {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full}, + {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty}, + {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup}, + {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop}, + {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem}, + {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata}, + {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb}, + {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb}, + {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags}, + {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink}, + {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer}, + }; + +static const struct test_spec ci_skip_tests[] = { + /* Flaky tests */ + {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, + {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, + {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, + {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb}, + /* Tests with huge page dependency */ + {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned}, + {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc}, + {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE", + .test_func = testapp_unaligned_inv_desc_4001_frame}, + {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS", + .test_func = testapp_send_receive_unaligned_mb}, + {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb}, + /* Test with HW ring size dependency */ + {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size}, + {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size}, + /* Too long test */ + {.name = "TEARDOWN", .test_func = testapp_teardown}, +}; + + +#endif /* TEST_XSK_H_ */ diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index d66687f1ee6a..09ff21e1ad2f 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -1,11 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ +#include <sched.h> #include <test_progs.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> #include "timer.skel.h" #include "timer_failure.skel.h" +#include "timer_interrupt.skel.h" #define NUM_THR 8 +static int perf_event_open(__u32 type, __u64 config, int pid, int cpu) +{ + struct perf_event_attr attr = { + .type = type, + .config = config, + .size = sizeof(struct perf_event_attr), + .sample_period = 10000, + }; + + return syscall(__NR_perf_event_open, &attr, pid, cpu, -1, 0); +} + static void *spin_lock_thread(void *arg) { int i, err, prog_fd = *(int *)arg; @@ -21,13 +37,174 @@ static void *spin_lock_thread(void *arg) pthread_exit(arg); } -static int timer(struct timer *timer_skel) + +static int timer_stress_runner(struct timer *timer_skel, bool async_cancel) { - int i, err, prog_fd; + int i, err = 1, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts); pthread_t thread_id[NUM_THR]; void *ret; + timer_skel->bss->async_cancel = async_cancel; + prog_fd = bpf_program__fd(timer_skel->progs.race); + for (i = 0; i < NUM_THR; i++) { + err = pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + break; + } + + while (i) { + err = pthread_join(thread_id[--i], &ret); + if (ASSERT_OK(err, "pthread_join")) + ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join"); + } + return err; +} + +static int timer_stress(struct timer *timer_skel) +{ + return timer_stress_runner(timer_skel, false); +} + +static int timer_stress_async_cancel(struct timer *timer_skel) +{ + return timer_stress_runner(timer_skel, true); +} + +static void *nmi_cpu_worker(void *arg) +{ + volatile __u64 num = 1; + int i; + + for (i = 0; i < 500000000; ++i) + num *= (i % 7) + 1; + (void)num; + + return NULL; +} + +static int run_nmi_test(struct timer *timer_skel, struct bpf_program *prog) +{ + struct bpf_link *link = NULL; + int pe_fd = -1, pipefd[2] = {-1, -1}, pid = 0, status; + char buf = 0; + int ret = -1; + + if (!ASSERT_OK(pipe(pipefd), "pipe")) + goto cleanup; + + pid = fork(); + if (pid == 0) { + /* Child: spawn multiple threads to consume multiple CPUs */ + pthread_t threads[NUM_THR]; + int i; + + close(pipefd[1]); + read(pipefd[0], &buf, 1); + close(pipefd[0]); + + for (i = 0; i < NUM_THR; i++) + pthread_create(&threads[i], NULL, nmi_cpu_worker, NULL); + for (i = 0; i < NUM_THR; i++) + pthread_join(threads[i], NULL); + exit(0); + } + + if (!ASSERT_GE(pid, 0, "fork")) + goto cleanup; + + /* Open perf event for child process across all CPUs */ + pe_fd = perf_event_open(PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CPU_CYCLES, + pid, /* measure child process */ + -1); /* on any CPU */ + if (pe_fd < 0) { + if (errno == ENOENT || errno == EOPNOTSUPP) { + printf("SKIP:no PERF_COUNT_HW_CPU_CYCLES\n"); + test__skip(); + ret = EOPNOTSUPP; + goto cleanup; + } + ASSERT_GE(pe_fd, 0, "perf_event_open"); + goto cleanup; + } + + link = bpf_program__attach_perf_event(prog, pe_fd); + if (!ASSERT_OK_PTR(link, "attach_perf_event")) + goto cleanup; + pe_fd = -1; /* Ownership transferred to link */ + + /* Signal child to start CPU work */ + close(pipefd[0]); + pipefd[0] = -1; + write(pipefd[1], &buf, 1); + close(pipefd[1]); + pipefd[1] = -1; + + waitpid(pid, &status, 0); + pid = 0; + + /* Verify NMI context was hit */ + ASSERT_GT(timer_skel->bss->test_hits, 0, "test_hits"); + ret = 0; + +cleanup: + bpf_link__destroy(link); + if (pe_fd >= 0) + close(pe_fd); + if (pid > 0) { + write(pipefd[1], &buf, 1); + waitpid(pid, &status, 0); + } + if (pipefd[0] >= 0) + close(pipefd[0]); + if (pipefd[1] >= 0) + close(pipefd[1]); + return ret; +} + +static int timer_stress_nmi_race(struct timer *timer_skel) +{ + int err; + + err = run_nmi_test(timer_skel, timer_skel->progs.nmi_race); + if (err == EOPNOTSUPP) + return 0; + return err; +} + +static int timer_stress_nmi_update(struct timer *timer_skel) +{ + int err; + + err = run_nmi_test(timer_skel, timer_skel->progs.nmi_update); + if (err == EOPNOTSUPP) + return 0; + if (err) + return err; + ASSERT_GT(timer_skel->bss->update_hits, 0, "update_hits"); + return 0; +} + +static int timer_stress_nmi_cancel(struct timer *timer_skel) +{ + int err; + + err = run_nmi_test(timer_skel, timer_skel->progs.nmi_cancel); + if (err == EOPNOTSUPP) + return 0; + if (err) + return err; + ASSERT_GT(timer_skel->bss->cancel_hits, 0, "cancel_hits"); + return 0; +} + +static int timer(struct timer *timer_skel) +{ + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); + err = timer__attach(timer_skel); if (!ASSERT_OK(err, "timer_attach")) return err; @@ -62,36 +239,113 @@ static int timer(struct timer *timer_skel) /* check that code paths completed */ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); - prog_fd = bpf_program__fd(timer_skel->progs.race); - for (i = 0; i < NUM_THR; i++) { - err = pthread_create(&thread_id[i], NULL, - &spin_lock_thread, &prog_fd); - if (!ASSERT_OK(err, "pthread_create")) - break; - } + return 0; +} - while (i) { - err = pthread_join(thread_id[--i], &ret); - if (ASSERT_OK(err, "pthread_join")) - ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join"); - } +static int timer_cancel_async(struct timer *timer_skel) +{ + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + prog_fd = bpf_program__fd(timer_skel->progs.test_async_cancel_succeed); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + usleep(500); + /* check that there were no errors in timer execution */ + ASSERT_EQ(timer_skel->bss->err, 0, "err"); + + /* check that code paths completed */ + ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); return 0; } -/* TODO: use pid filtering */ -void serial_test_timer(void) +static void test_timer(int (*timer_test_fn)(struct timer *timer_skel)) { struct timer *timer_skel = NULL; int err; timer_skel = timer__open_and_load(); + if (!timer_skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) return; - err = timer(timer_skel); + err = timer_test_fn(timer_skel); ASSERT_OK(err, "timer"); timer__destroy(timer_skel); +} + +void serial_test_timer(void) +{ + test_timer(timer); RUN_TESTS(timer_failure); } + +void serial_test_timer_stress(void) +{ + test_timer(timer_stress); +} + +void serial_test_timer_stress_async_cancel(void) +{ + test_timer(timer_stress_async_cancel); +} + +void serial_test_timer_async_cancel(void) +{ + test_timer(timer_cancel_async); +} + +void serial_test_timer_stress_nmi_race(void) +{ + test_timer(timer_stress_nmi_race); +} + +void serial_test_timer_stress_nmi_update(void) +{ + test_timer(timer_stress_nmi_update); +} + +void serial_test_timer_stress_nmi_cancel(void) +{ + test_timer(timer_stress_nmi_cancel); +} + +void test_timer_interrupt(void) +{ + struct timer_interrupt *skel = NULL; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, opts); + + skel = timer_interrupt__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } + if (!ASSERT_OK_PTR(skel, "timer_interrupt__open_and_load")) + return; + + err = timer_interrupt__attach(skel); + if (!ASSERT_OK(err, "timer_interrupt__attach")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.test_timer_interrupt); + err = bpf_prog_test_run_opts(prog_fd, &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + + usleep(50); + + ASSERT_EQ(skel->bss->in_interrupt, 0, "in_interrupt"); + if (skel->bss->preempt_count) + ASSERT_NEQ(skel->bss->in_interrupt_cb, 0, "in_interrupt_cb"); + +out: + timer_interrupt__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c index f74b82305da8..b841597c8a3a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c @@ -12,6 +12,10 @@ static void test_timer_crash_mode(int mode) struct timer_crash *skel; skel = timer_crash__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load")) return; skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c index 1a2f99596916..eb303fa1e09a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -59,6 +59,10 @@ void test_timer_lockup(void) } skel = timer_lockup__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index 9ff7843909e7..c930c7d7105b 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -65,6 +65,10 @@ void serial_test_timer_mim(void) goto cleanup; timer_skel = timer_mim__open_and_load(); + if (!timer_skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c b/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c new file mode 100644 index 000000000000..9f1f9aec8888 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "timer_start_deadlock.skel.h" + +void test_timer_start_deadlock(void) +{ + struct timer_start_deadlock *skel; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, opts); + + skel = timer_start_deadlock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = timer_start_deadlock__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.start_timer); + + /* + * Run the syscall program that attempts to deadlock. + * If the kernel deadlocks, this call will never return. + */ + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(err, "prog_test_run"); + ASSERT_EQ(opts.retval, 0, "prog_retval"); + + ASSERT_EQ(skel->bss->tp_called, 1, "tp_called"); +cleanup: + timer_start_deadlock__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c b/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c new file mode 100644 index 000000000000..29a46e96f660 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_start_delete_race.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include <sched.h> +#include <pthread.h> +#include <test_progs.h> +#include "timer_start_delete_race.skel.h" + +/* + * Test for race between bpf_timer_start() and map element deletion. + * + * The race scenario: + * - CPU 1: bpf_timer_start() proceeds to bpf_async_process() and is about + * to call hrtimer_start() but hasn't yet + * - CPU 2: map_delete_elem() calls __bpf_async_cancel_and_free(), since + * timer is not scheduled yet hrtimer_try_to_cancel() is a nop, + * then calls bpf_async_refcount_put() dropping refcnt to zero + * and scheduling call_rcu_tasks_trace() + * - CPU 1: continues and calls hrtimer_start() + * - After RCU tasks trace grace period: memory is freed + * - Timer callback fires on freed memory: UAF! + * + * This test stresses this race by having two threads: + * - Thread 1: repeatedly starts timers + * - Thread 2: repeatedly deletes map elements + * + * KASAN should detect use-after-free. + */ + +#define ITERATIONS 1000 + +struct ctx { + struct timer_start_delete_race *skel; + volatile bool start; + volatile bool stop; + int errors; +}; + +static void *start_timer_thread(void *arg) +{ + struct ctx *ctx = arg; + cpu_set_t cpuset; + int fd, i; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); + + while (!ctx->start && !ctx->stop) + usleep(1); + if (ctx->stop) + return NULL; + + fd = bpf_program__fd(ctx->skel->progs.start_timer); + + for (i = 0; i < ITERATIONS && !ctx->stop; i++) { + LIBBPF_OPTS(bpf_test_run_opts, opts); + int err; + + err = bpf_prog_test_run_opts(fd, &opts); + if (err || opts.retval) { + ctx->errors++; + break; + } + } + + return NULL; +} + +static void *delete_elem_thread(void *arg) +{ + struct ctx *ctx = arg; + cpu_set_t cpuset; + int fd, i; + + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); + + while (!ctx->start && !ctx->stop) + usleep(1); + if (ctx->stop) + return NULL; + + fd = bpf_program__fd(ctx->skel->progs.delete_elem); + + for (i = 0; i < ITERATIONS && !ctx->stop; i++) { + LIBBPF_OPTS(bpf_test_run_opts, opts); + int err; + + err = bpf_prog_test_run_opts(fd, &opts); + if (err || opts.retval) { + ctx->errors++; + break; + } + } + + return NULL; +} + +void test_timer_start_delete_race(void) +{ + struct timer_start_delete_race *skel; + pthread_t threads[2]; + struct ctx ctx = {}; + int err; + + skel = timer_start_delete_race__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + ctx.skel = skel; + + err = pthread_create(&threads[0], NULL, start_timer_thread, &ctx); + if (!ASSERT_OK(err, "create start_timer_thread")) { + ctx.stop = true; + goto cleanup; + } + + err = pthread_create(&threads[1], NULL, delete_elem_thread, &ctx); + if (!ASSERT_OK(err, "create delete_elem_thread")) { + ctx.stop = true; + pthread_join(threads[0], NULL); + goto cleanup; + } + + ctx.start = true; + + pthread_join(threads[0], NULL); + pthread_join(threads[1], NULL); + + ASSERT_EQ(ctx.errors, 0, "thread_errors"); + + /* Either KASAN will catch UAF or kernel will crash or nothing happens */ +cleanup: + timer_start_delete_race__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index c3ab9b6fb069..f2f5d36ae00a 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -1,9 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ #define _GNU_SOURCE -#include <test_progs.h> #include <bpf/btf.h> -#include "cap_helpers.h" #include <fcntl.h> #include <sched.h> #include <signal.h> @@ -15,10 +13,19 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <sys/un.h> + +#include "bpf_util.h" +#include "cap_helpers.h" +#include "sysctl_helpers.h" +#include "test_progs.h" +#include "trace_helpers.h" + #include "priv_map.skel.h" #include "priv_prog.skel.h" #include "dummy_st_ops_success.skel.h" +#include "token_kallsyms.skel.h" #include "token_lsm.skel.h" +#include "priv_freplace_prog.skel.h" static inline int sys_mount(const char *dev_name, const char *dir_name, const char *type, unsigned long flags, @@ -114,7 +121,7 @@ static int create_bpffs_fd(void) static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) { - int mnt_fd, err; + int err; /* set up token delegation mount options */ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); @@ -135,12 +142,7 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) if (err < 0) return -errno; - /* create O_PATH fd for detached mount */ - mnt_fd = sys_fsmount(fs_fd, 0, 0); - if (err < 0) - return -errno; - - return mnt_fd; + return 0; } /* send FD over Unix domain (AF_UNIX) socket */ @@ -286,6 +288,7 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba { int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1; struct token_lsm *lsm_skel = NULL; + char one; /* load and attach LSM "policy" before we go into unpriv userns */ lsm_skel = token_lsm__open_and_load(); @@ -332,13 +335,19 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba err = sendfd(sock_fd, fs_fd); if (!ASSERT_OK(err, "send_fs_fd")) goto cleanup; - zclose(fs_fd); + + /* wait that the parent reads the fd, does the fsconfig() calls + * and send us a signal that it is done + */ + err = read(sock_fd, &one, sizeof(one)); + if (!ASSERT_GE(err, 0, "read_one")) + goto cleanup; /* avoid mucking around with mount namespaces and mounting at - * well-known path, just get detach-mounted BPF FS fd back from parent + * well-known path, just create O_PATH fd for detached mount */ - err = recvfd(sock_fd, &mnt_fd); - if (!ASSERT_OK(err, "recv_mnt_fd")) + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (!ASSERT_OK_FD(mnt_fd, "mnt_fd")) goto cleanup; /* try to fspick() BPF FS and try to add some delegation options */ @@ -428,24 +437,24 @@ again: static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) { - int fs_fd = -1, mnt_fd = -1, token_fd = -1, err; + int fs_fd = -1, token_fd = -1, err; + char one = 1; err = recvfd(sock_fd, &fs_fd); if (!ASSERT_OK(err, "recv_bpffs_fd")) goto cleanup; - mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); - if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { + err = materialize_bpffs_fd(fs_fd, bpffs_opts); + if (!ASSERT_GE(err, 0, "materialize_bpffs_fd")) { err = -EINVAL; goto cleanup; } - zclose(fs_fd); - /* pass BPF FS context object to parent */ - err = sendfd(sock_fd, mnt_fd); - if (!ASSERT_OK(err, "send_mnt_fd")) + /* notify the child that we did the fsconfig() calls and it can proceed. */ + err = write(sock_fd, &one, sizeof(one)); + if (!ASSERT_EQ(err, sizeof(one), "send_one")) goto cleanup; - zclose(mnt_fd); + zclose(fs_fd); /* receive BPF token FD back from child for some extra tests */ err = recvfd(sock_fd, &token_fd); @@ -458,7 +467,6 @@ static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) cleanup: zclose(sock_fd); zclose(fs_fd); - zclose(mnt_fd); zclose(token_fd); if (child_pid > 0) @@ -788,6 +796,84 @@ static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel) return 0; } +static int userns_obj_priv_freplace_setup(int mnt_fd, struct priv_freplace_prog **fr_skel, + struct priv_prog **skel, int *tgt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + int err; + char buf[256]; + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + *skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(*skel, "priv_prog__open_opts")) + return -EINVAL; + err = priv_prog__load(*skel); + if (!ASSERT_OK(err, "priv_prog__load")) + return -EINVAL; + + *fr_skel = priv_freplace_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(*skel, "priv_freplace_prog__open_opts")) + return -EINVAL; + + *tgt_fd = bpf_program__fd((*skel)->progs.xdp_prog1); + return 0; +} + +/* Verify that freplace works from user namespace, because bpf token is loaded + * in bpf_object__prepare + */ +static int userns_obj_priv_freplace_prog(int mnt_fd, struct token_lsm *lsm_skel) +{ + struct priv_freplace_prog *fr_skel = NULL; + struct priv_prog *skel = NULL; + int err, tgt_fd; + + err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd); + if (!ASSERT_OK(err, "setup")) + goto out; + + err = bpf_object__prepare(fr_skel->obj); + if (!ASSERT_OK(err, "freplace__prepare")) + goto out; + + err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = priv_freplace_prog__load(fr_skel); + ASSERT_OK(err, "priv_freplace_prog__load"); + +out: + priv_freplace_prog__destroy(fr_skel); + priv_prog__destroy(skel); + return err; +} + +/* Verify that replace fails to set attach target from user namespace without bpf token */ +static int userns_obj_priv_freplace_prog_fail(int mnt_fd, struct token_lsm *lsm_skel) +{ + struct priv_freplace_prog *fr_skel = NULL; + struct priv_prog *skel = NULL; + int err, tgt_fd; + + err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd); + if (!ASSERT_OK(err, "setup")) + goto out; + + err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1"); + if (ASSERT_ERR(err, "attach fails")) + err = 0; + else + err = -EINVAL; + +out: + priv_freplace_prog__destroy(fr_skel); + priv_prog__destroy(skel); + return err; +} + /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, * which should cause struct_ops application to fail, as BTF won't be uploaded * into the kernel, even if STRUCT_OPS programs themselves are allowed @@ -965,9 +1051,96 @@ err_out: return -EINVAL; } +static bool kallsyms_has_bpf_func(struct ksyms *ksyms, const char *func_name) +{ + char name[256]; + int i; + + for (i = 0; i < ksyms->sym_cnt; i++) { + if (sscanf(ksyms->syms[i].name, "bpf_prog_%*[^_]_%255s", name) == 1 && + strcmp(name, func_name) == 0) + return true; + } + return false; +} + +static int userns_obj_priv_prog_kallsyms(int mnt_fd, struct token_lsm *lsm_skel) +{ + const char *func_names[] = { "xdp_main", "token_ksym_subprog" }; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct token_kallsyms *skel; + struct ksyms *ksyms = NULL; + char buf[256]; + int i, err; + + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = token_kallsyms__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "token_kallsyms__open_opts")) + return -EINVAL; + + err = token_kallsyms__load(skel); + if (!ASSERT_OK(err, "token_kallsyms__load")) + goto cleanup; + + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) { + err = -EINVAL; + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(func_names); i++) { + if (!ASSERT_TRUE(kallsyms_has_bpf_func(ksyms, func_names[i]), + func_names[i])) { + err = -EINVAL; + break; + } + } + +cleanup: + free_kallsyms_local(ksyms); + token_kallsyms__destroy(skel); + return err; +} + #define bit(n) (1ULL << (n)) -void test_token(void) +static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel) +{ + int err, token_fd = -1; + struct bpf_token_info info; + u32 len = sizeof(struct bpf_token_info); + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + memset(&info, 0, len); + err = bpf_obj_get_info_by_fd(token_fd, &info, &len); + if (!ASSERT_ERR(err, "bpf_obj_get_token_info")) + goto cleanup; + if (!ASSERT_EQ(info.allowed_cmds, bit(BPF_MAP_CREATE), "token_info_cmds_map_create")) { + err = -EINVAL; + goto cleanup; + } + if (!ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_XDP), "token_info_progs_xdp")) { + err = -EINVAL; + goto cleanup; + } + + /* The BPF_PROG_TYPE_EXT is not set in token */ + if (ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_EXT), "token_info_progs_ext")) + err = -EINVAL; + +cleanup: + zclose(token_fd); + return err; +} + +void serial_test_token(void) { if (test__start_subtest("map_token")) { struct bpffs_opts opts = { @@ -1004,12 +1177,28 @@ void test_token(void) if (test__start_subtest("obj_priv_prog")) { struct bpffs_opts opts = { .cmds = bit(BPF_PROG_LOAD), - .progs = bit(BPF_PROG_TYPE_KPROBE), + .progs = bit(BPF_PROG_TYPE_XDP), .attachs = ~0ULL, }; subtest_userns(&opts, userns_obj_priv_prog); } + if (test__start_subtest("obj_priv_freplace_prog")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID), + .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + subtest_userns(&opts, userns_obj_priv_freplace_prog); + } + if (test__start_subtest("obj_priv_freplace_prog_fail")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID), + .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + subtest_userns(&opts, userns_obj_priv_freplace_prog_fail); + } if (test__start_subtest("obj_priv_btf_fail")) { struct bpffs_opts opts = { /* disallow BTF loading */ @@ -1054,4 +1243,35 @@ void test_token(void) subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); } + if (test__start_subtest("bpf_token_info")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .progs = bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_bpf_token_info); + } + if (test__start_subtest("obj_priv_prog_kallsyms")) { + char perf_paranoid_orig[32] = {}; + char kptr_restrict_orig[32] = {}; + struct bpffs_opts opts = { + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD), + .progs = bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + + if (sysctl_set_or_fail("/proc/sys/kernel/perf_event_paranoid", perf_paranoid_orig, "0")) + goto cleanup; + if (sysctl_set_or_fail("/proc/sys/kernel/kptr_restrict", kptr_restrict_orig, "0")) + goto cleanup; + + subtest_userns(&opts, userns_obj_priv_prog_kallsyms); + +cleanup: + if (perf_paranoid_orig[0]) + sysctl_set_or_fail("/proc/sys/kernel/perf_event_paranoid", NULL, perf_paranoid_orig); + if (kptr_restrict_orig[0]) + sysctl_set_or_fail("/proc/sys/kernel/kptr_restrict", NULL, kptr_restrict_orig); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index e56e88596d64..a5a8104c1ddd 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -6,18 +6,21 @@ #include "trace_printk.lskel.h" #define SEARCHMSG "testing,testing" +#define SEARCHMSG_UTF8 "中文,测试" static void trace_pipe_cb(const char *str, void *data) { if (strstr(str, SEARCHMSG) != NULL) - (*(int *)data)++; + ((int *)data)[0]++; + if (strstr(str, SEARCHMSG_UTF8)) + ((int *)data)[1]++; } void serial_test_trace_printk(void) { struct trace_printk_lskel__bss *bss; struct trace_printk_lskel *skel; - int err = 0, found = 0; + int err = 0, found[2] = {}; skel = trace_printk_lskel__open(); if (!ASSERT_OK_PTR(skel, "trace_printk__open")) @@ -46,11 +49,24 @@ void serial_test_trace_printk(void) if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret")) goto cleanup; - /* verify our search string is in the trace buffer */ - ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000), - "read_trace_pipe_iter"); + if (!ASSERT_GT(bss->trace_printk_utf8_ran, 0, "bss->trace_printk_utf8_ran")) + goto cleanup; + + if (!ASSERT_GT(bss->trace_printk_utf8_ret, 0, "bss->trace_printk_utf8_ret")) + goto cleanup; + + if (!ASSERT_LT(bss->trace_printk_invalid_spec_ret, 0, + "bss->trace_printk_invalid_spec_ret")) + goto cleanup; + + /* verify our search strings are in the trace buffer */ + ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, found, 1000), + "read_trace_pipe_iter"); + + if (!ASSERT_EQ(found[0], bss->trace_printk_ran, "found")) + goto cleanup; - if (!ASSERT_EQ(found, bss->trace_printk_ran, "found")) + if (!ASSERT_EQ(found[1], bss->trace_printk_utf8_ran, "found_utf8")) goto cleanup; cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c index a222df765bc3..f9f9e1cb87bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -28,10 +28,62 @@ out: tracing_failure__destroy(skel); } +static void test_tracing_fail_prog(const char *prog_name, const char *exp_msg) +{ + struct tracing_failure *skel; + struct bpf_program *prog; + char log_buf[256]; + int err; + + skel = tracing_failure__open(); + if (!ASSERT_OK_PTR(skel, "tracing_failure__open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + + bpf_program__set_autoload(prog, true); + bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf)); + + err = tracing_failure__load(skel); + if (!ASSERT_ERR(err, "tracing_failure__load")) + goto out; + + ASSERT_HAS_SUBSTR(log_buf, exp_msg, "log_buf"); +out: + tracing_failure__destroy(skel); +} + +static void test_tracing_deny(void) +{ + int btf_id; + + /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */ + btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY); + if (btf_id <= 0) { + test__skip(); + return; + } + + test_tracing_fail_prog("tracing_deny", + "Attaching tracing programs to function '__rcu_read_lock' is rejected."); +} + +static void test_fexit_noreturns(void) +{ + test_tracing_fail_prog("fexit_noreturns", + "Attaching fexit/fsession/fmod_ret to __noreturn function 'do_exit' is rejected."); +} + void test_tracing_failure(void) { if (test__start_subtest("bpf_spin_lock")) test_bpf_spin_lock(true); if (test__start_subtest("bpf_spin_unlock")) test_bpf_spin_lock(false); + if (test__start_subtest("tracing_deny")) + test_tracing_deny(); + if (test__start_subtest("fexit_noreturns")) + test_fexit_noreturns(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c index 19e68d4b3532..6f8c0bfb0415 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -112,10 +112,39 @@ destroy_skel: tracing_struct_many_args__destroy(skel); } +static void test_union_args(void) +{ + struct tracing_struct *skel; + int err; + + skel = tracing_struct__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load")) + return; + + err = tracing_struct__attach(skel); + if (!ASSERT_OK(err, "tracing_struct__attach")) + goto out; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->ut1_a_a, 1, "ut1:a.arg.a"); + ASSERT_EQ(skel->bss->ut1_b, 4, "ut1:b"); + ASSERT_EQ(skel->bss->ut1_c, 5, "ut1:c"); + + ASSERT_EQ(skel->bss->ut2_a, 6, "ut2:a"); + ASSERT_EQ(skel->bss->ut2_b_a, 2, "ut2:b.arg.a"); + ASSERT_EQ(skel->bss->ut2_b_b, 3, "ut2:b.arg.b"); + +out: + tracing_struct__destroy(skel); +} + void test_tracing_struct(void) { if (test__start_subtest("struct_args")) test_struct_args(); if (test__start_subtest("struct_many_args")) test_struct_many_args(); + if (test__start_subtest("union_args")) + test_union_args(); } diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 6cd7349d4a2b..7321850db75f 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -30,16 +30,14 @@ static struct bpf_program *load_prog(char *file, char *name, struct inst *inst) return prog; } -/* TODO: use different target function to run in concurrent mode */ -void serial_test_trampoline_count(void) +void test_trampoline_count(void) { char *file = "test_trampoline_count.bpf.o"; char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" }; - int bpf_max_tramp_links, err, i, prog_fd; + int bpf_max_tramp_links, i; struct bpf_program *prog; struct bpf_link *link; struct inst *inst; - LIBBPF_OPTS(bpf_test_run_opts, opts); bpf_max_tramp_links = get_bpf_max_tramp_links(); if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links")) @@ -80,16 +78,7 @@ void serial_test_trampoline_count(void) goto cleanup; /* and finally execute the probe */ - prog_fd = bpf_program__fd(prog); - if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) - goto cleanup; - - err = bpf_prog_test_run_opts(prog_fd, &opts); - if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) - goto cleanup; - - ASSERT_EQ(opts.retval & 0xffff, 33, "bpf_modify_return_test.result"); - ASSERT_EQ(opts.retval >> 16, 2, "bpf_modify_return_test.side_effect"); + ASSERT_OK(trigger_module_test_read(256), "trigger_module_test_read"); cleanup: for (; i >= 0; i--) { diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c index 472f4f9fa95f..64404602b9ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c +++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c @@ -8,6 +8,7 @@ #include "cap_helpers.h" #include "bpf_util.h" +#include "sysctl_helpers.h" /* Using CAP_LAST_CAP is risky here, since it can get pulled in from * an old /usr/include/linux/capability.h and be < CAP_BPF; as a result @@ -36,26 +37,6 @@ static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len) got_perfbuf_val = *(__u32 *)data; } -static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val) -{ - int ret = 0; - FILE *fp; - - fp = fopen(sysctl_path, "r+"); - if (!fp) - return -errno; - if (old_val && fscanf(fp, "%s", old_val) <= 0) { - ret = -ENOENT; - } else if (!old_val || strcmp(old_val, new_val) != 0) { - fseek(fp, 0, SEEK_SET); - if (fprintf(fp, "%s", new_val) < 0) - ret = -errno; - } - fclose(fp); - - return ret; -} - static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel, __u32 prog_id, int prog_fd, int perf_fd, char **map_paths, int *map_fds) diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c index cf3e0e7a64fa..86404476c1da 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c @@ -2,6 +2,7 @@ /* Copyright (c) 2023 Hengqi Chen */ #include <test_progs.h> +#include <asm/ptrace.h> #include "test_uprobe.skel.h" static FILE *urand_spawn(int *pid) @@ -33,7 +34,7 @@ static int urand_trigger(FILE **urand_pipe) return exit_code; } -void test_uprobe(void) +static void test_uprobe_attach(void) { LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); struct test_uprobe *skel; @@ -93,3 +94,156 @@ cleanup: pclose(urand_pipe); test_uprobe__destroy(skel); } + +#ifdef __x86_64__ +__naked __maybe_unused unsigned long uprobe_regs_change_trigger(void) +{ + asm volatile ( + "ret\n" + ); +} + +static __naked void uprobe_regs_change(struct pt_regs *before, struct pt_regs *after) +{ + asm volatile ( + "movq %r11, 48(%rdi)\n" + "movq %r10, 56(%rdi)\n" + "movq %r9, 64(%rdi)\n" + "movq %r8, 72(%rdi)\n" + "movq %rax, 80(%rdi)\n" + "movq %rcx, 88(%rdi)\n" + "movq %rdx, 96(%rdi)\n" + "movq %rsi, 104(%rdi)\n" + "movq %rdi, 112(%rdi)\n" + + /* save 2nd argument */ + "pushq %rsi\n" + "call uprobe_regs_change_trigger\n" + + /* save return value and load 2nd argument pointer to rax */ + "pushq %rax\n" + "movq 8(%rsp), %rax\n" + + "movq %r11, 48(%rax)\n" + "movq %r10, 56(%rax)\n" + "movq %r9, 64(%rax)\n" + "movq %r8, 72(%rax)\n" + "movq %rcx, 88(%rax)\n" + "movq %rdx, 96(%rax)\n" + "movq %rsi, 104(%rax)\n" + "movq %rdi, 112(%rax)\n" + + /* restore return value and 2nd argument */ + "pop %rax\n" + "pop %rsi\n" + + "movq %rax, 80(%rsi)\n" + "ret\n" + ); +} + +static void regs_common(void) +{ + struct pt_regs before = {}, after = {}, expected = { + .rax = 0xc0ffe, + .rcx = 0xbad, + .rdx = 0xdead, + .r8 = 0x8, + .r9 = 0x9, + .r10 = 0x10, + .r11 = 0x11, + .rdi = 0x12, + .rsi = 0x13, + }; + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->my_pid = getpid(); + skel->bss->regs = expected; + + uprobe_opts.func_name = "uprobe_regs_change_trigger"; + skel->links.test_regs_change = bpf_program__attach_uprobe_opts(skel->progs.test_regs_change, + -1, + "/proc/self/exe", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test_regs_change, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + uprobe_regs_change(&before, &after); + + ASSERT_EQ(after.rax, expected.rax, "ax"); + ASSERT_EQ(after.rcx, expected.rcx, "cx"); + ASSERT_EQ(after.rdx, expected.rdx, "dx"); + ASSERT_EQ(after.r8, expected.r8, "r8"); + ASSERT_EQ(after.r9, expected.r9, "r9"); + ASSERT_EQ(after.r10, expected.r10, "r10"); + ASSERT_EQ(after.r11, expected.r11, "r11"); + ASSERT_EQ(after.rdi, expected.rdi, "rdi"); + ASSERT_EQ(after.rsi, expected.rsi, "rsi"); + +cleanup: + test_uprobe__destroy(skel); +} + +static noinline unsigned long uprobe_regs_change_ip_1(void) +{ + return 0xc0ffee; +} + +static noinline unsigned long uprobe_regs_change_ip_2(void) +{ + return 0xdeadbeef; +} + +static void regs_ip(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + unsigned long ret; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->my_pid = getpid(); + skel->bss->ip = (unsigned long) uprobe_regs_change_ip_2; + + uprobe_opts.func_name = "uprobe_regs_change_ip_1"; + skel->links.test_regs_change_ip = bpf_program__attach_uprobe_opts( + skel->progs.test_regs_change_ip, + -1, + "/proc/self/exe", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test_regs_change_ip, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + ret = uprobe_regs_change_ip_1(); + ASSERT_EQ(ret, 0xdeadbeef, "ret"); + +cleanup: + test_uprobe__destroy(skel); +} + +static void test_uprobe_regs_change(void) +{ + if (test__start_subtest("regs_change_common")) + regs_common(); + if (test__start_subtest("regs_change_ip")) + regs_ip(); +} +#else +static void test_uprobe_regs_change(void) { } +#endif + +void test_uprobe(void) +{ + if (test__start_subtest("attach")) + test_uprobe_attach(); + test_uprobe_regs_change(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 2ee17ef1dae2..56cbea280fbd 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -62,8 +62,10 @@ static void release_child(struct child *child) return; close(child->go[1]); close(child->go[0]); - if (child->thread) + if (child->thread) { pthread_join(child->thread, NULL); + child->thread = 0; + } close(child->c2p[0]); close(child->c2p[1]); if (child->pid > 0) @@ -331,6 +333,8 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi { static struct child child; + memset(&child, 0, sizeof(child)); + /* no pid filter */ __test_attach_api(binary, pattern, opts, NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index c397336fe1ed..955a37751b52 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -8,22 +8,31 @@ #include <asm/ptrace.h> #include <linux/compiler.h> #include <linux/stringify.h> +#include <linux/kernel.h> #include <sys/wait.h> #include <sys/syscall.h> #include <sys/prctl.h> #include <asm/prctl.h> #include "uprobe_syscall.skel.h" #include "uprobe_syscall_executed.skel.h" +#include "bpf/libbpf_internal.h" -__naked unsigned long uretprobe_regs_trigger(void) +#define USDT_NOP .byte 0x0f, 0x1f, 0x44, 0x00, 0x00 +#include "usdt.h" + +#pragma GCC diagnostic ignored "-Wattributes" + +__attribute__((aligned(16))) +__nocf_check __weak __naked unsigned long uprobe_regs_trigger(void) { asm volatile ( + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n" /* nop5 */ "movq $0xdeadbeef, %rax\n" "ret\n" ); } -__naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) +__naked void uprobe_regs(struct pt_regs *before, struct pt_regs *after) { asm volatile ( "movq %r15, 0(%rdi)\n" @@ -44,15 +53,17 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) "movq $0, 120(%rdi)\n" /* orig_rax */ "movq $0, 128(%rdi)\n" /* rip */ "movq $0, 136(%rdi)\n" /* cs */ + "pushq %rax\n" "pushf\n" "pop %rax\n" "movq %rax, 144(%rdi)\n" /* eflags */ + "pop %rax\n" "movq %rsp, 152(%rdi)\n" /* rsp */ "movq $0, 160(%rdi)\n" /* ss */ /* save 2nd argument */ "pushq %rsi\n" - "call uretprobe_regs_trigger\n" + "call uprobe_regs_trigger\n" /* save return value and load 2nd argument pointer to rax */ "pushq %rax\n" @@ -92,25 +103,37 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) ); } -static void test_uretprobe_regs_equal(void) +static void test_uprobe_regs_equal(bool retprobe) { + LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = retprobe, + ); struct uprobe_syscall *skel = NULL; struct pt_regs before = {}, after = {}; unsigned long *pb = (unsigned long *) &before; unsigned long *pa = (unsigned long *) &after; unsigned long *pp; + unsigned long offset; unsigned int i, cnt; - int err; + + offset = get_uprobe_offset(&uprobe_regs_trigger); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + return; skel = uprobe_syscall__open_and_load(); if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load")) goto cleanup; - err = uprobe_syscall__attach(skel); - if (!ASSERT_OK(err, "uprobe_syscall__attach")) + skel->links.probe = bpf_program__attach_uprobe_opts(skel->progs.probe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(skel->links.probe, "bpf_program__attach_uprobe_opts")) goto cleanup; - uretprobe_regs(&before, &after); + /* make sure uprobe gets optimized */ + if (!retprobe) + uprobe_regs_trigger(); + + uprobe_regs(&before, &after); pp = (unsigned long *) &skel->bss->regs; cnt = sizeof(before)/sizeof(*pb); @@ -119,7 +142,7 @@ static void test_uretprobe_regs_equal(void) unsigned int offset = i * sizeof(unsigned long); /* - * Check register before and after uretprobe_regs_trigger call + * Check register before and after uprobe_regs_trigger call * that triggers the uretprobe. */ switch (offset) { @@ -133,7 +156,7 @@ static void test_uretprobe_regs_equal(void) /* * Check register seen from bpf program and register after - * uretprobe_regs_trigger call + * uprobe_regs_trigger call (with rax exception, check below). */ switch (offset) { /* @@ -146,6 +169,15 @@ static void test_uretprobe_regs_equal(void) case offsetof(struct pt_regs, rsp): case offsetof(struct pt_regs, ss): break; + /* + * uprobe does not see return value in rax, it needs to see the + * original (before) rax value + */ + case offsetof(struct pt_regs, rax): + if (!retprobe) { + ASSERT_EQ(pp[i], pb[i], "uprobe rax prog-before value check"); + break; + } default: if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check")) fprintf(stdout, "failed register offset %u\n", offset); @@ -175,7 +207,7 @@ static int write_bpf_testmod_uprobe(unsigned long offset) return ret != n ? (int) ret : 0; } -static void test_uretprobe_regs_change(void) +static void test_regs_change(void) { struct pt_regs before = {}, after = {}; unsigned long *pb = (unsigned long *) &before; @@ -183,13 +215,16 @@ static void test_uretprobe_regs_change(void) unsigned long cnt = sizeof(before)/sizeof(*pb); unsigned int i, err, offset; - offset = get_uprobe_offset(uretprobe_regs_trigger); + offset = get_uprobe_offset(uprobe_regs_trigger); err = write_bpf_testmod_uprobe(offset); if (!ASSERT_OK(err, "register_uprobe")) return; - uretprobe_regs(&before, &after); + /* make sure uprobe gets optimized */ + uprobe_regs_trigger(); + + uprobe_regs(&before, &after); err = write_bpf_testmod_uprobe(0); if (!ASSERT_OK(err, "unregister_uprobe")) @@ -251,7 +286,8 @@ static void test_uretprobe_syscall_call(void) .retprobe = true, ); struct uprobe_syscall_executed *skel; - int pid, status, err, go[2], c; + int pid, status, err, go[2], c = 0; + struct bpf_link *link; if (!ASSERT_OK(pipe(go), "pipe")) return; @@ -277,11 +313,14 @@ static void test_uretprobe_syscall_call(void) _exit(0); } - skel->links.test = bpf_program__attach_uprobe_multi(skel->progs.test, pid, - "/proc/self/exe", - "uretprobe_syscall_call", &opts); - if (!ASSERT_OK_PTR(skel->links.test, "bpf_program__attach_uprobe_multi")) + skel->bss->pid = pid; + + link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi, + pid, "/proc/self/exe", + "uretprobe_syscall_call", &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) goto cleanup; + skel->links.test_uretprobe_multi = link; /* kick the child */ write(go[1], &c, 1); @@ -301,6 +340,256 @@ cleanup: close(go[0]); } +#define TRAMP "[uprobes-trampoline]" + +__attribute__((aligned(16))) +__nocf_check __weak __naked void uprobe_test(void) +{ + asm volatile (" \n" + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n" + "ret \n" + ); +} + +__attribute__((aligned(16))) +__nocf_check __weak void usdt_test(void) +{ + USDT(optimized_uprobe, usdt); +} + +static int find_uprobes_trampoline(void *tramp_addr) +{ + void *start, *end; + char line[128]; + int ret = -1; + FILE *maps; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + fprintf(stderr, "cannot open maps\n"); + return -1; + } + + while (fgets(line, sizeof(line), maps)) { + int m = -1; + + /* We care only about private r-x mappings. */ + if (sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", &start, &end, &m) != 2) + continue; + if (m < 0) + continue; + if (!strncmp(&line[m], TRAMP, sizeof(TRAMP)-1) && (start == tramp_addr)) { + ret = 0; + break; + } + } + + fclose(maps); + return ret; +} + +static unsigned char nop5[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 }; + +static void *find_nop5(void *fn) +{ + int i; + + for (i = 0; i < 10; i++) { + if (!memcmp(nop5, fn + i, 5)) + return fn + i; + } + return NULL; +} + +typedef void (__attribute__((nocf_check)) *trigger_t)(void); + +static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigger, + void *addr, int executed) +{ + struct __arch_relative_insn { + __u8 op; + __s32 raddr; + } __packed *call; + void *tramp = NULL; + + /* Uprobe gets optimized after first trigger, so let's press twice. */ + trigger(); + trigger(); + + /* Make sure bpf program got executed.. */ + ASSERT_EQ(skel->bss->executed, executed, "executed"); + + /* .. and check the trampoline is as expected. */ + call = (struct __arch_relative_insn *) addr; + tramp = (void *) (call + 1) + call->raddr; + ASSERT_EQ(call->op, 0xe8, "call"); + ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline"); + + return tramp; +} + +static void check_detach(void *addr, void *tramp) +{ + /* [uprobes_trampoline] stays after detach */ + ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline"); + ASSERT_OK(memcmp(addr, nop5, 5), "nop5"); +} + +static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link, + trigger_t trigger, void *addr, int executed) +{ + void *tramp; + + tramp = check_attach(skel, trigger, addr, executed); + bpf_link__destroy(link); + check_detach(addr, tramp); +} + +static void test_uprobe_legacy(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = true, + ); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + /* uprobe */ + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe, + 0, "/proc/self/exe", offset, NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + + /* uretprobe */ + skel->bss->executed = 0; + + link = bpf_program__attach_uprobe_opts(skel->progs.test_uretprobe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_multi(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + opts.offsets = &offset; + opts.cnt = 1; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + /* uprobe.multi */ + link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + + /* uretprobe.multi */ + skel->bss->executed = 0; + opts.retprobe = true; + link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_session(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, + .session = true, + ); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + opts.offsets = &offset; + opts.cnt = 1; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 4); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_usdt(void) +{ + struct uprobe_syscall_executed *skel; + struct bpf_link *link; + void *addr; + + errno = 0; + addr = find_nop5(usdt_test); + if (!ASSERT_OK_PTR(addr, "find_nop5")) + return; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_usdt(skel->progs.test_usdt, + -1 /* all PIDs */, "/proc/self/exe", + "optimized_uprobe", "usdt", NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt")) + goto cleanup; + + check(skel, link, usdt_test, addr, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + /* * Borrowed from tools/testing/selftests/x86/test_shadow_stack.c. * @@ -343,43 +632,172 @@ static void test_uretprobe_shadow_stack(void) return; } - /* Run all of the uretprobe tests. */ - test_uretprobe_regs_equal(); - test_uretprobe_regs_change(); + /* Run all the tests with shadow stack in place. */ + + test_uprobe_regs_equal(false); + test_uprobe_regs_equal(true); test_uretprobe_syscall_call(); + test_uprobe_legacy(); + test_uprobe_multi(); + test_uprobe_session(); + test_uprobe_usdt(); + + test_regs_change(); + ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK); } -#else -static void test_uretprobe_regs_equal(void) + +static volatile bool race_stop; + +static USDT_DEFINE_SEMA(race); + +static void *worker_trigger(void *arg) { - test__skip(); + unsigned long rounds = 0; + + while (!race_stop) { + uprobe_test(); + rounds++; + } + + printf("tid %ld trigger rounds: %lu\n", sys_gettid(), rounds); + return NULL; } -static void test_uretprobe_regs_change(void) +static void *worker_attach(void *arg) { - test__skip(); + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct uprobe_syscall_executed *skel; + unsigned long rounds = 0, offset; + const char *sema[2] = { + __stringify(USDT_SEMA(race)), + NULL, + }; + unsigned long *ref; + int err; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + return NULL; + + err = elf_resolve_syms_offsets("/proc/self/exe", 1, (const char **) &sema, &ref, STT_OBJECT); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_sema")) + return NULL; + + opts.ref_ctr_offset = *ref; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return NULL; + + skel->bss->pid = getpid(); + + while (!race_stop) { + skel->links.test_uprobe = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(skel->links.test_uprobe, "bpf_program__attach_uprobe_opts")) + break; + + bpf_link__destroy(skel->links.test_uprobe); + skel->links.test_uprobe = NULL; + rounds++; + } + + printf("tid %ld attach rounds: %lu hits: %d\n", sys_gettid(), rounds, skel->bss->executed); + uprobe_syscall_executed__destroy(skel); + free(ref); + return NULL; } -static void test_uretprobe_syscall_call(void) +static useconds_t race_msec(void) { - test__skip(); + char *env; + + env = getenv("BPF_SELFTESTS_UPROBE_SYSCALL_RACE_MSEC"); + if (env) + return atoi(env); + + /* default duration is 500ms */ + return 500; } -static void test_uretprobe_shadow_stack(void) +static void test_uprobe_race(void) { - test__skip(); + int err, i, nr_threads; + pthread_t *threads; + + nr_threads = libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_threads, 0, "libbpf_num_possible_cpus")) + return; + nr_threads = max(2, nr_threads); + + threads = alloca(sizeof(*threads) * nr_threads); + if (!ASSERT_OK_PTR(threads, "malloc")) + return; + + for (i = 0; i < nr_threads; i++) { + err = pthread_create(&threads[i], NULL, i % 2 ? worker_trigger : worker_attach, + NULL); + if (!ASSERT_OK(err, "pthread_create")) + goto cleanup; + } + + usleep(race_msec() * 1000); + +cleanup: + race_stop = true; + for (nr_threads = i, i = 0; i < nr_threads; i++) + pthread_join(threads[i], NULL); + + ASSERT_FALSE(USDT_SEMA_IS_ACTIVE(race), "race_semaphore"); } + +#ifndef __NR_uprobe +#define __NR_uprobe 336 #endif -void test_uprobe_syscall(void) +static void test_uprobe_error(void) +{ + long err = syscall(__NR_uprobe); + + ASSERT_EQ(err, -1, "error"); + ASSERT_EQ(errno, ENXIO, "errno"); +} + +static void __test_uprobe_syscall(void) { if (test__start_subtest("uretprobe_regs_equal")) - test_uretprobe_regs_equal(); - if (test__start_subtest("uretprobe_regs_change")) - test_uretprobe_regs_change(); + test_uprobe_regs_equal(true); if (test__start_subtest("uretprobe_syscall_call")) test_uretprobe_syscall_call(); if (test__start_subtest("uretprobe_shadow_stack")) test_uretprobe_shadow_stack(); + if (test__start_subtest("uprobe_legacy")) + test_uprobe_legacy(); + if (test__start_subtest("uprobe_multi")) + test_uprobe_multi(); + if (test__start_subtest("uprobe_session")) + test_uprobe_session(); + if (test__start_subtest("uprobe_usdt")) + test_uprobe_usdt(); + if (test__start_subtest("uprobe_race")) + test_uprobe_race(); + if (test__start_subtest("uprobe_error")) + test_uprobe_error(); + if (test__start_subtest("uprobe_regs_equal")) + test_uprobe_regs_equal(false); + if (test__start_subtest("regs_change")) + test_regs_change(); +} +#else +static void __test_uprobe_syscall(void) +{ + test__skip(); +} +#endif + +void test_uprobe_syscall(void) +{ + __test_uprobe_syscall(); } diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 56ed1eb9b527..69759b27794d 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -40,12 +40,79 @@ static void __always_inline trigger_func(int x) { } } -static void subtest_basic_usdt(void) +#if defined(__x86_64__) || defined(__i386__) +/* + * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)" + * - 'size' is the size in bytes of the array element, and its sign indicates + * whether the type is signed (negative) or unsigned (positive). + * - 'base_reg' is the register holding the base address, normally rdx or edx + * - 'index_reg' is the register holding the index, normally rax or eax + * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the + * size of the element type. + * + * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be: + * - size: -2 (negative because 'short' is signed) + * - scale: 2 (since sizeof(short) == 2) + * + * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386 + */ +static volatile short array[] = {-1, -2, -3, -4}; + +#if defined(__x86_64__) +#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2) +#else +#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2) +#endif + +unsigned short test_usdt_sib_semaphore SEC(".probes"); + +static void trigger_sib_spec(void) +{ + /* + * Force SIB addressing with inline assembly. + * + * You must compile with -std=gnu99 or -std=c99 to use the + * STAP_PROBE_ASM macro. + * + * The STAP_PROBE_ASM macro generates a quoted string that gets + * inserted between the surrounding assembly instructions. In this + * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction + * stream, creating a probe point between the asm statement boundaries. + * It works fine with gcc/clang. + * + * Register constraints: + * - "d"(array): Binds the 'array' variable to %rdx or %edx register + * - "a"(0): Binds the constant 0 to %rax or %eax register + * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and + * %%rax(%eax), they contain the expected values for SIB addressing. + * + * The "memory" clobber prevents the compiler from reordering memory + * accesses around the probe point, ensuring that the probe behavior + * is predictable and consistent. + */ + asm volatile( + STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC) + : + : "d"(array), "a"(0) + : "memory" + ); +} +#endif + +static void subtest_basic_usdt(bool optimized) { LIBBPF_OPTS(bpf_usdt_opts, opts); struct test_usdt *skel; struct test_usdt__bss *bss; - int err; + int err, i, called; + const __u64 expected_cookie = 0xcafedeadbeeffeed; + +#define TRIGGER(x) ({ \ + trigger_func(x); \ + if (optimized) \ + trigger_func(x); \ + optimized ? 2 : 1; \ + }) skel = test_usdt__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -59,22 +126,32 @@ static void subtest_basic_usdt(void) goto cleanup; /* usdt0 won't be auto-attached */ - opts.usdt_cookie = 0xcafedeadbeeffeed; + opts.usdt_cookie = expected_cookie; skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, 0 /*self*/, "/proc/self/exe", "test", "usdt0", &opts); if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link")) goto cleanup; - trigger_func(1); +#if defined(__x86_64__) || defined(__i386__) + opts.usdt_cookie = expected_cookie; + skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib, + 0 /*self*/, "/proc/self/exe", + "test", "usdt_sib", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link")) + goto cleanup; +#endif + + called = TRIGGER(1); - ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called"); - ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called"); - ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called"); + ASSERT_EQ(bss->usdt0_called, called, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, called, "usdt12_called"); - ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie"); + ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie"); ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt"); ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret"); + ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size"); /* auto-attached usdt3 gets default zero cookie value */ ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie"); @@ -86,6 +163,9 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1"); ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); + ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size"); + ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size"); + ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size"); /* auto-attached usdt12 gets default zero cookie value */ ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie"); @@ -104,17 +184,22 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11"); ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12"); + int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 }; + + for (i = 0; i < 12; i++) + ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size"); + /* trigger_func() is marked __always_inline, so USDT invocations will be * inlined in two different places, meaning that each USDT will have * at least 2 different places to be attached to. This verifies that * bpf_program__attach_usdt() handles this properly and attaches to * all possible places of USDT invocation. */ - trigger_func(2); + called += TRIGGER(2); - ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called"); - ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called"); - ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called"); + ASSERT_EQ(bss->usdt0_called, called, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, called, "usdt12_called"); /* only check values that depend on trigger_func()'s input value */ ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1"); @@ -133,9 +218,9 @@ static void subtest_basic_usdt(void) if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach")) goto cleanup; - trigger_func(3); + called += TRIGGER(3); - ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); /* this time usdt3 has custom cookie */ ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie"); ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt"); @@ -147,10 +232,111 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); +#if defined(__x86_64__) || defined(__i386__) + trigger_sib_spec(); + ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called"); + ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie"); + ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt"); + ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg"); + ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret"); + ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size"); +#endif + cleanup: test_usdt__destroy(skel); +#undef TRIGGER } +#ifdef __x86_64__ +extern void usdt_1(void); +extern void usdt_2(void); + +static unsigned char nop1[1] = { 0x90 }; +static unsigned char nop1_nop5_combo[6] = { 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 }; + +static void *find_instr(void *fn, unsigned char *instr, size_t cnt) +{ + int i; + + for (i = 0; i < 10; i++) { + if (!memcmp(instr, fn + i, cnt)) + return fn + i; + } + return NULL; +} + +static void subtest_optimized_attach(void) +{ + struct test_usdt *skel; + __u8 *addr_1, *addr_2; + + /* usdt_1 USDT probe has single nop instruction */ + addr_1 = find_instr(usdt_1, nop1_nop5_combo, 6); + if (!ASSERT_NULL(addr_1, "usdt_1_find_nop1_nop5_combo")) + return; + + addr_1 = find_instr(usdt_1, nop1, 1); + if (!ASSERT_OK_PTR(addr_1, "usdt_1_find_nop1")) + return; + + /* usdt_2 USDT probe has nop,nop5 instructions combo */ + addr_2 = find_instr(usdt_2, nop1_nop5_combo, 6); + if (!ASSERT_OK_PTR(addr_2, "usdt_2_find_nop1_nop5_combo")) + return; + + skel = test_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_usdt__open_and_load")) + return; + + skel->bss->expected_ip = (unsigned long) addr_1; + + /* + * Attach program on top of usdt_1 which is single nop probe, + * so the probe won't get optimized. + */ + skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed, + 0 /*self*/, "/proc/self/exe", + "optimized_attach", "usdt_1", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt")) + goto cleanup; + + usdt_1(); + usdt_1(); + + /* int3 is on addr_1 address */ + ASSERT_EQ(*addr_1, 0xcc, "int3"); + ASSERT_EQ(skel->bss->executed, 2, "executed"); + + bpf_link__destroy(skel->links.usdt_executed); + + /* we expect the nop5 ip */ + skel->bss->expected_ip = (unsigned long) addr_2 + 1; + + /* + * Attach program on top of usdt_2 which is probe defined on top + * of nop1,nop5 combo, so the probe gets optimized on top of nop5. + */ + skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed, + 0 /*self*/, "/proc/self/exe", + "optimized_attach", "usdt_2", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt")) + goto cleanup; + + usdt_2(); + usdt_2(); + + /* nop stays on addr_2 address */ + ASSERT_EQ(*addr_2, 0x90, "nop"); + + /* call is on addr_2 + 1 address */ + ASSERT_EQ(*(addr_2 + 1), 0xe8, "call"); + ASSERT_EQ(skel->bss->executed, 4, "executed"); + +cleanup: + test_usdt__destroy(skel); +} +#endif + unsigned short test_usdt_100_semaphore SEC(".probes"); unsigned short test_usdt_300_semaphore SEC(".probes"); unsigned short test_usdt_400_semaphore SEC(".probes"); @@ -261,8 +447,16 @@ static void subtest_multispec_usdt(void) */ trigger_300_usdts(); - /* we'll reuse usdt_100 BPF program for usdt_300 test */ bpf_link__destroy(skel->links.usdt_100); + + bss->usdt_100_called = 0; + bss->usdt_100_sum = 0; + + /* If built with arm64/clang, there will be much less number of specs + * for usdt_300 call sites. + */ +#if !defined(__aarch64__) || !defined(__clang__) + /* we'll reuse usdt_100 BPF program for usdt_300 test */ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe", "test", "usdt_300", NULL); err = -errno; @@ -273,13 +467,11 @@ static void subtest_multispec_usdt(void) /* let's check that there are no "dangling" BPF programs attached due * to partial success of the above test:usdt_300 attachment */ - bss->usdt_100_called = 0; - bss->usdt_100_sum = 0; - f300(777); /* this is 301st instance of usdt_300 */ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called"); ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum"); +#endif /* This time we have USDT with 400 inlined invocations, but arg specs * should be the same across all sites, so libbpf will only need to @@ -410,7 +602,13 @@ cleanup: void test_usdt(void) { if (test__start_subtest("basic")) - subtest_basic_usdt(); + subtest_basic_usdt(false); +#ifdef __x86_64__ + if (test__start_subtest("basic_optimized")) + subtest_basic_usdt(true); + if (test__start_subtest("optimized_attach")) + subtest_optimized_attach(); +#endif if (test__start_subtest("multispec")) subtest_multispec_usdt(); if (test__start_subtest("urand_auto_attach")) diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index d424e7ecbd12..9fd3ae987321 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -21,8 +21,7 @@ #include "../progs/test_user_ringbuf.h" static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; -static const long c_ringbuf_size = 1 << 12; /* 1 small page */ -static const long c_max_entries = c_ringbuf_size / c_sample_size; +static long c_ringbuf_size, c_max_entries; static void drain_current_samples(void) { @@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void) uint32_t remaining_samples = total_samples; int err; - BUILD_BUG_ON(total_samples <= c_max_entries); + if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries")) + return; + err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; @@ -686,6 +687,9 @@ void test_user_ringbuf(void) { int i; + c_ringbuf_size = getpagesize(); /* 1 page */ + c_max_entries = c_ringbuf_size / c_sample_size; + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { if (!test__start_subtest(success_tests[i].test_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 8a0e1ff8a2dc..06cd24e37b3f 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -3,10 +3,14 @@ #include <test_progs.h> #include "cap_helpers.h" +#include "verifier_align.skel.h" #include "verifier_and.skel.h" #include "verifier_arena.skel.h" #include "verifier_arena_large.skel.h" +#include "verifier_arena_globals1.skel.h" +#include "verifier_arena_globals2.skel.h" #include "verifier_array_access.skel.h" +#include "verifier_async_cb_context.skel.h" #include "verifier_basic_stack.skel.h" #include "verifier_bitfield_write.skel.h" #include "verifier_bounds.skel.h" @@ -14,9 +18,11 @@ #include "verifier_bounds_deduction_non_const.skel.h" #include "verifier_bounds_mix_sign_unsign.skel.h" #include "verifier_bpf_get_stack.skel.h" +#include "verifier_bpf_trap.skel.h" #include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" #include "verifier_btf_unreliable_prog.skel.h" +#include "verifier_call_large_imm.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" #include "verifier_cgroup_skb.skel.h" @@ -26,13 +32,16 @@ #include "verifier_ctx.skel.h" #include "verifier_ctx_sk_msg.skel.h" #include "verifier_d_path.skel.h" +#include "verifier_default_trusted_ptr.skel.h" #include "verifier_direct_packet_access.skel.h" #include "verifier_direct_stack_access_wraparound.skel.h" #include "verifier_div0.skel.h" +#include "verifier_div_mod_bounds.skel.h" #include "verifier_div_overflow.skel.h" #include "verifier_global_subprogs.skel.h" #include "verifier_global_ptr_args.skel.h" #include "verifier_gotol.skel.h" +#include "verifier_gotox.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" #include "verifier_helper_restricted.skel.h" @@ -45,6 +54,9 @@ #include "verifier_ldsx.skel.h" #include "verifier_leak_ptr.skel.h" #include "verifier_linked_scalars.skel.h" +#include "verifier_live_stack.skel.h" +#include "verifier_liveness_exp.skel.h" +#include "verifier_load_acquire.skel.h" #include "verifier_loops1.skel.h" #include "verifier_lwt.skel.h" #include "verifier_map_in_map.skel.h" @@ -57,6 +69,7 @@ #include "verifier_meta_access.skel.h" #include "verifier_movsx.skel.h" #include "verifier_mtu.skel.h" +#include "verifier_mul.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" #include "verifier_bpf_fastcall.skel.h" @@ -80,8 +93,11 @@ #include "verifier_spill_fill.skel.h" #include "verifier_spin_lock.skel.h" #include "verifier_stack_ptr.skel.h" +#include "verifier_store_release.skel.h" #include "verifier_subprog_precision.skel.h" +#include "verifier_subprog_topo.skel.h" #include "verifier_subreg.skel.h" +#include "verifier_tailcall.skel.h" #include "verifier_tailcall_jit.skel.h" #include "verifier_typedef.skel.h" #include "verifier_uninit.skel.h" @@ -100,7 +116,9 @@ #include "verifier_xdp_direct_packet_access.skel.h" #include "verifier_bits_iter.skel.h" #include "verifier_lsm.skel.h" +#include "verifier_jit_inline.skel.h" #include "irq.skel.h" +#include "verifier_ctx_ptr_param.skel.h" #define MAX_ENTRIES 11 @@ -121,7 +139,7 @@ static void run_tests_aux(const char *skel_name, /* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */ err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps); if (err) { - PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err)); return; } @@ -131,14 +149,17 @@ static void run_tests_aux(const char *skel_name, err = cap_enable_effective(old_caps, NULL); if (err) - PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err)); } #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL) +void test_verifier_align(void) { RUN(verifier_align); } void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_arena(void) { RUN(verifier_arena); } void test_verifier_arena_large(void) { RUN(verifier_arena_large); } +void test_verifier_arena_globals1(void) { RUN(verifier_arena_globals1); } +void test_verifier_arena_globals2(void) { RUN(verifier_arena_globals2); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); } void test_verifier_bounds(void) { RUN(verifier_bounds); } @@ -146,25 +167,30 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); } void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); } +void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); } void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); } +void test_verifier_call_large_imm(void) { RUN(verifier_call_large_imm); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); } void test_verifier_const(void) { RUN(verifier_const); } void test_verifier_const_or(void) { RUN(verifier_const_or); } -void test_verifier_ctx(void) { RUN(verifier_ctx); } +void test_verifier_ctx(void) { RUN_TESTS(verifier_ctx); } void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); } void test_verifier_d_path(void) { RUN(verifier_d_path); } +void test_verifier_default_trusted_ptr(void) { RUN_TESTS(verifier_default_trusted_ptr); } void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_access); } void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); } void test_verifier_div0(void) { RUN(verifier_div0); } +void test_verifier_div_mod_bounds(void) { RUN(verifier_div_mod_bounds); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); } void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); } void test_verifier_gotol(void) { RUN(verifier_gotol); } +void test_verifier_gotox(void) { RUN(verifier_gotox); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } @@ -173,10 +199,13 @@ void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); } +void test_verifier_load_acquire(void) { RUN(verifier_load_acquire); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); } +void test_verifier_live_stack(void) { RUN(verifier_live_stack); } +void test_verifier_liveness_exp(void) { RUN(verifier_liveness_exp); } void test_verifier_loops1(void) { RUN(verifier_loops1); } void test_verifier_lwt(void) { RUN(verifier_lwt); } void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); } @@ -188,6 +217,7 @@ void test_verifier_may_goto_1(void) { RUN(verifier_may_goto_1); } void test_verifier_may_goto_2(void) { RUN(verifier_may_goto_2); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } +void test_verifier_mul(void) { RUN(verifier_mul); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); } @@ -211,8 +241,11 @@ void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } +void test_verifier_store_release(void) { RUN(verifier_store_release); } void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } +void test_verifier_subprog_topo(void) { RUN(verifier_subprog_topo); } void test_verifier_subreg(void) { RUN(verifier_subreg); } +void test_verifier_tailcall(void) { RUN(verifier_tailcall); } void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); } void test_verifier_typedef(void) { RUN(verifier_typedef); } void test_verifier_uninit(void) { RUN(verifier_uninit); } @@ -232,6 +265,8 @@ void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } void test_irq(void) { RUN(irq); } void test_verifier_mtu(void) { RUN(verifier_mtu); } +void test_verifier_jit_inline(void) { RUN(verifier_jit_inline); } +void test_verifier_ctx_ptr_param(void) { RUN(verifier_ctx_ptr_param); } static int init_test_val_map(struct bpf_object *obj, char *map_name) { @@ -268,6 +303,7 @@ void test_verifier_array_access(void) verifier_array_access__elf_bytes, init_array_access_maps); } +void test_verifier_async_cb_context(void) { RUN(verifier_async_cb_context); } static int init_value_ptr_arith_maps(struct bpf_object *obj) { diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c index 8337c6bc5b95..c01c0114af1b 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c @@ -25,10 +25,10 @@ static bool check_prog_load(int prog_fd, bool expect_err, const char *tag) static struct { /* strategically placed before others to avoid accidental modification by kernel */ - char filler[1024]; - char buf[1024]; + char filler[16384]; + char buf[16384]; /* strategically placed after buf[] to catch more accidental corruptions */ - char reference[1024]; + char reference[16384]; } logs; static const struct bpf_insn *insns; static size_t insn_cnt; @@ -47,7 +47,7 @@ static int load_prog(struct bpf_prog_load_opts *opts, bool expect_load_error) static void verif_log_subtest(const char *name, bool expect_load_error, int log_level) { LIBBPF_OPTS(bpf_prog_load_opts, opts); - char *exp_log, prog_name[16], op_name[32]; + char *exp_log, prog_name[24], op_name[32]; struct test_log_buf *skel; struct bpf_program *prog; size_t fixed_log_sz; diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index ab0f02faa80c..f327feb8e38c 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -18,6 +18,7 @@ #include <linux/keyctl.h> #include <sys/xattr.h> #include <linux/fsverity.h> +#include <linux/module_signature.h> #include <test_progs.h> #include "test_verify_pkcs7_sig.skel.h" @@ -33,29 +34,6 @@ #define SHA256_DIGEST_SIZE 32 #endif -/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ -#define MODULE_SIG_STRING "~Module signature appended~\n" - -/* - * Module signature information block. - * - * The constituents of the signature section are, in order: - * - * - Signer's name - * - Key identifier - * - Signature data - * - Information block - */ -struct module_signature { - __u8 algo; /* Public-key crypto algorithm [0] */ - __u8 hash; /* Digest algorithm [0] */ - __u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ - __u8 signer_len; /* Length of signer's name [0] */ - __u8 key_id_len; /* Length of key identifier [0] */ - __u8 __pad[3]; - __be32 sig_len; /* Length of signature data */ -}; - struct data { __u8 data[MAX_DATA_SIZE]; __u32 data_len; @@ -215,7 +193,7 @@ static int populate_data_item_mod(struct data *data_item) return 0; modlen = st.st_size; - marker_len = sizeof(MODULE_SIG_STRING) - 1; + marker_len = sizeof(MODULE_SIGNATURE_MARKER) - 1; fd = open(mod_path, O_RDONLY); if (fd == -1) @@ -228,7 +206,7 @@ static int populate_data_item_mod(struct data *data_item) if (mod == MAP_FAILED) return -errno; - if (strncmp(mod + modlen - marker_len, MODULE_SIG_STRING, marker_len)) { + if (strncmp(mod + modlen - marker_len, MODULE_SIGNATURE_MARKER, marker_len)) { ret = -EINVAL; goto out; } @@ -268,7 +246,7 @@ static void test_verify_pkcs7_sig_from_map(void) char *tmp_dir; struct test_verify_pkcs7_sig *skel = NULL; struct bpf_map *map; - struct data data; + struct data data = {}; int ret, zero = 0; /* Trigger creation of session keyring. */ diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c index 99e438fe12ac..84831eecc935 100644 --- a/tools/testing/selftests/bpf/prog_tests/wq.c +++ b/tools/testing/selftests/bpf/prog_tests/wq.c @@ -16,12 +16,12 @@ void serial_test_wq(void) /* re-run the success test to check if the timer was actually executed */ wq_skel = wq__open_and_load(); - if (!ASSERT_OK_PTR(wq_skel, "wq_skel_load")) + if (!ASSERT_OK_PTR(wq_skel, "wq__open_and_load")) return; err = wq__attach(wq_skel); if (!ASSERT_OK(err, "wq_attach")) - return; + goto clean_up; prog_fd = bpf_program__fd(wq_skel->progs.test_syscall_array_sleepable); err = bpf_prog_test_run_opts(prog_fd, &topts); @@ -31,6 +31,7 @@ void serial_test_wq(void) usleep(50); /* 10 usecs should be enough, but give it extra */ ASSERT_EQ(wq_skel->bss->ok_sleepable, (1 << 1), "ok_sleepable"); +clean_up: wq__destroy(wq_skel); } @@ -38,3 +39,59 @@ void serial_test_failures_wq(void) { RUN_TESTS(wq_failures); } + +static void test_failure_map_no_btf(void) +{ + struct wq *skel = NULL; + char log[8192]; + const struct bpf_insn *insns; + size_t insn_cnt; + int ret, err, map_fd; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_size = sizeof(log), .log_buf = log, + .log_level = 2); + + skel = wq__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + err = bpf_object__prepare(skel->obj); + if (!ASSERT_OK(err, "skel__prepare")) + goto out; + + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "map_no_btf", sizeof(__u32), sizeof(__u64), 100, + NULL); + if (!ASSERT_GT(map_fd, -1, "map create")) + goto out; + + err = bpf_map__reuse_fd(skel->maps.array, map_fd); + if (!ASSERT_OK(err, "map reuse fd")) { + close(map_fd); + goto out; + } + + insns = bpf_program__insns(skel->progs.test_map_no_btf); + if (!ASSERT_OK_PTR(insns, "insns ptr")) + goto out; + + insn_cnt = bpf_program__insn_cnt(skel->progs.test_map_no_btf); + if (!ASSERT_GT(insn_cnt, 0u, "insn cnt")) + goto out; + + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + if (!ASSERT_LT(ret, 0, "prog load failed")) { + if (ret > 0) + close(ret); + goto out; + } + + ASSERT_HAS_SUBSTR(log, "map 'map_no_btf' has to have BTF in order to use bpf_wq", + "log complains no map BTF"); +out: + wq__destroy(skel); +} + +void test_wq_custom(void) +{ + if (test__start_subtest("test_failure_map_no_btf")) + test_failure_map_no_btf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index b2b2d85dbb1b..43264347e7d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -static void test_xdp_adjust_tail_grow(void) +static void test_xdp_adjust_tail_grow(bool is_64k_pagesize) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; struct bpf_object *obj; - char buf[4096]; /* avoid segfault: large buf to hold grow results */ + char buf[8192]; /* avoid segfault: large buf to hold grow results */ __u32 expect_sz; int err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), .data_out = buf, .data_size_out = sizeof(buf), .repeat = 1, ); + /* topts.data_size_in as a special signal to bpf prog */ + if (is_64k_pagesize) + topts.data_size_in = sizeof(pkt_v4) - 1; + else + topts.data_size_in = sizeof(pkt_v4); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; @@ -208,7 +213,7 @@ out: bpf_object__close(obj); } -static void test_xdp_adjust_frags_tail_grow(void) +static void test_xdp_adjust_frags_tail_grow_4k(void) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; __u32 exp_size; @@ -246,14 +251,20 @@ static void test_xdp_adjust_frags_tail_grow(void) ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval"); ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size"); - for (i = 0; i < 9000; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + for (i = 0; i < 9000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + } - for (i = 9000; i < 9010; i++) - ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + for (i = 9000; i < 9010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + } - for (i = 9010; i < 16384; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + for (i = 9010; i < 16384; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + } /* Test a too large grow */ memset(buf, 1, 16384); @@ -273,16 +284,93 @@ out: bpf_object__close(obj); } +static void test_xdp_adjust_frags_tail_grow_64k(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; + __u32 exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, i, prog_fd; + __u8 *buf; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + goto out; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(262144); + if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb")) + goto out; + + /* Test case add 10 bytes to last frag */ + memset(buf, 1, 262144); + exp_size = 90000 + 10; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 90000; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + for (i = 0; i < 90000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-old"); + } + + for (i = 90000; i < 90010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "90Kb+10b-new"); + } + + for (i = 90010; i < 262144; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched"); + } + + /* Test a too large grow */ + memset(buf, 1, 262144); + exp_size = 90001; + + topts.data_in = topts.data_out = buf; + topts.data_size_in = 90001; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + free(buf); +out: + bpf_object__close(obj); +} + void test_xdp_adjust_tail(void) { + int page_size = getpagesize(); + if (test__start_subtest("xdp_adjust_tail_shrink")) test_xdp_adjust_tail_shrink(); if (test__start_subtest("xdp_adjust_tail_grow")) - test_xdp_adjust_tail_grow(); + test_xdp_adjust_tail_grow(page_size == 65536); if (test__start_subtest("xdp_adjust_tail_grow2")) test_xdp_adjust_tail_grow2(); if (test__start_subtest("xdp_adjust_frags_tail_shrink")) test_xdp_adjust_frags_tail_shrink(); - if (test__start_subtest("xdp_adjust_frags_tail_grow")) - test_xdp_adjust_frags_tail_grow(); + if (test__start_subtest("xdp_adjust_frags_tail_grow")) { + if (page_size == 65536) + test_xdp_adjust_frags_tail_grow_64k(); + else + test_xdp_adjust_frags_tail_grow_4k(); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index fb952703653e..c42488e445c2 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -191,13 +191,18 @@ fail: return -1; } -static void bonding_cleanup(struct skeletons *skeletons) +static void link_cleanup(struct skeletons *skeletons) { - restore_root_netns(); while (skeletons->nlinks) { skeletons->nlinks--; bpf_link__destroy(skeletons->links[skeletons->nlinks]); } +} + +static void bonding_cleanup(struct skeletons *skeletons) +{ + restore_root_netns(); + link_cleanup(skeletons); ASSERT_OK(system("ip link delete bond1"), "delete bond1"); ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1"); ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2"); @@ -493,6 +498,90 @@ out: system("ip link del bond_nest2"); } +/* + * Test that XDP redirect via xdp_master_redirect() does not crash when + * the bond master device is not up. When bond is in round-robin mode but + * never opened, rr_tx_counter is NULL. + */ +static void test_xdp_bonding_redirect_no_up(struct skeletons *skeletons) +{ + struct nstoken *nstoken = NULL; + int xdp_pass_fd; + int veth1_ifindex; + int err; + char pkt[ETH_HLEN + 1]; + struct xdp_md ctx_in = {}; + + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt, + .data_size_in = sizeof(pkt), + .ctx_in = &ctx_in, + .ctx_size_in = sizeof(ctx_in), + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + .batch_size = 1, + ); + + /* We can't use bonding_setup() because bond will be active */ + SYS(out, "ip netns add ns_rr_no_up"); + nstoken = open_netns("ns_rr_no_up"); + if (!ASSERT_OK_PTR(nstoken, "open ns_rr_no_up")) + goto out; + + /* bond0: active-backup, UP with slave veth0. + * Attaching native XDP to bond0 enables bpf_master_redirect_enabled_key + * globally. + */ + SYS(out, "ip link add bond0 type bond mode active-backup"); + SYS(out, "ip link add veth0 type veth peer name veth0p"); + SYS(out, "ip link set veth0 master bond0"); + SYS(out, "ip link set bond0 up"); + SYS(out, "ip link set veth0p up"); + + /* bond1: round-robin, never UP -> rr_tx_counter stays NULL */ + SYS(out, "ip link add bond1 type bond mode balance-rr"); + SYS(out, "ip link add veth1 type veth peer name veth1p"); + SYS(out, "ip link set veth1 master bond1"); + + veth1_ifindex = if_nametoindex("veth1"); + if (!ASSERT_GT(veth1_ifindex, 0, "veth1_ifindex")) + goto out; + + /* Attach native XDP to bond0 -> enables global redirect key */ + if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond0")) + goto out; + + /* Attach generic XDP (XDP_TX) to veth1. + * When packets arrive at veth1 via netif_receive_skb, do_xdp_generic() + * runs this program. XDP_TX + bond slave triggers xdp_master_redirect(). + */ + err = bpf_xdp_attach(veth1_ifindex, + bpf_program__fd(skeletons->xdp_tx->progs.xdp_tx), + XDP_FLAGS_SKB_MODE, NULL); + if (!ASSERT_OK(err, "attach generic XDP to veth1")) + goto out; + + /* Run BPF_PROG_TEST_RUN with XDP_PASS live frames on veth1. + * XDP_PASS frames become SKBs with skb->dev = veth1, entering + * netif_receive_skb -> do_xdp_generic -> xdp_master_redirect. + * Without the fix, bond_rr_gen_slave_id() dereferences NULL + * rr_tx_counter and crashes. + */ + xdp_pass_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog); + + memset(pkt, 0, sizeof(pkt)); + ctx_in.data_end = sizeof(pkt); + ctx_in.ingress_ifindex = veth1_ifindex; + + err = bpf_prog_test_run_opts(xdp_pass_fd, &opts); + ASSERT_OK(err, "xdp_pass test_run should not crash"); + +out: + link_cleanup(skeletons); + close_netns(nstoken); + SYS_NOFAIL("ip netns del ns_rr_no_up"); +} + static void test_xdp_bonding_features(struct skeletons *skeletons) { LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); @@ -610,6 +699,61 @@ out: system("ip link del bond"); } +/* + * Test that changing xmit_hash_policy to vlan+srcmac is rejected when a + * native XDP program is loaded on a bond in 802.3ad or balance-xor mode. + * These modes support XDP only when xmit_hash_policy != vlan+srcmac; freely + * changing the policy creates an inconsistency that triggers a WARNING in + * dev_xdp_uninstall() during device teardown. + */ +static void test_xdp_bonding_xmit_policy_compat(struct skeletons *skeletons) +{ + struct nstoken *nstoken = NULL; + int bond_ifindex = -1; + int xdp_fd, err; + + SYS(out, "ip netns add ns_xmit_policy"); + nstoken = open_netns("ns_xmit_policy"); + if (!ASSERT_OK_PTR(nstoken, "open ns_xmit_policy")) + goto out; + + /* 802.3ad with layer2+3 policy: native XDP is supported */ + SYS(out, "ip link add bond0 type bond mode 802.3ad xmit_hash_policy layer2+3"); + SYS(out, "ip link add veth0 type veth peer name veth0p"); + SYS(out, "ip link set veth0 master bond0"); + SYS(out, "ip link set bond0 up"); + + bond_ifindex = if_nametoindex("bond0"); + if (!ASSERT_GT(bond_ifindex, 0, "bond0 ifindex")) + goto out; + + xdp_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog); + if (!ASSERT_GE(xdp_fd, 0, "xdp_dummy fd")) + goto out; + + err = bpf_xdp_attach(bond_ifindex, xdp_fd, XDP_FLAGS_DRV_MODE, NULL); + if (!ASSERT_OK(err, "attach XDP to bond0")) + goto out; + + /* With XDP loaded, switching to vlan+srcmac must be rejected */ + err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null"); + ASSERT_NEQ(err, 0, "vlan+srcmac change with XDP loaded should fail"); + + /* Detach XDP first, then the same change must succeed */ + ASSERT_OK(bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL), + "detach XDP from bond0"); + + bond_ifindex = -1; + err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null"); + ASSERT_OK(err, "vlan+srcmac change without XDP should succeed"); + +out: + if (bond_ifindex > 0) + bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL); + close_netns(nstoken); + SYS_NOFAIL("ip netns del ns_xmit_policy"); +} + static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { @@ -677,9 +821,15 @@ void serial_test_xdp_bonding(void) test_case->xmit_policy); } + if (test__start_subtest("xdp_bonding_xmit_policy_compat")) + test_xdp_bonding_xmit_policy_compat(&skeletons); + if (test__start_subtest("xdp_bonding_redirect_multi")) test_xdp_bonding_redirect_multi(&skeletons); + if (test__start_subtest("xdp_bonding_redirect_no_up")) + test_xdp_bonding_redirect_no_up(&skeletons); + out: xdp_dummy__destroy(skeletons.xdp_dummy); xdp_tx__destroy(skeletons.xdp_tx); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 937da9b7532a..26159e0499c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -4,12 +4,21 @@ #include "test_xdp_context_test_run.skel.h" #include "test_xdp_meta.skel.h" -#define TX_ADDR "10.0.0.1" -#define RX_ADDR "10.0.0.2" #define RX_NAME "veth0" #define TX_NAME "veth1" #define TX_NETNS "xdp_context_tx" #define RX_NETNS "xdp_context_rx" +#define TAP_NAME "tap0" +#define DUMMY_NAME "dum0" +#define TAP_NETNS "xdp_context_tuntap" + +#define TEST_PAYLOAD_LEN 32 +static const __u8 test_payload[TEST_PAYLOAD_LEN] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, +}; void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, __u32 data_meta, __u32 data, __u32 data_end, @@ -38,6 +47,7 @@ void test_xdp_context_test_run(void) struct test_xdp_context_test_run *skel = NULL; char data[sizeof(pkt_v4) + sizeof(__u32)]; char bad_ctx[sizeof(struct xdp_md) + 1]; + char large_data[256]; struct xdp_md ctx_in, ctx_out; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, @@ -85,12 +95,7 @@ void test_xdp_context_test_run(void) test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data), 0, 0, 0); - /* Meta data must be 255 bytes or smaller */ - test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0); - - /* Total size of data must match data_end - data_meta */ - test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), - sizeof(data) - 1, 0, 0, 0); + /* Total size of data must be data_end - data_meta or larger */ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data) + 1, 0, 0, 0); @@ -109,10 +114,95 @@ void test_xdp_context_test_run(void) test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), 0, 0, 1); + /* Meta data must be 216 bytes or smaller (256 - sizeof(struct + * xdp_frame)). Test both nearest invalid size and nearest invalid + * 4-byte-aligned size, and make sure data_in is large enough that we + * actually hit the check on metadata length + */ + opts.data_in = large_data; + opts.data_size_in = sizeof(large_data); + test_xdp_context_error(prog_fd, opts, 0, 217, sizeof(large_data), 0, 0, 0); + test_xdp_context_error(prog_fd, opts, 0, 220, sizeof(large_data), 0, 0, 0); + test_xdp_context_test_run__destroy(skel); } -void test_xdp_context_functional(void) +static int send_test_packet(int ifindex) +{ + int n, sock = -1; + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + + /* We use the Ethernet header only to identify the test packet */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + }; + + memcpy(packet, ð, sizeof(eth)); + memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + + sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (!ASSERT_GE(sock, 0, "socket")) + goto err; + + struct sockaddr_ll saddr = { + .sll_family = PF_PACKET, + .sll_ifindex = ifindex, + .sll_halen = ETH_ALEN + }; + n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr, + sizeof(saddr)); + if (!ASSERT_EQ(n, sizeof(packet), "sendto")) + goto err; + + close(sock); + return 0; + +err: + if (sock >= 0) + close(sock); + return -1; +} + +static int write_test_packet(int tap_fd) +{ + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + int n; + + /* The Ethernet header is mostly not relevant. We use it to identify the + * test packet and some BPF helpers we exercise expect to operate on + * Ethernet frames carrying IP packets. Pretend that's the case. + */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + .h_proto = htons(ETH_P_IP), + }; + + memcpy(packet, ð, sizeof(eth)); + memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); + + n = write(tap_fd, packet, sizeof(packet)); + if (!ASSERT_EQ(n, sizeof(packet), "write packet")) + return -1; + + return 0; +} + +static void dump_err_stream(const struct bpf_program *prog) +{ + char buf[512]; + int ret; + + ret = 0; + do { + ret = bpf_prog_stream_read(bpf_program__fd(prog), + BPF_STREAM_STDERR, buf, sizeof(buf), + NULL); + if (ret > 0) + fwrite(buf, sizeof(buf[0]), ret, stderr); + } while (ret > 0); +} + +void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); @@ -120,7 +210,7 @@ void test_xdp_context_functional(void) struct bpf_program *tc_prog, *xdp_prog; struct test_xdp_meta *skel = NULL; struct nstoken *nstoken = NULL; - int rx_ifindex; + int rx_ifindex, tx_ifindex; int ret; tx_ns = netns_new(TX_NETNS, false); @@ -138,7 +228,6 @@ void test_xdp_context_functional(void) if (!ASSERT_OK_PTR(nstoken, "setns rx_ns")) goto close; - SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME); SYS(close, "ip link set dev " RX_NAME " up"); skel = test_xdp_meta__open_and_load(); @@ -179,9 +268,20 @@ void test_xdp_context_functional(void) if (!ASSERT_OK_PTR(nstoken, "setns tx_ns")) goto close; - SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME); SYS(close, "ip link set dev " TX_NAME " up"); - ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping"); + + tx_ifindex = if_nametoindex(TX_NAME); + if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx")) + goto close; + + skel->bss->test_pass = false; + + ret = send_test_packet(tx_ifindex); + if (!ASSERT_OK(ret, "send_test_packet")) + goto close; + + if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass")) + dump_err_stream(tc_prog); close: close_netns(nstoken); @@ -190,3 +290,231 @@ close: netns_free(tx_ns); } +static void test_tuntap(struct bpf_program *xdp_prog, + struct bpf_program *tc_prio_1_prog, + struct bpf_program *tc_prio_2_prog, + bool *test_pass) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *ns = NULL; + int tap_fd = -1; + int tap_ifindex; + int ret; + + *test_pass = false; + + ns = netns_new(TAP_NETNS, true); + if (!ASSERT_OK_PTR(ns, "create and open ns")) + return; + + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " up"); + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + tc_hook.ifindex = tap_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + if (tc_prio_2_prog) { + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2, + .prog_fd = bpf_program__fd(tc_prio_2_prog)); + + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + } + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), + 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; + + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog); + +close: + if (tap_fd >= 0) + close(tap_fd); + netns_free(ns); +} + +/* Write a packet to a tap dev and copy it to ingress of a dummy dev */ +static void test_tuntap_mirred(struct bpf_program *xdp_prog, + struct bpf_program *tc_prog, + bool *test_pass) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *ns = NULL; + int dummy_ifindex; + int tap_fd = -1; + int tap_ifindex; + int ret; + + *test_pass = false; + + ns = netns_new(TAP_NETNS, true); + if (!ASSERT_OK_PTR(ns, "netns_new")) + return; + + /* Setup dummy interface */ + SYS(close, "ip link add name " DUMMY_NAME " type dummy"); + SYS(close, "ip link set dev " DUMMY_NAME " up"); + + dummy_ifindex = if_nametoindex(DUMMY_NAME); + if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex")) + goto close; + + tc_hook.ifindex = dummy_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + /* Setup TAP interface */ + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " up"); + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + /* Copy all packets received from TAP to dummy ingress */ + SYS(close, "tc qdisc add dev " TAP_NAME " clsact"); + SYS(close, "tc filter add dev " TAP_NAME " ingress " + "protocol all matchall " + "action mirred ingress mirror dev " DUMMY_NAME); + + /* Receive a packet on TAP */ + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; + + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prog); + +close: + if (tap_fd >= 0) + close(tap_fd); + netns_free(ns); +} + +void test_xdp_context_tuntap(void) +{ + struct test_xdp_meta *skel = NULL; + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + return; + + if (test__start_subtest("data_meta")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("dynptr_read")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_read, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("dynptr_slice")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_slice, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("dynptr_write")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_write, + skel->progs.ing_cls_dynptr_read, + &skel->bss->test_pass); + if (test__start_subtest("dynptr_slice_rdwr")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_slice_rdwr, + skel->progs.ing_cls_dynptr_slice, + &skel->bss->test_pass); + if (test__start_subtest("dynptr_offset")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_offset_wr, + skel->progs.ing_cls_dynptr_offset_rd, + &skel->bss->test_pass); + if (test__start_subtest("dynptr_offset_oob")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_offset_oob, + skel->progs.ing_cls, + &skel->bss->test_pass); + if (test__start_subtest("clone_data_meta_survives_data_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_survives_data_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_data_meta_survives_meta_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_survives_meta_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_meta_dynptr_survives_data_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_meta_dynptr_survives_meta_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write, + &skel->bss->test_pass); + /* Tests for BPF helpers which touch headroom */ + if (test__start_subtest("helper_skb_vlan_push_pop")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_vlan_push_pop, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_adjust_room")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_adjust_room, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_head_tail")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_head_tail, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_proto")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_proto, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + + test_xdp_meta__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index df27535995af..ad56e4370ce3 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -18,7 +18,7 @@ static void test_xdp_with_cpumap_helpers(void) struct bpf_cpumap_val val = { .qsize = 192, }; - int err, prog_fd, prog_redir_fd, map_fd; + int err, prog_fd, prog_redir_fd, map_fd, bad_fd; struct nstoken *nstoken = NULL; __u32 idx = 0; @@ -79,7 +79,22 @@ static void test_xdp_with_cpumap_helpers(void) val.qsize = 192; val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); err = bpf_map_update_elem(map_fd, &idx, &val, 0); - ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry"); + ASSERT_EQ(err, -EINVAL, "Add non-BPF_XDP_CPUMAP program to cpumap entry"); + + /* Try to attach non-BPF file descriptor */ + bad_fd = open("/dev/null", O_RDONLY); + ASSERT_GE(bad_fd, 0, "Open /dev/null for non-BPF fd"); + + val.bpf_prog.fd = bad_fd; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_EQ(err, -EINVAL, "Add non-BPF fd to cpumap entry"); + + /* Try to attach nonexistent file descriptor */ + err = close(bad_fd); + ASSERT_EQ(err, 0, "Close non-BPF fd for nonexistent fd"); + + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_EQ(err, -EBADF, "Add nonexistent fd to cpumap entry"); /* Try to attach BPF_XDP program with frags to cpumap when we have * already loaded a BPF_XDP program on the map diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 461ab18705d5..a8ab05216c38 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -7,6 +7,7 @@ #include <test_progs.h> #include "test_xdp_devmap_helpers.skel.h" +#include "test_xdp_devmap_tailcall.skel.h" #include "test_xdp_with_devmap_frags_helpers.skel.h" #include "test_xdp_with_devmap_helpers.skel.h" @@ -107,6 +108,29 @@ static void test_neg_xdp_devmap_helpers(void) } } +static void test_xdp_devmap_tailcall(enum bpf_attach_type prog_dev, + enum bpf_attach_type prog_tail, + bool expect_reject) +{ + struct test_xdp_devmap_tailcall *skel; + int err; + + skel = test_xdp_devmap_tailcall__open(); + if (!ASSERT_OK_PTR(skel, "test_xdp_devmap_tailcall__open")) + return; + + bpf_program__set_expected_attach_type(skel->progs.xdp_devmap, prog_dev); + bpf_program__set_expected_attach_type(skel->progs.xdp_entry, prog_tail); + + err = test_xdp_devmap_tailcall__load(skel); + if (expect_reject) + ASSERT_ERR(err, "test_xdp_devmap_tailcall__load"); + else + ASSERT_OK(err, "test_xdp_devmap_tailcall__load"); + + test_xdp_devmap_tailcall__destroy(skel); +} + static void test_xdp_with_devmap_frags_helpers(void) { struct test_xdp_with_devmap_frags_helpers *skel; @@ -238,8 +262,13 @@ void serial_test_xdp_devmap_attach(void) if (test__start_subtest("DEVMAP with frags programs in entries")) test_xdp_with_devmap_frags_helpers(); - if (test__start_subtest("Verifier check of DEVMAP programs")) + if (test__start_subtest("Verifier check of DEVMAP programs")) { test_neg_xdp_devmap_helpers(); + test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, BPF_XDP_DEVMAP, false); + test_xdp_devmap_tailcall(0, 0, true); + test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, 0, true); + test_xdp_devmap_tailcall(0, BPF_XDP_DEVMAP, true); + } if (test__start_subtest("DEVMAP with programs in entries on veth")) test_xdp_with_devmap_helpers_veth(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 7dac044664ac..dd34b0cc4b4e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -66,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd) #else #define MAX_PKT_SIZE 3408 #endif + +#define PAGE_SIZE_4K 4096 +#define PAGE_SIZE_64K 65536 + static void test_max_pkt_size(int fd) { - char data[MAX_PKT_SIZE + 1] = {}; + char data[PAGE_SIZE_64K + 1] = {}; int err; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = MAX_PKT_SIZE, .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); + + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K; + else + opts.data_size_in = MAX_PKT_SIZE; + err = bpf_prog_test_run_opts(fd, &opts); ASSERT_OK(err, "prog_run_max_size"); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c index 3f9146d83d79..325e0b64dc35 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c @@ -67,7 +67,7 @@ void test_xdp_flowtable(void) struct nstoken *tok = NULL; int iifindex, stats_fd; __u32 value, key = 0; - struct bpf_link *link; + struct bpf_link *link = NULL; if (SYS_NOFAIL("nft -v")) { fprintf(stdout, "Missing required nft tool\n"); @@ -160,6 +160,7 @@ void test_xdp_flowtable(void) ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed"); out: + bpf_link__destroy(link); xdp_flowtable__destroy(skel); if (tok) close_netns(tok); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 3d47878ef6bf..5c31054ad4a4 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -126,10 +126,10 @@ static int open_xsk(int ifindex, struct xsk *xsk) static void close_xsk(struct xsk *xsk) { - if (xsk->umem) - xsk_umem__delete(xsk->umem); if (xsk->socket) xsk_socket__delete(xsk->socket); + if (xsk->umem) + xsk_umem__delete(xsk->umem); munmap(xsk->umem_area, UMEM_SIZE); } @@ -351,9 +351,10 @@ void test_xdp_metadata(void) struct xdp_metadata2 *bpf_obj2 = NULL; struct xdp_metadata *bpf_obj = NULL; struct bpf_program *new_prog, *prog; + struct bpf_devmap_val devmap_e = {}; + struct bpf_map *prog_arr, *devmap; struct nstoken *tok = NULL; __u32 queue_id = QUEUE_ID; - struct bpf_map *prog_arr; struct xsk tx_xsk = {}; struct xsk rx_xsk = {}; __u32 val, key = 0; @@ -409,6 +410,13 @@ void test_xdp_metadata(void) bpf_program__set_ifindex(prog, rx_ifindex); bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); + /* Make sure we can load a dev-bound program that performs + * XDP_REDIRECT into a devmap. + */ + new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect"); + bpf_program__set_ifindex(new_prog, rx_ifindex); + bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY); + if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton")) goto out; @@ -423,6 +431,18 @@ void test_xdp_metadata(void) "update prog_arr")) goto out; + /* Make sure we can't add dev-bound programs to devmaps. */ + devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map"); + if (!ASSERT_OK_PTR(devmap, "no dev_map found")) + goto out; + + devmap_e.bpf_prog.fd = val; + if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key), + &devmap_e, sizeof(devmap_e), + BPF_ANY), + "update dev_map")) + goto out; + /* Attach BPF program to RX interface. */ ret = bpf_xdp_attach(rx_ifindex, diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c new file mode 100644 index 000000000000..910dabe95afd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <network_helpers.h> +#include "test_xdp_pull_data.skel.h" + +#define PULL_MAX (1 << 31) +#define PULL_PLUS_ONE (1 << 30) + +#define XDP_PACKET_HEADROOM 256 + +/* Find headroom and tailroom occupied by struct xdp_frame and struct + * skb_shared_info so that we can calculate the maximum pull lengths for + * test cases. They might not be the real size of the structures due to + * cache alignment. + */ +static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct xdp_md ctx = {}; + int prog_fd, err; + __u8 *buf; + + buf = calloc(frame_sz, sizeof(__u8)); + if (!ASSERT_OK_PTR(buf, "calloc buf")) + return -ENOMEM; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = frame_sz; + topts.data_size_out = frame_sz; + /* Pass a data_end larger than the linear space available to make sure + * bpf_prog_test_run_xdp() will fill the linear data area so that + * xdp_find_sizes can infer the size of struct skb_shared_info + */ + ctx.data_end = frame_sz; + topts.ctx_in = &ctx; + topts.ctx_out = &ctx; + topts.ctx_size_in = sizeof(ctx); + topts.ctx_size_out = sizeof(ctx); + + prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + free(buf); + + return err; +} + +/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024] + * so caller expecting XDP_PASS should always pass pull_len no less than 1024 + */ +static void run_test(struct test_xdp_pull_data *skel, int retval, + int frame_sz, int buff_len, int meta_len, int data_len, + int pull_len) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct xdp_md ctx = {}; + int prog_fd, err; + __u8 *buf; + + buf = calloc(buff_len, sizeof(__u8)); + if (!ASSERT_OK_PTR(buf, "calloc buf")) + return; + + buf[meta_len + 1023] = 0xaa; + buf[meta_len + 1024] = 0xbb; + buf[meta_len + 1025] = 0xcc; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = buff_len; + topts.data_size_out = buff_len; + ctx.data = meta_len; + ctx.data_end = meta_len + data_len; + topts.ctx_in = &ctx; + topts.ctx_out = &ctx; + topts.ctx_size_in = sizeof(ctx); + topts.ctx_size_out = sizeof(ctx); + + skel->bss->data_len = data_len; + if (pull_len & PULL_MAX) { + int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz; + int tailroom = frame_sz - XDP_PACKET_HEADROOM - + data_len - skel->bss->sinfo_sz; + + pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0; + pull_len += headroom + tailroom + data_len; + } + skel->bss->pull_len = pull_len; + + prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval"); + + if (retval == XDP_DROP) + goto out; + + ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size"); + ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size"); + /* Make sure data around xdp->data_end was not messed up by + * bpf_xdp_pull_data() + */ + ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]"); + ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]"); + ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]"); +out: + free(buf); +} + +static void test_xdp_pull_data_basic(void) +{ + u32 pg_sz, max_meta_len, max_data_len; + struct test_xdp_pull_data *skel; + int buff_len; + + skel = test_xdp_pull_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load")) + return; + + pg_sz = sysconf(_SC_PAGE_SIZE); + buff_len = pg_sz + pg_sz / 2; + + if (find_xdp_sizes(skel, pg_sz)) + goto out; + + max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz; + max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz; + + /* linear xdp pkt, pull 0 byte */ + run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048); + + /* multi-buf pkt, pull results in linear xdp pkt */ + run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048); + + /* multi-buf pkt, pull 1 byte to linear data area */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025); + + /* multi-buf pkt, pull 0 byte to linear data area */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025); + + /* multi-buf pkt, empty linear data area, pull requires memmove */ + run_test(skel, XDP_PASS, pg_sz, buff_len, 0, 0, PULL_MAX); + + /* multi-buf pkt, no headroom */ + run_test(skel, XDP_PASS, pg_sz, buff_len, max_meta_len, 1024, PULL_MAX); + + /* multi-buf pkt, no tailroom, pull requires memmove */ + run_test(skel, XDP_PASS, pg_sz, buff_len, 0, max_data_len, PULL_MAX); + + /* Test cases with invalid pull length */ + + /* linear xdp pkt, pull more than total data len */ + run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049); + + /* multi-buf pkt with no space left in linear data area */ + run_test(skel, XDP_DROP, pg_sz, buff_len, max_meta_len, max_data_len, + PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, empty linear data area */ + run_test(skel, XDP_DROP, pg_sz, buff_len, 0, 0, PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, no headroom */ + run_test(skel, XDP_DROP, pg_sz, buff_len, max_meta_len, 1024, + PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, no tailroom */ + run_test(skel, XDP_DROP, pg_sz, buff_len, 0, max_data_len, + PULL_MAX | PULL_PLUS_ONE); + +out: + test_xdp_pull_data__destroy(skel); +} + +void test_xdp_pull_data(void) +{ + if (test__start_subtest("xdp_pull_data")) + test_xdp_pull_data_basic(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c new file mode 100644 index 000000000000..18dd25344de7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Network topology: + * ----------- ----------- + * | NS1 | | NS2 | + * | veth0 -|--------|- veth0 | + * ----------- ----------- + * + */ + +#define _GNU_SOURCE +#include <net/if.h> +#include <uapi/linux/if_link.h> + +#include "network_helpers.h" +#include "test_progs.h" +#include "test_xdp_vlan.skel.h" + + +#define VETH_NAME "veth0" +#define NS_MAX_SIZE 32 +#define NS1_NAME "ns-xdp-vlan-1-" +#define NS2_NAME "ns-xdp-vlan-2-" +#define NS1_IP_ADDR "100.64.10.1" +#define NS2_IP_ADDR "100.64.10.2" +#define VLAN_ID 4011 + +static int setup_network(char *ns1, char *ns2) +{ + if (!ASSERT_OK(append_tid(ns1, NS_MAX_SIZE), "create ns1 name")) + goto fail; + if (!ASSERT_OK(append_tid(ns2, NS_MAX_SIZE), "create ns2 name")) + goto fail; + + SYS(fail, "ip netns add %s", ns1); + SYS(fail, "ip netns add %s", ns2); + SYS(fail, "ip -n %s link add %s type veth peer name %s netns %s", + ns1, VETH_NAME, VETH_NAME, ns2); + + /* NOTICE: XDP require VLAN header inside packet payload + * - Thus, disable VLAN offloading driver features + */ + SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns1, VETH_NAME); + SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns2, VETH_NAME); + + /* NS1 configuration */ + SYS(fail, "ip -n %s addr add %s/24 dev %s", ns1, NS1_IP_ADDR, VETH_NAME); + SYS(fail, "ip -n %s link set %s up", ns1, VETH_NAME); + + /* NS2 configuration */ + SYS(fail, "ip -n %s link add link %s name %s.%d type vlan id %d", + ns2, VETH_NAME, VETH_NAME, VLAN_ID, VLAN_ID); + SYS(fail, "ip -n %s addr add %s/24 dev %s.%d", ns2, NS2_IP_ADDR, VETH_NAME, VLAN_ID); + SYS(fail, "ip -n %s link set %s up", ns2, VETH_NAME); + SYS(fail, "ip -n %s link set %s.%d up", ns2, VETH_NAME, VLAN_ID); + + /* At this point ping should fail because VLAN tags are only used by NS2 */ + return !SYS_NOFAIL("ip netns exec %s ping -W 1 -c1 %s", ns2, NS1_IP_ADDR); + +fail: + return -1; +} + +static void cleanup_network(const char *ns1, const char *ns2) +{ + SYS_NOFAIL("ip netns del %s", ns1); + SYS_NOFAIL("ip netns del %s", ns2); +} + +static void xdp_vlan(struct bpf_program *xdp, struct bpf_program *tc, u32 flags) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + char ns1[NS_MAX_SIZE] = NS1_NAME; + char ns2[NS_MAX_SIZE] = NS2_NAME; + struct nstoken *nstoken = NULL; + int interface; + int ret; + + if (!ASSERT_OK(setup_network(ns1, ns2), "setup network")) + goto cleanup; + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open NS1")) + goto cleanup; + + interface = if_nametoindex(VETH_NAME); + if (!ASSERT_NEQ(interface, 0, "get interface index")) + goto cleanup; + + ret = bpf_xdp_attach(interface, bpf_program__fd(xdp), flags, NULL); + if (!ASSERT_OK(ret, "attach xdp_vlan_change")) + goto cleanup; + + tc_hook.ifindex = interface; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto detach_xdp; + + /* Now we'll use BPF programs to pop/push the VLAN tags */ + tc_opts.prog_fd = bpf_program__fd(tc); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto detach_xdp; + + close_netns(nstoken); + nstoken = NULL; + + /* Now the namespaces can reach each-other, test with pings */ + SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns1, NS2_IP_ADDR); + SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns2, NS1_IP_ADDR); + + +detach_tc: + bpf_tc_detach(&tc_hook, &tc_opts); +detach_xdp: + bpf_xdp_detach(interface, flags, NULL); +cleanup: + close_netns(nstoken); + cleanup_network(ns1, ns2); +} + +/* First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" + * egress use TC to add back VLAN tag 4011 + */ +void test_xdp_vlan_change(void) +{ + struct test_xdp_vlan *skel; + + skel = test_xdp_vlan__open_and_load(); + if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load")) + return; + + if (test__start_subtest("0")) + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 0); + + if (test__start_subtest("DRV_MODE")) + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, + XDP_FLAGS_DRV_MODE); + + if (test__start_subtest("SKB_MODE")) + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, + XDP_FLAGS_SKB_MODE); + + test_xdp_vlan__destroy(skel); +} + +/* Second test: XDP prog fully remove vlan header + * + * Catch kernel bug for generic-XDP, that doesn't allow us to + * remove a VLAN header, because skb->protocol still contain VLAN + * ETH_P_8021Q indication, and this cause overwriting of our changes. + */ +void test_xdp_vlan_remove(void) +{ + struct test_xdp_vlan *skel; + + skel = test_xdp_vlan__open_and_load(); + if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load")) + return; + + if (test__start_subtest("0")) + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 0); + + if (test__start_subtest("DRV_MODE")) + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, + XDP_FLAGS_DRV_MODE); + + if (test__start_subtest("SKB_MODE")) + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, + XDP_FLAGS_SKB_MODE); + + test_xdp_vlan__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xsk.c b/tools/testing/selftests/bpf/prog_tests/xsk.c new file mode 100644 index 000000000000..6e2f63ee2a6c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xsk.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <net/if.h> +#include <stdarg.h> + +#include "network_helpers.h" +#include "test_progs.h" +#include "test_xsk.h" +#include "xsk_xdp_progs.skel.h" + +#define VETH_RX "veth0" +#define VETH_TX "veth1" +#define MTU 1500 + +int setup_veth(bool busy_poll) +{ + SYS(fail, + "ip link add %s numtxqueues 4 numrxqueues 4 type veth peer name %s numtxqueues 4 numrxqueues 4", + VETH_RX, VETH_TX); + SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_RX); + SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_TX); + + if (busy_poll) { + SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_RX); + SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_RX); + SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_TX); + SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_TX); + } + + SYS(fail, "ip link set %s mtu %d", VETH_RX, MTU); + SYS(fail, "ip link set %s mtu %d", VETH_TX, MTU); + SYS(fail, "ip link set %s up", VETH_RX); + SYS(fail, "ip link set %s up", VETH_TX); + + return 0; + +fail: + return -1; +} + +void delete_veth(void) +{ + SYS_NOFAIL("ip link del %s", VETH_RX); + SYS_NOFAIL("ip link del %s", VETH_TX); +} + +int configure_ifobj(struct ifobject *tx, struct ifobject *rx) +{ + rx->ifindex = if_nametoindex(VETH_RX); + if (!ASSERT_OK_FD(rx->ifindex, "get RX ifindex")) + return -1; + + tx->ifindex = if_nametoindex(VETH_TX); + if (!ASSERT_OK_FD(tx->ifindex, "get TX ifindex")) + return -1; + + tx->shared_umem = false; + rx->shared_umem = false; + + + return 0; +} + +static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode) +{ + u32 max_frags, umem_tailroom, cache_line_size; + struct ifobject *ifobj_tx, *ifobj_rx; + struct test_spec test; + int ret; + + ifobj_tx = ifobject_create(); + if (!ASSERT_OK_PTR(ifobj_tx, "create ifobj_tx")) + return; + + ifobj_rx = ifobject_create(); + if (!ASSERT_OK_PTR(ifobj_rx, "create ifobj_rx")) + goto delete_tx; + + if (!ASSERT_OK(configure_ifobj(ifobj_tx, ifobj_rx), "conigure ifobj")) + goto delete_rx; + + ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring); + if (!ret) { + ifobj_tx->hw_ring_size_supp = true; + ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending; + ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending; + } + + cache_line_size = read_procfs_val(SMP_CACHE_BYTES_PATH); + if (!cache_line_size) + cache_line_size = 64; + + max_frags = read_procfs_val(MAX_SKB_FRAGS_PATH); + if (!max_frags) + max_frags = 17; + + ifobj_tx->max_skb_frags = max_frags; + ifobj_rx->max_skb_frags = max_frags; + + /* 48 bytes is a part of skb_shared_info w/o frags array; + * 16 bytes is sizeof(skb_frag_t) + */ + umem_tailroom = ALIGN(48 + (max_frags * 16), cache_line_size); + ifobj_tx->umem_tailroom = umem_tailroom; + ifobj_rx->umem_tailroom = umem_tailroom; + + if (!ASSERT_OK(init_iface(ifobj_rx, worker_testapp_validate_rx), "init RX")) + goto delete_rx; + if (!ASSERT_OK(init_iface(ifobj_tx, worker_testapp_validate_tx), "init TX")) + goto delete_rx; + + test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]); + + test.tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); + if (!ASSERT_OK_PTR(test.tx_pkt_stream_default, "TX pkt generation")) + goto delete_rx; + test.rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); + if (!ASSERT_OK_PTR(test.rx_pkt_stream_default, "RX pkt generation")) + goto delete_rx; + + + test_init(&test, ifobj_tx, ifobj_rx, mode, test_to_run); + ret = test.test_func(&test); + if (ret != TEST_SKIP) + ASSERT_OK(ret, "Run test"); + pkt_stream_restore_default(&test); + + if (ifobj_tx->hw_ring_size_supp) + hw_ring_size_reset(ifobj_tx); + + pkt_stream_delete(test.tx_pkt_stream_default); + pkt_stream_delete(test.rx_pkt_stream_default); + xsk_xdp_progs__destroy(ifobj_tx->xdp_progs); + xsk_xdp_progs__destroy(ifobj_rx->xdp_progs); + +delete_rx: + ifobject_delete(ifobj_rx); +delete_tx: + ifobject_delete(ifobj_tx); +} + +void test_ns_xsk_skb(void) +{ + int i; + + if (!ASSERT_OK(setup_veth(false), "setup veth")) + return; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (test__start_subtest(tests[i].name)) + test_xsk(&tests[i], TEST_MODE_SKB); + } + + delete_veth(); +} + +void test_ns_xsk_drv(void) +{ + int i; + + if (!ASSERT_OK(setup_veth(false), "setup veth")) + return; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (test__start_subtest(tests[i].name)) + test_xsk(&tests[i], TEST_MODE_DRV); + } + + delete_veth(); +} + |
