From 48e5d98a0eb1e4cec308ed63444a505a7e7dd9e3 Mon Sep 17 00:00:00 2001
From: Adrian Ratiu <adrian.ratiu@collabora.com>
Date: Wed, 20 Mar 2019 12:10:54 +0200
Subject: selftests/bpf: Add arm target register definitions

eBPF "restricted C" code can be compiled with LLVM/clang using target
triplets like armv7l-unknown-linux-gnueabihf and loaded/run with small
cross-compiled gobpf/elf [1] programs without requiring a full BCC
port which is also undesirable on small embedded systems due to its
size footprint. The only missing pieces are these helper macros which
otherwise have to be redefined by each eBPF arm program.

[1] https://github.com/iovisor/gobpf/tree/master/elf

Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_helpers.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index c81fc350f7ad..41f8a4b676a4 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -274,6 +274,9 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
 #elif defined(__TARGET_ARCH_s930x)
 	#define bpf_target_s930x
 	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm)
+	#define bpf_target_arm
+	#define bpf_target_defined
 #elif defined(__TARGET_ARCH_arm64)
 	#define bpf_target_arm64
 	#define bpf_target_defined
@@ -296,6 +299,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
 	#define bpf_target_x86
 #elif defined(__s390x__)
 	#define bpf_target_s930x
+#elif defined(__arm__)
+	#define bpf_target_arm
 #elif defined(__aarch64__)
 	#define bpf_target_arm64
 #elif defined(__mips__)
@@ -333,6 +338,19 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
 #define PT_REGS_SP(x) ((x)->gprs[15])
 #define PT_REGS_IP(x) ((x)->psw.addr)
 
+#elif defined(bpf_target_arm)
+
+#define PT_REGS_PARM1(x) ((x)->uregs[0])
+#define PT_REGS_PARM2(x) ((x)->uregs[1])
+#define PT_REGS_PARM3(x) ((x)->uregs[2])
+#define PT_REGS_PARM4(x) ((x)->uregs[3])
+#define PT_REGS_PARM5(x) ((x)->uregs[4])
+#define PT_REGS_RET(x) ((x)->uregs[14])
+#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->uregs[0])
+#define PT_REGS_SP(x) ((x)->uregs[13])
+#define PT_REGS_IP(x) ((x)->uregs[12])
+
 #elif defined(bpf_target_arm64)
 
 #define PT_REGS_PARM1(x) ((x)->regs[0])
-- 
cgit v1.2.3


From 253c8dde3cf60879d37b5fa47bde4a62c295c72b Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 22 Mar 2019 09:54:03 +0800
Subject: tools: update include/uapi/linux/bpf.h

Pull definitions for bpf_skc_lookup_tcp and bpf_sk_check_syncookie.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 929c8e537a14..3c04410137d9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2431,6 +2431,38 @@ union bpf_attr {
  *	Return
  *		A **struct bpf_sock** pointer on success, or **NULL** in
  *		case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ *	Description
+ *		Look for TCP socket matching *tuple*, optionally in a child
+ *		network namespace *netns*. The return value must be checked,
+ *		and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ *		This function is identical to bpf_sk_lookup_tcp, except that it
+ *		also returns timewait or request sockets. Use bpf_sk_fullsock
+ *		or bpf_tcp_socket to access the full structure.
+ *
+ *		This helper is available only if the kernel was compiled with
+ *		**CONFIG_NET** configuration option.
+ *	Return
+ *		Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ *		For sockets with reuseport option, the **struct bpf_sock**
+ *		result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * 	Description
+ * 		Check whether iph and th contain a valid SYN cookie ACK for
+ * 		the listening socket in sk.
+ *
+ * 		iph points to the start of the IPv4 or IPv6 header, while
+ * 		iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ * 		th points to the start of the TCP header, while th_len contains
+ * 		sizeof(struct tcphdr).
+ *
+ * 	Return
+ * 		0 if iph and th are a valid SYN cookie ACK, or a negative error
+ * 		otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2531,7 +2563,9 @@ union bpf_attr {
 	FN(sk_fullsock),		\
 	FN(tcp_sock),			\
 	FN(skb_ecn_set_ce),		\
-	FN(get_listener_sock),
+	FN(get_listener_sock),		\
+	FN(skc_lookup_tcp),		\
+	FN(tcp_check_syncookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From dbaf2877e9ad0ac77c463d1bf87b2eb7efc46160 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 22 Mar 2019 09:54:04 +0800
Subject: selftests/bpf: allow specifying helper for BPF_SK_LOOKUP

Make the BPF_SK_LOOKUP macro take a helper function, to ease
writing tests for new helpers.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c        |  6 +-
 .../testing/selftests/bpf/verifier/ref_tracking.c  | 78 +++++++++++-----------
 tools/testing/selftests/bpf/verifier/unpriv.c      |  8 +--
 3 files changed, 46 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 477a9dcf9fff..19b5d03acc2a 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -198,7 +198,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 }
 
 /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
-#define BPF_SK_LOOKUP							\
+#define BPF_SK_LOOKUP(func)						\
 	/* struct bpf_sock_tuple tuple = {} */				\
 	BPF_MOV64_IMM(BPF_REG_2, 0),					\
 	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),			\
@@ -207,13 +207,13 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32),		\
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40),		\
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48),		\
-	/* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */	\
+	/* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */		\
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),				\
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),				\
 	BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),	\
 	BPF_MOV64_IMM(BPF_REG_4, 0),					\
 	BPF_MOV64_IMM(BPF_REG_5, 0),					\
-	BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp)
+	BPF_EMIT_CALL(BPF_FUNC_ ## func)
 
 /* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return
  * value into 0 and does necessary preparation for direct packet access
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 923f2110072d..a6905e5017dc 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -1,7 +1,7 @@
 {
 	"reference tracking: leak potential reference",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
 	BPF_EXIT_INSN(),
 	},
@@ -12,7 +12,7 @@
 {
 	"reference tracking: leak potential reference on stack",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
 	BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
@@ -26,7 +26,7 @@
 {
 	"reference tracking: leak potential reference on stack 2",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
 	BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
@@ -41,7 +41,7 @@
 {
 	"reference tracking: zero potential reference",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
 	BPF_EXIT_INSN(),
 	},
@@ -52,7 +52,7 @@
 {
 	"reference tracking: copy and zero potential references",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
@@ -65,7 +65,7 @@
 {
 	"reference tracking: release reference without check",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	/* reference in r0 may be NULL */
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
@@ -79,7 +79,7 @@
 {
 	"reference tracking: release reference",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 	BPF_EMIT_CALL(BPF_FUNC_sk_release),
@@ -91,7 +91,7 @@
 {
 	"reference tracking: release reference 2",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
@@ -104,7 +104,7 @@
 {
 	"reference tracking: release reference twice",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
@@ -120,7 +120,7 @@
 {
 	"reference tracking: release reference twice inside branch",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
@@ -147,7 +147,7 @@
 	BPF_EXIT_INSN(),
 	BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
 		    offsetof(struct __sk_buff, mark)),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
 	/* Leak reference in R0 */
 	BPF_EXIT_INSN(),
@@ -175,7 +175,7 @@
 	BPF_EXIT_INSN(),
 	BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
 		    offsetof(struct __sk_buff, mark)),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
@@ -193,7 +193,7 @@
 {
 	"reference tracking in call: free reference in subprog",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -211,7 +211,7 @@
 {
 	"reference tracking in call: free reference in subprog and outside",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
@@ -241,7 +241,7 @@
 
 	/* subprog 1 */
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	/* spill unchecked sk_ptr into stack of caller */
 	BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
@@ -262,7 +262,7 @@
 	BPF_EXIT_INSN(),
 
 	/* subprog 1 */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_EXIT_INSN(), /* return sk */
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -291,7 +291,7 @@
 	BPF_EXIT_INSN(),
 
 	/* subprog 2 */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -324,7 +324,7 @@
 	BPF_EXIT_INSN(),
 
 	/* subprog 2 */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -334,7 +334,7 @@
 	"reference tracking: allow LD_ABS",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 	BPF_EMIT_CALL(BPF_FUNC_sk_release),
@@ -350,7 +350,7 @@
 	"reference tracking: forbid LD_ABS while holding reference",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_LD_ABS(BPF_B, 0),
 	BPF_LD_ABS(BPF_H, 0),
 	BPF_LD_ABS(BPF_W, 0),
@@ -367,7 +367,7 @@
 	"reference tracking: allow LD_IND",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 	BPF_EMIT_CALL(BPF_FUNC_sk_release),
@@ -384,7 +384,7 @@
 	"reference tracking: forbid LD_IND while holding reference",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_7, 1),
 	BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
@@ -402,7 +402,7 @@
 	"reference tracking: check reference or tail call",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	/* if (sk) bpf_sk_release() */
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
@@ -424,7 +424,7 @@
 	"reference tracking: release reference then tail call",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	/* if (sk) bpf_sk_release() */
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
@@ -446,7 +446,7 @@
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
 	/* Look up socket and store in REG_6 */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	/* bpf_tail_call() */
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_3, 2),
@@ -470,7 +470,7 @@
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
 	/* Look up socket and store in REG_6 */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 	/* if (!sk) goto end */
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
@@ -492,7 +492,7 @@
 {
 	"reference tracking: mangle and release sock_or_null",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
@@ -506,7 +506,7 @@
 {
 	"reference tracking: mangle and release sock",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
@@ -520,7 +520,7 @@
 {
 	"reference tracking: access member",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
 	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
@@ -534,7 +534,7 @@
 {
 	"reference tracking: write to member",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
@@ -553,7 +553,7 @@
 {
 	"reference tracking: invalid 64-bit access of member",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
@@ -568,7 +568,7 @@
 {
 	"reference tracking: access after release",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
 	BPF_EMIT_CALL(BPF_FUNC_sk_release),
@@ -608,7 +608,7 @@
 {
 	"reference tracking: use ptr from bpf_tcp_sock() after release",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -631,7 +631,7 @@
 {
 	"reference tracking: use ptr from bpf_sk_fullsock() after release",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -654,7 +654,7 @@
 {
 	"reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -681,7 +681,7 @@
 {
 	"reference tracking: use sk after bpf_sk_release(tp)",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -703,7 +703,7 @@
 {
 	"reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -725,7 +725,7 @@
 {
 	"reference tracking: bpf_sk_release(listen_sk)",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
@@ -750,7 +750,7 @@
 	/* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
 	"reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
 	.insns = {
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index dbaf5be947b2..91bb77c24a2e 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -242,7 +242,7 @@
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
 	/* struct bpf_sock *sock = bpf_sock_lookup(...); */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
 	/* u64 foo; */
 	/* void *target = &foo; */
@@ -276,7 +276,7 @@
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
 	/* struct bpf_sock *sock = bpf_sock_lookup(...); */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
 	/* u64 foo; */
 	/* void *target = &foo; */
@@ -307,7 +307,7 @@
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
 	/* struct bpf_sock *sock = bpf_sock_lookup(...); */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
 	/* u64 foo; */
 	/* void *target = &foo; */
@@ -339,7 +339,7 @@
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
 	/* struct bpf_sock *sock = bpf_sock_lookup(...); */
-	BPF_SK_LOOKUP,
+	BPF_SK_LOOKUP(sk_lookup_tcp),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
 	/* u64 foo; */
 	/* void *target = &foo; */
-- 
cgit v1.2.3


From 5792d52df1e77110abf0d11b1131992a8c0c8d17 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 22 Mar 2019 09:54:05 +0800
Subject: selftests/bpf: test references to sock_common

Make sure that returning a struct sock_common * reference invokes
the reference tracking machinery in the verifier.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/verifier/ref_tracking.c  | 48 ++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index a6905e5017dc..ebcbf154c460 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -9,6 +9,17 @@
 	.errstr = "Unreleased reference",
 	.result = REJECT,
 },
+{
+	"reference tracking: leak potential reference to sock_common",
+	.insns = {
+	BPF_SK_LOOKUP(skc_lookup_tcp),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.errstr = "Unreleased reference",
+	.result = REJECT,
+},
 {
 	"reference tracking: leak potential reference on stack",
 	.insns = {
@@ -49,6 +60,17 @@
 	.errstr = "Unreleased reference",
 	.result = REJECT,
 },
+{
+	"reference tracking: zero potential reference to sock_common",
+	.insns = {
+	BPF_SK_LOOKUP(skc_lookup_tcp),
+	BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.errstr = "Unreleased reference",
+	.result = REJECT,
+},
 {
 	"reference tracking: copy and zero potential references",
 	.insns = {
@@ -76,6 +98,20 @@
 	.errstr = "type=sock_or_null expected=sock",
 	.result = REJECT,
 },
+{
+	"reference tracking: release reference to sock_common without check",
+	.insns = {
+	BPF_SK_LOOKUP(skc_lookup_tcp),
+	/* reference in r0 may be NULL */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.errstr = "type=sock_common_or_null expected=sock",
+	.result = REJECT,
+},
 {
 	"reference tracking: release reference",
 	.insns = {
@@ -88,6 +124,18 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = ACCEPT,
 },
+{
+	"reference tracking: release reference to sock_common",
+	.insns = {
+	BPF_SK_LOOKUP(skc_lookup_tcp),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+},
 {
 	"reference tracking: release reference 2",
 	.insns = {
-- 
cgit v1.2.3


From bafc0ba8261e36e36b0b1e851749fd3712a2a6f4 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 22 Mar 2019 09:54:06 +0800
Subject: selftests/bpf: add tests for bpf_tcp_check_syncookie and
 bpf_skc_lookup_tcp

Add tests which verify that the new helpers work for both IPv4 and
IPv6, by forcing SYN cookies to always on. Use a new network namespace
to avoid clobbering the global SYN cookie settings.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore             |   1 +
 tools/testing/selftests/bpf/Makefile               |   5 +-
 tools/testing/selftests/bpf/bpf_helpers.h          |   8 +
 .../bpf/progs/test_tcp_check_syncookie_kern.c      | 129 +++++++++++++
 .../selftests/bpf/test_tcp_check_syncookie.sh      |  81 ++++++++
 .../selftests/bpf/test_tcp_check_syncookie_user.c  | 212 +++++++++++++++++++++
 6 files changed, 434 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
 create mode 100755 tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
 create mode 100644 tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3b74d23fffab..41e8a689aa77 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -30,4 +30,5 @@ test_netcnt
 test_section_names
 test_tcpnotify_user
 test_libbpf
+test_tcp_check_syncookie_user
 alu32
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2aed37ea61a4..25d3939eb840 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -51,7 +51,8 @@ TEST_PROGS := test_kmod.sh \
 	test_skb_cgroup_id.sh \
 	test_flow_dissector.sh \
 	test_xdp_vlan.sh \
-	test_lwt_ip_encap.sh
+	test_lwt_ip_encap.sh \
+	test_tcp_check_syncookie.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
@@ -60,7 +61,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
-	flow_dissector_load test_flow_dissector
+	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 41f8a4b676a4..97d140961438 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -159,6 +159,11 @@ static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
 					     int size, unsigned long long netns_id,
 					     unsigned long long flags) =
 	(void *) BPF_FUNC_sk_lookup_tcp;
+static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx,
+					     struct bpf_sock_tuple *tuple,
+					     int size, unsigned long long netns_id,
+					     unsigned long long flags) =
+	(void *) BPF_FUNC_skc_lookup_tcp;
 static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
 					     struct bpf_sock_tuple *tuple,
 					     int size, unsigned long long netns_id,
@@ -184,6 +189,9 @@ static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
 	(void *) BPF_FUNC_get_listener_sock;
 static int (*bpf_skb_ecn_set_ce)(void *ctx) =
 	(void *) BPF_FUNC_skb_ecn_set_ce;
+static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
+	    void *ip, int ip_len, void *tcp, int tcp_len) =
+	(void *) BPF_FUNC_tcp_check_syncookie;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
new file mode 100644
index 000000000000..1ab095bcacd8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") results = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 1,
+};
+
+static __always_inline void check_syncookie(void *ctx, void *data,
+					    void *data_end)
+{
+	struct bpf_sock_tuple tup;
+	struct bpf_sock *sk;
+	struct ethhdr *ethh;
+	struct iphdr *ipv4h;
+	struct ipv6hdr *ipv6h;
+	struct tcphdr *tcph;
+	int ret;
+	__u32 key = 0;
+	__u64 value = 1;
+
+	ethh = data;
+	if (ethh + 1 > data_end)
+		return;
+
+	switch (bpf_ntohs(ethh->h_proto)) {
+	case ETH_P_IP:
+		ipv4h = data + sizeof(struct ethhdr);
+		if (ipv4h + 1 > data_end)
+			return;
+
+		if (ipv4h->ihl != 5)
+			return;
+
+		tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr);
+		if (tcph + 1 > data_end)
+			return;
+
+		tup.ipv4.saddr = ipv4h->saddr;
+		tup.ipv4.daddr = ipv4h->daddr;
+		tup.ipv4.sport = tcph->source;
+		tup.ipv4.dport = tcph->dest;
+
+		sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4),
+					BPF_F_CURRENT_NETNS, 0);
+		if (!sk)
+			return;
+
+		if (sk->state != BPF_TCP_LISTEN)
+			goto release;
+
+		ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h),
+					      tcph, sizeof(*tcph));
+		break;
+
+	case ETH_P_IPV6:
+		ipv6h = data + sizeof(struct ethhdr);
+		if (ipv6h + 1 > data_end)
+			return;
+
+		if (ipv6h->nexthdr != IPPROTO_TCP)
+			return;
+
+		tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+		if (tcph + 1 > data_end)
+			return;
+
+		memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr));
+		memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr));
+		tup.ipv6.sport = tcph->source;
+		tup.ipv6.dport = tcph->dest;
+
+		sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6),
+					BPF_F_CURRENT_NETNS, 0);
+		if (!sk)
+			return;
+
+		if (sk->state != BPF_TCP_LISTEN)
+			goto release;
+
+		ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h),
+					      tcph, sizeof(*tcph));
+		break;
+
+	default:
+		return;
+	}
+
+	if (ret == 0)
+		bpf_map_update_elem(&results, &key, &value, 0);
+
+release:
+	bpf_sk_release(sk);
+}
+
+SEC("clsact/check_syncookie")
+int check_syncookie_clsact(struct __sk_buff *skb)
+{
+	check_syncookie(skb, (void *)(long)skb->data,
+			(void *)(long)skb->data_end);
+	return TC_ACT_OK;
+}
+
+SEC("xdp/check_syncookie")
+int check_syncookie_xdp(struct xdp_md *ctx)
+{
+	check_syncookie(ctx, (void *)(long)ctx->data,
+			(void *)(long)ctx->data_end);
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
new file mode 100755
index 000000000000..d48e51716d19
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Facebook
+# Copyright (c) 2019 Cloudflare
+
+set -eu
+
+wait_for_ip()
+{
+	local _i
+	printf "Wait for IP %s to become available " "$1"
+	for _i in $(seq ${MAX_PING_TRIES}); do
+		printf "."
+		if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then
+			echo " OK"
+			return
+		fi
+		sleep 1
+	done
+	echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+	exit 1
+}
+
+get_prog_id()
+{
+	awk '/ id / {sub(/.* id /, "", $0); print($1)}'
+}
+
+ns1_exec()
+{
+	ip netns exec ns1 "$@"
+}
+
+setup()
+{
+	ip netns add ns1
+	ns1_exec ip link set lo up
+
+	ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
+
+	wait_for_ip 127.0.0.1
+	wait_for_ip ::1
+}
+
+cleanup()
+{
+	ip netns del ns1 2>/dev/null || :
+}
+
+main()
+{
+	trap cleanup EXIT 2 3 6 15
+	setup
+
+	printf "Testing clsact..."
+	ns1_exec tc qdisc add dev "${TEST_IF}" clsact
+	ns1_exec tc filter add dev "${TEST_IF}" ingress \
+		bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da
+
+	BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \
+		      get_prog_id)
+	ns1_exec "${PROG}" "${BPF_PROG_ID}"
+	ns1_exec tc qdisc del dev "${TEST_IF}" clsact
+
+	printf "Testing XDP..."
+	ns1_exec ip link set "${TEST_IF}" xdp \
+		object "${BPF_PROG_OBJ}" section "${XDP_SECTION}"
+	BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id)
+	ns1_exec "${PROG}" "${BPF_PROG_ID}"
+}
+
+DIR=$(dirname $0)
+TEST_IF=lo
+MAX_PING_TRIES=5
+BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
+CLSACT_SECTION="clsact/check_syncookie"
+XDP_SECTION="xdp/check_syncookie"
+BPF_PROG_ID=0
+PROG="${DIR}/test_tcp_check_syncookie_user"
+
+main
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
new file mode 100644
index 000000000000..87829c86c746
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+static int start_server(const struct sockaddr *addr, socklen_t len)
+{
+	int fd;
+
+	fd = socket(addr->sa_family, SOCK_STREAM, 0);
+	if (fd == -1) {
+		log_err("Failed to create server socket");
+		goto out;
+	}
+
+	if (bind(fd, addr, len) == -1) {
+		log_err("Failed to bind server socket");
+		goto close_out;
+	}
+
+	if (listen(fd, 128) == -1) {
+		log_err("Failed to listen on server socket");
+		goto close_out;
+	}
+
+	goto out;
+
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int connect_to_server(int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd = -1;
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		goto out;
+	}
+
+	fd = socket(addr.ss_family, SOCK_STREAM, 0);
+	if (fd == -1) {
+		log_err("Failed to create client socket");
+		goto out;
+	}
+
+	if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+		log_err("Fail to connect to server");
+		goto close_out;
+	}
+
+	goto out;
+
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int get_map_fd_by_prog_id(int prog_id)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	__u32 map_ids[1];
+	int prog_fd = -1;
+	int map_fd = -1;
+
+	prog_fd = bpf_prog_get_fd_by_id(prog_id);
+	if (prog_fd < 0) {
+		log_err("Failed to get fd by prog id %d", prog_id);
+		goto err;
+	}
+
+	info.nr_map_ids = 1;
+	info.map_ids = (__u64)(unsigned long)map_ids;
+
+	if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+		log_err("Failed to get info by prog fd %d", prog_fd);
+		goto err;
+	}
+
+	if (!info.nr_map_ids) {
+		log_err("No maps found for prog fd %d", prog_fd);
+		goto err;
+	}
+
+	map_fd = bpf_map_get_fd_by_id(map_ids[0]);
+	if (map_fd < 0)
+		log_err("Failed to get fd by map id %d", map_ids[0]);
+err:
+	if (prog_fd >= 0)
+		close(prog_fd);
+	return map_fd;
+}
+
+static int run_test(int server_fd, int results_fd)
+{
+	int client = -1, srv_client = -1;
+	int ret = 0;
+	__u32 key = 0;
+	__u64 value = 0;
+
+	if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) {
+		log_err("Can't clear results");
+		goto err;
+	}
+
+	client = connect_to_server(server_fd);
+	if (client == -1)
+		goto err;
+
+	srv_client = accept(server_fd, NULL, 0);
+	if (srv_client == -1) {
+		log_err("Can't accept connection");
+		goto err;
+	}
+
+	if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) {
+		log_err("Can't lookup result");
+		goto err;
+	}
+
+	if (value != 1) {
+		log_err("Didn't match syncookie: %llu", value);
+		goto err;
+	}
+
+	goto out;
+
+err:
+	ret = 1;
+out:
+	close(client);
+	close(srv_client);
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	struct sockaddr_in addr4;
+	struct sockaddr_in6 addr6;
+	int server = -1;
+	int server_v6 = -1;
+	int results = -1;
+	int err = 0;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s prog_id\n", argv[0]);
+		exit(1);
+	}
+
+	results = get_map_fd_by_prog_id(atoi(argv[1]));
+	if (results < 0) {
+		log_err("Can't get map");
+		goto err;
+	}
+
+	memset(&addr4, 0, sizeof(addr4));
+	addr4.sin_family = AF_INET;
+	addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	addr4.sin_port = 0;
+
+	memset(&addr6, 0, sizeof(addr6));
+	addr6.sin6_family = AF_INET6;
+	addr6.sin6_addr = in6addr_loopback;
+	addr6.sin6_port = 0;
+
+	server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
+	if (server == -1)
+		goto err;
+
+	server_v6 = start_server((const struct sockaddr *)&addr6,
+				 sizeof(addr6));
+	if (server_v6 == -1)
+		goto err;
+
+	if (run_test(server, results))
+		goto err;
+
+	if (run_test(server_v6, results))
+		goto err;
+
+	printf("ok\n");
+	goto out;
+err:
+	err = 1;
+out:
+	close(server);
+	close(server_v6);
+	close(results);
+	return err;
+}
-- 
cgit v1.2.3


From f6827526279d75f0b1c1605b1bf560024bd7696f Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Fri, 15 Mar 2019 21:04:14 +0100
Subject: selftests: bpf: modify urandom_read and link it non-statically

After some experiences I found that urandom_read does not need to be
linked statically. When the 'read' syscall call is moved to separate
non-inlined function then bpf_get_stackid() is able to find
the executable in stack trace and extract its build_id from it.

Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile       |  2 +-
 tools/testing/selftests/bpf/urandom_read.c | 15 +++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 25d3939eb840..edd59707cb1f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -70,7 +70,7 @@ TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 all: $(TEST_CUSTOM_PROGS)
 
 $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c
-	$(CC) -o $@ -static $< -Wl,--build-id
+	$(CC) -o $@ $< -Wl,--build-id
 
 BPFOBJ := $(OUTPUT)/libbpf.a
 
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index 9de8b7cb4e6d..db781052758d 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -7,11 +7,19 @@
 
 #define BUF_SIZE 256
 
+static __attribute__((noinline))
+void urandom_read(int fd, int count)
+{
+       char buf[BUF_SIZE];
+       int i;
+
+       for (i = 0; i < count; ++i)
+               read(fd, buf, BUF_SIZE);
+}
+
 int main(int argc, char *argv[])
 {
 	int fd = open("/dev/urandom", O_RDONLY);
-	int i;
-	char buf[BUF_SIZE];
 	int count = 4;
 
 	if (fd < 0)
@@ -20,8 +28,7 @@ int main(int argc, char *argv[])
 	if (argc == 2)
 		count = atoi(argv[1]);
 
-	for (i = 0; i < count; ++i)
-		read(fd, buf, BUF_SIZE);
+	urandom_read(fd, count);
 
 	close(fd);
 	return 0;
-- 
cgit v1.2.3


From 98cdabcd0798bd9991821493120b928ed0dfab73 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:32:49 -0400
Subject: selftests/bpf: bpf tunnel encap test

Validate basic tunnel encapsulation using ipip.

Set up two namespaces connected by veth. Connect a client and server.
Do this with and without bpf encap.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile               |  3 +-
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 83 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tc_tunnel.sh      | 75 +++++++++++++++++++
 3 files changed, 160 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_tc_tunnel.c
 create mode 100755 tools/testing/selftests/bpf/test_tc_tunnel.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index edd59707cb1f..cdcc54ddf4b9 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -52,7 +52,8 @@ TEST_PROGS := test_kmod.sh \
 	test_flow_dissector.sh \
 	test_xdp_vlan.sh \
 	test_lwt_ip_encap.sh \
-	test_tcp_check_syncookie.sh
+	test_tcp_check_syncookie.sh \
+	test_tc_tunnel.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
new file mode 100644
index 000000000000..8223e4347be8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* In-place tunneling */
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <linux/types.h>
+
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+
+static const int cfg_port = 8000;
+
+static __always_inline void set_ipv4_csum(struct iphdr *iph)
+{
+	__u16 *iph16 = (__u16 *)iph;
+	__u32 csum;
+	int i;
+
+	iph->check = 0;
+
+#pragma clang loop unroll(full)
+	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
+		csum += *iph16++;
+
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+}
+
+SEC("encap")
+int encap_f(struct __sk_buff *skb)
+{
+	struct iphdr iph_outer, iph_inner;
+	struct tcphdr tcph;
+
+	if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+			       sizeof(iph_inner)) < 0)
+		return TC_ACT_OK;
+
+	/* filter only packets we want */
+	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
+		return TC_ACT_OK;
+
+	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+			       &tcph, sizeof(tcph)) < 0)
+		return TC_ACT_OK;
+
+	if (tcph.dest != __bpf_constant_htons(cfg_port))
+		return TC_ACT_OK;
+
+	/* add room between mac and network header */
+	if (bpf_skb_adjust_room(skb, sizeof(iph_outer), BPF_ADJ_ROOM_NET, 0))
+		return TC_ACT_SHOT;
+
+	/* prepare new outer network header */
+	iph_outer = iph_inner;
+	iph_outer.protocol = IPPROTO_IPIP;
+	iph_outer.tot_len = bpf_htons(sizeof(iph_outer) +
+				      bpf_htons(iph_outer.tot_len));
+	set_ipv4_csum(&iph_outer);
+
+	/* store new outer network header */
+	if (bpf_skb_store_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer),
+				BPF_F_INVALIDATE_HASH) < 0)
+		return TC_ACT_SHOT;
+
+	/* bpf_skb_adjust_room has moved header to start of room: restore */
+	if (bpf_skb_store_bytes(skb, ETH_HLEN + sizeof(iph_outer),
+				&iph_inner, sizeof(iph_inner),
+				BPF_F_INVALIDATE_HASH) < 0)
+		return TC_ACT_SHOT;
+
+	return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
new file mode 100755
index 000000000000..6ebb288a3afc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In-place tunneling
+
+# must match the port that the bpf program filters on
+readonly port=8000
+
+readonly ns_prefix="ns-$$-"
+readonly ns1="${ns_prefix}1"
+readonly ns2="${ns_prefix}2"
+
+readonly ns1_v4=192.168.1.1
+readonly ns2_v4=192.168.1.2
+
+setup() {
+	ip netns add "${ns1}"
+	ip netns add "${ns2}"
+
+	ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
+	      peer name veth2 mtu 1500 netns "${ns2}"
+
+	ip -netns "${ns1}" link set veth1 up
+	ip -netns "${ns2}" link set veth2 up
+
+	ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
+	ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
+
+	sleep 1
+}
+
+cleanup() {
+	ip netns del "${ns2}"
+	ip netns del "${ns1}"
+}
+
+server_listen() {
+	ip netns exec "${ns2}" nc -l -p "${port}" &
+	sleep 0.2
+}
+
+client_connect() {
+	ip netns exec "${ns1}" nc -z -w 1 "${ns2_v4}" "${port}"
+	echo $?
+}
+
+set -e
+trap cleanup EXIT
+
+setup
+
+# basic communication works
+echo "test basic connectivity"
+server_listen
+client_connect
+
+# clientside, insert bpf program to encap all TCP to port ${port}
+# client can no longer connect
+ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
+ip netns exec "${ns1}" tc filter add dev veth1 egress \
+	bpf direct-action object-file ./test_tc_tunnel.o section encap
+echo "test bpf encap without decap (expect failure)"
+server_listen
+! client_connect
+
+# serverside, insert decap module
+# server is still running
+# client can connect again
+ip netns exec "${ns2}" ip link add dev testtun0 type ipip \
+	remote "${ns1_v4}" local "${ns2_v4}"
+ip netns exec "${ns2}" ip link set dev testtun0 up
+echo "test bpf encap with tunnel device decap"
+client_connect
+
+echo OK
-- 
cgit v1.2.3


From ccd34cd3577dd6e244269bb8ccfab228360aa53d Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:32:50 -0400
Subject: selftests/bpf: expand bpf tunnel test with decap

The bpf tunnel test encapsulates using bpf, then decapsulates using
a standard tunnel device to verify correctness.

Once encap is verified, also test decap, by replacing the tunnel
device on decap with another bpf program.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 31 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tc_tunnel.sh      |  9 +++++++
 2 files changed, 40 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 8223e4347be8..25db148635ab 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -80,4 +80,35 @@ int encap_f(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+SEC("decap")
+int decap_f(struct __sk_buff *skb)
+{
+	struct iphdr iph_outer, iph_inner;
+
+	if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+			       sizeof(iph_outer)) < 0)
+		return TC_ACT_OK;
+
+	if (iph_outer.ihl != 5 || iph_outer.protocol != IPPROTO_IPIP)
+		return TC_ACT_OK;
+
+	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_outer),
+			       &iph_inner, sizeof(iph_inner)) < 0)
+		return TC_ACT_OK;
+
+	if (bpf_skb_adjust_room(skb, -(int)sizeof(iph_outer),
+				BPF_ADJ_ROOM_NET, 0))
+		return TC_ACT_SHOT;
+
+	/* bpf_skb_adjust_room has moved outer over inner header: restore */
+	if (bpf_skb_store_bytes(skb, ETH_HLEN, &iph_inner, sizeof(iph_inner),
+				BPF_F_INVALIDATE_HASH) < 0)
+		return TC_ACT_SHOT;
+
+	return TC_ACT_OK;
+}
+
 char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 6ebb288a3afc..91151d91e5a1 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -72,4 +72,13 @@ ip netns exec "${ns2}" ip link set dev testtun0 up
 echo "test bpf encap with tunnel device decap"
 client_connect
 
+# serverside, use BPF for decap
+ip netns exec "${ns2}" ip link del dev testtun0
+ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
+ip netns exec "${ns2}" tc filter add dev veth2 ingress \
+	bpf direct-action object-file ./test_tc_tunnel.o section decap
+server_listen
+echo "test bpf encap with bpf decap"
+client_connect
+
 echo OK
-- 
cgit v1.2.3


From ef81bd054942e2bd8289c91a3528e6fc0ca26c1c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:32:51 -0400
Subject: selftests/bpf: expand bpf tunnel test to ipv6

The test only uses ipv4 so far, expand to ipv6.
This is mostly a boilerplate near copy of the ipv4 path.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config                 |   2 +
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 116 +++++++++++++++++----
 tools/testing/selftests/bpf/test_tc_tunnel.sh      |  53 +++++++++-
 3 files changed, 149 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 37f947ec44ed..a42f4fc4dc11 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -23,3 +23,5 @@ CONFIG_LWTUNNEL=y
 CONFIG_BPF_STREAM_PARSER=y
 CONFIG_XDP_SOCKETS=y
 CONFIG_FTRACE_SYSCALLS=y
+CONFIG_IPV6_TUNNEL=y
+CONFIG_IPV6_GRE=y
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 25db148635ab..591f540ce513 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -7,6 +7,7 @@
 #include <linux/if_ether.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/tcp.h>
 #include <linux/pkt_cls.h>
 #include <linux/types.h>
@@ -31,15 +32,11 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
 	iph->check = ~((csum & 0xffff) + (csum >> 16));
 }
 
-SEC("encap")
-int encap_f(struct __sk_buff *skb)
+static int encap_ipv4(struct __sk_buff *skb)
 {
 	struct iphdr iph_outer, iph_inner;
 	struct tcphdr tcph;
 
-	if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
-		return TC_ACT_OK;
-
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
 			       sizeof(iph_inner)) < 0)
 		return TC_ACT_OK;
@@ -80,35 +77,118 @@ int encap_f(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("decap")
-int decap_f(struct __sk_buff *skb)
+static int encap_ipv6(struct __sk_buff *skb)
 {
-	struct iphdr iph_outer, iph_inner;
+	struct ipv6hdr iph_outer, iph_inner;
+	struct tcphdr tcph;
 
-	if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+			       sizeof(iph_inner)) < 0)
 		return TC_ACT_OK;
 
-	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
-			       sizeof(iph_outer)) < 0)
+	/* filter only packets we want */
+	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+			       &tcph, sizeof(tcph)) < 0)
 		return TC_ACT_OK;
 
-	if (iph_outer.ihl != 5 || iph_outer.protocol != IPPROTO_IPIP)
+	if (tcph.dest != __bpf_constant_htons(cfg_port))
+		return TC_ACT_OK;
+
+	/* add room between mac and network header */
+	if (bpf_skb_adjust_room(skb, sizeof(iph_outer), BPF_ADJ_ROOM_NET, 0))
+		return TC_ACT_SHOT;
+
+	/* prepare new outer network header */
+	iph_outer = iph_inner;
+	iph_outer.nexthdr = IPPROTO_IPV6;
+	iph_outer.payload_len = bpf_htons(sizeof(iph_outer) +
+					  bpf_ntohs(iph_outer.payload_len));
+
+	/* store new outer network header */
+	if (bpf_skb_store_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer),
+				BPF_F_INVALIDATE_HASH) < 0)
+		return TC_ACT_SHOT;
+
+	/* bpf_skb_adjust_room has moved header to start of room: restore */
+	if (bpf_skb_store_bytes(skb, ETH_HLEN + sizeof(iph_outer),
+				&iph_inner, sizeof(iph_inner),
+				BPF_F_INVALIDATE_HASH) < 0)
+		return TC_ACT_SHOT;
+
+	return TC_ACT_OK;
+}
+
+SEC("encap")
+int encap_f(struct __sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case __bpf_constant_htons(ETH_P_IP):
+		return encap_ipv4(skb);
+	case __bpf_constant_htons(ETH_P_IPV6):
+		return encap_ipv6(skb);
+	default:
+		/* does not match, ignore */
 		return TC_ACT_OK;
+	}
+}
 
-	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_outer),
-			       &iph_inner, sizeof(iph_inner)) < 0)
+static int decap_internal(struct __sk_buff *skb, int off, int len)
+{
+	char buf[sizeof(struct ipv6hdr)];
+
+	if (bpf_skb_load_bytes(skb, off + len, &buf, len) < 0)
 		return TC_ACT_OK;
 
-	if (bpf_skb_adjust_room(skb, -(int)sizeof(iph_outer),
-				BPF_ADJ_ROOM_NET, 0))
+	if (bpf_skb_adjust_room(skb, -len, BPF_ADJ_ROOM_NET, 0))
 		return TC_ACT_SHOT;
 
 	/* bpf_skb_adjust_room has moved outer over inner header: restore */
-	if (bpf_skb_store_bytes(skb, ETH_HLEN, &iph_inner, sizeof(iph_inner),
-				BPF_F_INVALIDATE_HASH) < 0)
+	if (bpf_skb_store_bytes(skb, off, buf, len, BPF_F_INVALIDATE_HASH) < 0)
 		return TC_ACT_SHOT;
 
 	return TC_ACT_OK;
 }
 
+static int decap_ipv4(struct __sk_buff *skb)
+{
+	struct iphdr iph_outer;
+
+	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+			       sizeof(iph_outer)) < 0)
+		return TC_ACT_OK;
+
+	if (iph_outer.ihl != 5 || iph_outer.protocol != IPPROTO_IPIP)
+		return TC_ACT_OK;
+
+	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer));
+}
+
+static int decap_ipv6(struct __sk_buff *skb)
+{
+	struct ipv6hdr iph_outer;
+
+	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
+			       sizeof(iph_outer)) < 0)
+		return TC_ACT_OK;
+
+	if (iph_outer.nexthdr != IPPROTO_IPV6)
+		return TC_ACT_OK;
+
+	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer));
+}
+
+SEC("decap")
+int decap_f(struct __sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case __bpf_constant_htons(ETH_P_IP):
+		return decap_ipv4(skb);
+	case __bpf_constant_htons(ETH_P_IPV6):
+		return decap_ipv6(skb);
+	default:
+		/* does not match, ignore */
+		return TC_ACT_OK;
+	}
+}
+
 char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 91151d91e5a1..7b1758f3006b 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -12,6 +12,9 @@ readonly ns2="${ns_prefix}2"
 
 readonly ns1_v4=192.168.1.1
 readonly ns2_v4=192.168.1.2
+readonly ns1_v6=fd::1
+readonly ns2_v6=fd::2
+
 
 setup() {
 	ip netns add "${ns1}"
@@ -25,6 +28,8 @@ setup() {
 
 	ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
 	ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
+	ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
+	ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
 
 	sleep 1
 }
@@ -35,16 +40,56 @@ cleanup() {
 }
 
 server_listen() {
-	ip netns exec "${ns2}" nc -l -p "${port}" &
+	ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" &
 	sleep 0.2
 }
 
 client_connect() {
-	ip netns exec "${ns1}" nc -z -w 1 "${ns2_v4}" "${port}"
+	ip netns exec "${ns1}" nc "${netcat_opt}" -z -w 1 "${addr2}" "${port}"
 	echo $?
 }
 
 set -e
+
+# no arguments: automated test, run all
+if [[ "$#" -eq "0" ]]; then
+	echo "ipip"
+	$0 ipv4
+
+	echo "ip6ip6"
+	$0 ipv6
+
+	echo "OK. All tests passed"
+	exit 0
+fi
+
+if [[ "$#" -ne "1" ]]; then
+	echo "Usage: $0"
+	echo "   or: $0 <ipv4|ipv6>"
+	exit 1
+fi
+
+case "$1" in
+"ipv4")
+	readonly tuntype=ipip
+	readonly addr1="${ns1_v4}"
+	readonly addr2="${ns2_v4}"
+	readonly netcat_opt=-4
+	;;
+"ipv6")
+	readonly tuntype=ip6tnl
+	readonly addr1="${ns1_v6}"
+	readonly addr2="${ns2_v6}"
+	readonly netcat_opt=-6
+	;;
+*)
+	echo "unknown arg: $1"
+	exit 1
+	;;
+esac
+
+echo "encap ${addr1} to ${addr2}, type ${tuntype}"
+
 trap cleanup EXIT
 
 setup
@@ -66,8 +111,8 @@ server_listen
 # serverside, insert decap module
 # server is still running
 # client can connect again
-ip netns exec "${ns2}" ip link add dev testtun0 type ipip \
-	remote "${ns1_v4}" local "${ns2_v4}"
+ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
+	remote "${addr1}" local "${addr2}"
 ip netns exec "${ns2}" ip link set dev testtun0 up
 echo "test bpf encap with tunnel device decap"
 client_connect
-- 
cgit v1.2.3


From 7255fade7b93e7e84e12f27ae5e8af9cf8b93745 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:32:52 -0400
Subject: selftests/bpf: extend bpf tunnel test with gre

GRE is a commonly used protocol. Add GRE cases for both IPv4 and IPv6.

It also inserts different sized headers, which can expose some
unexpected edge cases.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 148 +++++++++++++++------
 tools/testing/selftests/bpf/test_tc_tunnel.sh      |  21 ++-
 2 files changed, 123 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 591f540ce513..900c5653105f 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -2,6 +2,9 @@
 
 /* In-place tunneling */
 
+#include <stdbool.h>
+#include <string.h>
+
 #include <linux/stddef.h>
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
@@ -17,6 +20,18 @@
 
 static const int cfg_port = 8000;
 
+struct grev4hdr {
+	struct iphdr ip;
+	__be16 flags;
+	__be16 protocol;
+} __attribute__((packed));
+
+struct grev6hdr {
+	struct ipv6hdr ip;
+	__be16 flags;
+	__be16 protocol;
+} __attribute__((packed));
+
 static __always_inline void set_ipv4_csum(struct iphdr *iph)
 {
 	__u16 *iph16 = (__u16 *)iph;
@@ -32,10 +47,12 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
 	iph->check = ~((csum & 0xffff) + (csum >> 16));
 }
 
-static int encap_ipv4(struct __sk_buff *skb)
+static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 {
-	struct iphdr iph_outer, iph_inner;
+	struct grev4hdr h_outer;
+	struct iphdr iph_inner;
 	struct tcphdr tcph;
+	int olen;
 
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
 			       sizeof(iph_inner)) < 0)
@@ -52,24 +69,33 @@ static int encap_ipv4(struct __sk_buff *skb)
 	if (tcph.dest != __bpf_constant_htons(cfg_port))
 		return TC_ACT_OK;
 
+	olen = with_gre ? sizeof(h_outer) : sizeof(h_outer.ip);
+
 	/* add room between mac and network header */
-	if (bpf_skb_adjust_room(skb, sizeof(iph_outer), BPF_ADJ_ROOM_NET, 0))
+	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_NET, 0))
 		return TC_ACT_SHOT;
 
 	/* prepare new outer network header */
-	iph_outer = iph_inner;
-	iph_outer.protocol = IPPROTO_IPIP;
-	iph_outer.tot_len = bpf_htons(sizeof(iph_outer) +
-				      bpf_htons(iph_outer.tot_len));
-	set_ipv4_csum(&iph_outer);
+	h_outer.ip = iph_inner;
+	h_outer.ip.tot_len = bpf_htons(olen +
+				      bpf_htons(h_outer.ip.tot_len));
+	if (with_gre) {
+		h_outer.ip.protocol = IPPROTO_GRE;
+		h_outer.protocol = bpf_htons(ETH_P_IP);
+		h_outer.flags = 0;
+	} else {
+		h_outer.ip.protocol = IPPROTO_IPIP;
+	}
+
+	set_ipv4_csum((void *)&h_outer.ip);
 
 	/* store new outer network header */
-	if (bpf_skb_store_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer),
+	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
 				BPF_F_INVALIDATE_HASH) < 0)
 		return TC_ACT_SHOT;
 
 	/* bpf_skb_adjust_room has moved header to start of room: restore */
-	if (bpf_skb_store_bytes(skb, ETH_HLEN + sizeof(iph_outer),
+	if (bpf_skb_store_bytes(skb, ETH_HLEN + olen,
 				&iph_inner, sizeof(iph_inner),
 				BPF_F_INVALIDATE_HASH) < 0)
 		return TC_ACT_SHOT;
@@ -77,10 +103,12 @@ static int encap_ipv4(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-static int encap_ipv6(struct __sk_buff *skb)
+static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
 {
-	struct ipv6hdr iph_outer, iph_inner;
+	struct ipv6hdr iph_inner;
+	struct grev6hdr h_outer;
 	struct tcphdr tcph;
+	int olen;
 
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
 			       sizeof(iph_inner)) < 0)
@@ -94,23 +122,31 @@ static int encap_ipv6(struct __sk_buff *skb)
 	if (tcph.dest != __bpf_constant_htons(cfg_port))
 		return TC_ACT_OK;
 
+	olen = with_gre ? sizeof(h_outer) : sizeof(h_outer.ip);
+
 	/* add room between mac and network header */
-	if (bpf_skb_adjust_room(skb, sizeof(iph_outer), BPF_ADJ_ROOM_NET, 0))
+	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_NET, 0))
 		return TC_ACT_SHOT;
 
 	/* prepare new outer network header */
-	iph_outer = iph_inner;
-	iph_outer.nexthdr = IPPROTO_IPV6;
-	iph_outer.payload_len = bpf_htons(sizeof(iph_outer) +
-					  bpf_ntohs(iph_outer.payload_len));
+	h_outer.ip = iph_inner;
+	h_outer.ip.payload_len = bpf_htons(olen +
+					   bpf_ntohs(h_outer.ip.payload_len));
+	if (with_gre) {
+		h_outer.ip.nexthdr = IPPROTO_GRE;
+		h_outer.protocol = bpf_htons(ETH_P_IPV6);
+		h_outer.flags = 0;
+	} else {
+		h_outer.ip.nexthdr = IPPROTO_IPV6;
+	}
 
 	/* store new outer network header */
-	if (bpf_skb_store_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer),
+	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
 				BPF_F_INVALIDATE_HASH) < 0)
 		return TC_ACT_SHOT;
 
 	/* bpf_skb_adjust_room has moved header to start of room: restore */
-	if (bpf_skb_store_bytes(skb, ETH_HLEN + sizeof(iph_outer),
+	if (bpf_skb_store_bytes(skb, ETH_HLEN + olen,
 				&iph_inner, sizeof(iph_inner),
 				BPF_F_INVALIDATE_HASH) < 0)
 		return TC_ACT_SHOT;
@@ -118,28 +154,63 @@ static int encap_ipv6(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("encap")
-int encap_f(struct __sk_buff *skb)
+SEC("encap_ipip")
+int __encap_ipip(struct __sk_buff *skb)
 {
-	switch (skb->protocol) {
-	case __bpf_constant_htons(ETH_P_IP):
-		return encap_ipv4(skb);
-	case __bpf_constant_htons(ETH_P_IPV6):
-		return encap_ipv6(skb);
-	default:
-		/* does not match, ignore */
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv4(skb, false);
+	else
 		return TC_ACT_OK;
-	}
 }
 
-static int decap_internal(struct __sk_buff *skb, int off, int len)
+SEC("encap_gre")
+int __encap_gre(struct __sk_buff *skb)
 {
-	char buf[sizeof(struct ipv6hdr)];
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv4(skb, true);
+	else
+		return TC_ACT_OK;
+}
 
-	if (bpf_skb_load_bytes(skb, off + len, &buf, len) < 0)
+SEC("encap_ip6tnl")
+int __encap_ip6tnl(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return encap_ipv6(skb, false);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre")
+int __encap_ip6gre(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return encap_ipv6(skb, true);
+	else
 		return TC_ACT_OK;
+}
 
-	if (bpf_skb_adjust_room(skb, -len, BPF_ADJ_ROOM_NET, 0))
+static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
+{
+	char buf[sizeof(struct grev6hdr)];
+	int olen;
+
+	switch (proto) {
+	case IPPROTO_IPIP:
+	case IPPROTO_IPV6:
+		olen = len;
+		break;
+	case IPPROTO_GRE:
+		olen = len + 4 /* gre hdr */;
+		break;
+	default:
+		return TC_ACT_OK;
+	}
+
+	if (bpf_skb_load_bytes(skb, off + olen, &buf, olen) < 0)
+		return TC_ACT_OK;
+
+	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_NET, 0))
 		return TC_ACT_SHOT;
 
 	/* bpf_skb_adjust_room has moved outer over inner header: restore */
@@ -157,10 +228,11 @@ static int decap_ipv4(struct __sk_buff *skb)
 			       sizeof(iph_outer)) < 0)
 		return TC_ACT_OK;
 
-	if (iph_outer.ihl != 5 || iph_outer.protocol != IPPROTO_IPIP)
+	if (iph_outer.ihl != 5)
 		return TC_ACT_OK;
 
-	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer));
+	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
+			      iph_outer.protocol);
 }
 
 static int decap_ipv6(struct __sk_buff *skb)
@@ -171,10 +243,8 @@ static int decap_ipv6(struct __sk_buff *skb)
 			       sizeof(iph_outer)) < 0)
 		return TC_ACT_OK;
 
-	if (iph_outer.nexthdr != IPPROTO_IPV6)
-		return TC_ACT_OK;
-
-	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer));
+	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
+			      iph_outer.nexthdr);
 }
 
 SEC("decap")
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 7b1758f3006b..c78922048610 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -54,30 +54,36 @@ set -e
 # no arguments: automated test, run all
 if [[ "$#" -eq "0" ]]; then
 	echo "ipip"
-	$0 ipv4
+	$0 ipv4 ipip
 
 	echo "ip6ip6"
-	$0 ipv6
+	$0 ipv6 ip6tnl
+
+	echo "ip gre"
+	$0 ipv4 gre
+
+	echo "ip6 gre"
+	$0 ipv6 ip6gre
 
 	echo "OK. All tests passed"
 	exit 0
 fi
 
-if [[ "$#" -ne "1" ]]; then
+if [[ "$#" -ne "2" ]]; then
 	echo "Usage: $0"
-	echo "   or: $0 <ipv4|ipv6>"
+	echo "   or: $0 <ipv4|ipv6> <tuntype>"
 	exit 1
 fi
 
 case "$1" in
 "ipv4")
-	readonly tuntype=ipip
+	readonly tuntype=$2
 	readonly addr1="${ns1_v4}"
 	readonly addr2="${ns2_v4}"
 	readonly netcat_opt=-4
 	;;
 "ipv6")
-	readonly tuntype=ip6tnl
+	readonly tuntype=$2
 	readonly addr1="${ns1_v6}"
 	readonly addr2="${ns2_v6}"
 	readonly netcat_opt=-6
@@ -103,7 +109,8 @@ client_connect
 # client can no longer connect
 ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
 ip netns exec "${ns1}" tc filter add dev veth1 egress \
-	bpf direct-action object-file ./test_tc_tunnel.o section encap
+	bpf direct-action object-file ./test_tc_tunnel.o \
+	section "encap_${tuntype}"
 echo "test bpf encap without decap (expect failure)"
 server_listen
 ! client_connect
-- 
cgit v1.2.3


From 8142958954d17a31e0ac9e3a9c91103a1c171179 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:32:53 -0400
Subject: selftests/bpf: extend bpf tunnel test with tso

Segmentation offload takes a longer path. Verify that the feature
works with large packets.

The test succeeds if not setting dodgy in bpf_skb_adjust_room, as veth
TSO is permissive.

If not setting SKB_GSO_DODGY, this enables tunneled TSO offload on
supporting NICs.

The feature sets SKB_GSO_DODGY because the caller is untrusted. As a
result the packets traverse through the gso stack at least up to TCP.
And fail the gso_type validation, such as the skb->encapsulation check
in gre_gso_segment and the gso_type checks introduced in commit
418e897e0716 ("gso: validate gso_type on ipip style tunnel").

This will be addressed in a follow-on feature patch. In the meantime,
disable the new gso tests.

Changes v1->v2:
  - not all netcat versions support flag '-q', use timeout instead

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_tc_tunnel.sh | 60 ++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index c78922048610..9e18754f2354 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -15,6 +15,8 @@ readonly ns2_v4=192.168.1.2
 readonly ns1_v6=fd::1
 readonly ns2_v6=fd::2
 
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
 
 setup() {
 	ip netns add "${ns1}"
@@ -23,6 +25,8 @@ setup() {
 	ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
 	      peer name veth2 mtu 1500 netns "${ns2}"
 
+	ip netns exec "${ns1}" ethtool -K veth1 tso off
+
 	ip -netns "${ns1}" link set veth1 up
 	ip -netns "${ns2}" link set veth2 up
 
@@ -32,58 +36,86 @@ setup() {
 	ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
 
 	sleep 1
+
+	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
 }
 
 cleanup() {
 	ip netns del "${ns2}"
 	ip netns del "${ns1}"
+
+	if [[ -f "${outfile}" ]]; then
+		rm "${outfile}"
+	fi
+	if [[ -f "${infile}" ]]; then
+		rm "${infile}"
+	fi
 }
 
 server_listen() {
-	ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" &
+	ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+	server_pid=$!
 	sleep 0.2
 }
 
 client_connect() {
-	ip netns exec "${ns1}" nc "${netcat_opt}" -z -w 1 "${addr2}" "${port}"
+	ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
 	echo $?
 }
 
+verify_data() {
+	wait "${server_pid}"
+	# sha1sum returns two fields [sha1] [filepath]
+	# convert to bash array and access first elem
+	insum=($(sha1sum ${infile}))
+	outsum=($(sha1sum ${outfile}))
+	if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
+		echo "data mismatch"
+		exit 1
+	fi
+}
+
 set -e
 
 # no arguments: automated test, run all
 if [[ "$#" -eq "0" ]]; then
 	echo "ipip"
-	$0 ipv4 ipip
+	$0 ipv4 ipip 100
 
 	echo "ip6ip6"
-	$0 ipv6 ip6tnl
+	$0 ipv6 ip6tnl 100
 
 	echo "ip gre"
-	$0 ipv4 gre
+	$0 ipv4 gre 100
 
 	echo "ip6 gre"
-	$0 ipv6 ip6gre
+	$0 ipv6 ip6gre 100
+
+	# disabled until passes SKB_GSO_DODGY checks
+	# echo "ip gre gso"
+	# $0 ipv4 gre 2000
+
+	# disabled until passes SKB_GSO_DODGY checks
+	# echo "ip6 gre gso"
+	# $0 ipv6 ip6gre 2000
 
 	echo "OK. All tests passed"
 	exit 0
 fi
 
-if [[ "$#" -ne "2" ]]; then
+if [[ "$#" -ne "3" ]]; then
 	echo "Usage: $0"
-	echo "   or: $0 <ipv4|ipv6> <tuntype>"
+	echo "   or: $0 <ipv4|ipv6> <tuntype> <data_len>"
 	exit 1
 fi
 
 case "$1" in
 "ipv4")
-	readonly tuntype=$2
 	readonly addr1="${ns1_v4}"
 	readonly addr2="${ns2_v4}"
 	readonly netcat_opt=-4
 	;;
 "ipv6")
-	readonly tuntype=$2
 	readonly addr1="${ns1_v6}"
 	readonly addr2="${ns2_v6}"
 	readonly netcat_opt=-6
@@ -94,7 +126,10 @@ case "$1" in
 	;;
 esac
 
-echo "encap ${addr1} to ${addr2}, type ${tuntype}"
+readonly tuntype=$2
+readonly datalen=$3
+
+echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
 
 trap cleanup EXIT
 
@@ -104,6 +139,7 @@ setup
 echo "test basic connectivity"
 server_listen
 client_connect
+verify_data
 
 # clientside, insert bpf program to encap all TCP to port ${port}
 # client can no longer connect
@@ -123,6 +159,7 @@ ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
 ip netns exec "${ns2}" ip link set dev testtun0 up
 echo "test bpf encap with tunnel device decap"
 client_connect
+verify_data
 
 # serverside, use BPF for decap
 ip netns exec "${ns2}" ip link del dev testtun0
@@ -132,5 +169,6 @@ ip netns exec "${ns2}" tc filter add dev veth2 ingress \
 server_listen
 echo "test bpf encap with bpf decap"
 client_connect
+verify_data
 
 echo OK
-- 
cgit v1.2.3


From 6c408decbdc8a12a12a93c4d763cbc2a264f9332 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:32:57 -0400
Subject: bpf: Sync bpf.h to tools

Sync include/uapi/linux/bpf.h with tools/

Changes
  v1->v2:
  - BPF_F_ADJ_ROOM_MASK moved, no longer in this commit
  v2->v3:
  - BPF_F_ADJ_ROOM_ENCAP_L3_MASK moved, no longer in this commit

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3c04410137d9..837024512baf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1478,13 +1478,27 @@ union bpf_attr {
  * 		Grow or shrink the room for data in the packet associated to
  * 		*skb* by *len_diff*, and according to the selected *mode*.
  *
- * 		There is a single supported mode at this time:
+ *		There are two supported modes at this time:
+ *
+ *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ *		  (room space is added or removed below the layer 2 header).
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
  *
- * 		All values for *flags* are reserved for future usage, and must
- * 		be left at zero.
+ *		The following flags are supported at this time:
+ *
+ *		* **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ *		  Adjusting mss in this way is not allowed for datagrams.
+ *
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ *		  Any new space is reserved to hold a tunnel header.
+ *		  Configure skb offsets and other fields accordingly.
+ *
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ *		  Use with ENCAP_L3 flags to further specify the tunnel type.
  *
  * 		A call to this helper is susceptible to change the underlaying
  * 		packet buffer. Therefore, at load time, all checks on pointers
@@ -2624,9 +2638,18 @@ enum bpf_func_id {
 /* Current network namespace */
 #define BPF_F_CURRENT_NETNS		(-1L)
 
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	(1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	(1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE	(1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP	(1ULL << 4)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_MAC,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
-- 
cgit v1.2.3


From 005edd16562b78f416e2f576a64789c90d96882f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:32:58 -0400
Subject: selftests/bpf: convert bpf tunnel test to BPF_ADJ_ROOM_MAC

Avoid moving the network layer header when prefixing tunnel headers.

This avoids an explicit call to bpf_skb_store_bytes and an implicit
move of the network header bytes in bpf_skb_adjust_room.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 25 +++-------------------
 1 file changed, 3 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 900c5653105f..f6a16fd23dbd 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -72,7 +72,7 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 	olen = with_gre ? sizeof(h_outer) : sizeof(h_outer.ip);
 
 	/* add room between mac and network header */
-	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_NET, 0))
+	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, 0))
 		return TC_ACT_SHOT;
 
 	/* prepare new outer network header */
@@ -94,12 +94,6 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 				BPF_F_INVALIDATE_HASH) < 0)
 		return TC_ACT_SHOT;
 
-	/* bpf_skb_adjust_room has moved header to start of room: restore */
-	if (bpf_skb_store_bytes(skb, ETH_HLEN + olen,
-				&iph_inner, sizeof(iph_inner),
-				BPF_F_INVALIDATE_HASH) < 0)
-		return TC_ACT_SHOT;
-
 	return TC_ACT_OK;
 }
 
@@ -125,7 +119,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
 	olen = with_gre ? sizeof(h_outer) : sizeof(h_outer.ip);
 
 	/* add room between mac and network header */
-	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_NET, 0))
+	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, 0))
 		return TC_ACT_SHOT;
 
 	/* prepare new outer network header */
@@ -145,12 +139,6 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
 				BPF_F_INVALIDATE_HASH) < 0)
 		return TC_ACT_SHOT;
 
-	/* bpf_skb_adjust_room has moved header to start of room: restore */
-	if (bpf_skb_store_bytes(skb, ETH_HLEN + olen,
-				&iph_inner, sizeof(iph_inner),
-				BPF_F_INVALIDATE_HASH) < 0)
-		return TC_ACT_SHOT;
-
 	return TC_ACT_OK;
 }
 
@@ -207,14 +195,7 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		return TC_ACT_OK;
 	}
 
-	if (bpf_skb_load_bytes(skb, off + olen, &buf, olen) < 0)
-		return TC_ACT_OK;
-
-	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_NET, 0))
-		return TC_ACT_SHOT;
-
-	/* bpf_skb_adjust_room has moved outer over inner header: restore */
-	if (bpf_skb_store_bytes(skb, off, buf, len, BPF_F_INVALIDATE_HASH) < 0)
+	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, 0))
 		return TC_ACT_SHOT;
 
 	return TC_ACT_OK;
-- 
cgit v1.2.3


From 94f16813e1b297d31f8fe6217cd9be19e080f998 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:32:59 -0400
Subject: selftests/bpf: convert bpf tunnel test to BPF_F_ADJ_ROOM_FIXED_GSO

Lower route MTU to ensure packets fit in device MTU after encap, then
skip the gso_size changes.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 11 ++++++++---
 tools/testing/selftests/bpf/test_tc_tunnel.sh      |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index f6a16fd23dbd..3b79dffb8103 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -52,6 +52,7 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 	struct grev4hdr h_outer;
 	struct iphdr iph_inner;
 	struct tcphdr tcph;
+	__u64 flags;
 	int olen;
 
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
@@ -69,10 +70,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 	if (tcph.dest != __bpf_constant_htons(cfg_port))
 		return TC_ACT_OK;
 
+	flags = BPF_F_ADJ_ROOM_FIXED_GSO;
 	olen = with_gre ? sizeof(h_outer) : sizeof(h_outer.ip);
 
 	/* add room between mac and network header */
-	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, 0))
+	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
 		return TC_ACT_SHOT;
 
 	/* prepare new outer network header */
@@ -102,6 +104,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
 	struct ipv6hdr iph_inner;
 	struct grev6hdr h_outer;
 	struct tcphdr tcph;
+	__u64 flags;
 	int olen;
 
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
@@ -116,10 +119,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
 	if (tcph.dest != __bpf_constant_htons(cfg_port))
 		return TC_ACT_OK;
 
+	flags = BPF_F_ADJ_ROOM_FIXED_GSO;
 	olen = with_gre ? sizeof(h_outer) : sizeof(h_outer.ip);
 
 	/* add room between mac and network header */
-	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, 0))
+	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
 		return TC_ACT_SHOT;
 
 	/* prepare new outer network header */
@@ -195,7 +199,8 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		return TC_ACT_OK;
 	}
 
-	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, 0))
+	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
+				BPF_F_ADJ_ROOM_FIXED_GSO))
 		return TC_ACT_SHOT;
 
 	return TC_ACT_OK;
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 9e18754f2354..cda5317790d2 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -35,6 +35,12 @@ setup() {
 	ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
 	ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
 
+	# clamp route to reserve room for tunnel headers
+	ip -netns "${ns1}" -4 route flush table main
+	ip -netns "${ns1}" -6 route flush table main
+	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1
+	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1
+
 	sleep 1
 
 	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
-- 
cgit v1.2.3


From 75a1a9fa2e20de6319a19161ce4e2e1817d70e28 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Mar 2019 14:33:00 -0400
Subject: selftests/bpf: convert bpf tunnel test to encap modes

Make the tests correctly annotate skbs with tunnel metadata.

This makes the gso tests succeed. Enable them.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 19 +++++++++++++++----
 tools/testing/selftests/bpf/test_tc_tunnel.sh      | 10 ++++------
 2 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 3b79dffb8103..f541c2de947d 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -70,8 +70,13 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 	if (tcph.dest != __bpf_constant_htons(cfg_port))
 		return TC_ACT_OK;
 
-	flags = BPF_F_ADJ_ROOM_FIXED_GSO;
-	olen = with_gre ? sizeof(h_outer) : sizeof(h_outer.ip);
+	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
+	if (with_gre) {
+		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
+		olen = sizeof(h_outer);
+	} else {
+		olen = sizeof(h_outer.ip);
+	}
 
 	/* add room between mac and network header */
 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
@@ -119,8 +124,14 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
 	if (tcph.dest != __bpf_constant_htons(cfg_port))
 		return TC_ACT_OK;
 
-	flags = BPF_F_ADJ_ROOM_FIXED_GSO;
-	olen = with_gre ? sizeof(h_outer) : sizeof(h_outer.ip);
+	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+	if (with_gre) {
+		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
+		olen = sizeof(h_outer);
+	} else {
+		olen = sizeof(h_outer.ip);
+	}
+
 
 	/* add room between mac and network header */
 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index cda5317790d2..dcf320626931 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -97,13 +97,11 @@ if [[ "$#" -eq "0" ]]; then
 	echo "ip6 gre"
 	$0 ipv6 ip6gre 100
 
-	# disabled until passes SKB_GSO_DODGY checks
-	# echo "ip gre gso"
-	# $0 ipv4 gre 2000
+	echo "ip gre gso"
+	$0 ipv4 gre 2000
 
-	# disabled until passes SKB_GSO_DODGY checks
-	# echo "ip6 gre gso"
-	# $0 ipv6 ip6gre 2000
+	echo "ip6 gre gso"
+	$0 ipv6 ip6gre 2000
 
 	echo "OK. All tests passed"
 	exit 0
-- 
cgit v1.2.3


From 7df5e3db8f6354125540dfa50affd2c02b7d2832 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Fri, 22 Mar 2019 16:40:19 -0700
Subject: selftests: bpf: tc-bpf flow shaping with EDT

Add a small test that shows how to shape a TCP flow in tc-bpf
with EDT and ECN.

Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile            |   3 +-
 tools/testing/selftests/bpf/progs/test_tc_edt.c | 109 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tc_edt.sh      |  99 +++++++++++++++++++++
 3 files changed, 210 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_tc_edt.c
 create mode 100755 tools/testing/selftests/bpf/test_tc_edt.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index cdcc54ddf4b9..77b73b892136 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -53,7 +53,8 @@ TEST_PROGS := test_kmod.sh \
 	test_xdp_vlan.sh \
 	test_lwt_ip_encap.sh \
 	test_tcp_check_syncookie.sh \
-	test_tc_tunnel.sh
+	test_tc_tunnel.sh \
+	test_tc_edt.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
new file mode 100644
index 000000000000..3af64c470d64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* the maximum delay we are willing to add (drop packets beyond that) */
+#define TIME_HORIZON_NS (2000 * 1000 * 1000)
+#define NS_PER_SEC 1000000000
+#define ECN_HORIZON_NS 5000000
+#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
+
+/* flow_key => last_tstamp timestamp used */
+struct bpf_map_def SEC("maps") flow_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(uint32_t),
+	.value_size = sizeof(uint64_t),
+	.max_entries = 1,
+};
+
+static inline int throttle_flow(struct __sk_buff *skb)
+{
+	int key = 0;
+	uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
+	uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
+			THROTTLE_RATE_BPS;
+	uint64_t now = bpf_ktime_get_ns();
+	uint64_t tstamp, next_tstamp = 0;
+
+	if (last_tstamp)
+		next_tstamp = *last_tstamp + delay_ns;
+
+	tstamp = skb->tstamp;
+	if (tstamp < now)
+		tstamp = now;
+
+	/* should we throttle? */
+	if (next_tstamp <= tstamp) {
+		if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY))
+			return TC_ACT_SHOT;
+		return TC_ACT_OK;
+	}
+
+	/* do not queue past the time horizon */
+	if (next_tstamp - now >= TIME_HORIZON_NS)
+		return TC_ACT_SHOT;
+
+	/* set ecn bit, if needed */
+	if (next_tstamp - now >= ECN_HORIZON_NS)
+		bpf_skb_ecn_set_ce(skb);
+
+	if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST))
+		return TC_ACT_SHOT;
+	skb->tstamp = next_tstamp;
+
+	return TC_ACT_OK;
+}
+
+static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
+{
+	void *data_end = (void *)(long)skb->data_end;
+
+	/* drop malformed packets */
+	if ((void *)(tcp + 1) > data_end)
+		return TC_ACT_SHOT;
+
+	if (tcp->dest == bpf_htons(9000))
+		return throttle_flow(skb);
+
+	return TC_ACT_OK;
+}
+
+static inline int handle_ipv4(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph;
+	uint32_t ihl;
+
+	/* drop malformed packets */
+	if (data + sizeof(struct ethhdr) > data_end)
+		return TC_ACT_SHOT;
+	iph = (struct iphdr *)(data + sizeof(struct ethhdr));
+	if ((void *)(iph + 1) > data_end)
+		return TC_ACT_SHOT;
+	ihl = iph->ihl * 4;
+	if (((void *)iph) + ihl > data_end)
+		return TC_ACT_SHOT;
+
+	if (iph->protocol == IPPROTO_TCP)
+		return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl));
+
+	return TC_ACT_OK;
+}
+
+SEC("cls_test") int tc_prog(struct __sk_buff *skb)
+{
+	if (skb->protocol == bpf_htons(ETH_P_IP))
+		return handle_ipv4(skb);
+
+	return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
new file mode 100755
index 000000000000..f38567ef694b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_edt.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test installs a TC bpf program that throttles a TCP flow
+# with dst port = 9000 down to 5MBps. Then it measures actual
+# throughput of the flow.
+
+if [[ $EUID -ne 0 ]]; then
+	echo "This script must be run as root"
+	echo "FAIL"
+	exit 1
+fi
+
+# check that nc, dd, and timeout are present
+command -v nc >/dev/null 2>&1 || \
+	{ echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+	{ echo >&2 "nc is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+	{ echo >&2 "timeout is not available"; exit 1; }
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP_SRC="172.16.1.100"
+readonly IP_DST="172.16.2.100"
+
+cleanup()
+{
+	ip netns del ${NS_SRC}
+	ip netns del ${NS_DST}
+}
+
+trap cleanup EXIT
+
+set -e  # exit on error
+
+ip netns add "${NS_SRC}"
+ip netns add "${NS_DST}"
+ip link add veth_src type veth peer name veth_dst
+ip link set veth_src netns ${NS_SRC}
+ip link set veth_dst netns ${NS_DST}
+
+ip -netns ${NS_SRC} addr add ${IP_SRC}/24  dev veth_src
+ip -netns ${NS_DST} addr add ${IP_DST}/24  dev veth_dst
+
+ip -netns ${NS_SRC} link set dev veth_src up
+ip -netns ${NS_DST} link set dev veth_dst up
+
+ip -netns ${NS_SRC} route add ${IP_DST}/32  dev veth_src
+ip -netns ${NS_DST} route add ${IP_SRC}/32  dev veth_dst
+
+# set up TC on TX
+ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
+ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
+ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
+	bpf da obj test_tc_edt.o sec cls_test
+
+
+# start the listener
+ip netns exec ${NS_DST} bash -c \
+	"nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
+declare -i NC_PID=$!
+sleep 1
+
+declare -ir TIMEOUT=20
+declare -ir EXPECTED_BPS=5000000
+
+# run the load, capture RX bytes on DST
+declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
+	cat /sys/class/net/veth_dst/statistics/rx_bytes )
+
+set +e
+ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
+	bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
+set -e
+
+declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
+	cat /sys/class/net/veth_dst/statistics/rx_bytes )
+
+declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
+
+echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
+	awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
+		$1, ($2-$3)*100.0/$3}'
+
+# Pass the test if the actual bps is within 1% of the expected bps.
+# The difference is usually about 0.1% on a 20-sec test, and ==> zero
+# the longer the test runs.
+declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
+	 awk 'function abs(x){return ((x < 0.0) ? -x : x)}
+	      {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
+		else { print "0"} }' )
+if [ "${RES}" == "0" ] ; then
+	echo "PASS"
+else
+	echo "FAIL"
+	exit 1
+fi
-- 
cgit v1.2.3


From 0c4ea7f87abbdb56df616678bc23f10e51a0b4f8 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 25 Mar 2019 09:36:37 +0000
Subject: bpf: test_tc_tunnel.sh needs reverse path filtering disabled

test_tc_tunnel.sh sets up a pair of namespaces connected by a
veth pair to verify encap/decap using bpf_skb_adjust_room.  In
testing this, it uses tunnel links as the peer of the bpf-based
encap/decap.  However because the same IP header is used for inner
and outer IP, when packets arrive at the tunnel interface they will
be dropped by reverse path filtering as those packets are expected
on the veth interface (where the destination IP of the decapped
packet is configured).

To avoid this, ensure reverse path filtering is disabled for the
namespace using tunneling.

Fixes: 98cdabcd0798 ("selftests/bpf: bpf tunnel encap test")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_tc_tunnel.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index dcf320626931..c805adb88f3a 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -160,6 +160,14 @@ server_listen
 # client can connect again
 ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
 	remote "${addr1}" local "${addr2}"
+# Because packets are decapped by the tunnel they arrive on testtun0 from
+# the IP stack perspective.  Ensure reverse path filtering is disabled
+# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
+# expected veth2 (veth2 is where 192.168.1.2 is configured).
+ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+# rp needs to be disabled for both all and testtun0 as the rp value is
+# selected as the max of the "all" and device-specific values.
+ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
 ip netns exec "${ns2}" ip link set dev testtun0 up
 echo "test bpf encap with tunnel device decap"
 client_connect
-- 
cgit v1.2.3


From b4b6aa83433ea4675d4ba1be56774623db81f14f Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 19 Mar 2019 14:53:24 -0700
Subject: selftests: bpf: don't depend on hardcoded perf sample_freq

When running stacktrace_build_id_nmi, try to query
kernel.perf_event_max_sample_rate sysctl and use it as a sample_freq.
If there was an error reading sysctl, fallback to 5000.

kernel.perf_event_max_sample_rate sysctl can drift and/or can be
adjusted by the perf tool, so assuming a fixed number might be
problematic on a long running machine.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/stacktrace_build_id_nmi.c   | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 8a114bb1c379..1c1a2f75f3d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -1,13 +1,25 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
+static __u64 read_perf_max_sample_freq(void)
+{
+	__u64 sample_freq = 5000; /* fallback to 5000 on error */
+	FILE *f;
+
+	f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
+	if (f == NULL)
+		return sample_freq;
+	fscanf(f, "%llu", &sample_freq);
+	fclose(f);
+	return sample_freq;
+}
+
 void test_stacktrace_build_id_nmi(void)
 {
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
 	const char *file = "./test_stacktrace_build_id.o";
 	int err, pmu_fd, prog_fd;
 	struct perf_event_attr attr = {
-		.sample_freq = 5000,
 		.freq = 1,
 		.type = PERF_TYPE_HARDWARE,
 		.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -20,6 +32,8 @@ void test_stacktrace_build_id_nmi(void)
 	int build_id_matches = 0;
 	int retry = 1;
 
+	attr.sample_freq = read_perf_max_sample_freq();
+
 retry:
 	err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
-- 
cgit v1.2.3


From dd399ac9e343c7573c47d6820e4a23013c54749d Mon Sep 17 00:00:00 2001
From: Luca Boccassi <bluca@debian.org>
Date: Thu, 28 Mar 2019 11:33:53 +0000
Subject: tools/bpf: generate pkg-config file for libbpf

Generate a libbpf.pc file at build time so that users can rely
on pkg-config to find the library, its CFLAGS and LDFLAGS.

Signed-off-by: Luca Boccassi <bluca@debian.org>
Acked-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/.gitignore         |  1 +
 tools/lib/bpf/Makefile           | 18 +++++++++++++++---
 tools/lib/bpf/libbpf.pc.template | 12 ++++++++++++
 3 files changed, 28 insertions(+), 3 deletions(-)
 create mode 100644 tools/lib/bpf/libbpf.pc.template

(limited to 'tools')

diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
index 4db74758c674..7d9e182a1f51 100644
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@ -1,3 +1,4 @@
 libbpf_version.h
+libbpf.pc
 FEATURE-DUMP.libbpf
 test_libbpf
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 5bf8e52c41fc..2a578bfc0bca 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -90,6 +90,7 @@ LIBBPF_VERSION	= $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
 
 LIB_TARGET	= libbpf.a libbpf.so.$(LIBBPF_VERSION)
 LIB_FILE	= libbpf.a libbpf.so*
+PC_FILE		= libbpf.pc
 
 # Set compile option CFLAGS
 ifdef EXTRA_CFLAGS
@@ -134,13 +135,14 @@ VERSION_SCRIPT	:= libbpf.map
 
 LIB_TARGET	:= $(addprefix $(OUTPUT),$(LIB_TARGET))
 LIB_FILE	:= $(addprefix $(OUTPUT),$(LIB_FILE))
+PC_FILE		:= $(addprefix $(OUTPUT),$(PC_FILE))
 
 GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
 			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
 VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
 			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
-CMD_TARGETS = $(LIB_TARGET)
+CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
 
 CXX_TEST_TARGET = $(OUTPUT)test_libbpf
 
@@ -187,6 +189,12 @@ $(OUTPUT)libbpf.a: $(BPF_IN)
 $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
 	$(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@
 
+$(OUTPUT)libbpf.pc:
+	$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+		-e "s|@LIBDIR@|$(libdir_SQ)|" \
+		-e "s|@VERSION@|$(LIBBPF_VERSION)|" \
+		< libbpf.pc.template > $@
+
 check: check_abi
 
 check_abi: $(OUTPUT)libbpf.so
@@ -223,7 +231,11 @@ install_headers:
 		$(call do_install,libbpf.h,$(prefix)/include/bpf,644);
 		$(call do_install,btf.h,$(prefix)/include/bpf,644);
 
-install: install_lib
+install_pkgconfig: $(PC_FILE)
+	$(call QUIET_INSTALL, $(PC_FILE)) \
+		$(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644)
+
+install: install_lib install_pkgconfig
 
 ### Cleaning rules
 
@@ -233,7 +245,7 @@ config-clean:
 
 clean:
 	$(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
-		*.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS
+		*.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS
 	$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
 
 
diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template
new file mode 100644
index 000000000000..ac17fcef2108
--- /dev/null
+++ b/tools/lib/bpf/libbpf.pc.template
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libbpf
+Description: BPF library
+Version: @VERSION@
+Libs: -L${libdir} -lbpf
+Requires.private: libelf
+Cflags: -I${includedir}
-- 
cgit v1.2.3


From ca059af85283ba33d816b833a2654cb9a4b697de Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:19 +0000
Subject: selftests: forwarding: Add reverse path forwarding (RPF) test cases

In case a packet is routed using a multicast route whose specified
ingress interface does not match the interface from which the packet was
received, the packet is dropped.

Add IPv4 and IPv6 test cases for above mentioned scenario.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/router_multicast.sh   | 107 ++++++++++++++++++++-
 1 file changed, 106 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 109e6d785169..57e90c873a2c 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -28,7 +28,7 @@
 # +------------------+       +------------------+
 #
 
-ALL_TESTS="mcast_v4 mcast_v6"
+ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6"
 NUM_NETIFS=6
 source lib.sh
 source tc_common.sh
@@ -46,10 +46,14 @@ h1_create()
 
 	ip route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::1
 	ip route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::1
+
+	tc qdisc add dev $h1 ingress
 }
 
 h1_destroy()
 {
+	tc qdisc del dev $h1 ingress
+
 	ip route del 2001:db8:3::/64 vrf v$h1
 	ip route del 2001:db8:2::/64 vrf v$h1
 
@@ -124,10 +128,14 @@ router_create()
 	ip address add 2001:db8:1::1/64 dev $rp1
 	ip address add 2001:db8:2::1/64 dev $rp2
 	ip address add 2001:db8:3::1/64 dev $rp3
+
+	tc qdisc add dev $rp3 ingress
 }
 
 router_destroy()
 {
+	tc qdisc del dev $rp3 ingress
+
 	ip address del 2001:db8:3::1/64 dev $rp3
 	ip address del 2001:db8:2::1/64 dev $rp2
 	ip address del 2001:db8:1::1/64 dev $rp1
@@ -301,6 +309,103 @@ mcast_v6()
 	log_test "mcast IPv6"
 }
 
+rpf_v4()
+{
+	# Add a multicast route from first router port to the other two. Send
+	# matching packets and test that both hosts receive them. Then, send
+	# the same packets via the third router port and test that they do not
+	# reach any host due to RPF check. A filter with 'skip_hw' is added to
+	# test that devices capable of multicast routing offload trap those
+	# packets. The filter is essentialy a NOP in other scenarios.
+
+	RET=0
+
+	tc filter add dev $h1 ingress protocol ip pref 1 handle 1 flower \
+		dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1 flower \
+		dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop
+	tc filter add dev $h3 ingress protocol ip pref 1 handle 1 flower \
+		dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop
+	tc filter add dev $rp3 ingress protocol ip pref 1 handle 1 flower \
+		skip_hw dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action pass
+
+	create_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3
+
+	$MZ $h1 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+		-a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \
+		-A 198.51.100.2 -B 225.1.2.3 -q
+
+	tc_check_packets "dev $h2 ingress" 1 5
+	check_err $? "Multicast not received on first host"
+	tc_check_packets "dev $h3 ingress" 1 5
+	check_err $? "Multicast not received on second host"
+
+	$MZ $h3 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+		-a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \
+		-A 198.51.100.2 -B 225.1.2.3 -q
+
+	tc_check_packets "dev $h1 ingress" 1 0
+	check_err $? "Multicast received on first host when should not"
+	tc_check_packets "dev $h2 ingress" 1 5
+	check_err $? "Multicast received on second host when should not"
+	tc_check_packets "dev $rp3 ingress" 1 5
+	check_err $? "Packets not trapped due to RPF check"
+
+	delete_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3
+
+	tc filter del dev $rp3 ingress protocol ip pref 1 handle 1 flower
+	tc filter del dev $h3 ingress protocol ip pref 1 handle 1 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1 flower
+	tc filter del dev $h1 ingress protocol ip pref 1 handle 1 flower
+
+	log_test "RPF IPv4"
+}
+
+rpf_v6()
+{
+	RET=0
+
+	tc filter add dev $h1 ingress protocol ipv6 pref 1 handle 1 flower \
+		dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 1 flower \
+		dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop
+	tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 1 flower \
+		dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop
+	tc filter add dev $rp3 ingress protocol ipv6 pref 1 handle 1 flower \
+		skip_hw dst_ip ff0e::3 ip_proto udp dst_port 12345 action pass
+
+	create_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3
+
+	$MZ $h1 -6 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+		-a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \
+		-A 2001:db8:1::2 -B ff0e::3 -q
+
+	tc_check_packets "dev $h2 ingress" 1 5
+	check_err $? "Multicast not received on first host"
+	tc_check_packets "dev $h3 ingress" 1 5
+	check_err $? "Multicast not received on second host"
+
+	$MZ $h3 -6 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+		-a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \
+		-A 2001:db8:1::2 -B ff0e::3 -q
+
+	tc_check_packets "dev $h1 ingress" 1 0
+	check_err $? "Multicast received on first host when should not"
+	tc_check_packets "dev $h2 ingress" 1 5
+	check_err $? "Multicast received on second host when should not"
+	tc_check_packets "dev $rp3 ingress" 1 5
+	check_err $? "Packets not trapped due to RPF check"
+
+	delete_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3
+
+	tc filter del dev $rp3 ingress protocol ipv6 pref 1 handle 1 flower
+	tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 1 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 1 flower
+	tc filter del dev $h1 ingress protocol ipv6 pref 1 handle 1 flower
+
+	log_test "RPF IPv6"
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 0637e1f878b5b670485ee0afbd9e22c9cea2ffe0 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amitc@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:20 +0000
Subject: selftests: forwarding: Add PCP match and VLAN match tests

Send packets with VLAN and PCP set and check that TC flower filters can
match on these keys.

Signed-off-by: Amit Cohen <amitc@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/forwarding/tc_flower.sh  | 59 +++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 20d1077e5a3d..29bcfa84aec7 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
-	match_src_ip_test match_ip_flags_test"
+	match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -219,6 +219,63 @@ match_ip_flags_test()
 	log_test "ip_flags match ($tcflags)"
 }
 
+match_pcp_test()
+{
+	RET=0
+
+	vlan_create $h2 85 v$h2 192.0.2.11/24
+
+	tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 101 \
+		flower vlan_prio 6 $tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol 802.1q pref 2 handle 102 \
+		flower vlan_prio 7 $tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 7:85 -t ip -q
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 0:85 -t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 0
+	check_err $? "Matched on specified PCP when should not"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on specified PCP"
+
+	tc filter del dev $h2 ingress protocol 802.1q pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 101 flower
+
+	vlan_destroy $h2 85
+
+	log_test "PCP match ($tcflags)"
+}
+
+match_vlan_test()
+{
+	RET=0
+
+	vlan_create $h2 85 v$h2 192.0.2.11/24
+	vlan_create $h2 75 v$h2 192.0.2.10/24
+
+	tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 101 \
+		flower vlan_id 75 $tcflags action drop
+	tc filter add dev $h2 ingress protocol 802.1q pref 2 handle 102 \
+		flower vlan_id 85 $tcflags action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 0:85 -t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 0
+	check_err $? "Matched on specified VLAN when should not"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on specified VLAN"
+
+	tc filter del dev $h2 ingress protocol 802.1q pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 101 flower
+
+	vlan_destroy $h2 75
+	vlan_destroy $h2 85
+
+	log_test "VLAN match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
-- 
cgit v1.2.3


From 2fcbc0b15e39019e84863a69c822b3c3103cfd56 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:21 +0000
Subject: selftests: forwarding: Test action VLAN modify

Construct a basic topology consisting of two hosts connected using a
VLAN-aware bridge. Put each port in a different VLAN and test that ping
fails.

Add ingress and egress filters with a VLAN modify action and test that
ping passes.

Signed-off-by: Danielle Ratson <danieller@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/tc_vlan_modify.sh     | 164 +++++++++++++++++++++
 1 file changed, 164 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_vlan_modify.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh b/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh
new file mode 100755
index 000000000000..45378905cb97
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	vlan_modify_ingress
+	vlan_modify_egress
+"
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	vlan_create $h1 85 v$h1 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 85
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+	vlan_create $h2 65 v$h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 65
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	bridge vlan add dev $swp1 vid 85
+	bridge vlan add dev $swp2 vid 65
+
+	bridge vlan add dev $swp2 vid 85
+	bridge vlan add dev $swp1 vid 65
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	bridge vlan del vid 65 dev $swp1
+	bridge vlan del vid 85 dev $swp2
+
+	bridge vlan del vid 65 dev $swp2
+	bridge vlan del vid 85 dev $swp1
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+vlan_modify_ingress()
+{
+	RET=0
+
+	ping_do $h1.85 192.0.2.18
+	check_fail $? "ping between two different vlans passed when should not"
+
+	ping6_do $h1.85 2001:db8:2::2
+	check_fail $? "ping6 between two different vlans passed when should not"
+
+	tc filter add dev $swp1 ingress protocol all pref 1 handle 1 \
+		flower action vlan modify id 65
+	tc filter add dev $swp2 ingress protocol all pref 1 handle 1 \
+		flower action vlan modify id 85
+
+	ping_do $h1.85 192.0.2.18
+	check_err $? "ping between two different vlans failed when should not"
+
+	ping6_do $h1.85 2001:db8:2::2
+	check_err $? "ping6 between two different vlans failed when should not"
+
+	log_test "VLAN modify at ingress"
+
+	tc filter del dev $swp2 ingress protocol all pref 1 handle 1 flower
+	tc filter del dev $swp1 ingress protocol all pref 1 handle 1 flower
+}
+
+vlan_modify_egress()
+{
+	RET=0
+
+	ping_do $h1.85 192.0.2.18
+	check_fail $? "ping between two different vlans passed when should not"
+
+	ping6_do $h1.85 2001:db8:2::2
+	check_fail $? "ping6 between two different vlans passed when should not"
+
+	tc filter add dev $swp1 egress protocol all pref 1 handle 1 \
+		flower action vlan modify id 85
+	tc filter add dev $swp2 egress protocol all pref 1 handle 1 \
+		flower action vlan modify id 65
+
+	ping_do $h1.85 192.0.2.18
+	check_err $? "ping between two different vlans failed when should not"
+
+	ping6_do $h1.85 2001:db8:2::2
+	check_err $? "ping6 between two different vlans failed when should not"
+
+	log_test "VLAN modify at egress"
+
+	tc filter del dev $swp2 egress protocol all pref 1 handle 1 flower
+	tc filter del dev $swp1 egress protocol all pref 1 handle 1 flower
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 2cca8751af36d5f84eaa05d63632afa25a523b69 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:23 +0000
Subject: selftests: forwarding: devlink_lib: Avoid double sourcing of lib.sh

Don't source lib.sh twice and make the script work with ifnames passed
on the command line.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh        |  1 +
 .../selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh  |  3 +++
 .../selftests/drivers/net/mlxsw/spectrum/resource_scale.sh     |  5 ++++-
 tools/testing/selftests/net/forwarding/devlink_lib.sh          | 10 ----------
 4 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
index a372b2f60874..fb850e0ec837 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -12,6 +12,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
 	delta_two_masks_one_key_test delta_simple_rehash_test \
 	bloom_simple_test bloom_complex_test bloom_delta_test"
 NUM_NETIFS=2
+source $lib_dir/lib.sh
 source $lib_dir/tc_common.sh
 source $lib_dir/devlink_lib.sh
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
index b1fe960e398a..6f2683cbc7d5 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
@@ -1,7 +1,10 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
 NUM_NETIFS=1
+source $lib_dir/lib.sh
 source devlink_lib_spectrum.sh
 
 setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index e7ffc79561b7..43ba1b438f6d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -1,8 +1,11 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
 NUM_NETIFS=6
-source ../../../../net/forwarding/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
 source devlink_lib_spectrum.sh
 
 current_test=""
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 57cf8914910d..981b897d418d 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -1,16 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-##############################################################################
-# Source library
-
-relative_path="${BASH_SOURCE%/*}"
-if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
-	relative_path="."
-fi
-
-source "$relative_path/lib.sh"
-
 ##############################################################################
 # Defines
 
-- 
cgit v1.2.3


From 8e46aee69722054e85ae4b6029d0aed678016950 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:23 +0000
Subject: selftests: forwarding: devlink_lib: Simplify deduction of DEVLINK_DEV

Use devlink -j and jq for more accurate querying. Use cut -f-2 instead
of rev-cut-rev combo.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/devlink_lib.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 981b897d418d..61d3579a62af 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -4,9 +4,8 @@
 ##############################################################################
 # Defines
 
-DEVLINK_DEV=$(devlink port show | grep "${NETIFS[p1]}" | \
-	      grep -v "${NETIFS[p1]}[0-9]" | cut -d" " -f1 | \
-	      rev | cut -d"/" -f2- | rev)
+DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+		     | jq -r '.port | keys[]' | cut -d/ -f-2)
 if [ -z "$DEVLINK_DEV" ]; then
 	echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
 	exit 1
-- 
cgit v1.2.3


From d04cc726c8da45732c815549f2841a646332cc94 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:24 +0000
Subject: selftests: forwarding: devlink_lib: Add shared buffer helpers

Add helpers to obtain, set, and restore a pool size, and a port-pool and
tc-pool threshold.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/devlink_lib.sh        | 95 ++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 61d3579a62af..8553a67a2322 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -95,3 +95,98 @@ devlink_reload()
 			grep -c "size_new")
 	check_err $still_pending "Failed reload - There are still unset sizes"
 }
+
+declare -A DEVLINK_ORIG
+
+devlink_port_pool_threshold()
+{
+	local port=$1; shift
+	local pool=$1; shift
+
+	devlink sb port pool show $port pool $pool -j \
+		| jq '.port_pool."'"$port"'"[].threshold'
+}
+
+devlink_port_pool_th_set()
+{
+	local port=$1; shift
+	local pool=$1; shift
+	local th=$1; shift
+	local key="port_pool($port,$pool).threshold"
+
+	DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
+	devlink sb port pool set $port pool $pool th $th
+}
+
+devlink_port_pool_th_restore()
+{
+	local port=$1; shift
+	local pool=$1; shift
+	local key="port_pool($port,$pool).threshold"
+
+	devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]}
+}
+
+devlink_pool_size_thtype()
+{
+	local pool=$1; shift
+
+	devlink sb pool show "$DEVLINK_DEV" pool $pool -j \
+	    | jq -r '.pool[][] | (.size, .thtype)'
+}
+
+devlink_pool_size_thtype_set()
+{
+	local pool=$1; shift
+	local thtype=$1; shift
+	local size=$1; shift
+	local key="pool($pool).size_thtype"
+
+	DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
+	devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype
+}
+
+devlink_pool_size_thtype_restore()
+{
+	local pool=$1; shift
+	local key="pool($pool).size_thtype"
+	local -a orig=(${DEVLINK_ORIG[$key]})
+
+	devlink sb pool set "$DEVLINK_DEV" pool $pool \
+		size ${orig[0]} thtype ${orig[1]}
+}
+
+devlink_tc_bind_pool_th()
+{
+	local port=$1; shift
+	local tc=$1; shift
+	local dir=$1; shift
+
+	devlink sb tc bind show $port tc $tc type $dir -j \
+	    | jq -r '.tc_bind[][] | (.pool, .threshold)'
+}
+
+devlink_tc_bind_pool_th_set()
+{
+	local port=$1; shift
+	local tc=$1; shift
+	local dir=$1; shift
+	local pool=$1; shift
+	local th=$1; shift
+	local key="tc_bind($port,$dir,$tc).pool_th"
+
+	DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
+	devlink sb tc bind set $port tc $tc type $dir pool $pool th $th
+}
+
+devlink_tc_bind_pool_th_restore()
+{
+	local port=$1; shift
+	local tc=$1; shift
+	local dir=$1; shift
+	local key="tc_bind($port,$dir,$tc).pool_th"
+	local -a orig=(${DEVLINK_ORIG[$key]})
+
+	devlink sb tc bind set $port tc $tc type $dir \
+		pool ${orig[0]} th ${orig[1]}
+}
-- 
cgit v1.2.3


From 5dde21b3a7f648342d873ec964e0c486c329b91c Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:25 +0000
Subject: selftests: mlxsw: qos_mc_aware: Configure shared buffers

This test runs two streams of traffic from two independent ports to
create congestion on one egress port. It is necessary to configure the
shared buffer thresholds correctly, to make sure that there is traffic
from both streams in the shared buffer. Only then can the test actually
test prioritization among these streams.

Without this configuration, it is possible, that one of the streams
takes all of port-pool quota, and the other stream is not even admitted,
thus invalidating the result.

On Spectrum-1, this is not a problem, because MC traffic uses a separate
pool. But for Spectrum-2, MC and UC share the same pool, and the correct
configuration is important.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/qos_mc_aware.sh       | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 117f6f35d72f..2e17fe3b4872 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -67,6 +67,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding
 
 NUM_NETIFS=6
 source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
 
 h1_create()
 {
@@ -140,10 +141,28 @@ switch_create()
 	ip link set dev br111 up
 	ip link set dev $swp2.111 master br111
 	ip link set dev $swp3.111 master br111
+
+	# Make sure that ingress quotas are smaller than egress so that there is
+	# room for both streams of traffic to be admitted to shared buffer.
+	devlink_port_pool_th_set $swp1 0 5
+	devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
+
+	devlink_port_pool_th_set $swp2 0 5
+	devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
+
+	devlink_port_pool_th_set $swp3 4 12
 }
 
 switch_destroy()
 {
+	devlink_port_pool_th_restore $swp3 4
+
+	devlink_tc_bind_pool_th_restore $swp2 1 ingress
+	devlink_port_pool_th_restore $swp2 0
+
+	devlink_tc_bind_pool_th_restore $swp1 0 ingress
+	devlink_port_pool_th_restore $swp1 0
+
 	ip link del dev br111
 	ip link del dev br1
 
-- 
cgit v1.2.3


From 573363a68f27c75f92b029345eed0026a2bac0c0 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:26 +0000
Subject: selftests: mlxsw: Add qos_lib.sh

Extract reusable code from qos_mc_aware.sh and put into a new library.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/drivers/net/mlxsw/qos_lib.sh |  98 ++++++++++++++++++++
 .../selftests/drivers/net/mlxsw/qos_mc_aware.sh    | 103 +++------------------
 2 files changed, 109 insertions(+), 92 deletions(-)
 create mode 100644 tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
new file mode 100644
index 000000000000..e80be65799ad
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0
+
+humanize()
+{
+	local speed=$1; shift
+
+	for unit in bps Kbps Mbps Gbps; do
+		if (($(echo "$speed < 1024" | bc))); then
+			break
+		fi
+
+		speed=$(echo "scale=1; $speed / 1024" | bc)
+	done
+
+	echo "$speed${unit}"
+}
+
+rate()
+{
+	local t0=$1; shift
+	local t1=$1; shift
+	local interval=$1; shift
+
+	echo $((8 * (t1 - t0) / interval))
+}
+
+start_traffic()
+{
+	local h_in=$1; shift    # Where the traffic egresses the host
+	local sip=$1; shift
+	local dip=$1; shift
+	local dmac=$1; shift
+
+	$MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
+		-a own -b $dmac -t udp -q &
+	sleep 1
+}
+
+stop_traffic()
+{
+	# Suppress noise from killing mausezahn.
+	{ kill %% && wait %%; } 2>/dev/null
+}
+
+check_rate()
+{
+	local rate=$1; shift
+	local min=$1; shift
+	local what=$1; shift
+
+	if ((rate > min)); then
+		return 0
+	fi
+
+	echo "$what $(humanize $ir) < $(humanize $min)" > /dev/stderr
+	return 1
+}
+
+measure_rate()
+{
+	local sw_in=$1; shift   # Where the traffic ingresses the switch
+	local host_in=$1; shift # Where it ingresses another host
+	local counter=$1; shift # Counter to use for measurement
+	local what=$1; shift
+
+	local interval=10
+	local i
+	local ret=0
+
+	# Dips in performance might cause momentary ingress rate to drop below
+	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
+	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
+	# average ingress rate to somewhat mitigate this.
+	local min_ingress=2147483648
+
+	for i in {5..0}; do
+		local t0=$(ethtool_stats_get $host_in $counter)
+		local u0=$(ethtool_stats_get $sw_in $counter)
+		sleep $interval
+		local t1=$(ethtool_stats_get $host_in $counter)
+		local u1=$(ethtool_stats_get $sw_in $counter)
+
+		local ir=$(rate $u0 $u1 $interval)
+		local er=$(rate $t0 $t1 $interval)
+
+		if check_rate $ir $min_ingress "$what ingress rate"; then
+			break
+		fi
+
+		# Fail the test if we can't get the throughput.
+		if ((i == 0)); then
+			ret=1
+		fi
+	done
+
+	echo $ir $er
+	return $ret
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 2e17fe3b4872..71231ad2dbfb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -68,6 +68,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding
 NUM_NETIFS=6
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
+source qos_lib.sh
 
 h1_create()
 {
@@ -220,107 +221,28 @@ ping_ipv4()
 	ping_test $h2 192.0.2.130
 }
 
-humanize()
-{
-	local speed=$1; shift
-
-	for unit in bps Kbps Mbps Gbps; do
-		if (($(echo "$speed < 1024" | bc))); then
-			break
-		fi
-
-		speed=$(echo "scale=1; $speed / 1024" | bc)
-	done
-
-	echo "$speed${unit}"
-}
-
-rate()
-{
-	local t0=$1; shift
-	local t1=$1; shift
-	local interval=$1; shift
-
-	echo $((8 * (t1 - t0) / interval))
-}
-
-check_rate()
-{
-	local rate=$1; shift
-	local min=$1; shift
-	local what=$1; shift
-
-	if ((rate > min)); then
-		return 0
-	fi
-
-	echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
-	return 1
-}
-
-measure_uc_rate()
-{
-	local what=$1; shift
-
-	local interval=10
-	local i
-	local ret=0
-
-	# Dips in performance might cause momentary ingress rate to drop below
-	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
-	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
-	# average ingress rate to somewhat mitigate this.
-	local min_ingress=2147483648
-
-	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-		-a own -b $h3mac -t udp -q &
-	sleep 1
-
-	for i in {5..0}; do
-		local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
-		local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
-		sleep $interval
-		local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
-		local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
-
-		local ir=$(rate $u0 $u1 $interval)
-		local er=$(rate $t0 $t1 $interval)
-
-		if check_rate $ir $min_ingress "$what ingress rate"; then
-			break
-		fi
-
-		# Fail the test if we can't get the throughput.
-		if ((i == 0)); then
-			ret=1
-		fi
-	done
-
-	# Suppress noise from killing mausezahn.
-	{ kill %% && wait; } 2>/dev/null
-
-	echo $ir $er
-	exit $ret
-}
-
 test_mc_aware()
 {
 	RET=0
 
 	local -a uc_rate
-	uc_rate=($(measure_uc_rate "UC-only"))
+	start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+	uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only"))
 	check_err $? "Could not get high enough UC-only ingress rate"
+	stop_traffic
 	local ucth1=${uc_rate[1]}
 
-	$MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+	start_traffic $h1 own bc bc
 
 	local d0=$(date +%s)
 	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
 	local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
 
 	local -a uc_rate_2
-	uc_rate_2=($(measure_uc_rate "UC+MC"))
+	start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+	uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC"))
 	check_err $? "Could not get high enough UC+MC ingress rate"
+	stop_traffic
 	local ucth2=${uc_rate_2[1]}
 
 	local d1=$(date +%s)
@@ -338,8 +260,7 @@ test_mc_aware()
 	local mc_ir=$(rate $u0 $u1 $interval)
 	local mc_er=$(rate $t0 $t1 $interval)
 
-	# Suppress noise from killing mausezahn.
-	{ kill %% && wait; } 2>/dev/null
+	stop_traffic
 
 	log_test "UC performace under MC overload"
 
@@ -363,8 +284,7 @@ test_uc_aware()
 {
 	RET=0
 
-	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-		-a own -b $h3mac -t udp -q &
+	start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
 
 	local d0=$(date +%s)
 	local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
@@ -394,8 +314,7 @@ test_uc_aware()
 	((attempts == passes))
 	check_err $?
 
-	# Suppress noise from killing mausezahn.
-	{ kill %% && wait; } 2>/dev/null
+	stop_traffic
 
 	log_test "MC performace under UC overload"
 	echo "    ingress UC throughput $(humanize ${uc_ir})"
-- 
cgit v1.2.3


From 30905dc63badb17e5960fa73fa49ed1459790494 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 28 Mar 2019 12:12:27 +0000
Subject: selftests: mlxsw: Add a new test for strict priority

Test that when strict priority is configured on a system, the
higher-priority traffic does actually win all the available bandwidth.
The test uses a similar approach to qos_mc_aware.sh to run and account
the traffic.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/qos_ets_strict.sh  | 311 +++++++++++++++++++++
 1 file changed, 311 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
new file mode 100755
index 000000000000..6d1790b5de7a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for strict prioritization of traffic in the switch. Run two streams of
+# traffic, each through a different ingress port, one tagged with PCP of 1, the
+# other with PCP of 2. Both streams converge at one egress port, where they are
+# assigned TC of, respectively, 1 and 2, with strict priority configured between
+# them. In H3, we expect to see (almost) exclusively the high-priority traffic.
+#
+# Please see qos_mc_aware.sh for an explanation of why we use mausezahn and
+# counters instead of just running iperf3.
+#
+# +---------------------------+                 +-----------------------------+
+# | H1                        |                 |                          H2 |
+# |         $h1.111 +         |                 |         + $h2.222           |
+# |   192.0.2.33/28 |         |                 |         | 192.0.2.65/28     |
+# |   e-qos-map 0:1 |         |                 |         | e-qos-map 0:2     |
+# |                 |         |                 |         |                   |
+# |             $h1 +         |                 |         + $h2               |
+# +-----------------|---------+                 +---------|-------------------+
+#                   |                                     |
+# +-----------------|-------------------------------------|-------------------+
+# |           $swp1 +                                     + $swp2             |
+# |          >1Gbps |                                     | >1Gbps            |
+# | +---------------|-----------+              +----------|----------------+  |
+# | |     $swp1.111 +           |              |          + $swp2.222      |  |
+# | |                     BR111 |       SW     | BR222                     |  |
+# | |     $swp3.111 +           |              |          + $swp3.222      |  |
+# | +---------------|-----------+              +----------|----------------+  |
+# |                 \_____________________________________/                   |
+# |                                    |                                      |
+# |                                    + $swp3                                |
+# |                                    | 1Gbps bottleneck                     |
+# |                                    | ETS: (up n->tc n for n in 0..7)      |
+# |                                    |      strict priority                 |
+# +------------------------------------|--------------------------------------+
+#                                      |
+#                 +--------------------|--------------------+
+#                 |                    + $h3             H3 |
+#                 |                   / \                   |
+#                 |                  /   \                  |
+#                 |         $h3.111 +     + $h3.222         |
+#                 |  192.0.2.34/28          192.0.2.66/28   |
+#                 +-----------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_ets_strict
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	mtu_set $h1 10000
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+	ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 111
+
+	mtu_restore $h1
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+
+	vlan_create $h2 222 v$h2 192.0.2.65/28
+	ip link set dev $h2.222 type vlan egress-qos-map 0:2
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 222
+
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3
+	mtu_set $h3 10000
+
+	vlan_create $h3 111 v$h3 192.0.2.34/28
+	vlan_create $h3 222 v$h3 192.0.2.66/28
+}
+
+h3_destroy()
+{
+	vlan_destroy $h3 222
+	vlan_destroy $h3 111
+
+	mtu_restore $h3
+	simple_if_fini $h3
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+
+	# prio n -> TC n, strict scheduling
+	lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7
+	lldptool -T -i $swp3 -V ETS-CFG tsa=$(
+			)"0:strict,"$(
+			)"1:strict,"$(
+			)"2:strict,"$(
+			)"3:strict,"$(
+			)"4:strict,"$(
+			)"5:strict,"$(
+			)"6:strict,"$(
+			)"7:strict"
+	sleep 1
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+	ethtool -s $swp3 speed 1000 autoneg off
+
+	vlan_create $swp1 111
+	vlan_create $swp2 222
+	vlan_create $swp3 111
+	vlan_create $swp3 222
+
+	ip link add name br111 up type bridge vlan_filtering 0
+	ip link set dev $swp1.111 master br111
+	ip link set dev $swp3.111 master br111
+
+	ip link add name br222 up type bridge vlan_filtering 0
+	ip link set dev $swp2.222 master br222
+	ip link set dev $swp3.222 master br222
+
+	# Make sure that ingress quotas are smaller than egress so that there is
+	# room for both streams of traffic to be admitted to shared buffer.
+	devlink_pool_size_thtype_set 0 dynamic 10000000
+	devlink_pool_size_thtype_set 4 dynamic 10000000
+
+	devlink_port_pool_th_set $swp1 0 6
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
+
+	devlink_port_pool_th_set $swp2 0 6
+	devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
+
+	devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
+	devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
+	devlink_port_pool_th_set $swp3 4 7
+}
+
+switch_destroy()
+{
+	devlink_port_pool_th_restore $swp3 4
+	devlink_tc_bind_pool_th_restore $swp3 2 egress
+	devlink_tc_bind_pool_th_restore $swp3 1 egress
+
+	devlink_tc_bind_pool_th_restore $swp2 2 ingress
+	devlink_port_pool_th_restore $swp2 0
+
+	devlink_tc_bind_pool_th_restore $swp1 1 ingress
+	devlink_port_pool_th_restore $swp1 0
+
+	devlink_pool_size_thtype_restore 4
+	devlink_pool_size_thtype_restore 0
+
+	ip link del dev br222
+	ip link del dev br111
+
+	vlan_destroy $swp3 222
+	vlan_destroy $swp3 111
+	vlan_destroy $swp2 222
+	vlan_destroy $swp1 111
+
+	ethtool -s $swp3 autoneg on
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+	lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0
+
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h3mac=$(mac_get $h3)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34 " from H1"
+	ping_test $h2 192.0.2.66 " from H2"
+}
+
+rel()
+{
+	local old=$1; shift
+	local new=$1; shift
+
+	bc <<< "
+	    scale=2
+	    ret = 100 * $new / $old
+	    if (ret > 0) { ret } else { 0 }
+	"
+}
+
+test_ets_strict()
+{
+	RET=0
+
+	# Run high-prio traffic on its own.
+	start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac
+	local -a rate_2
+	rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2"))
+	check_err $? "Could not get high enough prio-2 ingress rate"
+	local rate_2_in=${rate_2[0]}
+	local rate_2_eg=${rate_2[1]}
+	stop_traffic # $h2.222
+
+	# Start low-prio stream.
+	start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac
+
+	local -a rate_1
+	rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1"))
+	check_err $? "Could not get high enough prio-1 ingress rate"
+	local rate_1_in=${rate_1[0]}
+	local rate_1_eg=${rate_1[1]}
+
+	# High-prio and low-prio on their own should have about the same
+	# throughput.
+	local rel21=$(rel $rate_1_eg $rate_2_eg)
+	check_err $(bc <<< "$rel21 < 95")
+	check_err $(bc <<< "$rel21 > 105")
+
+	# Start the high-prio stream--now both streams run.
+	start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac
+	rate_3=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2 w/ 1"))
+	check_err $? "Could not get high enough prio-2 ingress rate with prio-1"
+	local rate_3_in=${rate_3[0]}
+	local rate_3_eg=${rate_3[1]}
+	stop_traffic # $h2.222
+
+	stop_traffic # $h1.111
+
+	# High-prio should have about the same throughput whether or not
+	# low-prio is in the system.
+	local rel32=$(rel $rate_2_eg $rate_3_eg)
+	check_err $(bc <<< "$rel32 < 95")
+
+	log_test "strict priority"
+	echo "Ingress to switch:"
+	echo "  p1 in rate            $(humanize $rate_1_in)"
+	echo "  p2 in rate            $(humanize $rate_2_in)"
+	echo "  p2 in rate w/ p1      $(humanize $rate_3_in)"
+	echo "Egress from switch:"
+	echo "  p1 eg rate            $(humanize $rate_1_eg)"
+	echo "  p2 eg rate            $(humanize $rate_2_eg) ($rel21% of p1)"
+	echo "  p2 eg rate w/ p1      $(humanize $rate_3_eg) ($rel32% of p2)"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 49b1b4a19ca70fa6964a7394121726b7093c2303 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dmitrolin@mellanox.com>
Date: Thu, 28 Mar 2019 16:09:31 +0000
Subject: selftests: tc-testing: Add pedit tests

Add 36 pedit action tests to check pedit options described in tc-pedit(8)
man page. Test cases can be specified by categories: actions, pedit,
raw_op, layered_op. RAW_OP cases check offset option for u8, u16 and u32
offset size. LAYERED_OP cases check fields option for eth, ip, ip6,
tcp and udp headers.

Include following tests:
377e - Add pedit action with RAW_OP offset u32
a0ca - Add pedit action with RAW_OP offset u32 (INVALID)
dd8a - Add pedit action with RAW_OP offset u16 u16
53db - Add pedit action with RAW_OP offset u16 (INVALID)
5c7e - Add pedit action with RAW_OP offset u8 add value
2893 - Add pedit action with RAW_OP offset u8 quad
3a07 - Add pedit action with RAW_OP offset u8-u16-u8
ab0f - Add pedit action with RAW_OP offset u16-u8-u8
9d12 - Add pedit action with RAW_OP offset u32 set u16 clear u8 invert
ebfa - Add pedit action with RAW_OP offset overflow u32 (INVALID)
f512 - Add pedit action with RAW_OP offset u16 at offmask shift set
c2cb - Add pedit action with RAW_OP offset u32 retain value
86d4 - Add pedit action with LAYERED_OP eth set src & dst
c715 - Add pedit action with LAYERED_OP eth set src (INVALID)
ba22 - Add pedit action with LAYERED_OP eth type set/clear sequence
5810 - Add pedit action with LAYERED_OP ip set src & dst
1092 - Add pedit action with LAYERED_OP ip set ihl & dsfield
02d8 - Add pedit action with LAYERED_OP ip set ttl & protocol
3e2d - Add pedit action with LAYERED_OP ip set ttl (INVALID)
31ae - Add pedit action with LAYERED_OP ip ttl clear/set
486f - Add pedit action with LAYERED_OP ip set duplicate fields
e790 - Add pedit action with LAYERED_OP ip set ce, df, mf, firstfrag,
nofrag fields
6829 - Add pedit action with LAYERED_OP beyond ip set dport & sport
afd8 - Add pedit action with LAYERED_OP beyond ip set icmp_type &
icmp_code
3143 - Add pedit action with LAYERED_OP beyond ip set dport (INVALID)
fc1f - Add pedit action with LAYERED_OP ip6 set src & dst
6d34 - Add pedit action with LAYERED_OP ip6 dst retain value (INVALID)
6f5e - Add pedit action with LAYERED_OP ip6 flow_lbl
6795 - Add pedit action with LAYERED_OP ip6 set payload_len, nexthdr,
hoplimit
1442 - Add pedit action with LAYERED_OP tcp set dport & sport
b7ac - Add pedit action with LAYERED_OP tcp sport set (INVALID)
cfcc - Add pedit action with LAYERED_OP tcp flags set
3bc4 - Add pedit action with LAYERED_OP tcp set dport, sport & flags
fields
f1c8 - Add pedit action with LAYERED_OP udp set dport & sport
d784 - Add pedit action with mixed RAW/LAYERED_OP #1
70ca - Add pedit action with mixed RAW/LAYERED_OP #2

Signed-off-by: Dmytro Linkin <dmitrolin@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/pedit.json         | 903 +++++++++++++++++++++
 1 file changed, 903 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
index b73ceb9e28b1..0d319f1d01db 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
@@ -47,5 +47,908 @@
         "teardown": [
             "$TC actions flush action pedit"
         ]
+    },
+    {
+        "id": "377e",
+        "name": "Add pedit action with RAW_OP offset u32",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 12 u32 set 0x90abcdef",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "12: val 90abcdef mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "a0ca",
+        "name": "Add pedit action with RAW_OP offset u32 (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 2 u32 set 0x12345678",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "dd8a",
+        "name": "Add pedit action with RAW_OP offset u16 u16",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 12 u16 set 0x1234 munge offset 14 u16 set 0x5678",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "val 12340000 mask 0000ffff.*val 00005678 mask ffff0000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "53db",
+        "name": "Add pedit action with RAW_OP offset u16 (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 15 u16 set 0x1234",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "5c7e",
+        "name": "Add pedit action with RAW_OP offset u8 add value",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge offset 16 u8 add 0xf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 16: add 0f000000 mask 00ffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "2893",
+        "name": "Add pedit action with RAW_OP offset u8 quad",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 12 u8 set 0x12 munge offset 13 u8 set 0x34 munge offset 14 u8 set 0x56 munge offset 15 u8 set 0x78",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "val 12000000 mask 00ffffff.*val 00340000 mask ff00ffff.*val 00005600 mask ffff00ff.*val 00000078 mask ffffff00",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "3a07",
+        "name": "Add pedit action with RAW_OP offset u8-u16-u8",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 0 u8 set 0x12 munge offset 1 u16 set 0x3456 munge offset 3 u8 set 0x78",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "val 12000000 mask 00ffffff.*val 00345600 mask ff0000ff.*val 00000078 mask ffffff00",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "ab0f",
+        "name": "Add pedit action with RAW_OP offset u16-u8-u8",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 0 u16 set 0x1234 munge offset 2 u8 set 0x56 munge offset 3 u8 set 0x78",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "val 12340000 mask 0000ffff.*val 00005600 mask ffff00ff.*val 00000078 mask ffffff00",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "9d12",
+        "name": "Add pedit action with RAW_OP offset u32 set u16 clear u8 invert",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 0 u32 set 0x12345678 munge offset 1 u16 clear munge offset 2 u8 invert",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "val 12345678 mask 00000000.*val 00000000 mask ff0000ff.*val 0000ff00 mask ffffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "ebfa",
+        "name": "Add pedit action with RAW_OP offset overflow u32 (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 0xffffffffffffffffffffffffffffffffffffffffff u32 set 0x1",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "f512",
+        "name": "Add pedit action with RAW_OP offset u16 at offmask shift set",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 12 u16 at 12 ffff 1 set 0xaaaa",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 12: val aaaa0000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "c2cb",
+        "name": "Add pedit action with RAW_OP offset u32 retain value",
+        "category": [
+            "actions",
+            "pedit",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge offset 12 u32 set 0x12345678 retain 0xff00",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 12: val 00005600 mask ffff00ff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "86d4",
+        "name": "Add pedit action with LAYERED_OP eth set src & dst",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66 munge eth dst set ff:ee:dd:cc:bb:aa",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "eth\\+4: val 00001122 mask ffff0000.*eth\\+8: val 33445566 mask 00000000.*eth\\+0: val ffeeddcc mask 00000000.*eth\\+4: val bbaa0000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "c715",
+        "name": "Add pedit action with LAYERED_OP eth set src (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth src set %e:11:m2:33:x4:-5",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "ba22",
+        "name": "Add pedit action with LAYERED_OP eth type set/clear sequence",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear munge eth type set 0x1 munge eth type clear",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff.*eth\\+12: val 00010000 mask 0000ffff.*eth\\+12: val 00000000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "5810",
+        "name": "Add pedit action with LAYERED_OP ip set src & dst",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip src set 18.52.86.120 munge ip dst set 18.52.86.120",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 12: val 12345678 mask 00000000.* 16: val 12345678 mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "1092",
+        "name": "Add pedit action with LAYERED_OP ip set ihl & dsfield",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip ihl set 0xff munge ip dsfield set 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 0: val 0f000000 mask f0ffffff.* 0: val 00ff0000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "02d8",
+        "name": "Add pedit action with LAYERED_OP ip set ttl & protocol",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip ttl set 0x1 munge ip protocol set 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 8: val 01000000 mask 00ffffff.* 8: val 00ff0000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "3e2d",
+        "name": "Add pedit action with LAYERED_OP ip set ttl (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip ttl set 300",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "31ae",
+        "name": "Add pedit action with LAYERED_OP ip ttl clear/set",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip ttl clear munge ip ttl set 0x1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 8: val 00000000 mask 00ffffff.* 8: val 01000000 mask 00ffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "486f",
+        "name": "Add pedit action with LAYERED_OP ip set duplicate fields",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip ttl set 0x1 munge ip ttl set 0x1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 8: val 01000000 mask 00ffffff.* 8: val 01000000 mask 00ffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "e790",
+        "name": "Add pedit action with LAYERED_OP ip set ce, df, mf, firstfrag, nofrag fields",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip ce set 0xff munge ip df set 0xff munge ip mf set 0xff munge ip firstfrag set 0xff munge ip nofrag set 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 4: val 00008000 mask ffff7fff.* 4: val 00004000 mask ffffbfff.* 4: val 00002000 mask ffffdfff.* 4: val 00001f00 mask ffffe0ff.* 4: val 00003f00 mask ffffc0ff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "6829",
+        "name": "Add pedit action with LAYERED_OP beyond ip set dport & sport",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip dport set 0x1234 munge ip sport set 0x5678",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 20: val 00001234 mask ffff0000.* 20: val 56780000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "afd8",
+        "name": "Add pedit action with LAYERED_OP beyond ip set icmp_type & icmp_code",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit munge ip icmp_type set 0xff munge ip icmp_code set 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": " 20: val ff000000 mask 00ffffff.* 20: val ff000000 mask 00ffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "3143",
+        "name": "Add pedit action with LAYERED_OP beyond ip set dport (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip dport set 0x1234",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "fc1f",
+        "name": "Add pedit action with LAYERED_OP ip6 set src & dst",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip6 src set 2001:0db8:0:f101::1 munge ip6 dst set 2001:0db8:0:f101::1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "ipv6\\+8: val 20010db8 mask 00000000.*ipv6\\+12: val 0000f101 mask 00000000.*ipv6\\+16: val 00000000 mask 00000000.*ipv6\\+20: val 00000001 mask 00000000.*ipv6\\+24: val 20010db8 mask 00000000.*ipv6\\+28: val 0000f101 mask 00000000.*ipv6\\+32: val 00000000 mask 00000000.*ipv6\\+36: val 00000001 mask 00000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "6d34",
+        "name": "Add pedit action with LAYERED_OP ip6 dst retain value (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip6 dst set 2001:0db8:0:f101::1 retain 0xff0000",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "6f5e",
+        "name": "Add pedit action with LAYERED_OP ip6 flow_lbl",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip6 flow_lbl set 0xfffff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "ipv6\\+0: val 0007ffff mask fff80000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "6795",
+        "name": "Add pedit action with LAYERED_OP ip6 set payload_len, nexthdr, hoplimit",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip6 payload_len set 0xffff munge ip6 nexthdr set 0xff munge ip6 hoplimit set 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "ipv6\\+4: val ffff0000 mask 0000ffff.*ipv6\\+4: val 0000ff00 mask ffff00ff.*ipv6\\+4: val 000000ff mask ffffff00",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "1442",
+        "name": "Add pedit action with LAYERED_OP tcp set dport & sport",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge tcp dport set 4789 munge tcp sport set 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "tcp\\+0: val 000012b5 mask ffff0000.*tcp\\+0: val 00010000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "b7ac",
+        "name": "Add pedit action with LAYERED_OP tcp sport set (INVALID)",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge tcp sport set -200",
+        "expExitCode": "255",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "cfcc",
+        "name": "Add pedit action with LAYERED_OP tcp flags set",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge tcp flags set 0x16",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "tcp\\+12: val 00160000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "3bc4",
+        "name": "Add pedit action with LAYERED_OP tcp set dport, sport & flags fields",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge tcp dport set 4789 munge tcp sport set 1 munge tcp flags set 0x1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "tcp\\+0: val 000012b5 mask ffff0000.*tcp\\+0: val 00010000 mask 0000ffff.*tcp\\+12: val 00010000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "f1c8",
+        "name": "Add pedit action with LAYERED_OP udp set dport & sport",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge udp dport set 4789 munge udp sport set 4789",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "udp\\+0: val 000012b5 mask ffff0000.*udp\\+0: val 12b50000 mask 0000ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "d784",
+        "name": "Add pedit action with mixed RAW/LAYERED_OP #1",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66 munge ip ttl set 0xff munge tcp flags clear munge offset 15 u8 add 40 retain 0xf0 munge udp dport add 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "eth\\+4: val 00001122 mask ffff0000.*eth\\+8: val 33445566 mask 00000000.*ipv4\\+8: val ff000000 mask 00ffffff.*tcp\\+12: val 00000000 mask ff00ffff.* 12: add 00000020 mask ffffff0f.*udp\\+0: add 00000001 mask ffff0000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
+        "id": "70ca",
+        "name": "Add pedit action with mixed RAW/LAYERED_OP #2",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op",
+            "raw_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66 munge eth dst set ff:ee:dd:cc:bb:aa munge ip6 payload_len set 0xffff munge ip6 nexthdr set 0xff munge ip6 hoplimit preserve munge offset 0 u8 set 0x12 munge offset 1 u16 set 0x3456 munge offset 3 u8 set 0x78 munge ip ttl set 0xaa munge ip protocol set 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit | grep 'key '",
+        "matchPattern": "eth\\+4: val 00001122 mask ffff0000.*eth\\+8: val 33445566 mask 00000000.*eth\\+0: val ffeeddcc mask 00000000.*eth\\+4: val bbaa0000 mask 0000ffff.*ipv6\\+4: val ffff0000 mask 0000ffff.*ipv6\\+4: val 0000ff00 mask ffff00ff.*ipv6\\+4: val 00000000 mask ffffffff.* 0: val 12000000 mask 00ffffff.* 0: val 00345600 mask ff0000ff.* 0: val 00000078 mask ffffff00.*ipv4\\+8: val aa000000 mask 00ffffff.*ipv4\\+8: val 00ff0000 mask ff00ffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
     }
+
 ]
-- 
cgit v1.2.3


From 8ff80e96e3ccea5ff0a890d4f18997e0344dbec2 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Thu, 28 Mar 2019 18:01:58 -0700
Subject: selftests/bpf: Test variable offset stack access

Test different scenarios of indirect variable-offset stack access: out of
bound access (>0), min_off below initialized part of the stack,
max_off+size above initialized part of the stack, initialized stack.

Example of output:
  ...
  #856/p indirect variable-offset stack access, out of bound OK
  #857/p indirect variable-offset stack access, max_off+size > max_initialized OK
  #858/p indirect variable-offset stack access, min_off < min_initialized OK
  #859/p indirect variable-offset stack access, ok OK
  ...

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/var_off.c | 79 +++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index 1e536ff121a5..c4ebd0bb0781 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -40,7 +40,7 @@
 	.prog_type = BPF_PROG_TYPE_LWT_IN,
 },
 {
-	"indirect variable-offset stack access",
+	"indirect variable-offset stack access, out of bound",
 	.insns = {
 	/* Fill the top 8 bytes of the stack */
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -60,7 +60,82 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 5 },
-	.errstr = "variable stack read R2",
+	.errstr = "invalid stack type R2 var_off",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_LWT_IN,
 },
+{
+	"indirect variable-offset stack access, max_off+size > max_initialized",
+	.insns = {
+	/* Fill only the second from top 8 bytes of the stack. */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+	/* Get an unknown value. */
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+	/* Make it small and 4-byte aligned. */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+	BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+	/* Add it to fp.  We now have either fp-12 or fp-16, but we don't know
+	 * which. fp-12 size 8 is partially uninitialized stack.
+	 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+	/* Dereference it indirectly. */
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 5 },
+	.errstr = "invalid indirect read from stack var_off",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+	"indirect variable-offset stack access, min_off < min_initialized",
+	.insns = {
+	/* Fill only the top 8 bytes of the stack. */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* Get an unknown value */
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+	/* Make it small and 4-byte aligned. */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+	BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+	/* Add it to fp.  We now have either fp-12 or fp-16, but we don't know
+	 * which. fp-16 size 8 is partially uninitialized stack.
+	 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+	/* Dereference it indirectly. */
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 5 },
+	.errstr = "invalid indirect read from stack var_off",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+	"indirect variable-offset stack access, ok",
+	.insns = {
+	/* Fill the top 16 bytes of the stack. */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* Get an unknown value. */
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+	/* Make it small and 4-byte aligned. */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+	BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+	/* Add it to fp.  We now have either fp-12 or fp-16, we don't know
+	 * which, but either way it points to initialized stack.
+	 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+	/* Dereference it indirectly. */
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 6 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_LWT_IN,
+},
-- 
cgit v1.2.3


From 9de2640b06ecf0e6ef4b24b07a5573a2804d77d0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 28 Mar 2019 22:30:53 -0700
Subject: bpf: add bpffs multi-dimensional array tests in test_btf

For multiple dimensional arrays like below,
  int a[2][3]
both llvm and pahole generated one BTF_KIND_ARRAY type like
  . element_type: int
  . index_type: unsigned int
  . number of elements: 6

Such a collapsed BTF_KIND_ARRAY type will cause the divergence
in BTF vs. the user code. In the compile-once-run-everywhere
project, the header file is generated from BTF and used for bpf
program, and the definition in the header file will be different
from what user expects.

But the kernel actually supports chained multi-dimensional array
types properly. The above "int a[2][3]" can be represented as
  Type #n:
    . element_type: int
    . index_type: unsigned int
    . number of elements: 3
  Type #(n+1):
    . element_type: type #n
    . index_type: unsigned int
    . number of elements: 2

The following llvm commit
  https://reviews.llvm.org/rL357215
also enables llvm to generated proper chained multi-dimensional arrays.

The test_btf already has a raw test ("struct test #1") for chained
multi-dimensional arrays. This patch added amended bpffs test for
chained multi-dimensional arrays.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_btf.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 23e3b314ca60..5afc3f7dff81 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -3677,6 +3677,7 @@ struct pprint_mapv {
 	} aenum;
 	uint32_t ui32b;
 	uint32_t bits2c:2;
+	uint8_t si8_4[2][2];
 };
 
 #ifdef __SIZEOF_INT128__
@@ -3729,7 +3730,7 @@ static struct btf_raw_test pprint_test_template[] = {
 		BTF_ENUM_ENC(NAME_TBD, 2),
 		BTF_ENUM_ENC(NAME_TBD, 3),
 		/* struct pprint_mapv */		/* [16] */
-		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 10), 40),
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 11), 40),
 		BTF_MEMBER_ENC(NAME_TBD, 11, 0),	/* uint32_t ui32 */
 		BTF_MEMBER_ENC(NAME_TBD, 10, 32),	/* uint16_t ui16 */
 		BTF_MEMBER_ENC(NAME_TBD, 12, 64),	/* int32_t si32 */
@@ -3740,9 +3741,12 @@ static struct btf_raw_test pprint_test_template[] = {
 		BTF_MEMBER_ENC(NAME_TBD, 15, 192),	/* aenum */
 		BTF_MEMBER_ENC(NAME_TBD, 11, 224),	/* uint32_t ui32b */
 		BTF_MEMBER_ENC(NAME_TBD, 6, 256),	/* bits2c */
+		BTF_MEMBER_ENC(NAME_TBD, 17, 264),	/* si8_4 */
+		BTF_TYPE_ARRAY_ENC(18, 1, 2),		/* [17] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 2),		/* [18] */
 		BTF_END_RAW,
 	},
-	BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"),
+	BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"),
 	.key_size = sizeof(unsigned int),
 	.value_size = sizeof(struct pprint_mapv),
 	.key_type_id = 3,	/* unsigned int */
@@ -3791,7 +3795,7 @@ static struct btf_raw_test pprint_test_template[] = {
 		BTF_ENUM_ENC(NAME_TBD, 2),
 		BTF_ENUM_ENC(NAME_TBD, 3),
 		/* struct pprint_mapv */		/* [16] */
-		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40),
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40),
 		BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)),	/* uint32_t ui32 */
 		BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)),	/* uint16_t ui16 */
 		BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)),	/* int32_t si32 */
@@ -3802,9 +3806,12 @@ static struct btf_raw_test pprint_test_template[] = {
 		BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)),	/* aenum */
 		BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)),	/* uint32_t ui32b */
 		BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 256)),	/* bits2c */
+		BTF_MEMBER_ENC(NAME_TBD, 17, 264),	/* si8_4 */
+		BTF_TYPE_ARRAY_ENC(18, 1, 2),		/* [17] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 2),		/* [18] */
 		BTF_END_RAW,
 	},
-	BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"),
+	BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"),
 	.key_size = sizeof(unsigned int),
 	.value_size = sizeof(struct pprint_mapv),
 	.key_type_id = 3,	/* unsigned int */
@@ -3855,7 +3862,7 @@ static struct btf_raw_test pprint_test_template[] = {
 		BTF_ENUM_ENC(NAME_TBD, 2),
 		BTF_ENUM_ENC(NAME_TBD, 3),
 		/* struct pprint_mapv */		/* [16] */
-		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40),
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40),
 		BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)),	/* uint32_t ui32 */
 		BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)),	/* uint16_t ui16 */
 		BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)),	/* int32_t si32 */
@@ -3866,13 +3873,16 @@ static struct btf_raw_test pprint_test_template[] = {
 		BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)),	/* aenum */
 		BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)),	/* uint32_t ui32b */
 		BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 256)),	/* bits2c */
+		BTF_MEMBER_ENC(NAME_TBD, 20, BTF_MEMBER_OFFSET(0, 264)),	/* si8_4 */
 		/* typedef unsigned int ___int */	/* [17] */
 		BTF_TYPEDEF_ENC(NAME_TBD, 18),
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6),	/* [18] */
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15),	/* [19] */
+		BTF_TYPE_ARRAY_ENC(21, 1, 2),					/* [20] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 2),					/* [21] */
 		BTF_END_RAW,
 	},
-	BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int"),
+	BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int\0si8_4"),
 	.key_size = sizeof(unsigned int),
 	.value_size = sizeof(struct pprint_mapv),
 	.key_type_id = 3,	/* unsigned int */
@@ -4007,6 +4017,10 @@ static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
 			v->aenum = i & 0x03;
 			v->ui32b = 4;
 			v->bits2c = 1;
+			v->si8_4[0][0] = (cpu + i) & 0xff;
+			v->si8_4[0][1] = (cpu + i + 1) & 0xff;
+			v->si8_4[1][0] = (cpu + i + 2) & 0xff;
+			v->si8_4[1][1] = (cpu + i + 3) & 0xff;
 			v = (void *)v + rounded_value_size;
 		}
 	}
@@ -4040,7 +4054,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
 		nexpected_line = snprintf(expected_line, line_size,
 					  "%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
 					  "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
-					  "%u,0x%x}\n",
+					  "%u,0x%x,[[%d,%d],[%d,%d]]}\n",
 					  percpu_map ? "\tcpu" : "",
 					  percpu_map ? cpu : next_key,
 					  v->ui32, v->si32,
@@ -4054,7 +4068,9 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
 					  v->ui8a[6], v->ui8a[7],
 					  pprint_enum_str[v->aenum],
 					  v->ui32b,
-					  v->bits2c);
+					  v->bits2c,
+					  v->si8_4[0][0], v->si8_4[0][1],
+					  v->si8_4[1][0], v->si8_4[1][1]);
 	}
 
 #ifdef __SIZEOF_INT128__
-- 
cgit v1.2.3


From 6b7b6995c43e17f994c51c107740daedd948255a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 2 Apr 2019 10:08:32 -0700
Subject: selftests: bpf: tests.h should depend on .c files, not the output

This makes sure we don't put headers as input files when doing
compilation, because clang complains about the following:

clang-9: error: cannot specify -o when generating multiple output files
../lib.mk:152: recipe for target 'xxx/tools/testing/selftests/bpf/test_verifier' failed
make: *** [xxx/tools/testing/selftests/bpf/test_verifier] Error 1
make: *** Waiting for unfinished jobs....
clang-9: error: cannot specify -o when generating multiple output files
../lib.mk:152: recipe for target 'xxx/tools/testing/selftests/bpf/test_progs' failed

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 77b73b892136..078283d073b0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -209,7 +209,7 @@ ifeq ($(DWARF2BTF),y)
 endif
 
 PROG_TESTS_H := $(OUTPUT)/prog_tests/tests.h
-$(OUTPUT)/test_progs: $(PROG_TESTS_H)
+test_progs.c: $(PROG_TESTS_H)
 $(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
 $(OUTPUT)/test_progs: prog_tests/*.c
 
@@ -232,7 +232,7 @@ $(PROG_TESTS_H): $(PROG_TESTS_DIR) $(PROG_TESTS_FILES)
 		 ) > $(PROG_TESTS_H))
 
 VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h
-$(OUTPUT)/test_verifier: $(VERIFIER_TESTS_H)
+test_verifier.c: $(VERIFIER_TESTS_H)
 $(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
 
 VERIFIER_TESTS_DIR = $(OUTPUT)/verifier
-- 
cgit v1.2.3


From 94e8f3c7125a36c6cedf37c8838cb77c8a8d8cf9 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 2 Apr 2019 10:08:33 -0700
Subject: selftests: bpf: fix -Wformat-security warning for
 flow_dissector_load.c

flow_dissector_load.c:55:19: warning: format string is not a string literal (potentially insecure)
      [-Wformat-security]
                error(1, errno, command);
                                ^~~~~~~
flow_dissector_load.c:55:19: note: treat the string as an argument to avoid this
                error(1, errno, command);
                                ^
                                "%s",
1 warning generated.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/flow_dissector_load.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 77cafa66d048..7136ab9ffa73 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -52,7 +52,7 @@ static void detach_program(void)
 	sprintf(command, "rm -r %s", cfg_pin_path);
 	ret = system(command);
 	if (ret)
-		error(1, errno, command);
+		error(1, errno, "%s", command);
 }
 
 static void parse_opts(int argc, char **argv)
-- 
cgit v1.2.3


From a918b03e8c956d159b13fdde8608847393d54ef9 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 2 Apr 2019 10:08:34 -0700
Subject: selftests: bpf: fix -Wformat-invalid-specifier for bpf_obj_id.c

Use standard C99 %zu for sizeof, not GCC's custom %Zu:
bpf_obj_id.c:76:48: warning: invalid conversion specifier 'Z'

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index a64f7a02139c..cb827383db4d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -73,7 +73,7 @@ void test_bpf_obj_id(void)
 			  info_len != sizeof(struct bpf_map_info) ||
 			  strcmp((char *)map_infos[i].name, expected_map_name),
 			  "get-map-info(fd)",
-			  "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+			  "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
 			  err, errno,
 			  map_infos[i].type, BPF_MAP_TYPE_ARRAY,
 			  info_len, sizeof(struct bpf_map_info),
@@ -117,7 +117,7 @@ void test_bpf_obj_id(void)
 			  *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
 			  strcmp((char *)prog_infos[i].name, expected_prog_name),
 			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+			  "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
 			  err, errno, i,
 			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
 			  info_len, sizeof(struct bpf_prog_info),
@@ -185,7 +185,7 @@ void test_bpf_obj_id(void)
 		      memcmp(&prog_info, &prog_infos[i], info_len) ||
 		      *(int *)(long)prog_info.map_ids != saved_map_id,
 		      "get-prog-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
+		      "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
 		      err, errno, info_len, sizeof(struct bpf_prog_info),
 		      memcmp(&prog_info, &prog_infos[i], info_len),
 		      *(int *)(long)prog_info.map_ids, saved_map_id);
@@ -231,7 +231,7 @@ void test_bpf_obj_id(void)
 		      memcmp(&map_info, &map_infos[i], info_len) ||
 		      array_value != array_magic_value,
 		      "check get-map-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
+		      "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
 		      err, errno, info_len, sizeof(struct bpf_map_info),
 		      memcmp(&map_info, &map_infos[i], info_len),
 		      array_value, array_magic_value);
-- 
cgit v1.2.3


From 7596aa3ea8a0b478a8a5c6207e69cc7fcc035d45 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 2 Apr 2019 10:08:35 -0700
Subject: selftests: bpf: remove duplicate .flags initialization in ctx_skb.c

verifier/ctx_skb.c:708:11: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
        .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/verifier/ctx_skb.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index c660deb582f1..b0fda2877119 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -705,7 +705,6 @@
 	.errstr = "invalid bpf_context access",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"check cb access: half, wrong type",
-- 
cgit v1.2.3


From da11b417583ece875f862d84578259a2ab27ad86 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 1 Apr 2019 21:27:47 -0700
Subject: libbpf: teach libbpf about log_level bit 2

Allow bpf_prog_load_xattr() to specify log_level for program loading.

Teach libbpf to accept log_level with bit 2 set.

Increase default BPF_LOG_BUF_SIZE from 256k to 16M.
There is no downside to increase it to a maximum allowed by old kernels.
Existing 256k limit caused ENOSPC errors and users were not able to see
verifier error which is printed at the end of the verifier log.

If ENOSPC is hit, double the verifier log and try again to capture
the verifier error.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/bpf.c    |  2 +-
 tools/lib/bpf/bpf.h    |  2 +-
 tools/lib/bpf/libbpf.c | 16 ++++++++++++++--
 tools/lib/bpf/libbpf.h |  1 +
 4 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9cd015574e83..a1db869a6fda 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -223,7 +223,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 		return -EINVAL;
 
 	log_level = load_attr->log_level;
-	if (log_level > 2 || (log_level && !log_buf))
+	if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
 		return -EINVAL;
 
 	name_len = load_attr->name ? strlen(load_attr->name) : 0;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6ffdd79bea89..e2c0df7b831f 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -92,7 +92,7 @@ struct bpf_load_program_attr {
 #define MAPS_RELAX_COMPAT	0x01
 
 /* Recommend log buffer size */
-#define BPF_LOG_BUF_SIZE (256 * 1024)
+#define BPF_LOG_BUF_SIZE (16 * 1024 * 1024) /* verifier maximum in kernels <= 5.1 */
 LIBBPF_API int
 bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 		       char *log_buf, size_t log_buf_sz);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 11c25d9ea431..e1e4d35cf08e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -152,6 +152,7 @@ struct bpf_program {
 		};
 	} *reloc_desc;
 	int nr_reloc;
+	int log_level;
 
 	struct {
 		int nr;
@@ -1494,6 +1495,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 {
 	struct bpf_load_program_attr load_attr;
 	char *cp, errmsg[STRERR_BUFSIZE];
+	int log_buf_size = BPF_LOG_BUF_SIZE;
 	char *log_buf;
 	int ret;
 
@@ -1514,21 +1516,30 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.line_info = prog->line_info;
 	load_attr.line_info_rec_size = prog->line_info_rec_size;
 	load_attr.line_info_cnt = prog->line_info_cnt;
+	load_attr.log_level = prog->log_level;
 	if (!load_attr.insns || !load_attr.insns_cnt)
 		return -EINVAL;
 
-	log_buf = malloc(BPF_LOG_BUF_SIZE);
+retry_load:
+	log_buf = malloc(log_buf_size);
 	if (!log_buf)
 		pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
 
-	ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
+	ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
 
 	if (ret >= 0) {
+		if (load_attr.log_level)
+			pr_debug("verifier log:\n%s", log_buf);
 		*pfd = ret;
 		ret = 0;
 		goto out;
 	}
 
+	if (errno == ENOSPC) {
+		log_buf_size <<= 1;
+		free(log_buf);
+		goto retry_load;
+	}
 	ret = -LIBBPF_ERRNO__LOAD;
 	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 	pr_warning("load bpf program failed: %s\n", cp);
@@ -2938,6 +2949,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 		bpf_program__set_expected_attach_type(prog,
 						      expected_attach_type);
 
+		prog->log_level = attr->log_level;
 		if (!first_prog)
 			first_prog = prog;
 	}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c70785cc8ef5..531323391d07 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -314,6 +314,7 @@ struct bpf_prog_load_attr {
 	enum bpf_prog_type prog_type;
 	enum bpf_attach_type expected_attach_type;
 	int ifindex;
+	int log_level;
 };
 
 LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-- 
cgit v1.2.3


From e5e7a8f2d858a91b79c4afc51a3f15edcbf9cb60 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 1 Apr 2019 21:27:48 -0700
Subject: selftests/bpf: add few verifier scale tests

Add 3 basic tests that stress verifier scalability.

test_verif_scale1.c calls non-inlined jhash() function 90 times on
different position in the packet.
This test simulates network packet parsing.
jhash function is ~140 instructions and main program is ~1200 insns.

test_verif_scale2.c force inlines jhash() function 90 times.
This program is ~15k instructions long.

test_verif_scale3.c calls non-inlined jhash() function 90 times on
But this time jhash has to process 32-bytes from the packet
instead of 14-bytes in tests 1 and 2.
jhash function is ~230 insns and main program is ~1200 insns.

$ test_progs -s
can be used to see verifier stats.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/prog_tests/bpf_verif_scale.c     | 49 +++++++++++++++
 tools/testing/selftests/bpf/progs/test_jhash.h     | 70 ++++++++++++++++++++++
 .../selftests/bpf/progs/test_verif_scale1.c        | 30 ++++++++++
 .../selftests/bpf/progs/test_verif_scale2.c        | 30 ++++++++++
 .../selftests/bpf/progs/test_verif_scale3.c        | 30 ++++++++++
 tools/testing/selftests/bpf/test_progs.c           |  6 +-
 tools/testing/selftests/bpf/test_progs.h           |  1 +
 7 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_jhash.h
 create mode 100644 tools/testing/selftests/bpf/progs/test_verif_scale1.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_verif_scale2.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_verif_scale3.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
new file mode 100644
index 000000000000..23b159d95c3f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <test_progs.h>
+static int libbpf_debug_print(enum libbpf_print_level level,
+			      const char *format, va_list args)
+{
+	if (level != LIBBPF_DEBUG)
+		return 0;
+
+	if (!strstr(format, "verifier log"))
+		return 0;
+	return vfprintf(stderr, "%s", args);
+}
+
+static int check_load(const char *file)
+{
+	struct bpf_prog_load_attr attr;
+	struct bpf_object *obj;
+	int err, prog_fd;
+
+	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+	attr.file = file;
+	attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+	attr.log_level = 4;
+	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+	bpf_object__close(obj);
+	if (err)
+		error_cnt++;
+	return err;
+}
+
+void test_bpf_verif_scale(void)
+{
+	const char *file1 = "./test_verif_scale1.o";
+	const char *file2 = "./test_verif_scale2.o";
+	const char *file3 = "./test_verif_scale3.o";
+	int err;
+
+	if (verifier_stats)
+		libbpf_set_print(libbpf_debug_print);
+
+	err = check_load(file1);
+	err |= check_load(file2);
+	err |= check_load(file3);
+	if (!err)
+		printf("test_verif_scale:OK\n");
+	else
+		printf("test_verif_scale:FAIL\n");
+}
diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h
new file mode 100644
index 000000000000..3d12c11a8d47
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_jhash.h
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+typedef unsigned int u32;
+
+static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+#define JHASH_INITVAL		0xdeadbeef
+
+static ATTR
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const unsigned char *k = key;
+
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	while (length > 12) {
+		a += *(volatile u32 *)(k);
+		b += *(volatile u32 *)(k + 4);
+		c += *(volatile u32 *)(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 c ^= a;
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
new file mode 100644
index 000000000000..f3236ce35f31
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((noinline))
+#include "test_jhash.h"
+
+SEC("scale90_noinline")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	void *ptr;
+	int ret = 0, nh_off, i = 0;
+
+	nh_off = 14;
+
+	/* pragma unroll doesn't work on large loops */
+
+#define C do { \
+	ptr = data + i; \
+	if (ptr + nh_off > data_end) \
+		break; \
+	ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+	} while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+	C30;C30;C30; /* 90 calls */
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
new file mode 100644
index 000000000000..77830693eccb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((always_inline))
+#include "test_jhash.h"
+
+SEC("scale90_inline")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	void *ptr;
+	int ret = 0, nh_off, i = 0;
+
+	nh_off = 14;
+
+	/* pragma unroll doesn't work on large loops */
+
+#define C do { \
+	ptr = data + i; \
+	if (ptr + nh_off > data_end) \
+		break; \
+	ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+	} while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+	C30;C30;C30; /* 90 calls */
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
new file mode 100644
index 000000000000..1848da04ea41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((noinline))
+#include "test_jhash.h"
+
+SEC("scale90_noinline32")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	void *ptr;
+	int ret = 0, nh_off, i = 0;
+
+	nh_off = 32;
+
+	/* pragma unroll doesn't work on large loops */
+
+#define C do { \
+	ptr = data + i; \
+	if (ptr + nh_off > data_end) \
+		break; \
+	ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+	} while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+	C30;C30;C30; /* 90 calls */
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5d10aee9e277..bf5c90998916 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -9,6 +9,7 @@
 
 int error_cnt, pass_cnt;
 bool jit_enabled;
+bool verifier_stats = false;
 
 struct ipv4_packet pkt_v4 = {
 	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -162,12 +163,15 @@ void *spin_lock_thread(void *arg)
 #include <prog_tests/tests.h>
 #undef DECLARE
 
-int main(void)
+int main(int ac, char **av)
 {
 	srand(time(NULL));
 
 	jit_enabled = is_jit_enabled();
 
+	if (ac == 2 && strcmp(av[1], "-s") == 0)
+		verifier_stats = true;
+
 #define CALL
 #include <prog_tests/tests.h>
 #undef CALL
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 51a07367cd43..f095e1d4c657 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -40,6 +40,7 @@ typedef __u16 __sum16;
 
 extern int error_cnt, pass_cnt;
 extern bool jit_enabled;
+extern bool verifier_stats;
 
 #define MAGIC_BYTES 123
 
-- 
cgit v1.2.3


From 8aa2d4b4b92cd534d53353b0c2fb079572b97fdf Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 1 Apr 2019 21:27:49 -0700
Subject: selftests/bpf: synthetic tests to push verifier limits

Add a test to generate 1m ld_imm64 insns to stress the verifier.

Bump the size of fill_ld_abs_vlan_push_pop test from 4k to 29k
and jump_around_ld_abs from 4k to 5.5k.
Larger sizes are not possible due to 16-bit offset encoding
in jump instructions.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c  | 35 +++++++++++++++++++++-------
 tools/testing/selftests/bpf/verifier/ld_dw.c |  9 +++++++
 2 files changed, 35 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 19b5d03acc2a..75ef63b42f2c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,6 +50,7 @@
 #include "../../../include/linux/filter.h"
 
 #define MAX_INSNS	BPF_MAXINSNS
+#define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
 #define MAX_NR_MAPS	14
 #define MAX_TEST_RUNS	8
@@ -66,6 +67,7 @@ static int skips;
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
+	struct bpf_insn	*fill_insns;
 	int fixup_map_hash_8b[MAX_FIXUPS];
 	int fixup_map_hash_48b[MAX_FIXUPS];
 	int fixup_map_hash_16b[MAX_FIXUPS];
@@ -83,6 +85,7 @@ struct bpf_test {
 	const char *errstr;
 	const char *errstr_unpriv;
 	uint32_t retval, retval_unpriv, insn_processed;
+	int prog_len;
 	enum {
 		UNDEF,
 		ACCEPT,
@@ -119,10 +122,11 @@ struct other_val {
 
 static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
 {
-	/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+	/* test: {skb->data[0], vlan_push} x 51 + {skb->data[0], vlan_pop} x 51 */
 #define PUSH_CNT 51
-	unsigned int len = BPF_MAXINSNS;
-	struct bpf_insn *insn = self->insns;
+	/* jump range is limited to 16 bit. PUSH_CNT of ld_abs needs room */
+	unsigned int len = (1 << 15) - PUSH_CNT * 2 * 5 * 6;
+	struct bpf_insn *insn = self->fill_insns;
 	int i = 0, j, k = 0;
 
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
@@ -156,12 +160,14 @@ loop:
 	for (; i < len - 1; i++)
 		insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
 	insn[len - 1] = BPF_EXIT_INSN();
+	self->prog_len = len;
 }
 
 static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 {
-	struct bpf_insn *insn = self->insns;
-	unsigned int len = BPF_MAXINSNS;
+	struct bpf_insn *insn = self->fill_insns;
+	/* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */
+	unsigned int len = (1 << 15) / 6;
 	int i = 0;
 
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
@@ -171,11 +177,12 @@ static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 	while (i < len - 1)
 		insn[i++] = BPF_LD_ABS(BPF_B, 1);
 	insn[i] = BPF_EXIT_INSN();
+	self->prog_len = i + 1;
 }
 
 static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 {
-	struct bpf_insn *insn = self->insns;
+	struct bpf_insn *insn = self->fill_insns;
 	uint64_t res = 0;
 	int i = 0;
 
@@ -193,6 +200,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
 	insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
 	insn[i] = BPF_EXIT_INSN();
+	self->prog_len = i + 1;
 	res ^= (res >> 32);
 	self->retval = (uint32_t)res;
 }
@@ -520,8 +528,10 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
 	int *fixup_map_spin_lock = test->fixup_map_spin_lock;
 
-	if (test->fill_helper)
+	if (test->fill_helper) {
+		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
 		test->fill_helper(test);
+	}
 
 	/* Allocating HTs with 1 elem is fine here, since we only test
 	 * for verifier and not do a runtime lookup, so the only thing
@@ -718,12 +728,17 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 	fixup_skips = skips;
 	do_test_fixup(test, prog_type, prog, map_fds);
+	if (test->fill_insns) {
+		prog = test->fill_insns;
+		prog_len = test->prog_len;
+	} else {
+		prog_len = probe_filter_length(prog);
+	}
 	/* If there were some map skips during fixup due to missing bpf
 	 * features, skip this test.
 	 */
 	if (fixup_skips != skips)
 		return;
-	prog_len = probe_filter_length(prog);
 
 	pflags = 0;
 	if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
@@ -731,7 +746,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
 		pflags |= BPF_F_ANY_ALIGNMENT;
 	fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
-				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4);
 	if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
 		printf("SKIP (unsupported program type %d)\n", prog_type);
 		skips++;
@@ -830,6 +845,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		goto fail_log;
 	}
 close_fds:
+	if (test->fill_insns)
+		free(test->fill_insns);
 	close(fd_prog);
 	for (i = 0; i < MAX_NR_MAPS; i++)
 		close(map_fds[i]);
diff --git a/tools/testing/selftests/bpf/verifier/ld_dw.c b/tools/testing/selftests/bpf/verifier/ld_dw.c
index d2c75b889598..0f18e62f0099 100644
--- a/tools/testing/selftests/bpf/verifier/ld_dw.c
+++ b/tools/testing/selftests/bpf/verifier/ld_dw.c
@@ -34,3 +34,12 @@
 	.result = ACCEPT,
 	.retval = 5,
 },
+{
+	"ld_dw: xor semi-random 64 bit imms, test 5",
+	.insns = { },
+	.data = { },
+	.fill_helper = bpf_fill_rand_ld_dw,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.retval = 1000000 - 6,
+},
-- 
cgit v1.2.3


From 0979ff7992fb6f4eb837995b12f4071dcafebd2d Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Thu, 4 Apr 2019 07:17:55 +0900
Subject: selftests/bpf: ksym_search won't check symbols exists

Currently, ksym_search located at trace_helpers won't check symbols are
existing or not.

In ksym_search, when symbol is not found, it will return &syms[0](_stext).
But when the kernel symbols are not loaded, it will return NULL, which is
not a desired action.

This commit will add verification logic whether symbols are loaded prior
to the symbol search.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/trace_helpers.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 4cdb63bf0521..9a9fc6c9b70b 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -52,6 +52,10 @@ struct ksym *ksym_search(long key)
 	int start = 0, end = sym_cnt;
 	int result;
 
+	/* kallsyms not loaded. return NULL */
+	if (sym_cnt <= 0)
+		return NULL;
+
 	while (start < end) {
 		size_t mid = start + (end - start) / 2;
 
-- 
cgit v1.2.3


From e67b2c715415b121339049b630f0b4e1ede888dc Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Thu, 4 Apr 2019 07:17:56 +0900
Subject: samples, selftests/bpf: add NULL check for ksym_search

Since, ksym_search added with verification logic for symbols existence,
it could return NULL when the kernel symbols are not loaded.

This commit will add NULL check logic after ksym_search.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/offwaketime_user.c                            | 5 +++++
 samples/bpf/sampleip_user.c                               | 5 +++++
 samples/bpf/spintest_user.c                               | 7 ++++++-
 samples/bpf/trace_event_user.c                            | 5 +++++
 tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c | 4 ++--
 5 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index f06063af9fcb..bb315ce1b866 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -28,6 +28,11 @@ static void print_ksym(__u64 addr)
 	if (!addr)
 		return;
 	sym = ksym_search(addr);
+	if (!sym) {
+		printf("ksym not found. Is kallsyms loaded?\n");
+		return;
+	}
+
 	if (PRINT_RAW_ADDR)
 		printf("%s/%llx;", sym->name, addr);
 	else
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index 216c7ecbbbe9..23b90a45c802 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -109,6 +109,11 @@ static void print_ip_map(int fd)
 	for (i = 0; i < max; i++) {
 		if (counts[i].ip > PAGE_OFFSET) {
 			sym = ksym_search(counts[i].ip);
+			if (!sym) {
+				printf("ksym not found. Is kallsyms loaded?\n");
+				continue;
+			}
+
 			printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name,
 			       counts[i].count);
 		} else {
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index 8d3e9cfa1909..2556af2d9b3e 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -37,8 +37,13 @@ int main(int ac, char **argv)
 			bpf_map_lookup_elem(map_fd[0], &next_key, &value);
 			assert(next_key == value);
 			sym = ksym_search(value);
-			printf(" %s", sym->name);
 			key = next_key;
+			if (!sym) {
+				printf("ksym not found. Is kallsyms loaded?\n");
+				continue;
+			}
+
+			printf(" %s", sym->name);
 		}
 		if (key)
 			printf("\n");
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index d08046ab81f0..d4178f60e075 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -34,6 +34,11 @@ static void print_ksym(__u64 addr)
 	if (!addr)
 		return;
 	sym = ksym_search(addr);
+	if (!sym) {
+		printf("ksym not found. Is kallsyms loaded?\n");
+		return;
+	}
+
 	printf("%s;", sym->name);
 	if (!strcmp(sym->name, "sys_read"))
 		sys_read_seen = true;
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index d7bb5beb1c57..c2a0a9d5591b 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -39,7 +39,7 @@ static int get_stack_print_output(void *data, int size)
 		} else {
 			for (i = 0; i < num_stack; i++) {
 				ks = ksym_search(raw_data[i]);
-				if (strcmp(ks->name, nonjit_func) == 0) {
+				if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
 					found = true;
 					break;
 				}
@@ -56,7 +56,7 @@ static int get_stack_print_output(void *data, int size)
 		} else {
 			for (i = 0; i < num_stack; i++) {
 				ks = ksym_search(e->kern_stack[i]);
-				if (strcmp(ks->name, nonjit_func) == 0) {
+				if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
 					good_kern_stack = true;
 					break;
 				}
-- 
cgit v1.2.3


From f68a5b44647bce6c34b10d5560c5b2c0aff31afc Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 3 Apr 2019 23:22:38 -0700
Subject: selftests/bpf: Test indirect var_off stack access in raw mode

Test that verifier rejects indirect access to uninitialized stack with
variable offset.

Example of output:
  # ./test_verifier
  ...
  #859/p indirect variable-offset stack access, uninitialized OK

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/verifier/var_off.c | 27 ++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index c4ebd0bb0781..3840bd16e173 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -114,6 +114,33 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_LWT_IN,
 },
+{
+	"indirect variable-offset stack access, uninitialized",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_2, 6),
+	BPF_MOV64_IMM(BPF_REG_3, 28),
+	/* Fill the top 16 bytes of the stack. */
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* Get an unknown value. */
+	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0),
+	/* Make it small and 4-byte aligned. */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4),
+	BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
+	/* Add it to fp.  We now have either fp-12 or fp-16, we don't know
+	 * which, but either way it points to initialized stack.
+	 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
+	BPF_MOV64_IMM(BPF_REG_5, 8),
+	/* Dereference it indirectly. */
+	BPF_EMIT_CALL(BPF_FUNC_getsockopt),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid indirect read from stack var_off",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+},
 {
 	"indirect variable-offset stack access, ok",
 	.insns = {
-- 
cgit v1.2.3


From 2c6927dbdc3fbd41207e671212f53a98bbebf6ba Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 3 Apr 2019 23:22:40 -0700
Subject: selftests/bpf: Test indirect var_off stack access in unpriv mode

Test that verifier rejects indirect stack access with variable offset in
unprivileged mode and accepts same code in privileged mode.

Since pointer arithmetics is prohibited in unprivileged mode verifier
should reject the program even before it gets to helper call that uses
variable offset, at the time when that variable offset is trying to be
constructed.

Example of output:
  # ./test_verifier
  ...
  #859/u indirect variable-offset stack access, priv vs unpriv OK
  #859/p indirect variable-offset stack access, priv vs unpriv OK

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/verifier/var_off.c | 27 ++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index 3840bd16e173..f5d5ff18ef22 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -114,6 +114,33 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_LWT_IN,
 },
+{
+	"indirect variable-offset stack access, priv vs unpriv",
+	.insns = {
+	/* Fill the top 16 bytes of the stack. */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* Get an unknown value. */
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+	/* Make it small and 4-byte aligned. */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+	BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+	/* Add it to fp.  We now have either fp-12 or fp-16, we don't know
+	 * which, but either way it points to initialized stack.
+	 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+	/* Dereference it indirectly. */
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 6 },
+	.errstr_unpriv = "R2 stack pointer arithmetic goes out of range, prohibited for !root",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
 {
 	"indirect variable-offset stack access, uninitialized",
 	.insns = {
-- 
cgit v1.2.3


From 07f9196241f8bceef975dd15f894f8ed51425d55 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 3 Apr 2019 23:22:42 -0700
Subject: selftests/bpf: Test unbounded var_off stack access

Test the case when reg->smax_value is too small/big and can overflow,
and separately min and max values outside of stack bounds.

Example of output:
  # ./test_verifier
  #856/p indirect variable-offset stack access, unbounded OK
  #857/p indirect variable-offset stack access, max out of bound OK
  #858/p indirect variable-offset stack access, min out of bound OK

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/verifier/var_off.c | 57 +++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index f5d5ff18ef22..8504ac937809 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -40,7 +40,35 @@
 	.prog_type = BPF_PROG_TYPE_LWT_IN,
 },
 {
-	"indirect variable-offset stack access, out of bound",
+	"indirect variable-offset stack access, unbounded",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_2, 6),
+	BPF_MOV64_IMM(BPF_REG_3, 28),
+	/* Fill the top 16 bytes of the stack. */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* Get an unknown value. */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops,
+							   bytes_received)),
+	/* Check the lower bound but don't check the upper one. */
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_4, 0, 4),
+	/* Point the lower bound to initialized stack. Offset is now in range
+	 * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.
+	 */
+	BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
+	BPF_MOV64_IMM(BPF_REG_5, 8),
+	/* Dereference it indirectly. */
+	BPF_EMIT_CALL(BPF_FUNC_getsockopt),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.errstr = "R4 unbounded indirect variable offset stack access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+},
+{
+	"indirect variable-offset stack access, max out of bound",
 	.insns = {
 	/* Fill the top 8 bytes of the stack */
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -60,7 +88,32 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 5 },
-	.errstr = "invalid stack type R2 var_off",
+	.errstr = "R2 max value is outside of stack bound",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+	"indirect variable-offset stack access, min out of bound",
+	.insns = {
+	/* Fill the top 8 bytes of the stack */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* Get an unknown value */
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+	/* Make it small and 4-byte aligned */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+	BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 516),
+	/* add it to fp.  We now have either fp-516 or fp-512, but
+	 * we don't know which
+	 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+	/* dereference it indirectly */
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 5 },
+	.errstr = "R2 min value is outside of stack bound",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_LWT_IN,
 },
-- 
cgit v1.2.3


From f1054c65bca637c220fe8e32648156459361bb99 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 5 Apr 2019 18:40:47 +0300
Subject: selftests: forwarding: test for bridge mcast traffic after report and
 leave

This test is split in two, the first part checks if a report creates a
corresponding mdb entry and if traffic is properly forwarded to it, and
the second part checks if the mdb entry is deleted after a leave and
if traffic is *not* forwarded to it. Since the mcast querier is enabled
we should see standard mcast snooping bridge behaviour.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 152 +++++++++++++++++++++
 1 file changed, 152 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_igmp.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
new file mode 100755
index 000000000000..88d2472ba151
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="reportleave_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="239.10.10.10"
+TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	# Always cleanup the mcast group
+	ip address del dev $h2 $TEST_GROUP/32 2>&1 1>/dev/null
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+# return 0 if the packet wasn't seen on host2_if or 1 if it was
+mcast_packet_test()
+{
+	local mac=$1
+	local ip=$2
+	local host1_if=$3
+	local host2_if=$4
+	local seen=0
+
+	# Add an ACL on `host2_if` which will tell us whether the packet
+	# was received by it or not.
+	tc qdisc add dev $host2_if ingress
+	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q
+	sleep 1
+
+	tc -j -s filter show dev $host2_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	if [[ $? -eq 0 ]]; then
+		seen=1
+	fi
+
+	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host2_if ingress
+
+	return $seen
+}
+
+reportleave_test()
+{
+	RET=0
+	ip address add dev $h2 $TEST_GROUP/32 autojoin
+	check_err $? "Could not join $TEST_GROUP"
+
+	sleep 5
+	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
+	check_err $? "Report didn't create mdb entry for $TEST_GROUP"
+
+	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+	check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
+
+	log_test "IGMP report $TEST_GROUP"
+
+	RET=0
+	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
+	check_err $? "mdb entry for $TEST_GROUP is missing"
+
+	ip address del dev $h2 $TEST_GROUP/32
+	check_err $? "Could not leave $TEST_GROUP"
+
+	sleep 5
+	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
+	check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP"
+
+	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+	check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry"
+
+	log_test "IGMP leave $TEST_GROUP"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From ff466b58055f2d28d8ddc1388af312e87a693efe Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Sat, 6 Apr 2019 22:37:34 -0700
Subject: libbpf: Ignore -Wformat-nonliteral warning

vsprintf() in __base_pr() uses nonliteral format string and it breaks
compilation for those who provide corresponding extra CFLAGS, e.g.:
https://github.com/libbpf/libbpf/issues/27

If libbpf is built with the flags from PR:

  libbpf.c:68:26: error: format string is not a string literal
  [-Werror,-Wformat-nonliteral]
          return vfprintf(stderr, format, args);
                                  ^~~~~~
  1 error generated.

Ignore this warning since the use case in libbpf.c is legit.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e1e4d35cf08e..1ebfe7943d53 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -52,6 +52,11 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
 
+/* vsprintf() in __base_pr() uses nonliteral format string. It may break
+ * compilation if user enables corresponding warning. Disable it explicitly.
+ */
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+
 #define __printf(a, b)	__attribute__((format(printf, a, b)))
 
 static int __base_pr(enum libbpf_print_level level, const char *format,
-- 
cgit v1.2.3


From 4c145dce26013763490df88f2473714f5bc7857d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Mar 2019 21:16:31 +0100
Subject: xfrm: make xfrm modes builtin

after previous changes, xfrm_mode contains no function pointers anymore
and all modules defining such struct contain no code except an init/exit
functions to register the xfrm_mode struct with the xfrm core.

Just place the xfrm modes core and remove the modules,
the run-time xfrm_mode register/unregister functionality is removed.

Before:

    text    data     bss      dec filename
    7523     200    2364    10087 net/xfrm/xfrm_input.o
   40003     628     440    41071 net/xfrm/xfrm_state.o
15730338 6937080 4046908 26714326 vmlinux

    7389     200    2364    9953  net/xfrm/xfrm_input.o
   40574     656     440   41670  net/xfrm/xfrm_state.o
15730084 6937068 4046908 26714060 vmlinux

The xfrm*_mode_{transport,tunnel,beet} modules are gone.

v2: replace CONFIG_INET6_XFRM_MODE_* IS_ENABLED guards with CONFIG_IPV6
    ones rather than removing them.

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h                 |  13 +--
 net/ipv4/Kconfig                   |  29 +------
 net/ipv4/Makefile                  |   3 -
 net/ipv4/ip_vti.c                  |   2 +-
 net/ipv4/xfrm4_mode_beet.c         |  41 ----------
 net/ipv4/xfrm4_mode_transport.c    |  36 ---------
 net/ipv4/xfrm4_mode_tunnel.c       |  38 ---------
 net/ipv6/Kconfig                   |  35 +-------
 net/ipv6/Makefile                  |   4 -
 net/ipv6/ip6_vti.c                 |   2 +-
 net/ipv6/xfrm6_mode_beet.c         |  42 ----------
 net/ipv6/xfrm6_mode_ro.c           |  55 -------------
 net/ipv6/xfrm6_mode_transport.c    |  37 ---------
 net/ipv6/xfrm6_mode_tunnel.c       |  45 -----------
 net/xfrm/xfrm_input.c              |  13 ++-
 net/xfrm/xfrm_interface.c          |   2 +-
 net/xfrm/xfrm_output.c             |  15 ++--
 net/xfrm/xfrm_policy.c             |   2 +-
 net/xfrm/xfrm_state.c              | 158 ++++++++++++++-----------------------
 tools/testing/selftests/net/config |   2 -
 20 files changed, 81 insertions(+), 493 deletions(-)
 delete mode 100644 net/ipv4/xfrm4_mode_beet.c
 delete mode 100644 net/ipv4/xfrm4_mode_transport.c
 delete mode 100644 net/ipv4/xfrm4_mode_tunnel.c
 delete mode 100644 net/ipv6/xfrm6_mode_beet.c
 delete mode 100644 net/ipv6/xfrm6_mode_ro.c
 delete mode 100644 net/ipv6/xfrm6_mode_transport.c
 delete mode 100644 net/ipv6/xfrm6_mode_tunnel.c

(limited to 'tools')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8d1c9506bcf6..4ca79cdc3460 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -234,9 +234,9 @@ struct xfrm_state {
 	/* Reference to data common to all the instances of this
 	 * transformer. */
 	const struct xfrm_type	*type;
-	struct xfrm_mode	*inner_mode;
-	struct xfrm_mode	*inner_mode_iaf;
-	struct xfrm_mode	*outer_mode;
+	const struct xfrm_mode	*inner_mode;
+	const struct xfrm_mode	*inner_mode_iaf;
+	const struct xfrm_mode	*outer_mode;
 
 	const struct xfrm_type_offload	*type_offload;
 
@@ -347,7 +347,6 @@ struct xfrm_state_afinfo {
 	struct module			*owner;
 	const struct xfrm_type		*type_map[IPPROTO_MAX];
 	const struct xfrm_type_offload	*type_offload_map[IPPROTO_MAX];
-	struct xfrm_mode		*mode_map[XFRM_MODE_MAX];
 
 	int			(*init_flags)(struct xfrm_state *x);
 	void			(*init_tempsel)(struct xfrm_selector *sel,
@@ -423,7 +422,6 @@ int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned sh
 int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
 
 struct xfrm_mode {
-	struct module *owner;
 	u8 encap;
 	u8 family;
 	u8 flags;
@@ -434,9 +432,6 @@ enum {
 	XFRM_MODE_FLAG_TUNNEL = 1,
 };
 
-int xfrm_register_mode(struct xfrm_mode *mode);
-void xfrm_unregister_mode(struct xfrm_mode *mode);
-
 static inline int xfrm_af2proto(unsigned int family)
 {
 	switch(family) {
@@ -449,7 +444,7 @@ static inline int xfrm_af2proto(unsigned int family)
 	}
 }
 
-static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
+static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
 {
 	if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) ||
 	    (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6))
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 32cae39cdff6..8108e97d4285 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -304,7 +304,7 @@ config NET_IPVTI
 	tristate "Virtual (secure) IP: tunneling"
 	select INET_TUNNEL
 	select NET_IP_TUNNEL
-	depends on INET_XFRM_MODE_TUNNEL
+	select XFRM
 	---help---
 	  Tunneling means encapsulating data of one protocol type within
 	  another protocol and sending it over a channel that understands the
@@ -396,33 +396,6 @@ config INET_TUNNEL
 	tristate
 	default n
 
-config INET_XFRM_MODE_TRANSPORT
-	tristate "IP: IPsec transport mode"
-	default y
-	select XFRM
-	---help---
-	  Support for IPsec transport mode.
-
-	  If unsure, say Y.
-
-config INET_XFRM_MODE_TUNNEL
-	tristate "IP: IPsec tunnel mode"
-	default y
-	select XFRM
-	---help---
-	  Support for IPsec tunnel mode.
-
-	  If unsure, say Y.
-
-config INET_XFRM_MODE_BEET
-	tristate "IP: IPsec BEET mode"
-	default y
-	select XFRM
-	---help---
-	  Support for IPsec BEET mode.
-
-	  If unsure, say Y.
-
 config INET_DIAG
 	tristate "INET: socket monitoring interface"
 	default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 58629314eae9..000a61994c8f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -37,10 +37,7 @@ obj-$(CONFIG_INET_ESP) += esp4.o
 obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o
 obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
 obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
-obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
 obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
-obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
-obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
 obj-$(CONFIG_IP_PNP) += ipconfig.o
 obj-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
 obj-$(CONFIG_INET_DIAG) += inet_diag.o
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 3f3f6d6be318..91926c9a3bc9 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -107,7 +107,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
 	struct net_device *dev;
 	struct pcpu_sw_netstats *tstats;
 	struct xfrm_state *x;
-	struct xfrm_mode *inner_mode;
+	const struct xfrm_mode *inner_mode;
 	struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
 	u32 orig_mark = skb->mark;
 	int ret;
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
deleted file mode 100644
index ba84b278e627..000000000000
--- a/net/ipv4/xfrm4_mode_beet.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4.
- *
- * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
- *                    Miika Komu     <miika@iki.fi>
- *                    Herbert Xu     <herbert@gondor.apana.org.au>
- *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
- *                    Jeff Ahrenholz <ahrenholz@gmail.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-
-
-static struct xfrm_mode xfrm4_beet_mode = {
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_BEET,
-	.flags = XFRM_MODE_FLAG_TUNNEL,
-	.family = AF_INET,
-};
-
-static int __init xfrm4_beet_init(void)
-{
-	return xfrm_register_mode(&xfrm4_beet_mode);
-}
-
-static void __exit xfrm4_beet_exit(void)
-{
-	xfrm_unregister_mode(&xfrm4_beet_mode);
-}
-
-module_init(xfrm4_beet_init);
-module_exit(xfrm4_beet_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET);
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
deleted file mode 100644
index 397863ea762b..000000000000
--- a/net/ipv4/xfrm4_mode_transport.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * xfrm4_mode_transport.c - Transport mode encapsulation for IPv4.
- *
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-#include <net/protocol.h>
-
-static struct xfrm_mode xfrm4_transport_mode = {
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_TRANSPORT,
-	.family = AF_INET,
-};
-
-static int __init xfrm4_transport_init(void)
-{
-	return xfrm_register_mode(&xfrm4_transport_mode);
-}
-
-static void __exit xfrm4_transport_exit(void)
-{
-	xfrm_unregister_mode(&xfrm4_transport_mode);
-}
-
-module_init(xfrm4_transport_init);
-module_exit(xfrm4_transport_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TRANSPORT);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
deleted file mode 100644
index b2b132c800fc..000000000000
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4.
- *
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-
-static struct xfrm_mode xfrm4_tunnel_mode = {
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_TUNNEL,
-	.flags = XFRM_MODE_FLAG_TUNNEL,
-	.family = AF_INET,
-};
-
-static int __init xfrm4_mode_tunnel_init(void)
-{
-	return xfrm_register_mode(&xfrm4_tunnel_mode);
-}
-
-static void __exit xfrm4_mode_tunnel_exit(void)
-{
-	xfrm_unregister_mode(&xfrm4_tunnel_mode);
-}
-
-module_init(xfrm4_mode_tunnel_init);
-module_exit(xfrm4_mode_tunnel_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 613282c65a10..cd915e332c98 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -135,44 +135,11 @@ config INET6_TUNNEL
 	tristate
 	default n
 
-config INET6_XFRM_MODE_TRANSPORT
-	tristate "IPv6: IPsec transport mode"
-	default IPV6
-	select XFRM
-	---help---
-	  Support for IPsec transport mode.
-
-	  If unsure, say Y.
-
-config INET6_XFRM_MODE_TUNNEL
-	tristate "IPv6: IPsec tunnel mode"
-	default IPV6
-	select XFRM
-	---help---
-	  Support for IPsec tunnel mode.
-
-	  If unsure, say Y.
-
-config INET6_XFRM_MODE_BEET
-	tristate "IPv6: IPsec BEET mode"
-	default IPV6
-	select XFRM
-	---help---
-	  Support for IPsec BEET mode.
-
-	  If unsure, say Y.
-
-config INET6_XFRM_MODE_ROUTEOPTIMIZATION
-	tristate "IPv6: MIPv6 route optimization mode"
-	select XFRM
-	---help---
-	  Support for MIPv6 route optimization mode.
-
 config IPV6_VTI
 tristate "Virtual (secure) IPv6: tunneling"
 	select IPV6_TUNNEL
 	select NET_IP_TUNNEL
-	depends on INET6_XFRM_MODE_TUNNEL
+	select XFRM
 	---help---
 	Tunneling means encapsulating data of one protocol type within
 	another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index e0026fa1261b..8ccf35514015 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -35,10 +35,6 @@ obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o
 obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
 obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
 obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
-obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
-obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
-obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
-obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_IPV6_MIP6) += mip6.o
 obj-$(CONFIG_IPV6_ILA) += ila/
 obj-$(CONFIG_NETFILTER)	+= netfilter/
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 369803c581b7..71ec5e60cf8f 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -342,7 +342,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
 	struct net_device *dev;
 	struct pcpu_sw_netstats *tstats;
 	struct xfrm_state *x;
-	struct xfrm_mode *inner_mode;
+	const struct xfrm_mode *inner_mode;
 	struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
 	u32 orig_mark = skb->mark;
 	int ret;
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
deleted file mode 100644
index 1c4a76bdd889..000000000000
--- a/net/ipv6/xfrm6_mode_beet.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
- *
- * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
- *                    Miika Komu     <miika@iki.fi>
- *                    Herbert Xu     <herbert@gondor.apana.org.au>
- *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
- *                    Jeff Ahrenholz <ahrenholz@gmail.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-static struct xfrm_mode xfrm6_beet_mode = {
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_BEET,
-	.flags = XFRM_MODE_FLAG_TUNNEL,
-	.family = AF_INET6,
-};
-
-static int __init xfrm6_beet_init(void)
-{
-	return xfrm_register_mode(&xfrm6_beet_mode);
-}
-
-static void __exit xfrm6_beet_exit(void)
-{
-	xfrm_unregister_mode(&xfrm6_beet_mode);
-}
-
-module_init(xfrm6_beet_init);
-module_exit(xfrm6_beet_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
deleted file mode 100644
index d0a6a4dbd689..000000000000
--- a/net/ipv6/xfrm6_mode_ro.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * xfrm6_mode_ro.c - Route optimization mode for IPv6.
- *
- * Copyright (C)2003-2006 Helsinki University of Technology
- * Copyright (C)2003-2006 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*
- * Authors:
- *	Noriaki TAKAMIYA @USAGI
- *	Masahide NAKAMURA @USAGI
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/stringify.h>
-#include <linux/time.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-static struct xfrm_mode xfrm6_ro_mode = {
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_ROUTEOPTIMIZATION,
-	.family = AF_INET6,
-};
-
-static int __init xfrm6_ro_init(void)
-{
-	return xfrm_register_mode(&xfrm6_ro_mode);
-}
-
-static void __exit xfrm6_ro_exit(void)
-{
-	xfrm_unregister_mode(&xfrm6_ro_mode);
-}
-
-module_init(xfrm6_ro_init);
-module_exit(xfrm6_ro_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
deleted file mode 100644
index d90c934c2f1a..000000000000
--- a/net/ipv6/xfrm6_mode_transport.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6.
- *
- * Copyright (C) 2002 USAGI/WIDE Project
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-#include <net/protocol.h>
-
-static struct xfrm_mode xfrm6_transport_mode = {
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_TRANSPORT,
-	.family = AF_INET6,
-};
-
-static int __init xfrm6_transport_init(void)
-{
-	return xfrm_register_mode(&xfrm6_transport_mode);
-}
-
-static void __exit xfrm6_transport_exit(void)
-{
-	xfrm_unregister_mode(&xfrm6_transport_mode);
-}
-
-module_init(xfrm6_transport_init);
-module_exit(xfrm6_transport_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
deleted file mode 100644
index e5c928dd70e3..000000000000
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6.
- *
- * Copyright (C) 2002 USAGI/WIDE Project
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ip6_route.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per RFC 2401.
- */
-static struct xfrm_mode xfrm6_tunnel_mode = {
-	.owner = THIS_MODULE,
-	.encap = XFRM_MODE_TUNNEL,
-	.flags = XFRM_MODE_FLAG_TUNNEL,
-	.family = AF_INET6,
-};
-
-static int __init xfrm6_mode_tunnel_init(void)
-{
-	return xfrm_register_mode(&xfrm6_tunnel_mode);
-}
-
-static void __exit xfrm6_mode_tunnel_exit(void)
-{
-	xfrm_unregister_mode(&xfrm6_tunnel_mode);
-}
-
-module_init(xfrm6_mode_tunnel_init);
-module_exit(xfrm6_mode_tunnel_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 74b53c13279b..b5a31c8e2088 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -351,7 +351,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
 
 static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct xfrm_mode *inner_mode = x->inner_mode;
+	const struct xfrm_mode *inner_mode = x->inner_mode;
 	const struct xfrm_state_afinfo *afinfo;
 	int err = -EAFNOSUPPORT;
 
@@ -394,7 +394,6 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-#if IS_ENABLED(CONFIG_INET_XFRM_MODE_TRANSPORT)
 	int ihl = skb->data - skb_transport_header(skb);
 
 	if (skb->transport_header != skb->network_header) {
@@ -405,14 +404,11 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
 	skb_reset_transport_header(skb);
 	return 0;
-#else
-	return -EOPNOTSUPP;
-#endif
 }
 
 static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-#if IS_ENABLED(CONFIG_INET6_XFRM_MODE_TRANSPORT)
+#if IS_ENABLED(CONFIG_IPV6)
 	int ihl = skb->data - skb_transport_header(skb);
 
 	if (skb->transport_header != skb->network_header) {
@@ -425,7 +421,8 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb_reset_transport_header(skb);
 	return 0;
 #else
-	return -EOPNOTSUPP;
+	WARN_ON_ONCE(1);
+	return -EAFNOSUPPORT;
 #endif
 }
 
@@ -458,12 +455,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 {
 	const struct xfrm_state_afinfo *afinfo;
 	struct net *net = dev_net(skb->dev);
+	const struct xfrm_mode *inner_mode;
 	int err;
 	__be32 seq;
 	__be32 seq_hi;
 	struct xfrm_state *x = NULL;
 	xfrm_address_t *daddr;
-	struct xfrm_mode *inner_mode;
 	u32 mark = skb->mark;
 	unsigned int family = AF_UNSPEC;
 	int decaps = 0;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 93efb0965e7d..4fc49dbf3edf 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -244,8 +244,8 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
 
 static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
 {
+	const struct xfrm_mode *inner_mode;
 	struct pcpu_sw_netstats *tstats;
-	struct xfrm_mode *inner_mode;
 	struct net_device *dev;
 	struct xfrm_state *x;
 	struct xfrm_if *xi;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 17c4f58d28ea..3cb2a328a8ab 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -61,7 +61,6 @@ static struct dst_entry *skb_dst_pop(struct sk_buff *skb)
  */
 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-#if IS_ENABLED(CONFIG_INET_XFRM_MODE_TRANSPORT)
 	struct iphdr *iph = ip_hdr(skb);
 	int ihl = iph->ihl * 4;
 
@@ -74,10 +73,6 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	__skb_pull(skb, ihl);
 	memmove(skb_network_header(skb), iph, ihl);
 	return 0;
-#else
-	WARN_ON_ONCE(1);
-	return -EOPNOTSUPP;
-#endif
 }
 
 /* Add encapsulation header.
@@ -87,7 +82,7 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-#if IS_ENABLED(CONFIG_INET6_XFRM_MODE_TRANSPORT)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6hdr *iph;
 	u8 *prevhdr;
 	int hdr_len;
@@ -107,7 +102,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 #else
 	WARN_ON_ONCE(1);
-	return -EOPNOTSUPP;
+	return -EAFNOSUPPORT;
 #endif
 }
 
@@ -118,7 +113,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-#if IS_ENABLED(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6hdr *iph;
 	u8 *prevhdr;
 	int hdr_len;
@@ -140,7 +135,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 #else
 	WARN_ON_ONCE(1);
-	return -EOPNOTSUPP;
+	return -EAFNOSUPPORT;
 #endif
 }
 
@@ -624,7 +619,7 @@ EXPORT_SYMBOL_GPL(xfrm_output);
 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	const struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode *inner_mode;
+	const struct xfrm_mode *inner_mode;
 	int err = -EAFNOSUPPORT;
 
 	if (x->sel.family == AF_UNSPEC)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 67122beb116c..1a5fd2296556 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2546,10 +2546,10 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 					    struct dst_entry *dst)
 {
 	const struct xfrm_state_afinfo *afinfo;
+	const struct xfrm_mode *inner_mode;
 	struct net *net = xp_net(policy);
 	unsigned long now = jiffies;
 	struct net_device *dev;
-	struct xfrm_mode *inner_mode;
 	struct xfrm_dst *xdst_prev = NULL;
 	struct xfrm_dst *xdst0 = NULL;
 	int i = 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 358b09f0d018..ace26f6dc790 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -330,92 +330,67 @@ static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
 	module_put(type->owner);
 }
 
-static DEFINE_SPINLOCK(xfrm_mode_lock);
-int xfrm_register_mode(struct xfrm_mode *mode)
-{
-	struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode **modemap;
-	int err;
-
-	if (unlikely(mode->encap >= XFRM_MODE_MAX))
-		return -EINVAL;
-
-	afinfo = xfrm_state_get_afinfo(mode->family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-
-	err = -EEXIST;
-	modemap = afinfo->mode_map;
-	spin_lock_bh(&xfrm_mode_lock);
-	if (modemap[mode->encap])
-		goto out;
-
-	err = -ENOENT;
-	if (!try_module_get(afinfo->owner))
-		goto out;
-
-	modemap[mode->encap] = mode;
-	err = 0;
-
-out:
-	spin_unlock_bh(&xfrm_mode_lock);
-	rcu_read_unlock();
-	return err;
-}
-EXPORT_SYMBOL(xfrm_register_mode);
-
-void xfrm_unregister_mode(struct xfrm_mode *mode)
-{
-	struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode **modemap;
-
-	afinfo = xfrm_state_get_afinfo(mode->family);
-	if (WARN_ON_ONCE(!afinfo))
-		return;
-
-	modemap = afinfo->mode_map;
-	spin_lock_bh(&xfrm_mode_lock);
-	if (likely(modemap[mode->encap] == mode)) {
-		modemap[mode->encap] = NULL;
-		module_put(afinfo->owner);
-	}
-
-	spin_unlock_bh(&xfrm_mode_lock);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(xfrm_unregister_mode);
-
-static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
-{
-	struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode *mode;
-	int modload_attempted = 0;
+static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
+	[XFRM_MODE_BEET] = {
+		.encap = XFRM_MODE_BEET,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET,
+	},
+	[XFRM_MODE_TRANSPORT] = {
+		.encap = XFRM_MODE_TRANSPORT,
+		.family = AF_INET,
+	},
+	[XFRM_MODE_TUNNEL] = {
+		.encap = XFRM_MODE_TUNNEL,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET,
+	},
+};
+
+static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
+	[XFRM_MODE_BEET] = {
+		.encap = XFRM_MODE_BEET,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET6,
+	},
+	[XFRM_MODE_ROUTEOPTIMIZATION] = {
+		.encap = XFRM_MODE_ROUTEOPTIMIZATION,
+		.family = AF_INET6,
+	},
+	[XFRM_MODE_TRANSPORT] = {
+		.encap = XFRM_MODE_TRANSPORT,
+		.family = AF_INET6,
+	},
+	[XFRM_MODE_TUNNEL] = {
+		.encap = XFRM_MODE_TUNNEL,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET6,
+	},
+};
+
+static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
+{
+	const struct xfrm_mode *mode;
 
 	if (unlikely(encap >= XFRM_MODE_MAX))
 		return NULL;
 
-retry:
-	afinfo = xfrm_state_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return NULL;
-
-	mode = READ_ONCE(afinfo->mode_map[encap]);
-	if (unlikely(mode && !try_module_get(mode->owner)))
-		mode = NULL;
-
-	rcu_read_unlock();
-	if (!mode && !modload_attempted) {
-		request_module("xfrm-mode-%d-%d", family, encap);
-		modload_attempted = 1;
-		goto retry;
+	switch (family) {
+	case AF_INET:
+		mode = &xfrm4_mode_map[encap];
+		if (mode->family == family)
+			return mode;
+		break;
+	case AF_INET6:
+		mode = &xfrm6_mode_map[encap];
+		if (mode->family == family)
+			return mode;
+		break;
+	default:
+		break;
 	}
 
-	return mode;
-}
-
-static void xfrm_put_mode(struct xfrm_mode *mode)
-{
-	module_put(mode->owner);
+	return NULL;
 }
 
 void xfrm_state_free(struct xfrm_state *x)
@@ -436,12 +411,6 @@ static void ___xfrm_state_destroy(struct xfrm_state *x)
 	kfree(x->coaddr);
 	kfree(x->replay_esn);
 	kfree(x->preplay_esn);
-	if (x->inner_mode)
-		xfrm_put_mode(x->inner_mode);
-	if (x->inner_mode_iaf)
-		xfrm_put_mode(x->inner_mode_iaf);
-	if (x->outer_mode)
-		xfrm_put_mode(x->outer_mode);
 	if (x->type_offload)
 		xfrm_put_type_offload(x->type_offload);
 	if (x->type) {
@@ -2235,8 +2204,8 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 
 int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 {
-	struct xfrm_state_afinfo *afinfo;
-	struct xfrm_mode *inner_mode;
+	const struct xfrm_mode *inner_mode;
+	const struct xfrm_state_afinfo *afinfo;
 	int family = x->props.family;
 	int err;
 
@@ -2262,24 +2231,21 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 			goto error;
 
 		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
-		    family != x->sel.family) {
-			xfrm_put_mode(inner_mode);
+		    family != x->sel.family)
 			goto error;
-		}
 
 		x->inner_mode = inner_mode;
 	} else {
-		struct xfrm_mode *inner_mode_iaf;
+		const struct xfrm_mode *inner_mode_iaf;
 		int iafamily = AF_INET;
 
 		inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
 		if (inner_mode == NULL)
 			goto error;
 
-		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
-			xfrm_put_mode(inner_mode);
+		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
 			goto error;
-		}
+
 		x->inner_mode = inner_mode;
 
 		if (x->props.family == AF_INET)
@@ -2289,8 +2255,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 		if (inner_mode_iaf) {
 			if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
 				x->inner_mode_iaf = inner_mode_iaf;
-			else
-				xfrm_put_mode(inner_mode_iaf);
 		}
 	}
 
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index e9c860d00416..474040448601 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -7,9 +7,7 @@ CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_IPV6=y
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_VETH=y
-CONFIG_INET_XFRM_MODE_TUNNEL=y
 CONFIG_NET_IPVTI=y
-CONFIG_INET6_XFRM_MODE_TUNNEL=y
 CONFIG_IPV6_VTI=y
 CONFIG_DUMMY=y
 CONFIG_BRIDGE=y
-- 
cgit v1.2.3


From 6978cdb129da13f46bcc4362639ba5ee8fc82921 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 27 Mar 2019 09:22:29 +0100
Subject: kselftests: extend nft_nat with inet family based nat hooks

With older nft versions, this will cause:
[..]
PASS: ipv6 ping to ns1 was ip6 NATted to ns2
/dev/stdin:4:30-31: Error: syntax error, unexpected to, expecting newline or semicolon
                ip daddr 10.0.1.99 dnat ip to 10.0.2.99
                                           ^^
SKIP: inet nat tests
PASS: ip IP masquerade for ns2
[..]

as there is currently no way to detect if nft will be able to parse
the inet format.

redirect and masquerade tests need to be skipped in this case for inet
too because nft userspace has overzealous family check and rejects their
use in the inet family.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/nft_nat.sh | 130 +++++++++++++++++++--------
 1 file changed, 94 insertions(+), 36 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 8ec76681605c..248905130d5d 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -6,6 +6,7 @@
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 ret=0
+test_inet_nat=true
 
 nft --version > /dev/null 2>&1
 if [ $? -ne 0 ];then
@@ -141,17 +142,24 @@ reset_counters()
 
 test_local_dnat6()
 {
+	local family=$1
 	local lret=0
+	local IPF=""
+
+	if [ $family = "inet" ];then
+		IPF="ip6"
+	fi
+
 ip netns exec ns0 nft -f - <<EOF
-table ip6 nat {
+table $family nat {
 	chain output {
 		type nat hook output priority 0; policy accept;
-		ip6 daddr dead:1::99 dnat to dead:2::99
+		ip6 daddr dead:1::99 dnat $IPF to dead:2::99
 	}
 }
 EOF
 	if [ $? -ne 0 ]; then
-		echo "SKIP: Could not add add ip6 dnat hook"
+		echo "SKIP: Could not add add $family dnat hook"
 		return $ksft_skip
 	fi
 
@@ -201,7 +209,7 @@ EOF
 		fi
 	done
 
-	test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2"
+	test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was $family NATted to ns2"
 	ip netns exec ns0 nft flush chain ip6 nat output
 
 	return $lret
@@ -209,15 +217,32 @@ EOF
 
 test_local_dnat()
 {
+	local family=$1
 	local lret=0
-ip netns exec ns0 nft -f - <<EOF
-table ip nat {
+	local IPF=""
+
+	if [ $family = "inet" ];then
+		IPF="ip"
+	fi
+
+ip netns exec ns0 nft -f - <<EOF 2>/dev/null
+table $family nat {
 	chain output {
 		type nat hook output priority 0; policy accept;
-		ip daddr 10.0.1.99 dnat to 10.0.2.99
+		ip daddr 10.0.1.99 dnat $IPF to 10.0.2.99
 	}
 }
 EOF
+	if [ $? -ne 0 ]; then
+		if [ $family = "inet" ];then
+			echo "SKIP: inet nat tests"
+			test_inet_nat=false
+			return $ksft_skip
+		fi
+		echo "SKIP: Could not add add $family dnat hook"
+		return $ksft_skip
+	fi
+
 	# ping netns1, expect rewrite to netns2
 	ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
 	if [ $? -ne 0 ]; then
@@ -264,9 +289,9 @@ EOF
 		fi
 	done
 
-	test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2"
+	test $lret -eq 0 && echo "PASS: ping to ns1 was $family NATted to ns2"
 
-	ip netns exec ns0 nft flush chain ip nat output
+	ip netns exec ns0 nft flush chain $family nat output
 
 	reset_counters
 	ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
@@ -313,7 +338,7 @@ EOF
 		fi
 	done
 
-	test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush"
+	test $lret -eq 0 && echo "PASS: ping to ns1 OK after $family nat output chain flush"
 
 	return $lret
 }
@@ -321,6 +346,7 @@ EOF
 
 test_masquerade6()
 {
+	local family=$1
 	local lret=0
 
 	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -351,16 +377,21 @@ test_masquerade6()
 
 # add masquerading rule
 ip netns exec ns0 nft -f - <<EOF
-table ip6 nat {
+table $family nat {
 	chain postrouting {
 		type nat hook postrouting priority 0; policy accept;
 		meta oif veth0 masquerade
 	}
 }
 EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family masquerade hook"
+		return $ksft_skip
+	fi
+
 	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
 	if [ $? -ne 0 ] ; then
-		echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+		echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading"
 		lret=1
 	fi
 
@@ -397,19 +428,20 @@ EOF
 		fi
 	done
 
-	ip netns exec ns0 nft flush chain ip6 nat postrouting
+	ip netns exec ns0 nft flush chain $family nat postrouting
 	if [ $? -ne 0 ]; then
-		echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
+		echo "ERROR: Could not flush $family nat postrouting" 1>&2
 		lret=1
 	fi
 
-	test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+	test $lret -eq 0 && echo "PASS: $family IPv6 masquerade for ns2"
 
 	return $lret
 }
 
 test_masquerade()
 {
+	local family=$1
 	local lret=0
 
 	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
@@ -440,16 +472,21 @@ test_masquerade()
 
 # add masquerading rule
 ip netns exec ns0 nft -f - <<EOF
-table ip nat {
+table $family nat {
 	chain postrouting {
 		type nat hook postrouting priority 0; policy accept;
 		meta oif veth0 masquerade
 	}
 }
 EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family masquerade hook"
+		return $ksft_skip
+	fi
+
 	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
 	if [ $? -ne 0 ] ; then
-		echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+		echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading"
 		lret=1
 	fi
 
@@ -485,19 +522,20 @@ EOF
 		fi
 	done
 
-	ip netns exec ns0 nft flush chain ip nat postrouting
+	ip netns exec ns0 nft flush chain $family nat postrouting
 	if [ $? -ne 0 ]; then
-		echo "ERROR: Could not flush nat postrouting" 1>&2
+		echo "ERROR: Could not flush $family nat postrouting" 1>&2
 		lret=1
 	fi
 
-	test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+	test $lret -eq 0 && echo "PASS: $family IP masquerade for ns2"
 
 	return $lret
 }
 
 test_redirect6()
 {
+	local family=$1
 	local lret=0
 
 	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -527,16 +565,21 @@ test_redirect6()
 
 # add redirect rule
 ip netns exec ns0 nft -f - <<EOF
-table ip6 nat {
+table $family nat {
 	chain prerouting {
 		type nat hook prerouting priority 0; policy accept;
 		meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
 	}
 }
 EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family redirect hook"
+		return $ksft_skip
+	fi
+
 	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
 	if [ $? -ne 0 ] ; then
-		echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect"
+		echo "ERROR: cannot ping ns1 from ns2 via ipv6 with active $family redirect"
 		lret=1
 	fi
 
@@ -560,19 +603,20 @@ EOF
 		fi
 	done
 
-	ip netns exec ns0 nft delete table ip6 nat
+	ip netns exec ns0 nft delete table $family nat
 	if [ $? -ne 0 ]; then
-		echo "ERROR: Could not delete ip6 nat table" 1>&2
+		echo "ERROR: Could not delete $family nat table" 1>&2
 		lret=1
 	fi
 
-	test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2"
+	test $lret -eq 0 && echo "PASS: $family IPv6 redirection for ns2"
 
 	return $lret
 }
 
 test_redirect()
 {
+	local family=$1
 	local lret=0
 
 	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
@@ -603,16 +647,21 @@ test_redirect()
 
 # add redirect rule
 ip netns exec ns0 nft -f - <<EOF
-table ip nat {
+table $family nat {
 	chain prerouting {
 		type nat hook prerouting priority 0; policy accept;
 		meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
 	}
 }
 EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family redirect hook"
+		return $ksft_skip
+	fi
+
 	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
 	if [ $? -ne 0 ] ; then
-		echo "ERROR: cannot ping ns1 from ns2 with active ip redirect"
+		echo "ERROR: cannot ping ns1 from ns2 with active $family ip redirect"
 		lret=1
 	fi
 
@@ -637,13 +686,13 @@ EOF
 		fi
 	done
 
-	ip netns exec ns0 nft delete table ip nat
+	ip netns exec ns0 nft delete table $family nat
 	if [ $? -ne 0 ]; then
-		echo "ERROR: Could not delete nat table" 1>&2
+		echo "ERROR: Could not delete $family nat table" 1>&2
 		lret=1
 	fi
 
-	test $lret -eq 0 && echo "PASS: IP redirection for ns2"
+	test $lret -eq 0 && echo "PASS: $family IP redirection for ns2"
 
 	return $lret
 }
@@ -746,16 +795,25 @@ if [ $ret -eq 0 ];then
 fi
 
 reset_counters
-test_local_dnat
-test_local_dnat6
+test_local_dnat ip
+test_local_dnat6 ip6
+reset_counters
+$test_inet_nat && test_local_dnat inet
+$test_inet_nat && test_local_dnat6 inet
 
 reset_counters
-test_masquerade
-test_masquerade6
+test_masquerade ip
+test_masquerade6 ip6
+reset_counters
+$test_inet_nat && test_masquerade inet
+$test_inet_nat && test_masquerade6 inet
 
 reset_counters
-test_redirect
-test_redirect6
+test_redirect ip
+test_redirect6 ip6
+reset_counters
+$test_inet_nat && test_redirect inet
+$test_inet_nat && test_redirect6 inet
 
 for i in 0 1 2; do ip netns del ns$i;done
 
-- 
cgit v1.2.3


From 228ddb3315baf03fc32ebb1cdd928c4839b49e13 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 5 Apr 2019 16:30:41 -0700
Subject: selftests: fib_tests: Add tests for ipv6 gateway with ipv4 route

Add tests for ipv6 gateway with ipv4 route. Tests include basic
single path with ping to verify connectivity and multipath.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 70 +++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 1080ff55a788..e941024869ff 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,8 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw"
+
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
@@ -48,6 +49,7 @@ setup()
 {
 	set -e
 	ip netns add ns1
+	ip netns set ns1 auto
 	$IP link set dev lo up
 	ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
 	ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
@@ -698,6 +700,7 @@ route_setup()
 	set -e
 
 	ip netns add ns2
+	ip netns set ns2 auto
 	ip -netns ns2 link set dev lo up
 	ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
 	ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
@@ -1442,6 +1445,70 @@ ipv4_route_metrics_test()
 	route_cleanup
 }
 
+ipv4_route_v6_gw_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 route with IPv6 gateway tests"
+
+	route_setup
+	sleep 2
+
+	#
+	# single path route
+	#
+	run_cmd "$IP ro add 172.16.104.0/24 via inet6 2001:db8:101::2"
+	rc=$?
+	log_test $rc 0 "Single path route with IPv6 gateway"
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1"
+	fi
+
+	run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1"
+	log_test $rc 0 "Single path route with IPv6 gateway - ping"
+
+	run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2"
+	rc=$?
+	log_test $rc 0 "Single path route delete"
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.112.0/24"
+	fi
+
+	#
+	# multipath - v6 then v4
+	#
+	run_cmd "$IP ro add 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+	rc=$?
+	log_test $rc 0 "Multipath route add - v6 nexthop then v4"
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	fi
+
+	run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+	log_test $? 2 "    Multipath route delete - nexthops in wrong order"
+
+	run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+	log_test $? 0 "    Multipath route delete exact match"
+
+	#
+	# multipath - v4 then v6
+	#
+	run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+	rc=$?
+	log_test $rc 0 "Multipath route add - v4 nexthop then v6"
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 weight 1 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1"
+	fi
+
+	run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+	log_test $? 2 "    Multipath route delete - nexthops in wrong order"
+
+	run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+	log_test $? 0 "    Multipath route delete exact match"
+
+	route_cleanup
+}
 
 ################################################################################
 # usage
@@ -1511,6 +1578,7 @@ do
 	ipv4_addr_metric)		ipv4_addr_metric_test;;
 	ipv6_route_metrics)		ipv6_route_metrics_test;;
 	ipv4_route_metrics)		ipv4_route_metrics_test;;
+	ipv4_route_v6_gw)		ipv4_route_v6_gw_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
-- 
cgit v1.2.3


From d8eca5bbb2be9bc7546f9e733786fa2f1a594c67 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Apr 2019 23:20:03 +0200
Subject: bpf: implement lookup-free direct value access for maps

This generic extension to BPF maps allows for directly loading
an address residing inside a BPF map value as a single BPF
ldimm64 instruction!

The idea is similar to what BPF_PSEUDO_MAP_FD does today, which
is a special src_reg flag for ldimm64 instruction that indicates
that inside the first part of the double insns's imm field is a
file descriptor which the verifier then replaces as a full 64bit
address of the map into both imm parts. For the newly added
BPF_PSEUDO_MAP_VALUE src_reg flag, the idea is the following:
the first part of the double insns's imm field is again a file
descriptor corresponding to the map, and the second part of the
imm field is an offset into the value. The verifier will then
replace both imm parts with an address that points into the BPF
map value at the given value offset for maps that support this
operation. Currently supported is array map with single entry.
It is possible to support more than just single map element by
reusing both 16bit off fields of the insns as a map index, so
full array map lookup could be expressed that way. It hasn't
been implemented here due to lack of concrete use case, but
could easily be done so in future in a compatible way, since
both off fields right now have to be 0 and would correctly
denote a map index 0.

The BPF_PSEUDO_MAP_VALUE is a distinct flag as otherwise with
BPF_PSEUDO_MAP_FD we could not differ offset 0 between load of
map pointer versus load of map's value at offset 0, and changing
BPF_PSEUDO_MAP_FD's encoding into off by one to differ between
regular map pointer and map value pointer would add unnecessary
complexity and increases barrier for debugability thus less
suitable. Using the second part of the imm field as an offset
into the value does /not/ come with limitations since maximum
possible value size is in u32 universe anyway.

This optimization allows for efficiently retrieving an address
to a map value memory area without having to issue a helper call
which needs to prepare registers according to calling convention,
etc, without needing the extra NULL test, and without having to
add the offset in an additional instruction to the value base
pointer. The verifier then treats the destination register as
PTR_TO_MAP_VALUE with constant reg->off from the user passed
offset from the second imm field, and guarantees that this is
within bounds of the map value. Any subsequent operations are
normally treated as typical map value handling without anything
extra needed from verification side.

The two map operations for direct value access have been added to
array map for now. In future other types could be supported as
well depending on the use case. The main use case for this commit
is to allow for BPF loader support for global variables that
reside in .data/.rodata/.bss sections such that we can directly
load the address of them with minimal additional infrastructure
required. Loader support has been added in subsequent commits for
libbpf library.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h               |  6 +++
 include/linux/bpf_verifier.h      |  4 ++
 include/uapi/linux/bpf.h          | 13 +++++-
 kernel/bpf/arraymap.c             | 32 +++++++++++++++
 kernel/bpf/core.c                 |  3 +-
 kernel/bpf/disasm.c               |  5 ++-
 kernel/bpf/syscall.c              | 28 +++++++++----
 kernel/bpf/verifier.c             | 86 ++++++++++++++++++++++++++++++---------
 tools/bpf/bpftool/xlated_dumper.c |  3 ++
 9 files changed, 149 insertions(+), 31 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a445194b5fb6..bd93a592dd29 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -57,6 +57,12 @@ struct bpf_map_ops {
 			     const struct btf *btf,
 			     const struct btf_type *key_type,
 			     const struct btf_type *value_type);
+
+	/* Direct value access helpers. */
+	int (*map_direct_value_addr)(const struct bpf_map *map,
+				     u64 *imm, u32 off);
+	int (*map_direct_value_meta)(const struct bpf_map *map,
+				     u64 imm, u32 *off);
 };
 
 struct bpf_map {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index fc8254d6b569..b3ab61fe1932 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -224,6 +224,10 @@ struct bpf_insn_aux_data {
 		unsigned long map_state;	/* pointer/poison value for maps */
 		s32 call_imm;			/* saved imm field of call insn */
 		u32 alu_limit;			/* limit for add/sub register with pointer */
+		struct {
+			u32 map_index;		/* index into used_maps[] */
+			u32 map_off;		/* offset from value base address */
+		};
 	};
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
 	int sanitize_stack_off; /* stack slot to be cleared */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 837024512baf..26cfb5b2c964 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -255,8 +255,19 @@ enum bpf_attach_type {
  */
 #define BPF_F_ANY_ALIGNMENT	(1U << 1)
 
-/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm:      map fd              map fd
+ * insn[1].imm:      0                   offset into value
+ * insn[0].off:      0                   0
+ * insn[1].off:      0                   0
+ * ldimm64 rewrite:  address of map      address of map[0]+offset
+ * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
+ */
 #define BPF_PSEUDO_MAP_FD	1
+#define BPF_PSEUDO_MAP_VALUE	2
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c72e0d8e1e65..1a6e9861d554 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -160,6 +160,36 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
 	return array->value + array->elem_size * (index & array->index_mask);
 }
 
+static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
+				       u32 off)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+	if (map->max_entries != 1)
+		return -ENOTSUPP;
+	if (off >= map->value_size)
+		return -EINVAL;
+
+	*imm = (unsigned long)array->value;
+	return 0;
+}
+
+static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
+				       u32 *off)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u64 base = (unsigned long)array->value;
+	u64 range = array->elem_size;
+
+	if (map->max_entries != 1)
+		return -ENOTSUPP;
+	if (imm < base || imm >= base + range)
+		return -ENOENT;
+
+	*off = imm - base;
+	return 0;
+}
+
 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
@@ -419,6 +449,8 @@ const struct bpf_map_ops array_map_ops = {
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
 	.map_gen_lookup = array_map_gen_lookup,
+	.map_direct_value_addr = array_map_direct_value_addr,
+	.map_direct_value_meta = array_map_direct_value_meta,
 	.map_seq_show_elem = array_map_seq_show_elem,
 	.map_check_btf = array_map_check_btf,
 };
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 2966cb368bf4..ace8c22c8b0e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -292,7 +292,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
 		dst[i] = fp->insnsi[i];
 		if (!was_ld_map &&
 		    dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
-		    dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
+		    (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
+		     dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
 			was_ld_map = true;
 			dst[i].imm = 0;
 		} else if (was_ld_map &&
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index de73f55e42fd..d9ce383c0f9c 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -205,10 +205,11 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 			 * part of the ldimm64 insn is accessible.
 			 */
 			u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
-			bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+			bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
+				      insn->src_reg == BPF_PSEUDO_MAP_VALUE;
 			char tmp[64];
 
-			if (map_ptr && !allow_ptr_leaks)
+			if (is_ptr && !allow_ptr_leaks)
 				imm = 0;
 
 			verbose(cbs->private_data, "(%02x) r%d = %s\n",
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1d65e56594db..828518bb947b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2072,13 +2072,26 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
 }
 
 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
-					      unsigned long addr)
+					      unsigned long addr, u32 *off,
+					      u32 *type)
 {
+	const struct bpf_map *map;
 	int i;
 
-	for (i = 0; i < prog->aux->used_map_cnt; i++)
-		if (prog->aux->used_maps[i] == (void *)addr)
-			return prog->aux->used_maps[i];
+	for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
+		map = prog->aux->used_maps[i];
+		if (map == (void *)addr) {
+			*type = BPF_PSEUDO_MAP_FD;
+			return map;
+		}
+		if (!map->ops->map_direct_value_meta)
+			continue;
+		if (!map->ops->map_direct_value_meta(map, addr, off)) {
+			*type = BPF_PSEUDO_MAP_VALUE;
+			return map;
+		}
+	}
+
 	return NULL;
 }
 
@@ -2086,6 +2099,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
 {
 	const struct bpf_map *map;
 	struct bpf_insn *insns;
+	u32 off, type;
 	u64 imm;
 	int i;
 
@@ -2113,11 +2127,11 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
 			continue;
 
 		imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
-		map = bpf_map_from_imm(prog, imm);
+		map = bpf_map_from_imm(prog, imm, &off, &type);
 		if (map) {
-			insns[i].src_reg = BPF_PSEUDO_MAP_FD;
+			insns[i].src_reg = type;
 			insns[i].imm = map->id;
-			insns[i + 1].imm = 0;
+			insns[i + 1].imm = off;
 			continue;
 		}
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48718e1da16d..6ab7a23fc924 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5056,18 +5056,12 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* return the map pointer stored inside BPF_LD_IMM64 instruction */
-static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
-{
-	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
-
-	return (struct bpf_map *) (unsigned long) imm64;
-}
-
 /* verify BPF_LD_IMM64 instruction */
 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
+	struct bpf_insn_aux_data *aux = cur_aux(env);
 	struct bpf_reg_state *regs = cur_regs(env);
+	struct bpf_map *map;
 	int err;
 
 	if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -5091,11 +5085,22 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return 0;
 	}
 
-	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
-	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
+	map = env->used_maps[aux->map_index];
+	mark_reg_known_zero(env, regs, insn->dst_reg);
+	regs[insn->dst_reg].map_ptr = map;
+
+	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
+		regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+		regs[insn->dst_reg].off = aux->map_off;
+		if (map_value_has_spin_lock(map))
+			regs[insn->dst_reg].id = ++env->id_gen;
+	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+		regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+	} else {
+		verbose(env, "bpf verifier is misconfigured\n");
+		return -EINVAL;
+	}
 
-	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
-	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
 	return 0;
 }
 
@@ -6803,8 +6808,10 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 		}
 
 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+			struct bpf_insn_aux_data *aux;
 			struct bpf_map *map;
 			struct fd f;
+			u64 addr;
 
 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@ -6813,13 +6820,19 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 				return -EINVAL;
 			}
 
-			if (insn->src_reg == 0)
+			if (insn[0].src_reg == 0)
 				/* valid generic load 64-bit imm */
 				goto next_insn;
 
-			if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
-			    insn[1].imm != 0) {
-				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
+			/* In final convert_pseudo_ld_imm64() step, this is
+			 * converted into regular 64-bit imm load insn.
+			 */
+			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
+			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
+			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
+			     insn[1].imm != 0)) {
+				verbose(env,
+					"unrecognized bpf_ld_imm64 insn\n");
 				return -EINVAL;
 			}
 
@@ -6837,16 +6850,47 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 				return err;
 			}
 
-			/* store map pointer inside BPF_LD_IMM64 instruction */
-			insn[0].imm = (u32) (unsigned long) map;
-			insn[1].imm = ((u64) (unsigned long) map) >> 32;
+			aux = &env->insn_aux_data[i];
+			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+				addr = (unsigned long)map;
+			} else {
+				u32 off = insn[1].imm;
+
+				if (off >= BPF_MAX_VAR_OFF) {
+					verbose(env, "direct value offset of %u is not allowed\n", off);
+					fdput(f);
+					return -EINVAL;
+				}
+
+				if (!map->ops->map_direct_value_addr) {
+					verbose(env, "no direct value access support for this map type\n");
+					fdput(f);
+					return -EINVAL;
+				}
+
+				err = map->ops->map_direct_value_addr(map, &addr, off);
+				if (err) {
+					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
+						map->value_size, off);
+					fdput(f);
+					return err;
+				}
+
+				aux->map_off = off;
+				addr += off;
+			}
+
+			insn[0].imm = (u32)addr;
+			insn[1].imm = addr >> 32;
 
 			/* check whether we recorded this map already */
-			for (j = 0; j < env->used_map_cnt; j++)
+			for (j = 0; j < env->used_map_cnt; j++) {
 				if (env->used_maps[j] == map) {
+					aux->map_index = j;
 					fdput(f);
 					goto next_insn;
 				}
+			}
 
 			if (env->used_map_cnt >= MAX_USED_MAPS) {
 				fdput(f);
@@ -6863,6 +6907,8 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 				fdput(f);
 				return PTR_ERR(map);
 			}
+
+			aux->map_index = env->used_map_cnt;
 			env->used_maps[env->used_map_cnt++] = map;
 
 			if (bpf_map_is_cgroup_storage(map) &&
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 7073dbe1ff27..0bb17bf88b18 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -195,6 +195,9 @@ static const char *print_imm(void *private_data,
 	if (insn->src_reg == BPF_PSEUDO_MAP_FD)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "map[id:%u]", insn->imm);
+	else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
 	else
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "0x%llx", (unsigned long long)full_imm);
-- 
cgit v1.2.3


From c83fef6bc5627bd0484a151212449270ab294cd9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Apr 2019 23:20:11 +0200
Subject: bpf: sync {btf, bpf}.h uapi header from tools infrastructure

Pull in latest changes from both headers, so we can make use of
them in libbpf.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
 tools/include/uapi/linux/btf.h | 32 ++++++++++++++++++++++++++++----
 2 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 837024512baf..af1cbd951f26 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -105,6 +105,7 @@ enum bpf_cmd {
 	BPF_BTF_GET_FD_BY_ID,
 	BPF_TASK_FD_QUERY,
 	BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+	BPF_MAP_FREEZE,
 };
 
 enum bpf_map_type {
@@ -255,8 +256,19 @@ enum bpf_attach_type {
  */
 #define BPF_F_ANY_ALIGNMENT	(1U << 1)
 
-/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm:      map fd              map fd
+ * insn[1].imm:      0                   offset into value
+ * insn[0].off:      0                   0
+ * insn[1].off:      0                   0
+ * ldimm64 rewrite:  address of map      address of map[0]+offset
+ * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
+ */
 #define BPF_PSEUDO_MAP_FD	1
+#define BPF_PSEUDO_MAP_VALUE	2
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
@@ -283,7 +295,7 @@ enum bpf_attach_type {
 
 #define BPF_OBJ_NAME_LEN 16U
 
-/* Flags for accessing BPF object */
+/* Flags for accessing BPF object from syscall side. */
 #define BPF_F_RDONLY		(1U << 3)
 #define BPF_F_WRONLY		(1U << 4)
 
@@ -293,6 +305,10 @@ enum bpf_attach_type {
 /* Zero-initialize hash function seed. This should only be used for testing. */
 #define BPF_F_ZERO_SEED		(1U << 6)
 
+/* Flags for accessing BPF object from program side. */
+#define BPF_F_RDONLY_PROG	(1U << 7)
+#define BPF_F_WRONLY_PROG	(1U << 8)
+
 /* flags for BPF_PROG_QUERY */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 7b7475ef2f17..9310652ca4f9 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -39,11 +39,11 @@ struct btf_type {
 	 *             struct, union and fwd
 	 */
 	__u32 info;
-	/* "size" is used by INT, ENUM, STRUCT and UNION.
+	/* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
 	 * "size" tells the size of the type it is describing.
 	 *
 	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-	 * FUNC and FUNC_PROTO.
+	 * FUNC, FUNC_PROTO and VAR.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -70,8 +70,10 @@ struct btf_type {
 #define BTF_KIND_RESTRICT	11	/* Restrict	*/
 #define BTF_KIND_FUNC		12	/* Function	*/
 #define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
-#define BTF_KIND_MAX		13
-#define NR_BTF_KINDS		14
+#define BTF_KIND_VAR		14	/* Variable	*/
+#define BTF_KIND_DATASEC	15	/* Section	*/
+#define BTF_KIND_MAX		BTF_KIND_DATASEC
+#define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
@@ -138,4 +140,26 @@ struct btf_param {
 	__u32	type;
 };
 
+enum {
+	BTF_VAR_STATIC = 0,
+	BTF_VAR_GLOBAL_ALLOCATED,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+	__u32	linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+	__u32	type;
+	__u32	offset;
+	__u32	size;
+};
+
 #endif /* _UAPI__LINUX_BTF_H__ */
-- 
cgit v1.2.3


From f8c7a4d4dc39991c1bc05847ea4378282708f935 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Tue, 9 Apr 2019 23:20:12 +0200
Subject: bpf, libbpf: refactor relocation handling

Adjust the code for relocations slightly with no functional changes,
so that upcoming patches that will introduce support for relocations
into the .data, .rodata and .bss sections can be added independent
of these changes.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 62 ++++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 30 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1ebfe7943d53..6dba0f01673b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -871,20 +871,20 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 				obj->efile.symbols = data;
 				obj->efile.strtabidx = sh.sh_link;
 			}
-		} else if ((sh.sh_type == SHT_PROGBITS) &&
-			   (sh.sh_flags & SHF_EXECINSTR) &&
-			   (data->d_size > 0)) {
-			if (strcmp(name, ".text") == 0)
-				obj->efile.text_shndx = idx;
-			err = bpf_object__add_program(obj, data->d_buf,
-						      data->d_size, name, idx);
-			if (err) {
-				char errmsg[STRERR_BUFSIZE];
-				char *cp = libbpf_strerror_r(-err, errmsg,
-							     sizeof(errmsg));
-
-				pr_warning("failed to alloc program %s (%s): %s",
-					   name, obj->path, cp);
+		} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
+			if (sh.sh_flags & SHF_EXECINSTR) {
+				if (strcmp(name, ".text") == 0)
+					obj->efile.text_shndx = idx;
+				err = bpf_object__add_program(obj, data->d_buf,
+							      data->d_size, name, idx);
+				if (err) {
+					char errmsg[STRERR_BUFSIZE];
+					char *cp = libbpf_strerror_r(-err, errmsg,
+								     sizeof(errmsg));
+
+					pr_warning("failed to alloc program %s (%s): %s",
+						   name, obj->path, cp);
+				}
 			}
 		} else if (sh.sh_type == SHT_REL) {
 			void *reloc = obj->efile.reloc;
@@ -1046,24 +1046,26 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
-		/* TODO: 'maps' is sorted. We can use bsearch to make it faster. */
-		for (map_idx = 0; map_idx < nr_maps; map_idx++) {
-			if (maps[map_idx].offset == sym.st_value) {
-				pr_debug("relocation: find map %zd (%s) for insn %u\n",
-					 map_idx, maps[map_idx].name, insn_idx);
-				break;
+		if (sym.st_shndx == maps_shndx) {
+			/* TODO: 'maps' is sorted. We can use bsearch to make it faster. */
+			for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+				if (maps[map_idx].offset == sym.st_value) {
+					pr_debug("relocation: find map %zd (%s) for insn %u\n",
+						 map_idx, maps[map_idx].name, insn_idx);
+					break;
+				}
 			}
-		}
 
-		if (map_idx >= nr_maps) {
-			pr_warning("bpf relocation: map_idx %d large than %d\n",
-				   (int)map_idx, (int)nr_maps - 1);
-			return -LIBBPF_ERRNO__RELOC;
-		}
+			if (map_idx >= nr_maps) {
+				pr_warning("bpf relocation: map_idx %d large than %d\n",
+					   (int)map_idx, (int)nr_maps - 1);
+				return -LIBBPF_ERRNO__RELOC;
+			}
 
-		prog->reloc_desc[i].type = RELO_LD64;
-		prog->reloc_desc[i].insn_idx = insn_idx;
-		prog->reloc_desc[i].map_idx = map_idx;
+			prog->reloc_desc[i].type = RELO_LD64;
+			prog->reloc_desc[i].insn_idx = insn_idx;
+			prog->reloc_desc[i].map_idx = map_idx;
+		}
 	}
 	return 0;
 }
@@ -1425,7 +1427,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 			}
 			insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
 			insns[insn_idx].imm = obj->maps[map_idx].fd;
-		} else {
+		} else if (prog->reloc_desc[i].type == RELO_CALL) {
 			err = bpf_program__reloc_text(prog, obj,
 						      &prog->reloc_desc[i]);
 			if (err)
-- 
cgit v1.2.3


From d859900c4c56dc4f0f8894c92a01dad86917453e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Apr 2019 23:20:13 +0200
Subject: bpf, libbpf: support global data/bss/rodata sections

This work adds BPF loader support for global data sections
to libbpf. This allows to write BPF programs in more natural
C-like way by being able to define global variables and const
data.

Back at LPC 2018 [0] we presented a first prototype which
implemented support for global data sections by extending BPF
syscall where union bpf_attr would get additional memory/size
pair for each section passed during prog load in order to later
add this base address into the ldimm64 instruction along with
the user provided offset when accessing a variable. Consensus
from LPC was that for proper upstream support, it would be
more desirable to use maps instead of bpf_attr extension as
this would allow for introspection of these sections as well
as potential live updates of their content. This work follows
this path by taking the following steps from loader side:

 1) In bpf_object__elf_collect() step we pick up ".data",
    ".rodata", and ".bss" section information.

 2) If present, in bpf_object__init_internal_map() we add
    maps to the obj's map array that corresponds to each
    of the present sections. Given section size and access
    properties can differ, a single entry array map is
    created with value size that is corresponding to the
    ELF section size of .data, .bss or .rodata. These
    internal maps are integrated into the normal map
    handling of libbpf such that when user traverses all
    obj maps, they can be differentiated from user-created
    ones via bpf_map__is_internal(). In later steps when
    we actually create these maps in the kernel via
    bpf_object__create_maps(), then for .data and .rodata
    sections their content is copied into the map through
    bpf_map_update_elem(). For .bss this is not necessary
    since array map is already zero-initialized by default.
    Additionally, for .rodata the map is frozen as read-only
    after setup, such that neither from program nor syscall
    side writes would be possible.

 3) In bpf_program__collect_reloc() step, we record the
    corresponding map, insn index, and relocation type for
    the global data.

 4) And last but not least in the actual relocation step in
    bpf_program__relocate(), we mark the ldimm64 instruction
    with src_reg = BPF_PSEUDO_MAP_VALUE where in the first
    imm field the map's file descriptor is stored as similarly
    done as in BPF_PSEUDO_MAP_FD, and in the second imm field
    (as ldimm64 is 2-insn wide) we store the access offset
    into the section. Given these maps have only single element
    ldimm64's off remains zero in both parts.

 5) On kernel side, this special marked BPF_PSEUDO_MAP_VALUE
    load will then store the actual target address in order
    to have a 'map-lookup'-free access. That is, the actual
    map value base address + offset. The destination register
    in the verifier will then be marked as PTR_TO_MAP_VALUE,
    containing the fixed offset as reg->off and backing BPF
    map as reg->map_ptr. Meaning, it's treated as any other
    normal map value from verification side, only with
    efficient, direct value access instead of actual call to
    map lookup helper as in the typical case.

Currently, only support for static global variables has been
added, and libbpf rejects non-static global variables from
loading. This can be lifted until we have proper semantics
for how BPF will treat multi-object BPF loads. From BTF side,
libbpf will set the value type id of the types corresponding
to the ".bss", ".data" and ".rodata" names which LLVM will
emit without the object name prefix. The key type will be
left as zero, thus making use of the key-less BTF option in
array maps.

Simple example dump of program using globals vars in each
section:

  # bpftool prog
  [...]
  6784: sched_cls  name load_static_dat  tag a7e1291567277844  gpl
        loaded_at 2019-03-11T15:39:34+0000  uid 0
        xlated 1776B  jited 993B  memlock 4096B  map_ids 2238,2237,2235,2236,2239,2240

  # bpftool map show id 2237
  2237: array  name test_glo.bss  flags 0x0
        key 4B  value 64B  max_entries 1  memlock 4096B
  # bpftool map show id 2235
  2235: array  name test_glo.data  flags 0x0
        key 4B  value 64B  max_entries 1  memlock 4096B
  # bpftool map show id 2236
  2236: array  name test_glo.rodata  flags 0x80
        key 4B  value 96B  max_entries 1  memlock 4096B

  # bpftool prog dump xlated id 6784
  int load_static_data(struct __sk_buff * skb):
  ; int load_static_data(struct __sk_buff *skb)
     0: (b7) r6 = 0
  ; test_reloc(number, 0, &num0);
     1: (63) *(u32 *)(r10 -4) = r6
     2: (bf) r2 = r10
  ; int load_static_data(struct __sk_buff *skb)
     3: (07) r2 += -4
  ; test_reloc(number, 0, &num0);
     4: (18) r1 = map[id:2238]
     6: (18) r3 = map[id:2237][0]+0    <-- direct addr in .bss area
     8: (b7) r4 = 0
     9: (85) call array_map_update_elem#100464
    10: (b7) r1 = 1
  ; test_reloc(number, 1, &num1);
  [...]
  ; test_reloc(string, 2, str2);
   120: (18) r8 = map[id:2237][0]+16   <-- same here at offset +16
   122: (18) r1 = map[id:2239]
   124: (18) r3 = map[id:2237][0]+16
   126: (b7) r4 = 0
   127: (85) call array_map_update_elem#100464
   128: (b7) r1 = 120
  ; str1[5] = 'x';
   129: (73) *(u8 *)(r9 +5) = r1
  ; test_reloc(string, 3, str1);
   130: (b7) r1 = 3
   131: (63) *(u32 *)(r10 -4) = r1
   132: (b7) r9 = 3
   133: (bf) r2 = r10
  ; int load_static_data(struct __sk_buff *skb)
   134: (07) r2 += -4
  ; test_reloc(string, 3, str1);
   135: (18) r1 = map[id:2239]
   137: (18) r3 = map[id:2235][0]+16   <-- direct addr in .data area
   139: (b7) r4 = 0
   140: (85) call array_map_update_elem#100464
   141: (b7) r1 = 111
  ; __builtin_memcpy(&str2[2], "hello", sizeof("hello"));
   142: (73) *(u8 *)(r8 +6) = r1       <-- further access based on .bss data
   143: (b7) r1 = 108
   144: (73) *(u8 *)(r8 +5) = r1
  [...]

For Cilium use-case in particular, this enables migrating configuration
constants from Cilium daemon's generated header defines into global
data sections such that expensive runtime recompilations with LLVM can
be avoided altogether. Instead, the ELF file becomes effectively a
"template", meaning, it is compiled only once (!) and the Cilium daemon
will then rewrite relevant configuration data from the ELF's .data or
.rodata sections directly instead of recompiling the program. The
updated ELF is then loaded into the kernel and atomically replaces
the existing program in the networking datapath. More info in [0].

Based upon recent fix in LLVM, commit c0db6b6bd444 ("[BPF] Don't fail
for static variables").

  [0] LPC 2018, BPF track, "ELF relocation for static data in BPF",
      http://vger.kernel.org/lpc-bpf2018.html#session-3

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Makefile   |   2 +-
 tools/lib/bpf/bpf.c      |  10 ++
 tools/lib/bpf/bpf.h      |   1 +
 tools/lib/bpf/libbpf.c   | 342 ++++++++++++++++++++++++++++++++++++++++-------
 tools/lib/bpf/libbpf.h   |   1 +
 tools/lib/bpf/libbpf.map |   6 +
 6 files changed, 314 insertions(+), 48 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 2a578bfc0bca..008344507700 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -3,7 +3,7 @@
 
 BPF_VERSION = 0
 BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 2
+BPF_EXTRAVERSION = 3
 
 MAKEFLAGS += --no-print-directory
 
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a1db869a6fda..c039094ad3aa 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -429,6 +429,16 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key)
 	return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
 }
 
+int bpf_map_freeze(int fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+
+	return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+}
+
 int bpf_obj_pin(int fd, const char *pathname)
 {
 	union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index e2c0df7b831f..c9d218d21453 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -117,6 +117,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
 					      void *value);
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+LIBBPF_API int bpf_map_freeze(int fd);
 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
 LIBBPF_API int bpf_obj_get(const char *pathname);
 LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6dba0f01673b..f7b245fbb960 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7,6 +7,7 @@
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
  * Copyright (C) 2017 Nicira, Inc.
+ * Copyright (C) 2019 Isovalent, Inc.
  */
 
 #ifndef _GNU_SOURCE
@@ -149,6 +150,7 @@ struct bpf_program {
 		enum {
 			RELO_LD64,
 			RELO_CALL,
+			RELO_DATA,
 		} type;
 		int insn_idx;
 		union {
@@ -182,6 +184,19 @@ struct bpf_program {
 	__u32 line_info_cnt;
 };
 
+enum libbpf_map_type {
+	LIBBPF_MAP_UNSPEC,
+	LIBBPF_MAP_DATA,
+	LIBBPF_MAP_BSS,
+	LIBBPF_MAP_RODATA,
+};
+
+static const char * const libbpf_type_to_btf_name[] = {
+	[LIBBPF_MAP_DATA]	= ".data",
+	[LIBBPF_MAP_BSS]	= ".bss",
+	[LIBBPF_MAP_RODATA]	= ".rodata",
+};
+
 struct bpf_map {
 	int fd;
 	char *name;
@@ -193,11 +208,18 @@ struct bpf_map {
 	__u32 btf_value_type_id;
 	void *priv;
 	bpf_map_clear_priv_t clear_priv;
+	enum libbpf_map_type libbpf_type;
+};
+
+struct bpf_secdata {
+	void *rodata;
+	void *data;
 };
 
 static LIST_HEAD(bpf_objects_list);
 
 struct bpf_object {
+	char name[BPF_OBJ_NAME_LEN];
 	char license[64];
 	__u32 kern_version;
 
@@ -205,6 +227,7 @@ struct bpf_object {
 	size_t nr_programs;
 	struct bpf_map *maps;
 	size_t nr_maps;
+	struct bpf_secdata sections;
 
 	bool loaded;
 	bool has_pseudo_calls;
@@ -220,6 +243,9 @@ struct bpf_object {
 		Elf *elf;
 		GElf_Ehdr ehdr;
 		Elf_Data *symbols;
+		Elf_Data *data;
+		Elf_Data *rodata;
+		Elf_Data *bss;
 		size_t strtabidx;
 		struct {
 			GElf_Shdr shdr;
@@ -228,6 +254,9 @@ struct bpf_object {
 		int nr_reloc;
 		int maps_shndx;
 		int text_shndx;
+		int data_shndx;
+		int rodata_shndx;
+		int bss_shndx;
 	} efile;
 	/*
 	 * All loaded bpf_object is linked in a list, which is
@@ -449,6 +478,7 @@ static struct bpf_object *bpf_object__new(const char *path,
 					  size_t obj_buf_sz)
 {
 	struct bpf_object *obj;
+	char *end;
 
 	obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
 	if (!obj) {
@@ -457,8 +487,14 @@ static struct bpf_object *bpf_object__new(const char *path,
 	}
 
 	strcpy(obj->path, path);
-	obj->efile.fd = -1;
+	/* Using basename() GNU version which doesn't modify arg. */
+	strncpy(obj->name, basename((void *)path),
+		sizeof(obj->name) - 1);
+	end = strchr(obj->name, '.');
+	if (end)
+		*end = 0;
 
+	obj->efile.fd = -1;
 	/*
 	 * Caller of this function should also calls
 	 * bpf_object__elf_finish() after data collection to return
@@ -468,6 +504,9 @@ static struct bpf_object *bpf_object__new(const char *path,
 	obj->efile.obj_buf = obj_buf;
 	obj->efile.obj_buf_sz = obj_buf_sz;
 	obj->efile.maps_shndx = -1;
+	obj->efile.data_shndx = -1;
+	obj->efile.rodata_shndx = -1;
+	obj->efile.bss_shndx = -1;
 
 	obj->loaded = false;
 
@@ -486,6 +525,9 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
 		obj->efile.elf = NULL;
 	}
 	obj->efile.symbols = NULL;
+	obj->efile.data = NULL;
+	obj->efile.rodata = NULL;
+	obj->efile.bss = NULL;
 
 	zfree(&obj->efile.reloc);
 	obj->efile.nr_reloc = 0;
@@ -627,27 +669,76 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
 	return false;
 }
 
+static bool bpf_object__has_maps(const struct bpf_object *obj)
+{
+	return obj->efile.maps_shndx >= 0 ||
+	       obj->efile.data_shndx >= 0 ||
+	       obj->efile.rodata_shndx >= 0 ||
+	       obj->efile.bss_shndx >= 0;
+}
+
+static int
+bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
+			      enum libbpf_map_type type, Elf_Data *data,
+			      void **data_buff)
+{
+	struct bpf_map_def *def = &map->def;
+	char map_name[BPF_OBJ_NAME_LEN];
+
+	map->libbpf_type = type;
+	map->offset = ~(typeof(map->offset))0;
+	snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
+		 libbpf_type_to_btf_name[type]);
+	map->name = strdup(map_name);
+	if (!map->name) {
+		pr_warning("failed to alloc map name\n");
+		return -ENOMEM;
+	}
+
+	def->type = BPF_MAP_TYPE_ARRAY;
+	def->key_size = sizeof(int);
+	def->value_size = data->d_size;
+	def->max_entries = 1;
+	def->map_flags = type == LIBBPF_MAP_RODATA ?
+			 BPF_F_RDONLY_PROG : 0;
+	if (data_buff) {
+		*data_buff = malloc(data->d_size);
+		if (!*data_buff) {
+			zfree(&map->name);
+			pr_warning("failed to alloc map content buffer\n");
+			return -ENOMEM;
+		}
+		memcpy(*data_buff, data->d_buf, data->d_size);
+	}
+
+	pr_debug("map %ld is \"%s\"\n", map - obj->maps, map->name);
+	return 0;
+}
+
 static int
 bpf_object__init_maps(struct bpf_object *obj, int flags)
 {
+	int i, map_idx, map_def_sz, nr_syms, nr_maps = 0, nr_maps_glob = 0;
 	bool strict = !(flags & MAPS_RELAX_COMPAT);
-	int i, map_idx, map_def_sz, nr_maps = 0;
-	Elf_Scn *scn;
-	Elf_Data *data = NULL;
 	Elf_Data *symbols = obj->efile.symbols;
+	Elf_Data *data = NULL;
+	int ret = 0;
 
-	if (obj->efile.maps_shndx < 0)
-		return -EINVAL;
 	if (!symbols)
 		return -EINVAL;
+	nr_syms = symbols->d_size / sizeof(GElf_Sym);
 
-	scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
-	if (scn)
-		data = elf_getdata(scn, NULL);
-	if (!scn || !data) {
-		pr_warning("failed to get Elf_Data from map section %d\n",
-			   obj->efile.maps_shndx);
-		return -EINVAL;
+	if (obj->efile.maps_shndx >= 0) {
+		Elf_Scn *scn = elf_getscn(obj->efile.elf,
+					  obj->efile.maps_shndx);
+
+		if (scn)
+			data = elf_getdata(scn, NULL);
+		if (!scn || !data) {
+			pr_warning("failed to get Elf_Data from map section %d\n",
+				   obj->efile.maps_shndx);
+			return -EINVAL;
+		}
 	}
 
 	/*
@@ -657,7 +748,13 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 	 *
 	 * TODO: Detect array of map and report error.
 	 */
-	for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+	if (obj->efile.data_shndx >= 0)
+		nr_maps_glob++;
+	if (obj->efile.rodata_shndx >= 0)
+		nr_maps_glob++;
+	if (obj->efile.bss_shndx >= 0)
+		nr_maps_glob++;
+	for (i = 0; data && i < nr_syms; i++) {
 		GElf_Sym sym;
 
 		if (!gelf_getsym(symbols, i, &sym))
@@ -670,19 +767,21 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 	/* Alloc obj->maps and fill nr_maps. */
 	pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
 		 nr_maps, data->d_size);
-
-	if (!nr_maps)
+	if (!nr_maps && !nr_maps_glob)
 		return 0;
 
 	/* Assume equally sized map definitions */
-	map_def_sz = data->d_size / nr_maps;
-	if (!data->d_size || (data->d_size % nr_maps) != 0) {
-		pr_warning("unable to determine map definition size "
-			   "section %s, %d maps in %zd bytes\n",
-			   obj->path, nr_maps, data->d_size);
-		return -EINVAL;
+	if (data) {
+		map_def_sz = data->d_size / nr_maps;
+		if (!data->d_size || (data->d_size % nr_maps) != 0) {
+			pr_warning("unable to determine map definition size "
+				   "section %s, %d maps in %zd bytes\n",
+				   obj->path, nr_maps, data->d_size);
+			return -EINVAL;
+		}
 	}
 
+	nr_maps += nr_maps_glob;
 	obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
 	if (!obj->maps) {
 		pr_warning("alloc maps for object failed\n");
@@ -703,7 +802,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 	/*
 	 * Fill obj->maps using data in "maps" section.
 	 */
-	for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+	for (i = 0, map_idx = 0; data && i < nr_syms; i++) {
 		GElf_Sym sym;
 		const char *map_name;
 		struct bpf_map_def *def;
@@ -716,6 +815,8 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 		map_name = elf_strptr(obj->efile.elf,
 				      obj->efile.strtabidx,
 				      sym.st_name);
+
+		obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC;
 		obj->maps[map_idx].offset = sym.st_value;
 		if (sym.st_value + map_def_sz > data->d_size) {
 			pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
@@ -764,8 +865,27 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 		map_idx++;
 	}
 
-	qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map);
-	return 0;
+	/*
+	 * Populate rest of obj->maps with libbpf internal maps.
+	 */
+	if (obj->efile.data_shndx >= 0)
+		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+						    LIBBPF_MAP_DATA,
+						    obj->efile.data,
+						    &obj->sections.data);
+	if (!ret && obj->efile.rodata_shndx >= 0)
+		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+						    LIBBPF_MAP_RODATA,
+						    obj->efile.rodata,
+						    &obj->sections.rodata);
+	if (!ret && obj->efile.bss_shndx >= 0)
+		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+						    LIBBPF_MAP_BSS,
+						    obj->efile.bss, NULL);
+	if (!ret)
+		qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
+		      compare_bpf_map);
+	return ret;
 }
 
 static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -885,6 +1005,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 					pr_warning("failed to alloc program %s (%s): %s",
 						   name, obj->path, cp);
 				}
+			} else if (strcmp(name, ".data") == 0) {
+				obj->efile.data = data;
+				obj->efile.data_shndx = idx;
+			} else if (strcmp(name, ".rodata") == 0) {
+				obj->efile.rodata = data;
+				obj->efile.rodata_shndx = idx;
+			} else {
+				pr_debug("skip section(%d) %s\n", idx, name);
 			}
 		} else if (sh.sh_type == SHT_REL) {
 			void *reloc = obj->efile.reloc;
@@ -912,6 +1040,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 				obj->efile.reloc[n].shdr = sh;
 				obj->efile.reloc[n].data = data;
 			}
+		} else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
+			obj->efile.bss = data;
+			obj->efile.bss_shndx = idx;
 		} else {
 			pr_debug("skip section(%d) %s\n", idx, name);
 		}
@@ -938,7 +1069,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 			}
 		}
 	}
-	if (obj->efile.maps_shndx >= 0) {
+	if (bpf_object__has_maps(obj)) {
 		err = bpf_object__init_maps(obj, flags);
 		if (err)
 			goto out;
@@ -974,13 +1105,46 @@ bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
 	return NULL;
 }
 
+static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
+				      int shndx)
+{
+	return shndx == obj->efile.data_shndx ||
+	       shndx == obj->efile.bss_shndx ||
+	       shndx == obj->efile.rodata_shndx;
+}
+
+static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
+				      int shndx)
+{
+	return shndx == obj->efile.maps_shndx;
+}
+
+static bool bpf_object__relo_in_known_section(const struct bpf_object *obj,
+					      int shndx)
+{
+	return shndx == obj->efile.text_shndx ||
+	       bpf_object__shndx_is_maps(obj, shndx) ||
+	       bpf_object__shndx_is_data(obj, shndx);
+}
+
+static enum libbpf_map_type
+bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
+{
+	if (shndx == obj->efile.data_shndx)
+		return LIBBPF_MAP_DATA;
+	else if (shndx == obj->efile.bss_shndx)
+		return LIBBPF_MAP_BSS;
+	else if (shndx == obj->efile.rodata_shndx)
+		return LIBBPF_MAP_RODATA;
+	else
+		return LIBBPF_MAP_UNSPEC;
+}
+
 static int
 bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			   Elf_Data *data, struct bpf_object *obj)
 {
 	Elf_Data *symbols = obj->efile.symbols;
-	int text_shndx = obj->efile.text_shndx;
-	int maps_shndx = obj->efile.maps_shndx;
 	struct bpf_map *maps = obj->maps;
 	size_t nr_maps = obj->nr_maps;
 	int i, nrels;
@@ -1000,7 +1164,10 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 		GElf_Sym sym;
 		GElf_Rel rel;
 		unsigned int insn_idx;
+		unsigned int shdr_idx;
 		struct bpf_insn *insns = prog->insns;
+		enum libbpf_map_type type;
+		const char *name;
 		size_t map_idx;
 
 		if (!gelf_getrel(data, i, &rel)) {
@@ -1015,13 +1182,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 				   GELF_R_SYM(rel.r_info));
 			return -LIBBPF_ERRNO__FORMAT;
 		}
-		pr_debug("relo for %lld value %lld name %d\n",
+
+		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+				  sym.st_name) ? : "<?>";
+
+		pr_debug("relo for %lld value %lld name %d (\'%s\')\n",
 			 (long long) (rel.r_info >> 32),
-			 (long long) sym.st_value, sym.st_name);
+			 (long long) sym.st_value, sym.st_name, name);
 
-		if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
-			pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
-				   prog->section_name, sym.st_shndx);
+		shdr_idx = sym.st_shndx;
+		if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
+			pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
+				   prog->section_name, shdr_idx);
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
@@ -1046,10 +1218,22 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
-		if (sym.st_shndx == maps_shndx) {
-			/* TODO: 'maps' is sorted. We can use bsearch to make it faster. */
+		if (bpf_object__shndx_is_maps(obj, shdr_idx) ||
+		    bpf_object__shndx_is_data(obj, shdr_idx)) {
+			type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+			if (type != LIBBPF_MAP_UNSPEC &&
+			    GELF_ST_BIND(sym.st_info) == STB_GLOBAL) {
+				pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n",
+					   name, insn_idx, insns[insn_idx].code);
+				return -LIBBPF_ERRNO__RELOC;
+			}
+
 			for (map_idx = 0; map_idx < nr_maps; map_idx++) {
-				if (maps[map_idx].offset == sym.st_value) {
+				if (maps[map_idx].libbpf_type != type)
+					continue;
+				if (type != LIBBPF_MAP_UNSPEC ||
+				    (type == LIBBPF_MAP_UNSPEC &&
+				     maps[map_idx].offset == sym.st_value)) {
 					pr_debug("relocation: find map %zd (%s) for insn %u\n",
 						 map_idx, maps[map_idx].name, insn_idx);
 					break;
@@ -1062,7 +1246,8 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 				return -LIBBPF_ERRNO__RELOC;
 			}
 
-			prog->reloc_desc[i].type = RELO_LD64;
+			prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ?
+						   RELO_DATA : RELO_LD64;
 			prog->reloc_desc[i].insn_idx = insn_idx;
 			prog->reloc_desc[i].map_idx = map_idx;
 		}
@@ -1073,18 +1258,27 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
 {
 	struct bpf_map_def *def = &map->def;
-	__u32 key_type_id, value_type_id;
+	__u32 key_type_id = 0, value_type_id = 0;
 	int ret;
 
-	ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
-				   def->value_size, &key_type_id,
-				   &value_type_id);
-	if (ret)
+	if (!bpf_map__is_internal(map)) {
+		ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
+					   def->value_size, &key_type_id,
+					   &value_type_id);
+	} else {
+		/*
+		 * LLVM annotates global data differently in BTF, that is,
+		 * only as '.data', '.bss' or '.rodata'.
+		 */
+		ret = btf__find_by_name(btf,
+				libbpf_type_to_btf_name[map->libbpf_type]);
+	}
+	if (ret < 0)
 		return ret;
 
 	map->btf_key_type_id = key_type_id;
-	map->btf_value_type_id = value_type_id;
-
+	map->btf_value_type_id = bpf_map__is_internal(map) ?
+				 ret : value_type_id;
 	return 0;
 }
 
@@ -1195,6 +1389,34 @@ bpf_object__probe_caps(struct bpf_object *obj)
 	return bpf_object__probe_name(obj);
 }
 
+static int
+bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	int err, zero = 0;
+	__u8 *data;
+
+	/* Nothing to do here since kernel already zero-initializes .bss map. */
+	if (map->libbpf_type == LIBBPF_MAP_BSS)
+		return 0;
+
+	data = map->libbpf_type == LIBBPF_MAP_DATA ?
+	       obj->sections.data : obj->sections.rodata;
+
+	err = bpf_map_update_elem(map->fd, &zero, data, 0);
+	/* Freeze .rodata map as read-only from syscall side. */
+	if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) {
+		err = bpf_map_freeze(map->fd);
+		if (err) {
+			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+			pr_warning("Error freezing map(%s) as read-only: %s\n",
+				   map->name, cp);
+			err = 0;
+		}
+	}
+	return err;
+}
+
 static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
@@ -1252,6 +1474,7 @@ bpf_object__create_maps(struct bpf_object *obj)
 			size_t j;
 
 			err = *pfd;
+err_out:
 			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 			pr_warning("failed to create map (name: '%s'): %s\n",
 				   map->name, cp);
@@ -1259,6 +1482,15 @@ bpf_object__create_maps(struct bpf_object *obj)
 				zclose(obj->maps[j].fd);
 			return err;
 		}
+
+		if (bpf_map__is_internal(map)) {
+			err = bpf_object__populate_internal_map(obj, map);
+			if (err < 0) {
+				zclose(*pfd);
+				goto err_out;
+			}
+		}
+
 		pr_debug("create map %s: fd=%d\n", map->name, *pfd);
 	}
 
@@ -1413,19 +1645,27 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 		return 0;
 
 	for (i = 0; i < prog->nr_reloc; i++) {
-		if (prog->reloc_desc[i].type == RELO_LD64) {
+		if (prog->reloc_desc[i].type == RELO_LD64 ||
+		    prog->reloc_desc[i].type == RELO_DATA) {
+			bool relo_data = prog->reloc_desc[i].type == RELO_DATA;
 			struct bpf_insn *insns = prog->insns;
 			int insn_idx, map_idx;
 
 			insn_idx = prog->reloc_desc[i].insn_idx;
 			map_idx = prog->reloc_desc[i].map_idx;
 
-			if (insn_idx >= (int)prog->insns_cnt) {
+			if (insn_idx + 1 >= (int)prog->insns_cnt) {
 				pr_warning("relocation out of range: '%s'\n",
 					   prog->section_name);
 				return -LIBBPF_ERRNO__RELOC;
 			}
-			insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+
+			if (!relo_data) {
+				insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+			} else {
+				insns[insn_idx].src_reg = BPF_PSEUDO_MAP_VALUE;
+				insns[insn_idx + 1].imm = insns[insn_idx].imm;
+			}
 			insns[insn_idx].imm = obj->maps[map_idx].fd;
 		} else if (prog->reloc_desc[i].type == RELO_CALL) {
 			err = bpf_program__reloc_text(prog, obj,
@@ -2321,6 +2561,9 @@ void bpf_object__close(struct bpf_object *obj)
 		obj->maps[i].priv = NULL;
 		obj->maps[i].clear_priv = NULL;
 	}
+
+	zfree(&obj->sections.rodata);
+	zfree(&obj->sections.data);
 	zfree(&obj->maps);
 	obj->nr_maps = 0;
 
@@ -2798,6 +3041,11 @@ bool bpf_map__is_offload_neutral(struct bpf_map *map)
 	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
 }
 
+bool bpf_map__is_internal(struct bpf_map *map)
+{
+	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
+}
+
 void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 {
 	map->map_ifindex = ifindex;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 531323391d07..12db2822c8e7 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -301,6 +301,7 @@ LIBBPF_API void *bpf_map__priv(struct bpf_map *map);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
 LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map);
 LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f3ce50500cf2..be42bdffc8de 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -157,3 +157,9 @@ LIBBPF_0.0.2 {
 		bpf_program__bpil_addr_to_offs;
 		bpf_program__bpil_offs_to_addr;
 } LIBBPF_0.0.1;
+
+LIBBPF_0.0.3 {
+	global:
+		bpf_map__is_internal;
+		bpf_map_freeze;
+} LIBBPF_0.0.2;
-- 
cgit v1.2.3


From 1713d68b3bf039d029afd74653c9325f5003ccbe Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Apr 2019 23:20:14 +0200
Subject: bpf, libbpf: add support for BTF Var and DataSec

This adds libbpf support for BTF Var and DataSec kinds. Main point
here is that libbpf needs to do some preparatory work before the
whole BTF object can be loaded into the kernel, that is, fixing up
of DataSec size taken from the ELF section size and non-static
variable offset which needs to be taken from the ELF's string section.

Upstream LLVM doesn't fix these up since at time of BTF emission
it is too early in the compilation process thus this information
isn't available yet, hence loader needs to take care of it.

Note, deduplication handling has not been in the scope of this work
and needs to be addressed in a future commit.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://reviews.llvm.org/D59441
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c      |  97 +++++++++++++++++++++++++++++-
 tools/lib/bpf/btf.h      |   3 +
 tools/lib/bpf/libbpf.c   | 150 +++++++++++++++++++++++++++++++++++++++++------
 tools/lib/bpf/libbpf.h   |   4 ++
 tools/lib/bpf/libbpf.map |   1 +
 5 files changed, 235 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 87e3020ac1bc..0f9079b9539e 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -24,6 +24,8 @@
 		((k) == BTF_KIND_CONST) || \
 		((k) == BTF_KIND_RESTRICT))
 
+#define IS_VAR(k) ((k) == BTF_KIND_VAR)
+
 static struct btf_type btf_void;
 
 struct btf {
@@ -212,6 +214,10 @@ static int btf_type_size(struct btf_type *t)
 		return base_size + vlen * sizeof(struct btf_member);
 	case BTF_KIND_FUNC_PROTO:
 		return base_size + vlen * sizeof(struct btf_param);
+	case BTF_KIND_VAR:
+		return base_size + sizeof(struct btf_var);
+	case BTF_KIND_DATASEC:
+		return base_size + vlen * sizeof(struct btf_var_secinfo);
 	default:
 		pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info));
 		return -EINVAL;
@@ -283,6 +289,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION:
 		case BTF_KIND_ENUM:
+		case BTF_KIND_DATASEC:
 			size = t->size;
 			goto done;
 		case BTF_KIND_PTR:
@@ -292,6 +299,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 		case BTF_KIND_VOLATILE:
 		case BTF_KIND_CONST:
 		case BTF_KIND_RESTRICT:
+		case BTF_KIND_VAR:
 			type_id = t->type;
 			break;
 		case BTF_KIND_ARRAY:
@@ -326,7 +334,8 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
 	t = btf__type_by_id(btf, type_id);
 	while (depth < MAX_RESOLVE_DEPTH &&
 	       !btf_type_is_void_or_null(t) &&
-	       IS_MODIFIER(BTF_INFO_KIND(t->info))) {
+	       (IS_MODIFIER(BTF_INFO_KIND(t->info)) ||
+		IS_VAR(BTF_INFO_KIND(t->info)))) {
 		type_id = t->type;
 		t = btf__type_by_id(btf, type_id);
 		depth++;
@@ -408,6 +417,92 @@ done:
 	return btf;
 }
 
+static int compare_vsi_off(const void *_a, const void *_b)
+{
+	const struct btf_var_secinfo *a = _a;
+	const struct btf_var_secinfo *b = _b;
+
+	return a->offset - b->offset;
+}
+
+static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
+			     struct btf_type *t)
+{
+	__u32 size = 0, off = 0, i, vars = BTF_INFO_VLEN(t->info);
+	const char *name = btf__name_by_offset(btf, t->name_off);
+	const struct btf_type *t_var;
+	struct btf_var_secinfo *vsi;
+	struct btf_var *var;
+	int ret;
+
+	if (!name) {
+		pr_debug("No name found in string section for DATASEC kind.\n");
+		return -ENOENT;
+	}
+
+	ret = bpf_object__section_size(obj, name, &size);
+	if (ret || !size || (t->size && t->size != size)) {
+		pr_debug("Invalid size for section %s: %u bytes\n", name, size);
+		return -ENOENT;
+	}
+
+	t->size = size;
+
+	for (i = 0, vsi = (struct btf_var_secinfo *)(t + 1);
+	     i < vars; i++, vsi++) {
+		t_var = btf__type_by_id(btf, vsi->type);
+		var = (struct btf_var *)(t_var + 1);
+
+		if (BTF_INFO_KIND(t_var->info) != BTF_KIND_VAR) {
+			pr_debug("Non-VAR type seen in section %s\n", name);
+			return -EINVAL;
+		}
+
+		if (var->linkage == BTF_VAR_STATIC)
+			continue;
+
+		name = btf__name_by_offset(btf, t_var->name_off);
+		if (!name) {
+			pr_debug("No name found in string section for VAR kind\n");
+			return -ENOENT;
+		}
+
+		ret = bpf_object__variable_offset(obj, name, &off);
+		if (ret) {
+			pr_debug("No offset found in symbol table for VAR %s\n", name);
+			return -ENOENT;
+		}
+
+		vsi->offset = off;
+	}
+
+	qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off);
+	return 0;
+}
+
+int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
+{
+	int err = 0;
+	__u32 i;
+
+	for (i = 1; i <= btf->nr_types; i++) {
+		struct btf_type *t = btf->types[i];
+
+		/* Loader needs to fix up some of the things compiler
+		 * couldn't get its hands on while emitting BTF. This
+		 * is section size and global variable offset. We use
+		 * the info from the ELF itself for this purpose.
+		 */
+		if (BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC) {
+			err = btf_fixup_datasec(obj, btf, t);
+			if (err)
+				break;
+		}
+	}
+
+	return err;
+}
+
 int btf__load(struct btf *btf)
 {
 	__u32 log_buf_size = BPF_LOG_BUF_SIZE;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 28a1e1e59861..c7b399e81fce 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -21,6 +21,8 @@ struct btf;
 struct btf_ext;
 struct btf_type;
 
+struct bpf_object;
+
 /*
  * The .BTF.ext ELF section layout defined as
  *   struct btf_ext_header
@@ -57,6 +59,7 @@ struct btf_ext_header {
 
 LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
 				   const char *type_name);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f7b245fbb960..e2a457e7c318 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -669,6 +669,112 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
 	return false;
 }
 
+static int bpf_object_search_section_size(const struct bpf_object *obj,
+					  const char *name, size_t *d_size)
+{
+	const GElf_Ehdr *ep = &obj->efile.ehdr;
+	Elf *elf = obj->efile.elf;
+	Elf_Scn *scn = NULL;
+	int idx = 0;
+
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		const char *sec_name;
+		Elf_Data *data;
+		GElf_Shdr sh;
+
+		idx++;
+		if (gelf_getshdr(scn, &sh) != &sh) {
+			pr_warning("failed to get section(%d) header from %s\n",
+				   idx, obj->path);
+			return -EIO;
+		}
+
+		sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+		if (!sec_name) {
+			pr_warning("failed to get section(%d) name from %s\n",
+				   idx, obj->path);
+			return -EIO;
+		}
+
+		if (strcmp(name, sec_name))
+			continue;
+
+		data = elf_getdata(scn, 0);
+		if (!data) {
+			pr_warning("failed to get section(%d) data from %s(%s)\n",
+				   idx, name, obj->path);
+			return -EIO;
+		}
+
+		*d_size = data->d_size;
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+			     __u32 *size)
+{
+	int ret = -ENOENT;
+	size_t d_size;
+
+	*size = 0;
+	if (!name) {
+		return -EINVAL;
+	} else if (!strcmp(name, ".data")) {
+		if (obj->efile.data)
+			*size = obj->efile.data->d_size;
+	} else if (!strcmp(name, ".bss")) {
+		if (obj->efile.bss)
+			*size = obj->efile.bss->d_size;
+	} else if (!strcmp(name, ".rodata")) {
+		if (obj->efile.rodata)
+			*size = obj->efile.rodata->d_size;
+	} else {
+		ret = bpf_object_search_section_size(obj, name, &d_size);
+		if (!ret)
+			*size = d_size;
+	}
+
+	return *size ? 0 : ret;
+}
+
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+				__u32 *off)
+{
+	Elf_Data *symbols = obj->efile.symbols;
+	const char *sname;
+	size_t si;
+
+	if (!name || !off)
+		return -EINVAL;
+
+	for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
+		GElf_Sym sym;
+
+		if (!gelf_getsym(symbols, si, &sym))
+			continue;
+		if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+		    GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
+			continue;
+
+		sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+				   sym.st_name);
+		if (!sname) {
+			pr_warning("failed to get sym name string for var %s\n",
+				   name);
+			return -EIO;
+		}
+		if (strcmp(name, sname) == 0) {
+			*off = sym.st_value;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
 static bool bpf_object__has_maps(const struct bpf_object *obj)
 {
 	return obj->efile.maps_shndx >= 0 ||
@@ -911,6 +1017,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 	Elf *elf = obj->efile.elf;
 	GElf_Ehdr *ep = &obj->efile.ehdr;
 	Elf_Data *btf_ext_data = NULL;
+	Elf_Data *btf_data = NULL;
 	Elf_Scn *scn = NULL;
 	int idx = 0, err = 0;
 
@@ -954,32 +1061,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 			 (int)sh.sh_link, (unsigned long)sh.sh_flags,
 			 (int)sh.sh_type);
 
-		if (strcmp(name, "license") == 0)
+		if (strcmp(name, "license") == 0) {
 			err = bpf_object__init_license(obj,
 						       data->d_buf,
 						       data->d_size);
-		else if (strcmp(name, "version") == 0)
+		} else if (strcmp(name, "version") == 0) {
 			err = bpf_object__init_kversion(obj,
 							data->d_buf,
 							data->d_size);
-		else if (strcmp(name, "maps") == 0)
+		} else if (strcmp(name, "maps") == 0) {
 			obj->efile.maps_shndx = idx;
-		else if (strcmp(name, BTF_ELF_SEC) == 0) {
-			obj->btf = btf__new(data->d_buf, data->d_size);
-			if (IS_ERR(obj->btf)) {
-				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
-					   BTF_ELF_SEC, PTR_ERR(obj->btf));
-				obj->btf = NULL;
-				continue;
-			}
-			err = btf__load(obj->btf);
-			if (err) {
-				pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n",
-					   BTF_ELF_SEC, err);
-				btf__free(obj->btf);
-				obj->btf = NULL;
-				err = 0;
-			}
+		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
+			btf_data = data;
 		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
 			btf_ext_data = data;
 		} else if (sh.sh_type == SHT_SYMTAB) {
@@ -1054,6 +1147,25 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 		pr_warning("Corrupted ELF file: index of strtab invalid\n");
 		return LIBBPF_ERRNO__FORMAT;
 	}
+	if (btf_data) {
+		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
+		if (IS_ERR(obj->btf)) {
+			pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+				   BTF_ELF_SEC, PTR_ERR(obj->btf));
+			obj->btf = NULL;
+		} else {
+			err = btf__finalize_data(obj, obj->btf);
+			if (!err)
+				err = btf__load(obj->btf);
+			if (err) {
+				pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n",
+					   BTF_ELF_SEC, err);
+				btf__free(obj->btf);
+				obj->btf = NULL;
+				err = 0;
+			}
+		}
+	}
 	if (btf_ext_data) {
 		if (!obj->btf) {
 			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 12db2822c8e7..c5ff00515ce7 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -75,6 +75,10 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
 LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 						      size_t obj_buf_sz,
 						      const char *name);
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+			     __u32 *size);
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+				__u32 *off);
 LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
 LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
 				      const char *path);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index be42bdffc8de..673001787cba 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -162,4 +162,5 @@ LIBBPF_0.0.3 {
 	global:
 		bpf_map__is_internal;
 		bpf_map_freeze;
+		btf__finalize_data;
 } LIBBPF_0.0.2;
-- 
cgit v1.2.3


From 817998afa038c156bbc1a6e69c48aa26282cc41f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Apr 2019 23:20:15 +0200
Subject: bpf: bpftool support for dumping data/bss/rodata sections

Add the ability to bpftool to handle BTF Var and DataSec kinds
in order to dump them out of btf_dumper_type(). The value has a
single object with the section name, which itself holds an array
of variables it dumps. A single variable is an object by itself
printed along with its name. From there further type information
is dumped along with corresponding value information.

Example output from .rodata:

  # ./bpftool m d i 150
  [{
          "value": {
              ".rodata": [{
                      "load_static_data.bar": 18446744073709551615
                  },{
                      "num2": 24
                  },{
                      "num5": 43947
                  },{
                      "num6": 171
                  },{
                      "str0": [97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,0,0,0,0,0,0
                      ]
                  },{
                      "struct0": {
                          "a": 42,
                          "b": 4278120431,
                          "c": 1229782938247303441
                      }
                  },{
                      "struct2": {
                          "a": 0,
                          "b": 0,
                          "c": 0
                      }
                  }
              ]
          }
      }
  ]

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf_dumper.c | 59 ++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/map.c        | 10 ++++---
 2 files changed, 65 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index e63bce0755eb..8cafb9b31467 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -309,6 +309,48 @@ static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id,
 	return ret;
 }
 
+static int btf_dumper_var(const struct btf_dumper *d, __u32 type_id,
+			  __u8 bit_offset, const void *data)
+{
+	const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+	int ret;
+
+	jsonw_start_object(d->jw);
+	jsonw_name(d->jw, btf__name_by_offset(d->btf, t->name_off));
+	ret = btf_dumper_do_type(d, t->type, bit_offset, data);
+	jsonw_end_object(d->jw);
+
+	return ret;
+}
+
+static int btf_dumper_datasec(const struct btf_dumper *d, __u32 type_id,
+			      const void *data)
+{
+	struct btf_var_secinfo *vsi;
+	const struct btf_type *t;
+	int ret = 0, i, vlen;
+
+	t = btf__type_by_id(d->btf, type_id);
+	if (!t)
+		return -EINVAL;
+
+	vlen = BTF_INFO_VLEN(t->info);
+	vsi = (struct btf_var_secinfo *)(t + 1);
+
+	jsonw_start_object(d->jw);
+	jsonw_name(d->jw, btf__name_by_offset(d->btf, t->name_off));
+	jsonw_start_array(d->jw);
+	for (i = 0; i < vlen; i++) {
+		ret = btf_dumper_do_type(d, vsi[i].type, 0, data + vsi[i].offset);
+		if (ret)
+			break;
+	}
+	jsonw_end_array(d->jw);
+	jsonw_end_object(d->jw);
+
+	return ret;
+}
+
 static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
 			      __u8 bit_offset, const void *data)
 {
@@ -341,6 +383,10 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
 	case BTF_KIND_CONST:
 	case BTF_KIND_RESTRICT:
 		return btf_dumper_modifier(d, type_id, bit_offset, data);
+	case BTF_KIND_VAR:
+		return btf_dumper_var(d, type_id, bit_offset, data);
+	case BTF_KIND_DATASEC:
+		return btf_dumper_datasec(d, type_id, data);
 	default:
 		jsonw_printf(d->jw, "(unsupported-kind");
 		return -EINVAL;
@@ -377,6 +423,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
 {
 	const struct btf_type *proto_type;
 	const struct btf_array *array;
+	const struct btf_var *var;
 	const struct btf_type *t;
 
 	if (!type_id) {
@@ -440,6 +487,18 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
 		if (pos == -1)
 			return -1;
 		break;
+	case BTF_KIND_VAR:
+		var = (struct btf_var *)(t + 1);
+		if (var->linkage == BTF_VAR_STATIC)
+			BTF_PRINT_ARG("static ");
+		BTF_PRINT_TYPE(t->type);
+		BTF_PRINT_ARG(" %s",
+			      btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_DATASEC:
+		BTF_PRINT_ARG("section (\"%s\") ",
+			      btf__name_by_offset(btf, t->name_off));
+		break;
 	case BTF_KIND_UNKN:
 	default:
 		return -1;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e0c650d91784..e96903078991 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -153,11 +153,13 @@ static int do_dump_btf(const struct btf_dumper *d,
 	/* start of key-value pair */
 	jsonw_start_object(d->jw);
 
-	jsonw_name(d->jw, "key");
+	if (map_info->btf_key_type_id) {
+		jsonw_name(d->jw, "key");
 
-	ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
-	if (ret)
-		goto err_end_obj;
+		ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
+		if (ret)
+			goto err_end_obj;
+	}
 
 	if (!map_is_per_cpu(map_info->type)) {
 		jsonw_name(d->jw, "value");
-- 
cgit v1.2.3


From fb2abb73e575b6fcb428f803faf928ef04d5bb1e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Apr 2019 23:20:16 +0200
Subject: bpf, selftest: test {rd, wr}only flags and direct value access

Extend test_verifier with various test cases around the two kernel
extensions, that is, {rd,wr}only map support as well as direct map
value access. All passing, one skipped due to xskmap not present
on test machine:

  # ./test_verifier
  [...]
  #948/p XDP pkt read, pkt_meta' <= pkt_data, bad access 1 OK
  #949/p XDP pkt read, pkt_meta' <= pkt_data, bad access 2 OK
  #950/p XDP pkt read, pkt_data <= pkt_meta', good access OK
  #951/p XDP pkt read, pkt_data <= pkt_meta', bad access 1 OK
  #952/p XDP pkt read, pkt_data <= pkt_meta', bad access 2 OK
  Summary: 1410 PASSED, 1 SKIPPED, 0 FAILED

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/linux/filter.h                       |  21 +-
 tools/testing/selftests/bpf/test_verifier.c        |  51 ++-
 .../testing/selftests/bpf/verifier/array_access.c  | 159 ++++++++++
 .../selftests/bpf/verifier/direct_value_access.c   | 347 +++++++++++++++++++++
 4 files changed, 573 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/verifier/direct_value_access.c

(limited to 'tools')

diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index cce0b02c0e28..ca28b6ab8db7 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -278,10 +278,29 @@
 		.off   = 0,					\
 		.imm   = ((__u64) (IMM)) >> 32 })
 
+#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2)	\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF1,					\
+		.imm   = IMM1 }),				\
+	((struct bpf_insn) {					\
+		.code  = 0, /* zero is reserved opcode */	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF2,					\
+		.imm   = IMM2 })
+
 /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
 
 #define BPF_LD_MAP_FD(DST, MAP_FD)				\
-	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+	BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0,	\
+			      MAP_FD, 0)
+
+#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF)		\
+	BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0,	\
+			      MAP_FD, VALUE_OFF)
 
 /* Relative call */
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 75ef63b42f2c..e2ebcaddbe78 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -52,7 +52,7 @@
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	14
+#define MAX_NR_MAPS	16
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -82,6 +82,9 @@ struct bpf_test {
 	int fixup_cgroup_storage[MAX_FIXUPS];
 	int fixup_percpu_cgroup_storage[MAX_FIXUPS];
 	int fixup_map_spin_lock[MAX_FIXUPS];
+	int fixup_map_array_ro[MAX_FIXUPS];
+	int fixup_map_array_wo[MAX_FIXUPS];
+	int fixup_map_array_small[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	uint32_t retval, retval_unpriv, insn_processed;
@@ -285,13 +288,15 @@ static bool skip_unsupported_map(enum bpf_map_type map_type)
 	return false;
 }
 
-static int create_map(uint32_t type, uint32_t size_key,
-		      uint32_t size_value, uint32_t max_elem)
+static int __create_map(uint32_t type, uint32_t size_key,
+			uint32_t size_value, uint32_t max_elem,
+			uint32_t extra_flags)
 {
 	int fd;
 
 	fd = bpf_create_map(type, size_key, size_value, max_elem,
-			    type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
+			    (type == BPF_MAP_TYPE_HASH ?
+			     BPF_F_NO_PREALLOC : 0) | extra_flags);
 	if (fd < 0) {
 		if (skip_unsupported_map(type))
 			return -1;
@@ -301,6 +306,12 @@ static int create_map(uint32_t type, uint32_t size_key,
 	return fd;
 }
 
+static int create_map(uint32_t type, uint32_t size_key,
+		      uint32_t size_value, uint32_t max_elem)
+{
+	return __create_map(type, size_key, size_value, max_elem, 0);
+}
+
 static void update_map(int fd, int index)
 {
 	struct test_val value = {
@@ -527,6 +538,9 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_cgroup_storage = test->fixup_cgroup_storage;
 	int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
 	int *fixup_map_spin_lock = test->fixup_map_spin_lock;
+	int *fixup_map_array_ro = test->fixup_map_array_ro;
+	int *fixup_map_array_wo = test->fixup_map_array_wo;
+	int *fixup_map_array_small = test->fixup_map_array_small;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -652,6 +666,35 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_map_spin_lock++;
 		} while (*fixup_map_spin_lock);
 	}
+	if (*fixup_map_array_ro) {
+		map_fds[14] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+					   sizeof(struct test_val), 1,
+					   BPF_F_RDONLY_PROG);
+		update_map(map_fds[14], 0);
+		do {
+			prog[*fixup_map_array_ro].imm = map_fds[14];
+			fixup_map_array_ro++;
+		} while (*fixup_map_array_ro);
+	}
+	if (*fixup_map_array_wo) {
+		map_fds[15] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+					   sizeof(struct test_val), 1,
+					   BPF_F_WRONLY_PROG);
+		update_map(map_fds[15], 0);
+		do {
+			prog[*fixup_map_array_wo].imm = map_fds[15];
+			fixup_map_array_wo++;
+		} while (*fixup_map_array_wo);
+	}
+	if (*fixup_map_array_small) {
+		map_fds[16] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+					   1, 1, 0);
+		update_map(map_fds[16], 0);
+		do {
+			prog[*fixup_map_array_small].imm = map_fds[16];
+			fixup_map_array_small++;
+		} while (*fixup_map_array_small);
+	}
 }
 
 static int set_admin(bool admin)
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index 0dcecaf3ec6f..bcb83196e459 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -217,3 +217,162 @@
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
+{
+	"valid read map access into a read-only array 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_ro = { 3 },
+	.result = ACCEPT,
+	.retval = 28,
+},
+{
+	"valid read map access into a read-only array 2",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 4),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_MOV64_IMM(BPF_REG_5, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+		     BPF_FUNC_csum_diff),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_array_ro = { 3 },
+	.result = ACCEPT,
+	.retval = -29,
+},
+{
+	"invalid write map access into a read-only array 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_ro = { 3 },
+	.result = REJECT,
+	.errstr = "write into map forbidden",
+},
+{
+	"invalid write map access into a read-only array 2",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_4, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+		     BPF_FUNC_skb_load_bytes),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_array_ro = { 4 },
+	.result = REJECT,
+	.errstr = "write into map forbidden",
+},
+{
+	"valid write map access into a write-only array 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_wo = { 3 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"valid write map access into a write-only array 2",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_4, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+		     BPF_FUNC_skb_load_bytes),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_array_wo = { 4 },
+	.result = ACCEPT,
+	.retval = 0,
+},
+{
+	"invalid read map access into a write-only array 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_wo = { 3 },
+	.result = REJECT,
+	.errstr = "read from map forbidden",
+},
+{
+	"invalid read map access into a write-only array 2",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 4),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_MOV64_IMM(BPF_REG_5, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+		     BPF_FUNC_csum_diff),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_array_wo = { 3 },
+	.result = REJECT,
+	.errstr = "read from map forbidden",
+},
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
new file mode 100644
index 000000000000..b9fb28e8e224
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -0,0 +1,347 @@
+{
+	"direct map access, write test 1",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"direct map access, write test 2",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"direct map access, write test 3",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"direct map access, write test 4",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 40),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"direct map access, write test 5",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 32),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"direct map access, write test 6",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 40),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 4, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "R1 min value is outside of the array range",
+},
+{
+	"direct map access, write test 7",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, -1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 4, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "direct value offset of 4294967295 is not allowed",
+},
+{
+	"direct map access, write test 8",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, -1, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"direct map access, write test 9",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 48),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "invalid access to map value pointer",
+},
+{
+	"direct map access, write test 10",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 47),
+	BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"direct map access, write test 11",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 48),
+	BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "invalid access to map value pointer",
+},
+{
+	"direct map access, write test 12",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, (1<<29)),
+	BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "direct value offset of 536870912 is not allowed",
+},
+{
+	"direct map access, write test 13",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, (1<<29)-1),
+	BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "invalid access to map value pointer, value_size=48 off=536870911",
+},
+{
+	"direct map access, write test 14",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 47),
+	BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+	BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1, 3 },
+	.result = ACCEPT,
+	.retval = 0xff,
+},
+{
+	"direct map access, write test 15",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+	BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+	BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1, 3 },
+	.result = ACCEPT,
+	.retval = 0xffff,
+},
+{
+	"direct map access, write test 16",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+	BPF_LD_MAP_VALUE(BPF_REG_2, 0, 47),
+	BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1, 3 },
+	.result = REJECT,
+	.errstr = "invalid access to map value, value_size=48 off=47 size=2",
+},
+{
+	"direct map access, write test 17",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+	BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+	BPF_ST_MEM(BPF_H, BPF_REG_2, 1, 0xffff),
+	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1, 3 },
+	.result = REJECT,
+	.errstr = "invalid access to map value, value_size=48 off=47 size=2",
+},
+{
+	"direct map access, write test 18",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+	BPF_ST_MEM(BPF_H, BPF_REG_1, 0, 42),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_small = { 1 },
+	.result = REJECT,
+	.errstr = "R1 min value is outside of the array range",
+},
+{
+	"direct map access, write test 19",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+	BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_small = { 1 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"direct map access, write test 20",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1),
+	BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_small = { 1 },
+	.result = REJECT,
+	.errstr = "invalid access to map value pointer",
+},
+{
+	"direct map access, invalid insn test 1",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0, 1, 0, 47),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+	"direct map access, invalid insn test 2",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 1, 0, 0, 47),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+	"direct map access, invalid insn test 3",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, ~0, 0, 0, 47),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+	"direct map access, invalid insn test 4",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0, ~0, 0, 47),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+	"direct map access, invalid insn test 5",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, ~0, ~0, 0, 47),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+	"direct map access, invalid insn test 6",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, ~0, 0, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+	"direct map access, invalid insn test 7",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, ~0, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+	"direct map access, invalid insn test 8",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, ~0, ~0, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+	"direct map access, invalid insn test 9",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 0, 0, 47),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result = REJECT,
+	.errstr = "unrecognized bpf_ld_imm64 insn",
+},
-- 
cgit v1.2.3


From b915ebe6d9c8c6b5427e606c0ecee53df921382b Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Tue, 9 Apr 2019 23:20:17 +0200
Subject: bpf, selftest: test global data/bss/rodata sections

Add tests for libbpf relocation of static variable references
into the .data, .rodata and .bss sections of the ELF, also add
read-only test for .rodata. All passing:

  # ./test_progs
  [...]
  test_global_data:PASS:load program 0 nsec
  test_global_data:PASS:pass global data run 925 nsec
  test_global_data_number:PASS:relocate .bss reference 925 nsec
  test_global_data_number:PASS:relocate .data reference 925 nsec
  test_global_data_number:PASS:relocate .rodata reference 925 nsec
  test_global_data_number:PASS:relocate .bss reference 925 nsec
  test_global_data_number:PASS:relocate .data reference 925 nsec
  test_global_data_number:PASS:relocate .rodata reference 925 nsec
  test_global_data_number:PASS:relocate .bss reference 925 nsec
  test_global_data_number:PASS:relocate .bss reference 925 nsec
  test_global_data_number:PASS:relocate .rodata reference 925 nsec
  test_global_data_number:PASS:relocate .rodata reference 925 nsec
  test_global_data_number:PASS:relocate .rodata reference 925 nsec
  test_global_data_string:PASS:relocate .rodata reference 925 nsec
  test_global_data_string:PASS:relocate .data reference 925 nsec
  test_global_data_string:PASS:relocate .bss reference 925 nsec
  test_global_data_string:PASS:relocate .data reference 925 nsec
  test_global_data_string:PASS:relocate .bss reference 925 nsec
  test_global_data_struct:PASS:relocate .rodata reference 925 nsec
  test_global_data_struct:PASS:relocate .bss reference 925 nsec
  test_global_data_struct:PASS:relocate .rodata reference 925 nsec
  test_global_data_struct:PASS:relocate .data reference 925 nsec
  test_global_data_rdonly:PASS:test .rodata read-only map 925 nsec
  [...]
  Summary: 229 PASSED, 0 FAILED

Note map helper signatures have been changed to avoid warnings
when passing in const data.

Joint work with Daniel Borkmann.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_helpers.h          |   8 +-
 .../testing/selftests/bpf/prog_tests/global_data.c | 157 +++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_global_data.c | 106 ++++++++++++++
 3 files changed, 267 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/global_data.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_global_data.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 97d140961438..e85d62cb53d0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -9,14 +9,14 @@
 #define SEC(NAME) __attribute__((section(NAME), used))
 
 /* helper functions called from eBPF programs written in C */
-static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
 	(void *) BPF_FUNC_map_lookup_elem;
-static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+static int (*bpf_map_update_elem)(void *map, const void *key, const void *value,
 				  unsigned long long flags) =
 	(void *) BPF_FUNC_map_update_elem;
-static int (*bpf_map_delete_elem)(void *map, void *key) =
+static int (*bpf_map_delete_elem)(void *map, const void *key) =
 	(void *) BPF_FUNC_map_delete_elem;
-static int (*bpf_map_push_elem)(void *map, void *value,
+static int (*bpf_map_push_elem)(void *map, const void *value,
 				unsigned long long flags) =
 	(void *) BPF_FUNC_map_push_elem;
 static int (*bpf_map_pop_elem)(void *map, void *value) =
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
new file mode 100644
index 000000000000..d011079fb0bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+static void test_global_data_number(struct bpf_object *obj, __u32 duration)
+{
+	int i, err, map_fd;
+	uint64_t num;
+
+	map_fd = bpf_find_map(__func__, obj, "result_number");
+	if (map_fd < 0) {
+		error_cnt++;
+		return;
+	}
+
+	struct {
+		char *name;
+		uint32_t key;
+		uint64_t num;
+	} tests[] = {
+		{ "relocate .bss reference",     0, 0 },
+		{ "relocate .data reference",    1, 42 },
+		{ "relocate .rodata reference",  2, 24 },
+		{ "relocate .bss reference",     3, 0 },
+		{ "relocate .data reference",    4, 0xffeeff },
+		{ "relocate .rodata reference",  5, 0xabab },
+		{ "relocate .bss reference",     6, 1234 },
+		{ "relocate .bss reference",     7, 0 },
+		{ "relocate .rodata reference",  8, 0xab },
+		{ "relocate .rodata reference",  9, 0x1111111111111111 },
+		{ "relocate .rodata reference", 10, ~0 },
+	};
+
+	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+		err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
+		CHECK(err || num != tests[i].num, tests[i].name,
+		      "err %d result %lx expected %lx\n",
+		      err, num, tests[i].num);
+	}
+}
+
+static void test_global_data_string(struct bpf_object *obj, __u32 duration)
+{
+	int i, err, map_fd;
+	char str[32];
+
+	map_fd = bpf_find_map(__func__, obj, "result_string");
+	if (map_fd < 0) {
+		error_cnt++;
+		return;
+	}
+
+	struct {
+		char *name;
+		uint32_t key;
+		char str[32];
+	} tests[] = {
+		{ "relocate .rodata reference", 0, "abcdefghijklmnopqrstuvwxyz" },
+		{ "relocate .data reference",   1, "abcdefghijklmnopqrstuvwxyz" },
+		{ "relocate .bss reference",    2, "" },
+		{ "relocate .data reference",   3, "abcdexghijklmnopqrstuvwxyz" },
+		{ "relocate .bss reference",    4, "\0\0hello" },
+	};
+
+	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+		err = bpf_map_lookup_elem(map_fd, &tests[i].key, str);
+		CHECK(err || memcmp(str, tests[i].str, sizeof(str)),
+		      tests[i].name, "err %d result \'%s\' expected \'%s\'\n",
+		      err, str, tests[i].str);
+	}
+}
+
+struct foo {
+	__u8  a;
+	__u32 b;
+	__u64 c;
+};
+
+static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
+{
+	int i, err, map_fd;
+	struct foo val;
+
+	map_fd = bpf_find_map(__func__, obj, "result_struct");
+	if (map_fd < 0) {
+		error_cnt++;
+		return;
+	}
+
+	struct {
+		char *name;
+		uint32_t key;
+		struct foo val;
+	} tests[] = {
+		{ "relocate .rodata reference", 0, { 42, 0xfefeefef, 0x1111111111111111ULL, } },
+		{ "relocate .bss reference",    1, { } },
+		{ "relocate .rodata reference", 2, { } },
+		{ "relocate .data reference",   3, { 41, 0xeeeeefef, 0x2111111111111111ULL, } },
+	};
+
+	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+		err = bpf_map_lookup_elem(map_fd, &tests[i].key, &val);
+		CHECK(err || memcmp(&val, &tests[i].val, sizeof(val)),
+		      tests[i].name, "err %d result { %u, %u, %llu } expected { %u, %u, %llu }\n",
+		      err, val.a, val.b, val.c, tests[i].val.a, tests[i].val.b, tests[i].val.c);
+	}
+}
+
+static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
+{
+	int err = -ENOMEM, map_fd, zero = 0;
+	struct bpf_map *map;
+	__u8 *buff;
+
+	map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+	if (!map || !bpf_map__is_internal(map)) {
+		error_cnt++;
+		return;
+	}
+
+	map_fd = bpf_map__fd(map);
+	if (map_fd < 0) {
+		error_cnt++;
+		return;
+	}
+
+	buff = malloc(bpf_map__def(map)->value_size);
+	if (buff)
+		err = bpf_map_update_elem(map_fd, &zero, buff, 0);
+	free(buff);
+	CHECK(!err || errno != EPERM, "test .rodata read-only map",
+	      "err %d errno %d\n", err, errno);
+}
+
+void test_global_data(void)
+{
+	const char *file = "./test_global_data.o";
+	__u32 duration = 0, retval;
+	struct bpf_object *obj;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+		return;
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || retval, "pass global data run",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+
+	test_global_data_number(obj, duration);
+	test_global_data_string(obj, duration);
+	test_global_data_struct(obj, duration);
+	test_global_data_rdonly(obj, duration);
+
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
new file mode 100644
index 000000000000..5ab14e941980
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Isovalent, Inc.
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") result_number = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.key_size	= sizeof(__u32),
+	.value_size	= sizeof(__u64),
+	.max_entries	= 11,
+};
+
+struct bpf_map_def SEC("maps") result_string = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.key_size	= sizeof(__u32),
+	.value_size	= 32,
+	.max_entries	= 5,
+};
+
+struct foo {
+	__u8  a;
+	__u32 b;
+	__u64 c;
+};
+
+struct bpf_map_def SEC("maps") result_struct = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.key_size	= sizeof(__u32),
+	.value_size	= sizeof(struct foo),
+	.max_entries	= 5,
+};
+
+/* Relocation tests for __u64s. */
+static       __u64 num0;
+static       __u64 num1 = 42;
+static const __u64 num2 = 24;
+static       __u64 num3 = 0;
+static       __u64 num4 = 0xffeeff;
+static const __u64 num5 = 0xabab;
+static const __u64 num6 = 0xab;
+
+/* Relocation tests for strings. */
+static const char str0[32] = "abcdefghijklmnopqrstuvwxyz";
+static       char str1[32] = "abcdefghijklmnopqrstuvwxyz";
+static       char str2[32];
+
+/* Relocation tests for structs. */
+static const struct foo struct0 = {
+	.a = 42,
+	.b = 0xfefeefef,
+	.c = 0x1111111111111111ULL,
+};
+static struct foo struct1;
+static const struct foo struct2;
+static struct foo struct3 = {
+	.a = 41,
+	.b = 0xeeeeefef,
+	.c = 0x2111111111111111ULL,
+};
+
+#define test_reloc(map, num, var)					\
+	do {								\
+		__u32 key = num;					\
+		bpf_map_update_elem(&result_##map, &key, var, 0);	\
+	} while (0)
+
+SEC("static_data_load")
+int load_static_data(struct __sk_buff *skb)
+{
+	static const __u64 bar = ~0;
+
+	test_reloc(number, 0, &num0);
+	test_reloc(number, 1, &num1);
+	test_reloc(number, 2, &num2);
+	test_reloc(number, 3, &num3);
+	test_reloc(number, 4, &num4);
+	test_reloc(number, 5, &num5);
+	num4 = 1234;
+	test_reloc(number, 6, &num4);
+	test_reloc(number, 7, &num0);
+	test_reloc(number, 8, &num6);
+
+	test_reloc(string, 0, str0);
+	test_reloc(string, 1, str1);
+	test_reloc(string, 2, str2);
+	str1[5] = 'x';
+	test_reloc(string, 3, str1);
+	__builtin_memcpy(&str2[2], "hello", sizeof("hello"));
+	test_reloc(string, 4, str2);
+
+	test_reloc(struct, 0, &struct0);
+	test_reloc(struct, 1, &struct1);
+	test_reloc(struct, 2, &struct2);
+	test_reloc(struct, 3, &struct3);
+
+	test_reloc(number,  9, &struct0.c);
+	test_reloc(number, 10, &bar);
+
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From c861168b7c219838637aaa8c3acc81707aa495f6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Apr 2019 23:20:18 +0200
Subject: bpf, selftest: add test cases for BTF Var and DataSec

Extend test_btf with various positive and negative tests around
BTF verification of kind Var and DataSec. All passing as well:

  # ./test_btf
  [...]
  BTF raw test[4] (global data test #1): OK
  BTF raw test[5] (global data test #2): OK
  BTF raw test[6] (global data test #3): OK
  BTF raw test[7] (global data test #4, unsupported linkage): OK
  BTF raw test[8] (global data test #5, invalid var type): OK
  BTF raw test[9] (global data test #6, invalid var type (fwd type)): OK
  BTF raw test[10] (global data test #7, invalid var type (fwd type)): OK
  BTF raw test[11] (global data test #8, invalid var size): OK
  BTF raw test[12] (global data test #9, invalid var size): OK
  BTF raw test[13] (global data test #10, invalid var size): OK
  BTF raw test[14] (global data test #11, multiple section members): OK
  BTF raw test[15] (global data test #12, invalid offset): OK
  BTF raw test[16] (global data test #13, invalid offset): OK
  BTF raw test[17] (global data test #14, invalid offset): OK
  BTF raw test[18] (global data test #15, not var kind): OK
  BTF raw test[19] (global data test #16, invalid var referencing sec): OK
  BTF raw test[20] (global data test #17, invalid var referencing var): OK
  BTF raw test[21] (global data test #18, invalid var loop): OK
  BTF raw test[22] (global data test #19, invalid var referencing var): OK
  BTF raw test[23] (global data test #20, invalid ptr referencing var): OK
  BTF raw test[24] (global data test #21, var included in struct): OK
  BTF raw test[25] (global data test #22, array of var): OK
  [...]
  PASS:167 SKIP:0 FAIL:0

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_btf.c | 665 ++++++++++++++++++++++++++++++++-
 1 file changed, 663 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 5afc3f7dff81..08010e619a1c 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -85,6 +85,11 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
 #define BTF_UNION_ENC(name, nr_elems, sz)	\
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz)
 
+#define BTF_VAR_ENC(name, type, linkage)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), type), (linkage)
+#define BTF_VAR_SECINFO_ENC(type, offset, size)	\
+	(type), (offset), (size)
+
 #define BTF_MEMBER_ENC(name, type, bits_offset)	\
 	(name), (type), (bits_offset)
 #define BTF_ENUM_ENC(name, val) (name), (val)
@@ -291,7 +296,6 @@ static struct btf_raw_test raw_tests[] = {
 	.value_type_id = 3,
 	.max_entries = 4,
 },
-
 {
 	.descr = "struct test #3 Invalid member offset",
 	.raw_types = {
@@ -319,7 +323,664 @@ static struct btf_raw_test raw_tests[] = {
 	.btf_load_err = true,
 	.err_str = "Invalid member bits_offset",
 },
-
+/*
+ * struct A {
+ *	unsigned long long m;
+ *	int n;
+ *	char o;
+ *	[3 bytes hole]
+ *	int p[8];
+ * };
+ */
+{
+	.descr = "global data test #1",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test1_map",
+	.key_size = sizeof(int),
+	.value_size = 48,
+	.key_type_id = 1,
+	.value_type_id = 5,
+	.max_entries = 4,
+},
+/*
+ * struct A {
+ *	unsigned long long m;
+ *	int n;
+ *	char o;
+ *	[3 bytes hole]
+ *	int p[8];
+ * };
+ * static struct A t; <- in .bss
+ */
+{
+	.descr = "global data test #2",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		/* static struct A t */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		/* .bss section */				/* [7] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48),
+		BTF_VAR_SECINFO_ENC(6, 0, 48),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 48,
+	.key_type_id = 0,
+	.value_type_id = 7,
+	.max_entries = 1,
+},
+{
+	.descr = "global data test #3",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* static int t */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		/* .bss section */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0t\0.bss",
+	.str_sec_size = sizeof("\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 3,
+	.max_entries = 1,
+},
+{
+	.descr = "global data test #4, unsupported linkage",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* static int t */
+		BTF_VAR_ENC(NAME_TBD, 1, 2),			/* [2] */
+		/* .bss section */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0t\0.bss",
+	.str_sec_size = sizeof("\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 3,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Linkage not supported",
+},
+{
+	.descr = "global data test #5, invalid var type",
+	.raw_types = {
+		/* static void t */
+		BTF_VAR_ENC(NAME_TBD, 0, 0),			/* [1] */
+		/* .bss section */				/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(1, 0, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0t\0.bss",
+	.str_sec_size = sizeof("\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+{
+	.descr = "global data test #6, invalid var type (fwd type)",
+	.raw_types = {
+		/* union A */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+		/* static union A t */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		/* .bss section */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0.bss",
+	.str_sec_size = sizeof("\0A\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type",
+},
+{
+	.descr = "global data test #7, invalid var type (fwd type)",
+	.raw_types = {
+		/* union A */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+		/* static union A t */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		/* .bss section */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(1, 0, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0.bss",
+	.str_sec_size = sizeof("\0A\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type",
+},
+{
+	.descr = "global data test #8, invalid var size",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		/* static struct A t */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		/* .bss section */				/* [7] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48),
+		BTF_VAR_SECINFO_ENC(6, 0, 47),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 48,
+	.key_type_id = 0,
+	.value_type_id = 7,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid size",
+},
+{
+	.descr = "global data test #9, invalid var size",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		/* static struct A t */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		/* .bss section */				/* [7] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46),
+		BTF_VAR_SECINFO_ENC(6, 0, 48),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 48,
+	.key_type_id = 0,
+	.value_type_id = 7,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid size",
+},
+{
+	.descr = "global data test #10, invalid var size",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		/* static struct A t */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		/* .bss section */				/* [7] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46),
+		BTF_VAR_SECINFO_ENC(6, 0, 46),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 48,
+	.key_type_id = 0,
+	.value_type_id = 7,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid size",
+},
+{
+	.descr = "global data test #11, multiple section members",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		/* static struct A t */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		/* static int u */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [7] */
+		/* .bss section */				/* [8] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+		BTF_VAR_SECINFO_ENC(6, 10, 48),
+		BTF_VAR_SECINFO_ENC(7, 58, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 62,
+	.key_type_id = 0,
+	.value_type_id = 8,
+	.max_entries = 1,
+},
+{
+	.descr = "global data test #12, invalid offset",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		/* static struct A t */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		/* static int u */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [7] */
+		/* .bss section */				/* [8] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+		BTF_VAR_SECINFO_ENC(6, 10, 48),
+		BTF_VAR_SECINFO_ENC(7, 60, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 62,
+	.key_type_id = 0,
+	.value_type_id = 8,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid offset+size",
+},
+{
+	.descr = "global data test #13, invalid offset",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		/* static struct A t */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		/* static int u */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [7] */
+		/* .bss section */				/* [8] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+		BTF_VAR_SECINFO_ENC(6, 10, 48),
+		BTF_VAR_SECINFO_ENC(7, 12, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 62,
+	.key_type_id = 0,
+	.value_type_id = 8,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid offset",
+},
+{
+	.descr = "global data test #14, invalid offset",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		/* } */
+		/* static struct A t */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [6] */
+		/* static int u */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [7] */
+		/* .bss section */				/* [8] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+		BTF_VAR_SECINFO_ENC(7, 58, 4),
+		BTF_VAR_SECINFO_ENC(6, 10, 48),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 62,
+	.key_type_id = 0,
+	.value_type_id = 8,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid offset",
+},
+{
+	.descr = "global data test #15, not var kind",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		/* .bss section */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(1, 0, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0.bss",
+	.str_sec_size = sizeof("\0A\0t\0.bss"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 3,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Not a VAR kind member",
+},
+{
+	.descr = "global data test #16, invalid var referencing sec",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 5, 0),			/* [2] */
+		BTF_VAR_ENC(NAME_TBD, 2, 0),			/* [3] */
+		/* a section */					/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(3, 0, 4),
+		/* a section */					/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(6, 0, 4),
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [6] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0s\0a\0a",
+	.str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 4,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+{
+	.descr = "global data test #17, invalid var referencing var",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_VAR_ENC(NAME_TBD, 2, 0),			/* [3] */
+		/* a section */					/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(3, 0, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0s\0a\0a",
+	.str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 4,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+{
+	.descr = "global data test #18, invalid var loop",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 2, 0),			/* [2] */
+		/* .bss section */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0aaa",
+	.str_sec_size = sizeof("\0A\0t\0aaa"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 4,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+{
+	.descr = "global data test #19, invalid var referencing var",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 3, 0),			/* [2] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [3] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0s\0a\0a",
+	.str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 4,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+{
+	.descr = "global data test #20, invalid ptr referencing var",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* PTR type_id=3	*/			/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [3] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0s\0a\0a",
+	.str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 4,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
+{
+	.descr = "global data test #21, var included in struct",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* struct A { */				/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* VAR type_id=3; */
+		/* } */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [3] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0s\0a\0a",
+	.str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 4,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid member",
+},
+{
+	.descr = "global data test #22, array of var",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 4),			/* [2] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [3] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0t\0s\0a\0a",
+	.str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = ".bss",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 0,
+	.value_type_id = 4,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid elem",
+},
 /* Test member exceeds the size of struct.
  *
  * struct A {
-- 
cgit v1.2.3


From 69a0f9ecef22131982ba328e6b74ebb082bc0992 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 9 Apr 2019 17:37:41 -0700
Subject: bpf, bpftool: fix a few ubsan warnings

The issue is reported at https://github.com/libbpf/libbpf/issues/28.

Basically, per C standard, for
  void *memcpy(void *dest, const void *src, size_t n)
if "dest" or "src" is NULL, regardless of whether "n" is 0 or not,
the result of memcpy is undefined. clang ubsan reported three such
instances in bpf.c with the following pattern:
  memcpy(dest, 0, 0).

Although in practice, no known compiler will cause issues when
copy size is 0. Let us still fix the issue to silence ubsan
warnings.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/bpf.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c039094ad3aa..dababce68f0b 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -79,7 +79,6 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
 
 int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 {
-	__u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
 	union bpf_attr attr;
 
 	memset(&attr, '\0', sizeof(attr));
@@ -89,8 +88,9 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 	attr.value_size = create_attr->value_size;
 	attr.max_entries = create_attr->max_entries;
 	attr.map_flags = create_attr->map_flags;
-	memcpy(attr.map_name, create_attr->name,
-	       min(name_len, BPF_OBJ_NAME_LEN - 1));
+	if (create_attr->name)
+		memcpy(attr.map_name, create_attr->name,
+		       min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
 	attr.numa_node = create_attr->numa_node;
 	attr.btf_fd = create_attr->btf_fd;
 	attr.btf_key_type_id = create_attr->btf_key_type_id;
@@ -155,7 +155,6 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
 			       int key_size, int inner_map_fd, int max_entries,
 			       __u32 map_flags, int node)
 {
-	__u32 name_len = name ? strlen(name) : 0;
 	union bpf_attr attr;
 
 	memset(&attr, '\0', sizeof(attr));
@@ -166,7 +165,9 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
 	attr.inner_map_fd = inner_map_fd;
 	attr.max_entries = max_entries;
 	attr.map_flags = map_flags;
-	memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+	if (name)
+		memcpy(attr.map_name, name,
+		       min(strlen(name), BPF_OBJ_NAME_LEN - 1));
 
 	if (node >= 0) {
 		attr.map_flags |= BPF_F_NUMA_NODE;
@@ -216,7 +217,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	void *finfo = NULL, *linfo = NULL;
 	union bpf_attr attr;
 	__u32 log_level;
-	__u32 name_len;
 	int fd;
 
 	if (!load_attr || !log_buf != !log_buf_sz)
@@ -226,8 +226,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
 		return -EINVAL;
 
-	name_len = load_attr->name ? strlen(load_attr->name) : 0;
-
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = load_attr->prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
@@ -253,8 +251,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	attr.line_info_rec_size = load_attr->line_info_rec_size;
 	attr.line_info_cnt = load_attr->line_info_cnt;
 	attr.line_info = ptr_to_u64(load_attr->line_info);
-	memcpy(attr.prog_name, load_attr->name,
-	       min(name_len, BPF_OBJ_NAME_LEN - 1));
+	if (load_attr->name)
+		memcpy(attr.prog_name, load_attr->name,
+		       min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
 
 	fd = sys_bpf_prog_load(&attr, sizeof(attr));
 	if (fd >= 0)
-- 
cgit v1.2.3


From 50bd645b3a21a374dbd0fa8273a5f4e98001fb05 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 10 Apr 2019 08:54:16 +0200
Subject: libbpf: fix crash in XDP socket part with new larger BPF_LOG_BUF_SIZE

In commit da11b417583e ("libbpf: teach libbpf about log_level bit 2"),
the BPF_LOG_BUF_SIZE was increased to 16M. The XDP socket part of
libbpf allocated the log_buf on the stack, but for the new 16M buffer
size this is not going to work. Change the code so it uses a 16K buffer
instead.

Fixes: da11b417583e ("libbpf: teach libbpf about log_level bit 2")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/xsk.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 8d0078b65486..557ef8d1250d 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -259,7 +259,8 @@ out_umem_alloc:
 
 static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 {
-	char bpf_log_buf[BPF_LOG_BUF_SIZE];
+	static const int log_buf_size = 16 * 1024;
+	char log_buf[log_buf_size];
 	int err, prog_fd;
 
 	/* This is the C-program:
@@ -308,10 +309,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
 
 	prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
-				   "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf,
-				   BPF_LOG_BUF_SIZE);
+				   "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
+				   log_buf_size);
 	if (prog_fd < 0) {
-		pr_warning("BPF log buffer:\n%s", bpf_log_buf);
+		pr_warning("BPF log buffer:\n%s", log_buf);
 		return prog_fd;
 	}
 
-- 
cgit v1.2.3


From d5adbdd77ecc1d841af244a10958792141830d30 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 10 Apr 2019 18:36:43 -0700
Subject: libbpf: Fix build with gcc-8

Reported in [1].

With gcc 8.3.0 the following error is issued:

  cc -Ibpf@sta -I. -I.. -I.././include -I.././include/uapi
  -fdiagnostics-color=always -fsanitize=address,undefined -fno-omit-frame-pointer
  -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror -g -fPIC -g -O2
  -Werror -Wall -Wno-pointer-arith -Wno-sign-compare  -MD -MQ
  'bpf@sta/src_libbpf.c.o' -MF 'bpf@sta/src_libbpf.c.o.d' -o
  'bpf@sta/src_libbpf.c.o' -c ../src/libbpf.c
  ../src/libbpf.c: In function 'bpf_object__elf_collect':
  ../src/libbpf.c:947:18: error: 'map_def_sz' may be used uninitialized in this
  function [-Werror=maybe-uninitialized]
     if (map_def_sz <= sizeof(struct bpf_map_def)) {
         ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ../src/libbpf.c:827:18: note: 'map_def_sz' was declared here
    int i, map_idx, map_def_sz, nr_syms, nr_maps = 0, nr_maps_glob = 0;
                    ^~~~~~~~~~

According to [2] -Wmaybe-uninitialized is enabled by -Wall.
Same error is generated by clang's -Wconditional-uninitialized.

[1] https://github.com/libbpf/libbpf/pull/29#issuecomment-481902601
[2] https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html

Fixes: d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections")
Reported-by: Evgeny Vereshchagin <evvers@ya.ru>
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e2a457e7c318..67484cf32b2e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -824,7 +824,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
 static int
 bpf_object__init_maps(struct bpf_object *obj, int flags)
 {
-	int i, map_idx, map_def_sz, nr_syms, nr_maps = 0, nr_maps_glob = 0;
+	int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
 	bool strict = !(flags & MAPS_RELAX_COMPAT);
 	Elf_Data *symbols = obj->efile.symbols;
 	Elf_Data *data = NULL;
-- 
cgit v1.2.3


From 569b0c77735d1567963cb4601390877b30458be4 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Wed, 10 Apr 2019 13:56:42 +0900
Subject: tools/bpftool: show btf id in program information

Let's add a way to know whether a program has btf context.
Patch adds 'btf_id' in the output of program listing.
When btf_id is present, it means program has btf context.

Sample output:
user@test# bpftool prog list
25: xdp  name xdp_prog1  tag 539ec6ce11b52f98  gpl
	loaded_at 2019-04-10T11:44:20+0900  uid 0
	xlated 488B  not jited  memlock 4096B  map_ids 23
	btf_id 1

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/prog.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index d2be5a06c339..81067803189e 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -249,6 +249,9 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 	if (info->nr_map_ids)
 		show_prog_maps(fd, info->nr_map_ids);
 
+	if (info->btf_id)
+		jsonw_int_field(json_wtr, "btf_id", info->btf_id);
+
 	if (!hash_empty(prog_table.table)) {
 		struct pinned_obj *obj;
 
@@ -319,6 +322,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 		}
 	}
 
+	if (info->btf_id)
+		printf("\n\tbtf_id %d\n", info->btf_id);
+
 	printf("\n");
 }
 
-- 
cgit v1.2.3


From 5e903c656b98614698a891c6e098186272cbba14 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 9 Apr 2019 11:49:10 -0700
Subject: libbpf: add support for ctx_{size, }_{in, out} in BPF_PROG_TEST_RUN

Support recently introduced input/output context for test runs.
We extend only bpf_prog_test_run_xattr. bpf_prog_test_run is
unextendable and left as is.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 7 +++++++
 tools/lib/bpf/bpf.c            | 5 +++++
 tools/lib/bpf/bpf.h            | 5 +++++
 3 files changed, 17 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index af1cbd951f26..31a27dd337dc 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -412,6 +412,13 @@ union bpf_attr {
 		__aligned_u64	data_out;
 		__u32		repeat;
 		__u32		duration;
+		__u32		ctx_size_in;	/* input: len of ctx_in */
+		__u32		ctx_size_out;	/* input/output: len of ctx_out
+						 *   returns ENOSPC if ctx_out
+						 *   is too small.
+						 */
+		__aligned_u64	ctx_in;
+		__aligned_u64	ctx_out;
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index dababce68f0b..955191c64b64 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -554,10 +554,15 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
 	attr.test.data_out = ptr_to_u64(test_attr->data_out);
 	attr.test.data_size_in = test_attr->data_size_in;
 	attr.test.data_size_out = test_attr->data_size_out;
+	attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
+	attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
+	attr.test.ctx_size_in = test_attr->ctx_size_in;
+	attr.test.ctx_size_out = test_attr->ctx_size_out;
 	attr.test.repeat = test_attr->repeat;
 
 	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
 	test_attr->data_size_out = attr.test.data_size_out;
+	test_attr->ctx_size_out = attr.test.ctx_size_out;
 	test_attr->retval = attr.test.retval;
 	test_attr->duration = attr.test.duration;
 	return ret;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index c9d218d21453..bc30783d1403 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -136,6 +136,11 @@ struct bpf_prog_test_run_attr {
 			      * out: length of data_out */
 	__u32 retval;        /* out: return code of the BPF program */
 	__u32 duration;      /* out: average per repetition in ns */
+	const void *ctx_in; /* optional */
+	__u32 ctx_size_in;
+	void *ctx_out;      /* optional */
+	__u32 ctx_size_out; /* in: max length of ctx_out
+			     * out: length of cxt_out */
 };
 
 LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
-- 
cgit v1.2.3


From 3daf8e703ec3dcf73a27a7dcabbac152793eb114 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 9 Apr 2019 11:49:11 -0700
Subject: selftests: bpf: add selftest for __sk_buff context in
 BPF_PROG_TEST_RUN

Simple test that sets cb to {1,2,3,4,5} and priority to 6, runs bpf
program that fails if cb is not what we expect and increments cb[i] and
priority. When the test finishes, we check that cb is now {2,3,4,5,6}
and priority is 7.

We also test the sanity checks:
* ctx_in is provided, but ctx_size_in is zero (same for
  ctx_out/ctx_size_out)
* unexpected non-zero fields in __sk_buff return EINVAL

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/prog_tests/skb_ctx.c | 89 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_skb_ctx.c | 21 ++++++
 2 files changed, 110 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/skb_ctx.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_skb_ctx.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
new file mode 100644
index 000000000000..e95baa32e277
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_skb_ctx(void)
+{
+	struct __sk_buff skb = {
+		.cb[0] = 1,
+		.cb[1] = 2,
+		.cb[2] = 3,
+		.cb[3] = 4,
+		.cb[4] = 5,
+		.priority = 6,
+	};
+	struct bpf_prog_test_run_attr tattr = {
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.ctx_in = &skb,
+		.ctx_size_in = sizeof(skb),
+		.ctx_out = &skb,
+		.ctx_size_out = sizeof(skb),
+	};
+	struct bpf_object *obj;
+	int err;
+	int i;
+
+	err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+			    &tattr.prog_fd);
+	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+		return;
+
+	/* ctx_in != NULL, ctx_size_in == 0 */
+
+	tattr.ctx_size_in = 0;
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno);
+	tattr.ctx_size_in = sizeof(skb);
+
+	/* ctx_out != NULL, ctx_size_out == 0 */
+
+	tattr.ctx_size_out = 0;
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno);
+	tattr.ctx_size_out = sizeof(skb);
+
+	/* non-zero [len, tc_index] fields should be rejected*/
+
+	skb.len = 1;
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno);
+	skb.len = 0;
+
+	skb.tc_index = 1;
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno);
+	skb.tc_index = 0;
+
+	/* non-zero [hash, sk] fields should be rejected */
+
+	skb.hash = 1;
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno);
+	skb.hash = 0;
+
+	skb.sk = (struct bpf_sock *)1;
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno);
+	skb.sk = 0;
+
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err != 0 || tattr.retval,
+		   "run",
+		   "err %d errno %d retval %d\n",
+		   err, errno, tattr.retval);
+
+	CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
+		   "ctx_size_out",
+		   "incorrect output size, want %lu have %u\n",
+		   sizeof(skb), tattr.ctx_size_out);
+
+	for (i = 0; i < 5; i++)
+		CHECK_ATTR(skb.cb[i] != i + 2,
+			   "ctx_out_cb",
+			   "skb->cb[i] == %d, expected %d\n",
+			   skb.cb[i], i + 2);
+	CHECK_ATTR(skb.priority != 7,
+		   "ctx_out_priority",
+		   "skb->priority == %d, expected %d\n",
+		   skb.priority, 7);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
new file mode 100644
index 000000000000..7a80960d7df1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+SEC("skb_ctx")
+int process(struct __sk_buff *skb)
+{
+	#pragma clang loop unroll(full)
+	for (int i = 0; i < 5; i++) {
+		if (skb->cb[i] != i + 1)
+			return 1;
+		skb->cb[i]++;
+	}
+	skb->priority++;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 9e35552ae1eafd666e7388a1a94a321665d2f911 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 11 Apr 2019 19:12:20 +0300
Subject: net: sched: flower: use correct ht function to prevent duplicates

Implementation of function rhashtable_insert_fast() check if its internal
helper function __rhashtable_insert_fast() returns non-NULL pointer and
seemingly return -EEXIST in such case. However, since
__rhashtable_insert_fast() is called with NULL key pointer, it never
actually checks for duplicates, which means that -EEXIST is never returned
to the user. Use rhashtable_lookup_insert_fast() hash table API instead. In
order to verify that it works as expected and prevent the problem from
happening in future, extend tc-tests with new test that verifies that no
new filters with existing key can be inserted to flower classifier.

Fixes: 1f17f7742eeb ("net: sched: flower: insert filter to ht before offloading it to hw")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c                               |  6 +++---
 .../selftests/tc-testing/tc-tests/filters/tests.json | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2763176e369c..9cd8122a5c38 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1466,9 +1466,9 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
 	struct fl_flow_mask *mask = fnew->mask;
 	int err;
 
-	err = rhashtable_insert_fast(&mask->ht,
-				     &fnew->ht_node,
-				     mask->filter_ht_params);
+	err = rhashtable_lookup_insert_fast(&mask->ht,
+					    &fnew->ht_node,
+					    mask->filter_ht_params);
 	if (err) {
 		*in_ht = false;
 		/* It is okay if filter with same key exists when
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 2d096b2abf2c..e2f92cefb8d5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -58,5 +58,25 @@
             "$TC qdisc del dev $DEV2 ingress",
             "/bin/rm $BATCH_FILE"
         ]
+    },
+    {
+        "id": "4cbd",
+        "name": "Try to add filter with duplicate key",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress",
+            "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+        "expExitCode": "2",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
     }
 ]
-- 
cgit v1.2.3


From 166b5a7f2ca3803ab0a7bb33ac2300e616de2470 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Tue, 9 Apr 2019 15:06:40 +0100
Subject: selftests_bpf: extend test_tc_tunnel for UDP encap

commit 868d523535c2 ("bpf: add bpf_skb_adjust_room encap flags")
introduced support to bpf_skb_adjust_room for GSO-friendly GRE
and UDP encapsulation and later introduced associated test_tc_tunnel
tests.  Here those tests are extended to cover UDP encapsulation also.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/config                 |   4 +
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 140 ++++++++++++++-------
 tools/testing/selftests/bpf/test_tc_tunnel.sh      |  47 ++++++-
 3 files changed, 143 insertions(+), 48 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index a42f4fc4dc11..8e749c1f5bf6 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -25,3 +25,7 @@ CONFIG_XDP_SOCKETS=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_IPV6_TUNNEL=y
 CONFIG_IPV6_GRE=y
+CONFIG_NET_FOU=m
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_IPV6_FOU=m
+CONFIG_IPV6_FOU_TUNNEL=m
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index f541c2de947d..33b338e6cf09 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -12,6 +12,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
+#include <linux/udp.h>
 #include <linux/pkt_cls.h>
 #include <linux/types.h>
 
@@ -20,16 +21,27 @@
 
 static const int cfg_port = 8000;
 
-struct grev4hdr {
-	struct iphdr ip;
+static const int cfg_udp_src = 20000;
+static const int cfg_udp_dst = 5555;
+
+struct gre_hdr {
 	__be16 flags;
 	__be16 protocol;
 } __attribute__((packed));
 
-struct grev6hdr {
+union l4hdr {
+	struct udphdr udp;
+	struct gre_hdr gre;
+};
+
+struct v4hdr {
+	struct iphdr ip;
+	union l4hdr l4hdr;
+} __attribute__((packed));
+
+struct v6hdr {
 	struct ipv6hdr ip;
-	__be16 flags;
-	__be16 protocol;
+	union l4hdr l4hdr;
 } __attribute__((packed));
 
 static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -47,10 +59,10 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
 	iph->check = ~((csum & 0xffff) + (csum >> 16));
 }
 
-static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
 {
-	struct grev4hdr h_outer;
 	struct iphdr iph_inner;
+	struct v4hdr h_outer;
 	struct tcphdr tcph;
 	__u64 flags;
 	int olen;
@@ -70,12 +82,29 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 	if (tcph.dest != __bpf_constant_htons(cfg_port))
 		return TC_ACT_OK;
 
+	olen = sizeof(h_outer.ip);
+
 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
-	if (with_gre) {
+	switch (encap_proto) {
+	case IPPROTO_GRE:
 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
-		olen = sizeof(h_outer);
-	} else {
-		olen = sizeof(h_outer.ip);
+		olen += sizeof(h_outer.l4hdr.gre);
+		h_outer.l4hdr.gre.protocol = bpf_htons(ETH_P_IP);
+		h_outer.l4hdr.gre.flags = 0;
+		break;
+	case IPPROTO_UDP:
+		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
+		olen += sizeof(h_outer.l4hdr.udp);
+		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
+		h_outer.l4hdr.udp.dest = __bpf_constant_htons(cfg_udp_dst);
+		h_outer.l4hdr.udp.check = 0;
+		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
+						  sizeof(h_outer.l4hdr.udp));
+		break;
+	case IPPROTO_IPIP:
+		break;
+	default:
+		return TC_ACT_OK;
 	}
 
 	/* add room between mac and network header */
@@ -85,16 +114,10 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 	/* prepare new outer network header */
 	h_outer.ip = iph_inner;
 	h_outer.ip.tot_len = bpf_htons(olen +
-				      bpf_htons(h_outer.ip.tot_len));
-	if (with_gre) {
-		h_outer.ip.protocol = IPPROTO_GRE;
-		h_outer.protocol = bpf_htons(ETH_P_IP);
-		h_outer.flags = 0;
-	} else {
-		h_outer.ip.protocol = IPPROTO_IPIP;
-	}
+				       bpf_ntohs(h_outer.ip.tot_len));
+	h_outer.ip.protocol = encap_proto;
 
-	set_ipv4_csum((void *)&h_outer.ip);
+	set_ipv4_csum(&h_outer.ip);
 
 	/* store new outer network header */
 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
@@ -104,11 +127,12 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
 	return TC_ACT_OK;
 }
 
-static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
 {
 	struct ipv6hdr iph_inner;
-	struct grev6hdr h_outer;
+	struct v6hdr h_outer;
 	struct tcphdr tcph;
+	__u16 tot_len;
 	__u64 flags;
 	int olen;
 
@@ -124,15 +148,32 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
 	if (tcph.dest != __bpf_constant_htons(cfg_port))
 		return TC_ACT_OK;
 
+	olen = sizeof(h_outer.ip);
+
 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
-	if (with_gre) {
+	switch (encap_proto) {
+	case IPPROTO_GRE:
 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
-		olen = sizeof(h_outer);
-	} else {
-		olen = sizeof(h_outer.ip);
+		olen += sizeof(h_outer.l4hdr.gre);
+		h_outer.l4hdr.gre.protocol = bpf_htons(ETH_P_IPV6);
+		h_outer.l4hdr.gre.flags = 0;
+		break;
+	case IPPROTO_UDP:
+		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
+		olen += sizeof(h_outer.l4hdr.udp);
+		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
+		h_outer.l4hdr.udp.dest = __bpf_constant_htons(cfg_udp_dst);
+		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
+			  sizeof(h_outer.l4hdr.udp);
+		h_outer.l4hdr.udp.check = 0;
+		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
+		break;
+	case IPPROTO_IPV6:
+		break;
+	default:
+		return TC_ACT_OK;
 	}
 
-
 	/* add room between mac and network header */
 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
 		return TC_ACT_SHOT;
@@ -141,13 +182,8 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
 	h_outer.ip = iph_inner;
 	h_outer.ip.payload_len = bpf_htons(olen +
 					   bpf_ntohs(h_outer.ip.payload_len));
-	if (with_gre) {
-		h_outer.ip.nexthdr = IPPROTO_GRE;
-		h_outer.protocol = bpf_htons(ETH_P_IPV6);
-		h_outer.flags = 0;
-	} else {
-		h_outer.ip.nexthdr = IPPROTO_IPV6;
-	}
+
+	h_outer.ip.nexthdr = encap_proto;
 
 	/* store new outer network header */
 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
@@ -161,7 +197,7 @@ SEC("encap_ipip")
 int __encap_ipip(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
-		return encap_ipv4(skb, false);
+		return encap_ipv4(skb, IPPROTO_IPIP);
 	else
 		return TC_ACT_OK;
 }
@@ -170,7 +206,16 @@ SEC("encap_gre")
 int __encap_gre(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
-		return encap_ipv4(skb, true);
+		return encap_ipv4(skb, IPPROTO_GRE);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_udp")
+int __encap_udp(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv4(skb, IPPROTO_UDP);
 	else
 		return TC_ACT_OK;
 }
@@ -179,7 +224,7 @@ SEC("encap_ip6tnl")
 int __encap_ip6tnl(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
-		return encap_ipv6(skb, false);
+		return encap_ipv6(skb, IPPROTO_IPV6);
 	else
 		return TC_ACT_OK;
 }
@@ -188,23 +233,34 @@ SEC("encap_ip6gre")
 int __encap_ip6gre(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
-		return encap_ipv6(skb, true);
+		return encap_ipv6(skb, IPPROTO_GRE);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp")
+int __encap_ip6udp(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return encap_ipv6(skb, IPPROTO_UDP);
 	else
 		return TC_ACT_OK;
 }
 
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
-	char buf[sizeof(struct grev6hdr)];
-	int olen;
+	char buf[sizeof(struct v6hdr)];
+	int olen = len;
 
 	switch (proto) {
 	case IPPROTO_IPIP:
 	case IPPROTO_IPV6:
-		olen = len;
 		break;
 	case IPPROTO_GRE:
-		olen = len + 4 /* gre hdr */;
+		olen += sizeof(struct gre_hdr);
+		break;
+	case IPPROTO_UDP:
+		olen += sizeof(struct udphdr);
 		break;
 	default:
 		return TC_ACT_OK;
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index c805adb88f3a..64f30910d39a 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -15,6 +15,9 @@ readonly ns2_v4=192.168.1.2
 readonly ns1_v6=fd::1
 readonly ns2_v6=fd::2
 
+# Must match port used by bpf program
+readonly udpport=5555
+
 readonly infile="$(mktemp)"
 readonly outfile="$(mktemp)"
 
@@ -38,8 +41,8 @@ setup() {
 	# clamp route to reserve room for tunnel headers
 	ip -netns "${ns1}" -4 route flush table main
 	ip -netns "${ns1}" -6 route flush table main
-	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1
-	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1
+	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1472 dev veth1
+	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1452 dev veth1
 
 	sleep 1
 
@@ -103,6 +106,18 @@ if [[ "$#" -eq "0" ]]; then
 	echo "ip6 gre gso"
 	$0 ipv6 ip6gre 2000
 
+	echo "ip udp"
+	$0 ipv4 udp 100
+
+	echo "ip6 udp"
+	$0 ipv6 ip6udp 100
+
+	echo "ip udp gso"
+	$0 ipv4 udp 2000
+
+	echo "ip6 udp gso"
+	$0 ipv6 ip6udp 2000
+
 	echo "OK. All tests passed"
 	exit 0
 fi
@@ -117,12 +132,20 @@ case "$1" in
 "ipv4")
 	readonly addr1="${ns1_v4}"
 	readonly addr2="${ns2_v4}"
-	readonly netcat_opt=-4
+	readonly ipproto=4
+	readonly netcat_opt=-${ipproto}
+	readonly foumod=fou
+	readonly foutype=ipip
+	readonly fouproto=4
 	;;
 "ipv6")
 	readonly addr1="${ns1_v6}"
 	readonly addr2="${ns2_v6}"
-	readonly netcat_opt=-6
+	readonly ipproto=6
+	readonly netcat_opt=-${ipproto}
+	readonly foumod=fou6
+	readonly foutype=ip6tnl
+	readonly fouproto="41 -6"
 	;;
 *)
 	echo "unknown arg: $1"
@@ -155,11 +178,23 @@ echo "test bpf encap without decap (expect failure)"
 server_listen
 ! client_connect
 
+if [[ "$tuntype" =~ "udp" ]]; then
+	# Set up fou tunnel.
+	ttype="${foutype}"
+	targs="encap fou encap-sport auto encap-dport $udpport"
+	# fou may be a module; allow this to fail.
+	modprobe "${foumod}" ||true
+	ip netns exec "${ns2}" ip fou add port 5555 ipproto ${fouproto}
+else
+	ttype=$tuntype
+	targs=""
+fi
+
 # serverside, insert decap module
 # server is still running
 # client can connect again
-ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
-	remote "${addr1}" local "${addr2}"
+ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
+	remote "${addr1}" local "${addr2}" $targs
 # Because packets are decapped by the tunnel they arrive on testtun0 from
 # the IP stack perspective.  Ensure reverse path filtering is disabled
 # otherwise we drop the TCP SYN as arriving on testtun0 instead of the
-- 
cgit v1.2.3


From 1db04c300a41e17892bf83ed0d1aa681416ee150 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Tue, 9 Apr 2019 15:06:42 +0100
Subject: bpf: sync bpf.h to tools/ for BPF_F_ADJ_ROOM_ENCAP_L2

Sync include/uapi/linux/bpf.h with tools/ equivalent to add
BPF_F_ADJ_ROOM_ENCAP_L2(len) macro.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 31a27dd337dc..2e96d0b4bf65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1523,6 +1523,10 @@ union bpf_attr {
  *		* **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
  *		  Use with ENCAP_L3 flags to further specify the tunnel type.
  *
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
+ *		  Use with ENCAP_L3/L4 flags to further specify the tunnel
+ *		  type; **len** is the length of the inner MAC header.
+ *
  * 		A call to this helper is susceptible to change the underlaying
  * 		packet buffer. Therefore, at load time, all checks on pointers
  * 		previously done by the verifier are invalidated and must be
@@ -2664,10 +2668,16 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_adjust_room flags. */
 #define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
 
+#define	BPF_ADJ_ROOM_ENCAP_L2_MASK	0xff
+#define	BPF_ADJ_ROOM_ENCAP_L2_SHIFT	56
+
 #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	(1ULL << 1)
 #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	(1ULL << 2)
 #define BPF_F_ADJ_ROOM_ENCAP_L4_GRE	(1ULL << 3)
 #define BPF_F_ADJ_ROOM_ENCAP_L4_UDP	(1ULL << 4)
+#define	BPF_F_ADJ_ROOM_ENCAP_L2(len)	(((__u64)len & \
+					  BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+					 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
 
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
-- 
cgit v1.2.3


From 3ec61df82ba0c2d2455da838ee46bf60f2256b56 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Tue, 9 Apr 2019 15:06:43 +0100
Subject: selftests_bpf: add L2 encap to test_tc_tunnel

Update test_tc_tunnel to verify adding inner L2 header
encapsulation (an MPLS label or ethernet header) works.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/config                 |   4 +
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 219 ++++++++++++++++++---
 tools/testing/selftests/bpf/test_tc_tunnel.sh      | 113 ++++++++---
 3 files changed, 277 insertions(+), 59 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 8e749c1f5bf6..8c976476f6fd 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -29,3 +29,7 @@ CONFIG_NET_FOU=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_IPV6_FOU=m
 CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_MPLS=y
+CONFIG_NET_MPLS_GSO=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 33b338e6cf09..bcb00d737e95 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -11,6 +11,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/mpls.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/pkt_cls.h>
@@ -22,7 +23,14 @@
 static const int cfg_port = 8000;
 
 static const int cfg_udp_src = 20000;
-static const int cfg_udp_dst = 5555;
+
+#define	UDP_PORT		5555
+#define	MPLS_OVER_UDP_PORT	6635
+#define	ETH_OVER_UDP_PORT	7777
+
+/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
+static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
+						     MPLS_LS_S_MASK | 0xff);
 
 struct gre_hdr {
 	__be16 flags;
@@ -37,11 +45,13 @@ union l4hdr {
 struct v4hdr {
 	struct iphdr ip;
 	union l4hdr l4hdr;
+	__u8 pad[16];			/* enough space for L2 header */
 } __attribute__((packed));
 
 struct v6hdr {
 	struct ipv6hdr ip;
 	union l4hdr l4hdr;
+	__u8 pad[16];			/* enough space for L2 header */
 } __attribute__((packed));
 
 static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -59,13 +69,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
 	iph->check = ~((csum & 0xffff) + (csum >> 16));
 }
 
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+				      __u16 l2_proto)
 {
+	__u16 udp_dst = UDP_PORT;
 	struct iphdr iph_inner;
 	struct v4hdr h_outer;
 	struct tcphdr tcph;
+	int olen, l2_len;
 	__u64 flags;
-	int olen;
 
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
 			       sizeof(iph_inner)) < 0)
@@ -83,23 +95,38 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
 		return TC_ACT_OK;
 
 	olen = sizeof(h_outer.ip);
+	l2_len = 0;
 
 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
+
+	switch (l2_proto) {
+	case ETH_P_MPLS_UC:
+		l2_len = sizeof(mpls_label);
+		udp_dst = MPLS_OVER_UDP_PORT;
+		break;
+	case ETH_P_TEB:
+		l2_len = ETH_HLEN;
+		udp_dst = ETH_OVER_UDP_PORT;
+		break;
+	}
+	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
+
 	switch (encap_proto) {
 	case IPPROTO_GRE:
 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
 		olen += sizeof(h_outer.l4hdr.gre);
-		h_outer.l4hdr.gre.protocol = bpf_htons(ETH_P_IP);
+		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
 		h_outer.l4hdr.gre.flags = 0;
 		break;
 	case IPPROTO_UDP:
 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
 		olen += sizeof(h_outer.l4hdr.udp);
 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
-		h_outer.l4hdr.udp.dest = __bpf_constant_htons(cfg_udp_dst);
+		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
 		h_outer.l4hdr.udp.check = 0;
 		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
-						  sizeof(h_outer.l4hdr.udp));
+						  sizeof(h_outer.l4hdr.udp) +
+						  l2_len);
 		break;
 	case IPPROTO_IPIP:
 		break;
@@ -107,6 +134,19 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
 		return TC_ACT_OK;
 	}
 
+	/* add L2 encap (if specified) */
+	switch (l2_proto) {
+	case ETH_P_MPLS_UC:
+		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+		break;
+	case ETH_P_TEB:
+		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
+				       ETH_HLEN))
+			return TC_ACT_SHOT;
+		break;
+	}
+	olen += l2_len;
+
 	/* add room between mac and network header */
 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
 		return TC_ACT_SHOT;
@@ -127,14 +167,16 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
 	return TC_ACT_OK;
 }
 
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+				      __u16 l2_proto)
 {
+	__u16 udp_dst = UDP_PORT;
 	struct ipv6hdr iph_inner;
 	struct v6hdr h_outer;
 	struct tcphdr tcph;
+	int olen, l2_len;
 	__u16 tot_len;
 	__u64 flags;
-	int olen;
 
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
 			       sizeof(iph_inner)) < 0)
@@ -149,20 +191,34 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
 		return TC_ACT_OK;
 
 	olen = sizeof(h_outer.ip);
+	l2_len = 0;
 
 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+
+	switch (l2_proto) {
+	case ETH_P_MPLS_UC:
+		l2_len = sizeof(mpls_label);
+		udp_dst = MPLS_OVER_UDP_PORT;
+		break;
+	case ETH_P_TEB:
+		l2_len = ETH_HLEN;
+		udp_dst = ETH_OVER_UDP_PORT;
+		break;
+	}
+	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
+
 	switch (encap_proto) {
 	case IPPROTO_GRE:
 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
 		olen += sizeof(h_outer.l4hdr.gre);
-		h_outer.l4hdr.gre.protocol = bpf_htons(ETH_P_IPV6);
+		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
 		h_outer.l4hdr.gre.flags = 0;
 		break;
 	case IPPROTO_UDP:
 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
 		olen += sizeof(h_outer.l4hdr.udp);
 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
-		h_outer.l4hdr.udp.dest = __bpf_constant_htons(cfg_udp_dst);
+		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
 			  sizeof(h_outer.l4hdr.udp);
 		h_outer.l4hdr.udp.check = 0;
@@ -174,6 +230,19 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
 		return TC_ACT_OK;
 	}
 
+	/* add L2 encap (if specified) */
+	switch (l2_proto) {
+	case ETH_P_MPLS_UC:
+		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+		break;
+	case ETH_P_TEB:
+		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
+				       ETH_HLEN))
+			return TC_ACT_SHOT;
+		break;
+	}
+	olen += l2_len;
+
 	/* add room between mac and network header */
 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
 		return TC_ACT_SHOT;
@@ -193,56 +262,128 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
 	return TC_ACT_OK;
 }
 
-SEC("encap_ipip")
-int __encap_ipip(struct __sk_buff *skb)
+SEC("encap_ipip_none")
+int __encap_ipip_none(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_gre_none")
+int __encap_gre_none(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_gre_mpls")
+int __encap_gre_mpls(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_gre_eth")
+int __encap_gre_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
-		return encap_ipv4(skb, IPPROTO_IPIP);
+		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
 	else
 		return TC_ACT_OK;
 }
 
-SEC("encap_gre")
-int __encap_gre(struct __sk_buff *skb)
+SEC("encap_udp_none")
+int __encap_udp_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
-		return encap_ipv4(skb, IPPROTO_GRE);
+		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
 	else
 		return TC_ACT_OK;
 }
 
-SEC("encap_udp")
-int __encap_udp(struct __sk_buff *skb)
+SEC("encap_udp_mpls")
+int __encap_udp_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
-		return encap_ipv4(skb, IPPROTO_UDP);
+		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_udp_eth")
+int __encap_udp_eth(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_ip6tnl_none")
+int __encap_ip6tnl_none(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_none")
+int __encap_ip6gre_none(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_mpls")
+int __encap_ip6gre_mpls(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
+	else
+		return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_eth")
+int __encap_ip6gre_eth(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
 	else
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6tnl")
-int __encap_ip6tnl(struct __sk_buff *skb)
+SEC("encap_ip6udp_none")
+int __encap_ip6udp_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
-		return encap_ipv6(skb, IPPROTO_IPV6);
+		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
 	else
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6gre")
-int __encap_ip6gre(struct __sk_buff *skb)
+SEC("encap_ip6udp_mpls")
+int __encap_ip6udp_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
-		return encap_ipv6(skb, IPPROTO_GRE);
+		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
 	else
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6udp")
-int __encap_ip6udp(struct __sk_buff *skb)
+SEC("encap_ip6udp_eth")
+int __encap_ip6udp_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
-		return encap_ipv6(skb, IPPROTO_UDP);
+		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
 	else
 		return TC_ACT_OK;
 }
@@ -250,6 +391,8 @@ int __encap_ip6udp(struct __sk_buff *skb)
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
 	char buf[sizeof(struct v6hdr)];
+	struct gre_hdr greh;
+	struct udphdr udph;
 	int olen = len;
 
 	switch (proto) {
@@ -258,9 +401,29 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		break;
 	case IPPROTO_GRE:
 		olen += sizeof(struct gre_hdr);
+		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
+			return TC_ACT_OK;
+		switch (bpf_ntohs(greh.protocol)) {
+		case ETH_P_MPLS_UC:
+			olen += sizeof(mpls_label);
+			break;
+		case ETH_P_TEB:
+			olen += ETH_HLEN;
+			break;
+		}
 		break;
 	case IPPROTO_UDP:
 		olen += sizeof(struct udphdr);
+		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
+			return TC_ACT_OK;
+		switch (bpf_ntohs(udph.dest)) {
+		case MPLS_OVER_UDP_PORT:
+			olen += sizeof(mpls_label);
+			break;
+		case ETH_OVER_UDP_PORT:
+			olen += ETH_HLEN;
+			break;
+		}
 		break;
 	default:
 		return TC_ACT_OK;
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 64f30910d39a..d4d8d5d3b06e 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -17,6 +17,9 @@ readonly ns2_v6=fd::2
 
 # Must match port used by bpf program
 readonly udpport=5555
+# MPLSoverUDP
+readonly mplsudpport=6635
+readonly mplsproto=137
 
 readonly infile="$(mktemp)"
 readonly outfile="$(mktemp)"
@@ -41,8 +44,8 @@ setup() {
 	# clamp route to reserve room for tunnel headers
 	ip -netns "${ns1}" -4 route flush table main
 	ip -netns "${ns1}" -6 route flush table main
-	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1472 dev veth1
-	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1452 dev veth1
+	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
+	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
 
 	sleep 1
 
@@ -89,42 +92,44 @@ set -e
 # no arguments: automated test, run all
 if [[ "$#" -eq "0" ]]; then
 	echo "ipip"
-	$0 ipv4 ipip 100
+	$0 ipv4 ipip none 100
 
 	echo "ip6ip6"
-	$0 ipv6 ip6tnl 100
+	$0 ipv6 ip6tnl none 100
 
-	echo "ip gre"
-	$0 ipv4 gre 100
+	for mac in none mpls eth ; do
+		echo "ip gre $mac"
+		$0 ipv4 gre $mac 100
 
-	echo "ip6 gre"
-	$0 ipv6 ip6gre 100
+		echo "ip6 gre $mac"
+		$0 ipv6 ip6gre $mac 100
 
-	echo "ip gre gso"
-	$0 ipv4 gre 2000
+		echo "ip gre $mac gso"
+		$0 ipv4 gre $mac 2000
 
-	echo "ip6 gre gso"
-	$0 ipv6 ip6gre 2000
+		echo "ip6 gre $mac gso"
+		$0 ipv6 ip6gre $mac 2000
 
-	echo "ip udp"
-	$0 ipv4 udp 100
+		echo "ip udp $mac"
+		$0 ipv4 udp $mac 100
 
-	echo "ip6 udp"
-	$0 ipv6 ip6udp 100
+		echo "ip6 udp $mac"
+		$0 ipv6 ip6udp $mac 100
 
-	echo "ip udp gso"
-	$0 ipv4 udp 2000
+		echo "ip udp $mac gso"
+		$0 ipv4 udp $mac 2000
 
-	echo "ip6 udp gso"
-	$0 ipv6 ip6udp 2000
+		echo "ip6 udp $mac gso"
+		$0 ipv6 ip6udp $mac 2000
+	done
 
 	echo "OK. All tests passed"
 	exit 0
 fi
 
-if [[ "$#" -ne "3" ]]; then
+if [[ "$#" -ne "4" ]]; then
 	echo "Usage: $0"
-	echo "   or: $0 <ipv4|ipv6> <tuntype> <data_len>"
+	echo "   or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
 	exit 1
 fi
 
@@ -137,6 +142,8 @@ case "$1" in
 	readonly foumod=fou
 	readonly foutype=ipip
 	readonly fouproto=4
+	readonly fouproto_mpls=${mplsproto}
+	readonly gretaptype=gretap
 	;;
 "ipv6")
 	readonly addr1="${ns1_v6}"
@@ -146,6 +153,8 @@ case "$1" in
 	readonly foumod=fou6
 	readonly foutype=ip6tnl
 	readonly fouproto="41 -6"
+	readonly fouproto_mpls="${mplsproto} -6"
+	readonly gretaptype=ip6gretap
 	;;
 *)
 	echo "unknown arg: $1"
@@ -154,9 +163,10 @@ case "$1" in
 esac
 
 readonly tuntype=$2
-readonly datalen=$3
+readonly mac=$3
+readonly datalen=$4
 
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
+echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
 
 trap cleanup EXIT
 
@@ -173,7 +183,7 @@ verify_data
 ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
 ip netns exec "${ns1}" tc filter add dev veth1 egress \
 	bpf direct-action object-file ./test_tc_tunnel.o \
-	section "encap_${tuntype}"
+	section "encap_${tuntype}_${mac}"
 echo "test bpf encap without decap (expect failure)"
 server_listen
 ! client_connect
@@ -184,7 +194,18 @@ if [[ "$tuntype" =~ "udp" ]]; then
 	targs="encap fou encap-sport auto encap-dport $udpport"
 	# fou may be a module; allow this to fail.
 	modprobe "${foumod}" ||true
-	ip netns exec "${ns2}" ip fou add port 5555 ipproto ${fouproto}
+	if [[ "$mac" == "mpls" ]]; then
+		dport=${mplsudpport}
+		dproto=${fouproto_mpls}
+		tmode="mode any ttl 255"
+	else
+		dport=${udpport}
+		dproto=${fouproto}
+	fi
+	ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
+	targs="encap fou encap-sport auto encap-dport $dport"
+elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+	ttype=$gretaptype
 else
 	ttype=$tuntype
 	targs=""
@@ -194,7 +215,31 @@ fi
 # server is still running
 # client can connect again
 ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
-	remote "${addr1}" local "${addr2}" $targs
+	${tmode} remote "${addr1}" local "${addr2}" $targs
+
+expect_tun_fail=0
+
+if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
+	# No support for MPLS IPv6 fou tunnel; expect failure.
+	expect_tun_fail=1
+elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
+	# No support for TEB fou tunnel; expect failure.
+	expect_tun_fail=1
+elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+	# Share ethernet address between tunnel/veth2 so L2 decap works.
+	ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
+		  awk '/ether/ { print $2 }')
+	ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
+elif [[ "$mac" == "mpls" ]]; then
+	modprobe mpls_iptunnel ||true
+	modprobe mpls_gso ||true
+	ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
+	ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
+	ip netns exec "${ns2}" ip link set lo up
+	ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
+	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
+fi
+
 # Because packets are decapped by the tunnel they arrive on testtun0 from
 # the IP stack perspective.  Ensure reverse path filtering is disabled
 # otherwise we drop the TCP SYN as arriving on testtun0 instead of the
@@ -204,16 +249,22 @@ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
 # selected as the max of the "all" and device-specific values.
 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
 ip netns exec "${ns2}" ip link set dev testtun0 up
-echo "test bpf encap with tunnel device decap"
-client_connect
-verify_data
+if [[ "$expect_tun_fail" == 1 ]]; then
+	# This tunnel mode is not supported, so we expect failure.
+	echo "test bpf encap with tunnel device decap (expect failure)"
+	! client_connect
+else
+	echo "test bpf encap with tunnel device decap"
+	client_connect
+	verify_data
+	server_listen
+fi
 
 # serverside, use BPF for decap
 ip netns exec "${ns2}" ip link del dev testtun0
 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
 ip netns exec "${ns2}" tc filter add dev veth2 ingress \
 	bpf direct-action object-file ./test_tc_tunnel.o section decap
-server_listen
 echo "test bpf encap with bpf decap"
 client_connect
 verify_data
-- 
cgit v1.2.3


From 6b7a21140fca461c6d8d5c65a3746e7da50a409e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Apr 2019 11:44:46 +0200
Subject: tools: add smp_* barrier variants to include infrastructure

Add the definition for smp_rmb(), smp_wmb(), and smp_mb() to the
tools include infrastructure: this patch adds the implementation
for x86-64 and arm64, and have it fall back as currently is for
other archs which do not have it implemented at this point. The
x86-64 one uses lock + add combination for smp_mb() with address
below red zone.

This is on top of 09d62154f613 ("tools, perf: add and use optimized
ring_buffer_{read_head, write_tail} helpers"), which didn't touch
smp_* barrier implementations. Magnus recently rightfully reported
however that the latter on x86-64 still wrongly falls back to sfence,
lfence and mfence respectively, thus fix that for applications under
tools making use of these to avoid such ugly surprises. The main
header under tools (include/asm/barrier.h) will in that case not
select the fallback implementation.

Reported-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/arch/arm64/include/asm/barrier.h | 10 ++++++++++
 tools/arch/x86/include/asm/barrier.h   |  7 +++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
index 378c051fa177..3b9b41331c4f 100644
--- a/tools/arch/arm64/include/asm/barrier.h
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -14,6 +14,16 @@
 #define wmb()		asm volatile("dmb ishst" ::: "memory")
 #define rmb()		asm volatile("dmb ishld" ::: "memory")
 
+/*
+ * Kernel uses dmb variants on arm64 for smp_*() barriers. Pretty much the same
+ * implementation as above mb()/wmb()/rmb(), though for the latter kernel uses
+ * dsb. In any case, should above mb()/wmb()/rmb() change, make sure the below
+ * smp_*() don't.
+ */
+#define smp_mb()	asm volatile("dmb ish" ::: "memory")
+#define smp_wmb()	asm volatile("dmb ishst" ::: "memory")
+#define smp_rmb()	asm volatile("dmb ishld" ::: "memory")
+
 #define smp_store_release(p, v)						\
 do {									\
 	union { typeof(*p) __val; char __c[1]; } __u =			\
diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
index 58919868473c..0adf295dd5b6 100644
--- a/tools/arch/x86/include/asm/barrier.h
+++ b/tools/arch/x86/include/asm/barrier.h
@@ -21,9 +21,12 @@
 #define rmb()	asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 #define wmb()	asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 #elif defined(__x86_64__)
-#define mb() 	asm volatile("mfence":::"memory")
-#define rmb()	asm volatile("lfence":::"memory")
+#define mb()	asm volatile("mfence" ::: "memory")
+#define rmb()	asm volatile("lfence" ::: "memory")
 #define wmb()	asm volatile("sfence" ::: "memory")
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_mb()  asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
 #endif
 
 #if defined(__x86_64__)
-- 
cgit v1.2.3


From 26f7fe4a5db5b41d2abe53e37100c8984b157fb2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 11 Apr 2019 16:36:39 +0200
Subject: selftests: netfilter: add ebtables broute test case

ebtables -t broute allows to redirect packets in a way that
they get pushed up the stack, even if the interface is part
of a bridge.

In case of IP packets to non-local address, this means
those IP packets are routed instead of bridged-forwarded, just
as if the bridge would not have existed.

Expected test output is:
PASS: netns connectivity: ns1 and ns2 can reach each other
PASS: ns1/ns2 connectivity with active broute rule
PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/Makefile         |   2 +-
 .../testing/selftests/netfilter/bridge_brouter.sh  | 146 +++++++++++++++++++++
 2 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/bridge_brouter.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index c9ff2b47bd1c..80dae72a25c7 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for netfilter selftests
 
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/bridge_brouter.sh b/tools/testing/selftests/netfilter/bridge_brouter.sh
new file mode 100755
index 000000000000..29f3955b9af7
--- /dev/null
+++ b/tools/testing/selftests/netfilter/bridge_brouter.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+#
+# This test is for bridge 'brouting', i.e. make some packets being routed
+# rather than getting bridged even though they arrive on interface that is
+# part of a bridge.
+
+#           eth0    br0     eth0
+# setup is: ns1 <-> ns0 <-> ns2
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+ebtables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ebtables"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth0 netns ns0 type veth peer name eth0 netns ns1
+if [ $? -ne 0 ]; then
+	echo "SKIP: Can't create veth device"
+	exit $ksft_skip
+fi
+ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+
+ip -net ns0 link set lo up
+ip -net ns0 link set veth0 up
+ip -net ns0 link set veth1 up
+
+ip -net ns0 link add br0 type bridge
+if [ $? -ne 0 ]; then
+	echo "SKIP: Can't create bridge br0"
+	exit $ksft_skip
+fi
+
+ip -net ns0 link set veth0 master br0
+ip -net ns0 link set veth1 master br0
+ip -net ns0 link set br0 up
+ip -net ns0 addr add 10.0.0.1/24 dev br0
+
+# place both in same subnet, ns1 and ns2 connected via ns0:br0
+for i in 1 2; do
+  ip -net ns$i link set lo up
+  ip -net ns$i link set eth0 up
+  ip -net ns$i addr add 10.0.0.1$i/24 dev eth0
+done
+
+test_ebtables_broute()
+{
+	local cipt
+
+	# redirect is needed so the dstmac is rewritten to the bridge itself,
+	# ip stack won't process OTHERHOST (foreign unicast mac) packets.
+	ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add ebtables broute redirect rule"
+		return $ksft_skip
+	fi
+
+	# ping netns1, expected to not work (ip forwarding is off)
+	ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1
+	if [ $? -eq 0 ]; then
+		echo "ERROR: ping works, should have failed" 1>&2
+		return 1
+	fi
+
+	# enable forwarding on both interfaces.
+	# neither needs an ip address, but at least the bridge needs
+	# an ip address in same network segment as ns1 and ns2 (ns0
+	# needs to be able to determine route for to-be-forwarded packet).
+	ip netns exec ns0 sysctl -q net.ipv4.conf.veth0.forwarding=1
+	ip netns exec ns0 sysctl -q net.ipv4.conf.veth1.forwarding=1
+
+	sleep 1
+
+	ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null
+	if [ $? -ne 0 ]; then
+		echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2
+		return 1
+	fi
+
+	echo "PASS: ns1/ns2 connectivity with active broute rule"
+	ip netns exec ns0 ebtables -t broute -F
+
+	# ping netns1, expected to work (frames are bridged)
+	ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null
+	if [ $? -ne 0 ]; then
+		echo "ERROR: ping did not work, but it should (bridged)" 1>&2
+		return 1
+	fi
+
+	ip netns exec ns0 ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP
+
+	# ping netns1, expected to not work (DROP in bridge forward)
+	ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1
+	if [ $? -eq 0 ]; then
+		echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2
+		return 1
+	fi
+
+	# re-activate brouter
+	ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
+
+	ip netns exec ns2 ping -q -c 1 10.0.0.11 > /dev/null
+	if [ $? -ne 0 ]; then
+		echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2
+		return 1
+	fi
+
+	echo "PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop"
+	return 0
+}
+
+# test basic connectivity
+ip netns exec ns1 ping -c 1 -q 10.0.0.12 > /dev/null
+if [ $? -ne 0 ]; then
+    echo "ERROR: Could not reach ns2 from ns1" 1>&2
+    ret=1
+fi
+
+ip netns exec ns2 ping -c 1 -q 10.0.0.11 > /dev/null
+if [ $? -ne 0 ]; then
+    echo "ERROR: Could not reach ns1 from ns2" 1>&2
+    ret=1
+fi
+
+if [ $ret -eq 0 ];then
+    echo "PASS: netns connectivity: ns1 and ns2 can reach each other"
+fi
+
+test_ebtables_broute
+ret=$?
+for i in 0 1 2; do ip netns del ns$i;done
+
+exit $ret
-- 
cgit v1.2.3


From 56490b623aa0ffa8526611e3e76a8ac546fe78f6 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 11 Apr 2019 11:51:50 -0700
Subject: selftests: Add debugging options to pmtu.sh

pmtu.sh script runs a number of tests and dumps a summary of pass/fail.
If a test fails, it is near impossible to debug why. For example:

    TEST: ipv6: PMTU exceptions                       [FAIL]

There are a lot of commands run behind the scenes for this test. Which
one is failing?

Add a VERBOSE option to show commands that are run and any output from
those commands. Add a PAUSE_ON_FAIL option to halt the script if a test
fails allowing users to poke around with the setup in the failed state.

In the process, rename tracing to TRACING and move declaration to top
with the new variables.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 213 +++++++++++++++++++++---------------
 1 file changed, 124 insertions(+), 89 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 912b2dc50be3..524b15dabb3c 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -116,6 +116,10 @@
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
+PAUSE_ON_FAIL=no
+VERBOSE=0
+TRACING=0
+
 # Some systems don't have a ping6 binary anymore
 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 
@@ -222,6 +226,23 @@ err_flush() {
 	err_buf=
 }
 
+run_cmd() {
+	cmd="$*"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+	fi
+
+	out="$($cmd 2>&1)"
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+		echo
+	fi
+
+	return $rc
+}
+
 # Find the auto-generated name for this namespace
 nsname() {
 	eval echo \$NS_$1
@@ -258,22 +279,22 @@ setup_fou_or_gue() {
 		fi
 	fi
 
-	${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
-	${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
+	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
 
-	${ns_b} ip fou add port 5556 ipproto ${ipproto}
-	${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
+	run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
+	run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
 
 	if [ "${inner}" = "4" ]; then
-		${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
-		${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
+		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
+		run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
 	else
-		${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
-		${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
+		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
+		run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
 	fi
 
-	${ns_a} ip link set ${encap}_a up
-	${ns_b} ip link set ${encap}_b up
+	run_cmd ${ns_a} ip link set ${encap}_a up
+	run_cmd ${ns_b} ip link set ${encap}_b up
 }
 
 setup_fou44() {
@@ -319,17 +340,17 @@ setup_namespaces() {
 }
 
 setup_veth() {
-	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
-	${ns_a} ip link set veth_b netns ${NS_B}
+	run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+	run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
 
-	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
-	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+	run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+	run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
 
-	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
-	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+	run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+	run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
 
-	${ns_a} ip link set veth_a up
-	${ns_b} ip link set veth_b up
+	run_cmd ${ns_a} ip link set veth_a up
+	run_cmd ${ns_b} ip link set veth_b up
 }
 
 setup_vti() {
@@ -342,14 +363,14 @@ setup_vti() {
 
 	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
 
-	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
-	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+	run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+	run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
 
-	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
-	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+	run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+	run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
 
-	${ns_a} ip link set vti${proto}_a up
-	${ns_b} ip link set vti${proto}_b up
+	run_cmd ${ns_a} ip link set vti${proto}_a up
+	run_cmd ${ns_b} ip link set vti${proto}_b up
 }
 
 setup_vti4() {
@@ -375,17 +396,17 @@ setup_vxlan_or_geneve() {
 		opts_b=""
 	fi
 
-	${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
-	${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
+	run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
+	run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
 
-	${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
-	${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+	run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+	run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
 
-	${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
-	${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+	run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+	run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
 
-	${ns_a} ip link set ${type}_a up
-	${ns_b} ip link set ${type}_b up
+	run_cmd ${ns_a} ip link set ${type}_a up
+	run_cmd ${ns_b} ip link set ${type}_b up
 }
 
 setup_geneve4() {
@@ -409,15 +430,15 @@ setup_xfrm() {
 	veth_a_addr="${2}"
 	veth_b_addr="${3}"
 
-	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
-	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
-	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
-	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+	run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" || return 1
+	run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+	run_cmd "${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel"
+	run_cmd "${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel"
 
-	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
-	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
-	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
-	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+	run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+	run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+	run_cmd "${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel"
+	run_cmd "${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel"
 }
 
 setup_xfrm4() {
@@ -481,7 +502,7 @@ setup() {
 }
 
 trace() {
-	[ $tracing -eq 0 ] && return
+	[ $TRACING -eq 0 ] && return
 
 	for arg do
 		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
@@ -597,8 +618,8 @@ test_pmtu_ipvX() {
 	mtu "${ns_b}"  veth_B-R2 1500
 
 	# Create route exceptions
-	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} > /dev/null
-	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} > /dev/null
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
 
 	# Check that exceptions have been created with the correct PMTU
 	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
@@ -630,7 +651,7 @@ test_pmtu_ipvX() {
 	# Decrease remote MTU on path via R2, get new exception
 	mtu "${ns_r2}" veth_R2-B 400
 	mtu "${ns_b}"  veth_B-R2 400
-	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
 	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
 
@@ -647,7 +668,7 @@ test_pmtu_ipvX() {
 	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
 
 	# Get new exception
-	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
 	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
 }
@@ -696,7 +717,7 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
 
 	mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
 	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
-	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
 
 	# Check that exception was created
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
@@ -776,7 +797,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() {
 
 	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
 	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
-	${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
 
 	# Check that exception was created
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
@@ -834,13 +855,13 @@ test_pmtu_vti4_exception() {
 
 	# Send DF packet without exceeding link layer MTU, check that no
 	# exception is created
-	${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
 	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
 
 	# Now exceed link layer MTU by one byte, check that exception is created
 	# with the right PMTU value
-	${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
 	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
 }
@@ -856,7 +877,7 @@ test_pmtu_vti6_exception() {
 	mtu "${ns_b}" veth_b 4000
 	mtu "${ns_a}" vti6_a 5000
 	mtu "${ns_b}" vti6_b 5000
-	${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} > /dev/null
+	run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
 
 	# Check that exception was created
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
@@ -902,9 +923,9 @@ test_pmtu_vti6_default_mtu() {
 test_pmtu_vti4_link_add_mtu() {
 	setup namespaces || return 2
 
-	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+	run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
 	[ $? -ne 0 ] && err "  vti not supported" && return 2
-	${ns_a} ip link del vti4_a
+	run_cmd ${ns_a} ip link del vti4_a
 
 	fail=0
 
@@ -912,7 +933,7 @@ test_pmtu_vti4_link_add_mtu() {
 	max=$((65535 - 20))
 	# Check invalid values first
 	for v in $((min - 1)) $((max + 1)); do
-		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+		run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
 		# This can fail, or MTU can be adjusted to a proper value
 		[ $? -ne 0 ] && continue
 		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
@@ -920,14 +941,14 @@ test_pmtu_vti4_link_add_mtu() {
 			err "  vti tunnel created with invalid MTU ${mtu}"
 			fail=1
 		fi
-		${ns_a} ip link del vti4_a
+		run_cmd ${ns_a} ip link del vti4_a
 	done
 
 	# Now check valid values
 	for v in ${min} 1300 ${max}; do
-		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+		run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
 		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
-		${ns_a} ip link del vti4_a
+		run_cmd ${ns_a} ip link del vti4_a
 		if [ "${mtu}" != "${v}" ]; then
 			err "  vti MTU ${mtu} doesn't match configured value ${v}"
 			fail=1
@@ -940,9 +961,9 @@ test_pmtu_vti4_link_add_mtu() {
 test_pmtu_vti6_link_add_mtu() {
 	setup namespaces || return 2
 
-	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+	run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
 	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
-	${ns_a} ip link del vti6_a
+	run_cmd ${ns_a} ip link del vti6_a
 
 	fail=0
 
@@ -950,7 +971,7 @@ test_pmtu_vti6_link_add_mtu() {
 	max=$((65535 - 40))
 	# Check invalid values first
 	for v in $((min - 1)) $((max + 1)); do
-		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+		run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
 		# This can fail, or MTU can be adjusted to a proper value
 		[ $? -ne 0 ] && continue
 		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
@@ -958,14 +979,14 @@ test_pmtu_vti6_link_add_mtu() {
 			err "  vti6 tunnel created with invalid MTU ${v}"
 			fail=1
 		fi
-		${ns_a} ip link del vti6_a
+		run_cmd ${ns_a} ip link del vti6_a
 	done
 
 	# Now check valid values
 	for v in 68 1280 1300 $((65535 - 40)); do
-		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+		run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
 		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
-		${ns_a} ip link del vti6_a
+		run_cmd ${ns_a} ip link del vti6_a
 		if [ "${mtu}" != "${v}" ]; then
 			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
 			fail=1
@@ -978,19 +999,19 @@ test_pmtu_vti6_link_add_mtu() {
 test_pmtu_vti6_link_change_mtu() {
 	setup namespaces || return 2
 
-	${ns_a} ip link add dummy0 mtu 1500 type dummy
+	run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
 	[ $? -ne 0 ] && err "  dummy not supported" && return 2
-	${ns_a} ip link add dummy1 mtu 3000 type dummy
-	${ns_a} ip link set dummy0 up
-	${ns_a} ip link set dummy1 up
+	run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
+	run_cmd ${ns_a} ip link set dummy0 up
+	run_cmd ${ns_a} ip link set dummy1 up
 
-	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
-	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+	run_cmd ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+	run_cmd ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
 
 	fail=0
 
 	# Create vti6 interface bound to device, passing MTU, check it
-	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 	if [ ${mtu} -ne 1300 ]; then
 		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
@@ -999,7 +1020,7 @@ test_pmtu_vti6_link_change_mtu() {
 
 	# Move to another device with different MTU, without passing MTU, check
 	# MTU is adjusted
-	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+	run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 	if [ ${mtu} -ne $((3000 - 40)) ]; then
 		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
@@ -1007,7 +1028,7 @@ test_pmtu_vti6_link_change_mtu() {
 	fi
 
 	# Move it back, passing MTU, check MTU is not overridden
-	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 	if [ ${mtu} -ne 1280 ]; then
 		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
@@ -1052,7 +1073,7 @@ test_cleanup_vxlanX_exception() {
 	# Fill exception cache for multiple CPUs (2)
 	# we can always use inner IPv4 for that
 	for cpu in ${cpu_list}; do
-		taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} > /dev/null
+		run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
 	done
 
 	${ns_a} ip link del dev veth_A-R1 &
@@ -1084,29 +1105,33 @@ usage() {
 	exit 1
 }
 
+################################################################################
+#
 exitcode=0
 desc=0
+
+while getopts :ptv o
+do
+	case $o in
+	p) PAUSE_ON_FAIL=yes;;
+	v) VERBOSE=1;;
+	t) if which tcpdump > /dev/null 2>&1; then
+		TRACING=1
+	   else
+		echo "=== tcpdump not available, tracing disabled"
+	   fi
+	   ;;
+	*) usage;;
+	esac
+done
+shift $(($OPTIND-1))
+
 IFS="	
 "
 
-tracing=0
 for arg do
-	if [ "${arg}" != "${arg#--*}" ]; then
-		opt="${arg#--}"
-		if [ "${opt}" = "trace" ]; then
-			if which tcpdump > /dev/null 2>&1; then
-				tracing=1
-			else
-				echo "=== tcpdump not available, tracing disabled"
-			fi
-		else
-			usage
-		fi
-	else
-		# Check first that all requested tests are available before
-		# running any
-		command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
-	fi
+	# Check first that all requested tests are available before running any
+	command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
 done
 
 trap cleanup EXIT
@@ -1124,6 +1149,11 @@ for t in ${tests}; do
 
 	(
 		unset IFS
+
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\n##########################################################################\n\n"
+		fi
+
 		eval test_${name}
 		ret=$?
 		cleanup
@@ -1132,6 +1162,11 @@ for t in ${tests}; do
 			printf "TEST: %-60s  [ OK ]\n" "${t}"
 		elif [ $ret -eq 1 ]; then
 			printf "TEST: %-60s  [FAIL]\n" "${t}"
+			if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+				echo
+				echo "Pausing. Hit enter to continue"
+				read a
+			fi
 			err_flush
 			exit 1
 		elif [ $ret -eq 2 ]; then
-- 
cgit v1.2.3


From 196398d4c0ac3de6f016f7774c952a069e116e71 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Thu, 7 Mar 2019 19:09:29 -0800
Subject: bpf: Sync bpf.h to tools/

Sync BPF_PROG_TYPE_CGROUP_SYSCTL related bpf UAPI changes to tools/.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 90 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2e96d0b4bf65..89976de909af 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -167,6 +167,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LIRC_MODE2,
 	BPF_PROG_TYPE_SK_REUSEPORT,
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
+	BPF_PROG_TYPE_CGROUP_SYSCTL,
 };
 
 enum bpf_attach_type {
@@ -188,6 +189,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP6_SENDMSG,
 	BPF_LIRC_MODE2,
 	BPF_FLOW_DISSECTOR,
+	BPF_CGROUP_SYSCTL,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2504,6 +2506,75 @@ union bpf_attr {
  * 	Return
  * 		0 if iph and th are a valid SYN cookie ACK, or a negative error
  * 		otherwise.
+ *
+ * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ *	Description
+ *		Get name of sysctl in /proc/sys/ and copy it into provided by
+ *		program buffer *buf* of size *buf_len*.
+ *
+ *		The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ *		If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+ *		copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+ *		only (e.g. "tcp_mem").
+ *	Return
+ *		Number of character copied (not including the trailing NUL).
+ *
+ *		**-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ *		truncated name in this case).
+ *
+ * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ *	Description
+ *		Get current value of sysctl as it is presented in /proc/sys
+ *		(incl. newline, etc), and copy it as a string into provided
+ *		by program buffer *buf* of size *buf_len*.
+ *
+ *		The whole value is copied, no matter what file position user
+ *		space issued e.g. sys_read at.
+ *
+ *		The buffer is always NUL terminated, unless it's zero-sized.
+ *	Return
+ *		Number of character copied (not including the trailing NUL).
+ *
+ *		**-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ *		truncated name in this case).
+ *
+ *		**-EINVAL** if current value was unavailable, e.g. because
+ *		sysctl is uninitialized and read returns -EIO for it.
+ *
+ * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ *	Description
+ *		Get new value being written by user space to sysctl (before
+ *		the actual write happens) and copy it as a string into
+ *		provided by program buffer *buf* of size *buf_len*.
+ *
+ *		User space may write new value at file position > 0.
+ *
+ *		The buffer is always NUL terminated, unless it's zero-sized.
+ *	Return
+ *		Number of character copied (not including the trailing NUL).
+ *
+ *		**-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ *		truncated name in this case).
+ *
+ *		**-EINVAL** if sysctl is being read.
+ *
+ * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ *	Description
+ *		Override new value being written by user space to sysctl with
+ *		value provided by program in buffer *buf* of size *buf_len*.
+ *
+ *		*buf* should contain a string in same form as provided by user
+ *		space on sysctl write.
+ *
+ *		User space may write new value at file position > 0. To override
+ *		the whole sysctl value file position should be set to zero.
+ *	Return
+ *		0 on success.
+ *
+ *		**-E2BIG** if the *buf_len* is too big.
+ *
+ *		**-EINVAL** if sysctl is being read.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2606,7 +2677,11 @@ union bpf_attr {
 	FN(skb_ecn_set_ce),		\
 	FN(get_listener_sock),		\
 	FN(skc_lookup_tcp),		\
-	FN(tcp_check_syncookie),
+	FN(tcp_check_syncookie),	\
+	FN(sysctl_get_name),		\
+	FN(sysctl_get_current_value),	\
+	FN(sysctl_get_new_value),	\
+	FN(sysctl_set_new_value),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2679,6 +2754,9 @@ enum bpf_func_id {
 					  BPF_ADJ_ROOM_ENCAP_L2_MASK) \
 					 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
 
+/* BPF_FUNC_sysctl_get_name flags. */
+#define BPF_F_SYSCTL_BASE_NAME		(1ULL << 0)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
@@ -3308,4 +3386,14 @@ struct bpf_line_info {
 struct bpf_spin_lock {
 	__u32	val;
 };
+
+struct bpf_sysctl {
+	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
+				 * Allows 1,2,4-byte read, but no write.
+				 */
+	__u32	file_pos;	/* Sysctl file position to read from, write to.
+				 * Allows 1,2,4-byte read an 4-byte write.
+				 */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From 063cc9f06ee6cac12dbc68a504bc4ff56bde790d Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 8 Mar 2019 09:15:26 -0800
Subject: libbpf: Support sysctl hook

Support BPF_PROG_TYPE_CGROUP_SYSCTL program in libbpf: identifying
program and attach types by section name, probe.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c        | 3 +++
 tools/lib/bpf/libbpf_probes.c | 1 +
 2 files changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 67484cf32b2e..e5b77ad97795 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2064,6 +2064,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_TRACEPOINT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 	case BPF_PROG_TYPE_PERF_EVENT:
+	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 		return false;
 	case BPF_PROG_TYPE_KPROBE:
 	default:
@@ -3004,6 +3005,8 @@ static const struct {
 						BPF_CGROUP_UDP4_SENDMSG),
 	BPF_EAPROG_SEC("cgroup/sendmsg6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 						BPF_CGROUP_UDP6_SENDMSG),
+	BPF_EAPROG_SEC("cgroup/sysctl",		BPF_PROG_TYPE_CGROUP_SYSCTL,
+						BPF_CGROUP_SYSCTL),
 };
 
 #undef BPF_PROG_SEC_IMPL
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 8c3a1c04dcb2..0f25541632e3 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -97,6 +97,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_LIRC_MODE2:
 	case BPF_PROG_TYPE_SK_REUSEPORT:
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 7007af63da3bf6764c9208029a3585756260b55f Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 8 Mar 2019 09:17:45 -0800
Subject: selftests/bpf: Test sysctl section name

Add unit test to verify that program and attach types are properly
identified for "cgroup/sysctl" section name.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_section_names.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
index 7c4f41572b1c..bebd4fbca1f4 100644
--- a/tools/testing/selftests/bpf/test_section_names.c
+++ b/tools/testing/selftests/bpf/test_section_names.c
@@ -119,6 +119,11 @@ static struct sec_name_test tests[] = {
 		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG},
 		{0, BPF_CGROUP_UDP6_SENDMSG},
 	},
+	{
+		"cgroup/sysctl",
+		{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
+		{0, BPF_CGROUP_SYSCTL},
+	},
 };
 
 static int test_prog_type_by_name(const struct sec_name_test *test)
-- 
cgit v1.2.3


From 1f5fa9ab6e2eb16ba81284280a498fe9c543a2d8 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 8 Mar 2019 15:08:21 -0800
Subject: selftests/bpf: Test BPF_CGROUP_SYSCTL

Add unit test for BPF_PROG_TYPE_CGROUP_SYSCTL program type.

Test that program can allow/deny access.
Test both valid and invalid accesses to ctx->write.

Example of output:
  # ./test_sysctl
  Test case: sysctl wrong attach_type .. [PASS]
  Test case: sysctl:read allow all .. [PASS]
  Test case: sysctl:read deny all .. [PASS]
  Test case: ctx:write sysctl:read read ok .. [PASS]
  Test case: ctx:write sysctl:write read ok .. [PASS]
  Test case: ctx:write sysctl:read write reject .. [PASS]
  Summary: 6 PASSED, 0 FAILED

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile      |   3 +-
 tools/testing/selftests/bpf/test_sysctl.c | 291 ++++++++++++++++++++++++++++++
 2 files changed, 293 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_sysctl.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 078283d073b0..f9d83ba7843e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
-	test_netcnt test_tcpnotify_user test_sock_fields
+	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -93,6 +93,7 @@ $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
 $(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
+$(OUTPUT)/test_sysctl: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
new file mode 100644
index 000000000000..6d0ab09789ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH			"/foo"
+#define MAX_INSNS		512
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sysctl_test {
+	const char *descr;
+	struct bpf_insn	insns[MAX_INSNS];
+	enum bpf_attach_type attach_type;
+	const char *sysctl;
+	int open_flags;
+	const char *newval;
+	enum {
+		LOAD_REJECT,
+		ATTACH_REJECT,
+		OP_EPERM,
+		SUCCESS,
+	} result;
+};
+
+static struct sysctl_test tests[] = {
+	{
+		.descr = "sysctl wrong attach_type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = 0,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = ATTACH_REJECT,
+	},
+	{
+		.descr = "sysctl:read allow all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl:read deny all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = OP_EPERM,
+	},
+	{
+		.descr = "ctx:write sysctl:read read ok",
+		.insns = {
+			/* If (write) */
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct bpf_sysctl, write)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "ctx:write sysctl:write read ok",
+		.insns = {
+			/* If (write) */
+			BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct bpf_sysctl, write)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/domainname",
+		.open_flags = O_WRONLY,
+		.newval = "(none)", /* same as default, should fail anyway */
+		.result = OP_EPERM,
+	},
+	{
+		.descr = "ctx:write sysctl:read write reject",
+		.insns = {
+			/* write = X */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sysctl, write)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = LOAD_REJECT,
+	},
+};
+
+static size_t probe_prog_length(const struct bpf_insn *fp)
+{
+	size_t len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
+{
+	struct bpf_insn *prog = test->insns;
+	struct bpf_load_program_attr attr;
+	int ret;
+
+	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+	attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
+	attr.insns = prog;
+	attr.insns_cnt = probe_prog_length(attr.insns);
+	attr.license = "GPL";
+
+	ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+	if (ret < 0 && test->result != LOAD_REJECT) {
+		log_err(">>> Loading program error.\n"
+			">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
+	}
+
+	return ret;
+}
+
+static int access_sysctl(const char *sysctl_path,
+			 const struct sysctl_test *test)
+{
+	int err = 0;
+	int fd;
+
+	fd = open(sysctl_path, test->open_flags | O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	if (test->open_flags == O_RDONLY) {
+		char buf[128];
+
+		if (read(fd, buf, sizeof(buf)) == -1)
+			goto err;
+	} else if (test->open_flags == O_WRONLY) {
+		if (!test->newval) {
+			log_err("New value for sysctl is not set");
+			goto err;
+		}
+		if (write(fd, test->newval, strlen(test->newval)) == -1)
+			goto err;
+	} else {
+		log_err("Unexpected sysctl access: neither read nor write");
+		goto err;
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(fd);
+	return err;
+}
+
+static int run_test_case(int cgfd, struct sysctl_test *test)
+{
+	enum bpf_attach_type atype = test->attach_type;
+	char sysctl_path[128];
+	int progfd = -1;
+	int err = 0;
+
+	printf("Test case: %s .. ", test->descr);
+
+	snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s",
+		 test->sysctl);
+
+	progfd = load_sysctl_prog(test, sysctl_path);
+	if (progfd < 0) {
+		if (test->result == LOAD_REJECT)
+			goto out;
+		else
+			goto err;
+	}
+
+	if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
+		if (test->result == ATTACH_REJECT)
+			goto out;
+		else
+			goto err;
+	}
+
+	if (access_sysctl(sysctl_path, test) == -1) {
+		if (test->result == OP_EPERM && errno == EPERM)
+			goto out;
+		else
+			goto err;
+	}
+
+	if (test->result != SUCCESS) {
+		log_err("Unexpected failure");
+		goto err;
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	/* Detaching w/o checking return code: best effort attempt. */
+	if (progfd != -1)
+		bpf_prog_detach(cgfd, atype);
+	close(progfd);
+	printf("[%s]\n", err ? "FAIL" : "PASS");
+	return err;
+}
+
+static int run_tests(int cgfd)
+{
+	int passes = 0;
+	int fails = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (run_test_case(cgfd, &tests[i]))
+			++fails;
+		else
+			++passes;
+	}
+	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+	return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+	int cgfd = -1;
+	int err = 0;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CG_PATH);
+	if (cgfd < 0)
+		goto err;
+
+	if (join_cgroup(CG_PATH))
+		goto err;
+
+	if (run_tests(cgfd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	return err;
+}
-- 
cgit v1.2.3


From 6041c67f28d8b297ef53ab47abdcd319626c765e Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 8 Mar 2019 15:13:43 -0800
Subject: selftests/bpf: Test bpf_sysctl_get_name helper

Test w/ and w/o BPF_F_SYSCTL_BASE_NAME, buffers with enough space and
too small buffers to get E2BIG and truncated result, etc.

  # ./test_sysctl
  ...
  Test case: sysctl_get_name sysctl_value:base ok .. [PASS]
  Test case: sysctl_get_name sysctl_value:base E2BIG truncated .. [PASS]
  Test case: sysctl_get_name sysctl:full ok .. [PASS]
  Test case: sysctl_get_name sysctl:full E2BIG truncated .. [PASS]
  Test case: sysctl_get_name sysctl:full E2BIG truncated small .. [PASS]
  Summary: 11 PASSED, 0 FAILED

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_sysctl.c | 222 ++++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 6d0ab09789ad..2e7ef04503a8 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -128,6 +128,228 @@ static struct sysctl_test tests[] = {
 		.open_flags = O_RDONLY,
 		.result = LOAD_REJECT,
 	},
+	{
+		.descr = "sysctl_get_name sysctl_value:base ok",
+		.insns = {
+			/* sysctl_get_name arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_name arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+
+			/* sysctl_get_name arg4 (flags) */
+			BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
+
+			/* sysctl_get_name(ctx, buf, buf_len, flags) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
+			/*     buf == "tcp_mem\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_get_name sysctl_value:base E2BIG truncated",
+		.insns = {
+			/* sysctl_get_name arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_name arg3 (buf_len) too small */
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+
+			/* sysctl_get_name arg4 (flags) */
+			BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
+
+			/* sysctl_get_name(ctx, buf, buf_len, flags) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
+
+			/*     buf[0:7] == "tcp_me\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_get_name sysctl:full ok",
+		.insns = {
+			/* sysctl_get_name arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_name arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 17),
+
+			/* sysctl_get_name arg4 (flags) */
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+
+			/* sysctl_get_name(ctx, buf, buf_len, flags) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
+
+			/*     buf[0:8] == "net/ipv4" && */
+			BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
+
+			/*     buf[8:16] == "/tcp_mem" && */
+			BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
+
+			/*     buf[16:24] == "\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x0ULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_get_name sysctl:full E2BIG truncated",
+		.insns = {
+			/* sysctl_get_name arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_name arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 16),
+
+			/* sysctl_get_name arg4 (flags) */
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+
+			/* sysctl_get_name(ctx, buf, buf_len, flags) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
+
+			/*     buf[0:8] == "net/ipv4" && */
+			BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
+
+			/*     buf[8:16] == "/tcp_me\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_get_name sysctl:full E2BIG truncated small",
+		.insns = {
+			/* sysctl_get_name arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_name arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+
+			/* sysctl_get_name arg4 (flags) */
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+
+			/* sysctl_get_name(ctx, buf, buf_len, flags) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
+
+			/*     buf[0:8] == "net/ip\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
 };
 
 static size_t probe_prog_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From 11ff34f74e32f5a4ec1f71621508789cd60775b3 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 8 Mar 2019 15:17:51 -0800
Subject: selftests/bpf: Test sysctl_get_current_value helper

Test sysctl_get_current_value on sysctl read and write, buffers with
enough space and too small buffers to get E2BIG and truncated result,
etc.

  # ./test_sysctl
  ...
  Test case: sysctl_get_current_value sysctl:read ok, gt .. [PASS]
  Test case: sysctl_get_current_value sysctl:read ok, eq .. [PASS]
  Test case: sysctl_get_current_value sysctl:read E2BIG truncated ..  [PASS]
  Test case: sysctl_get_current_value sysctl:read EINVAL .. [PASS]
  Test case: sysctl_get_current_value sysctl:write ok .. [PASS]
  Summary: 16 PASSED, 0 FAILED

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_sysctl.c | 228 ++++++++++++++++++++++++++++++
 1 file changed, 228 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 2e7ef04503a8..9fb6560fa775 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -18,11 +18,13 @@
 
 #define CG_PATH			"/foo"
 #define MAX_INSNS		512
+#define FIXUP_SYSCTL_VALUE	0
 
 char bpf_log_buf[BPF_LOG_BUF_SIZE];
 
 struct sysctl_test {
 	const char *descr;
+	size_t fixup_value_insn;
 	struct bpf_insn	insns[MAX_INSNS];
 	enum bpf_attach_type attach_type;
 	const char *sysctl;
@@ -350,6 +352,190 @@ static struct sysctl_test tests[] = {
 		.open_flags = O_RDONLY,
 		.result = SUCCESS,
 	},
+	{
+		.descr = "sysctl_get_current_value sysctl:read ok, gt",
+		.insns = {
+			/* sysctl_get_current_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_current_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+
+			/* sysctl_get_current_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
+
+			/*     buf[0:6] == "Linux\n\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_get_current_value sysctl:read ok, eq",
+		.insns = {
+			/* sysctl_get_current_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_current_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+
+			/* sysctl_get_current_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
+
+			/*     buf[0:6] == "Linux\n\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_get_current_value sysctl:read E2BIG truncated",
+		.insns = {
+			/* sysctl_get_current_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_current_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 6),
+
+			/* sysctl_get_current_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
+
+			/*     buf[0:6] == "Linux\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_get_current_value sysctl:read EINVAL",
+		.insns = {
+			/* sysctl_get_current_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_current_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+
+			/* sysctl_get_current_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4),
+
+			/*     buf[0:8] is NUL-filled) */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */
+		.open_flags = O_RDONLY,
+		.result = OP_EPERM,
+	},
+	{
+		.descr = "sysctl_get_current_value sysctl:write ok",
+		.fixup_value_insn = 6,
+		.insns = {
+			/* sysctl_get_current_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_current_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+
+			/* sysctl_get_current_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6),
+
+			/*     buf[0:4] == expected) */
+			BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_WRONLY,
+		.newval = "600", /* same as default, should fail anyway */
+		.result = OP_EPERM,
+	},
 };
 
 static size_t probe_prog_length(const struct bpf_insn *fp)
@@ -362,6 +548,27 @@ static size_t probe_prog_length(const struct bpf_insn *fp)
 	return len + 1;
 }
 
+static int fixup_sysctl_value(const char *buf, size_t buf_len,
+			      struct bpf_insn *prog, size_t insn_num)
+{
+	uint32_t value_num = 0;
+	uint8_t c, i;
+
+	if (buf_len > sizeof(value_num)) {
+		log_err("Value is too big (%zd) to use in fixup", buf_len);
+		return -1;
+	}
+
+	for (i = 0; i < buf_len; ++i) {
+		c = buf[i];
+		value_num |= (c << i * 8);
+	}
+
+	prog[insn_num].imm = value_num;
+
+	return 0;
+}
+
 static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
 {
 	struct bpf_insn *prog = test->insns;
@@ -374,6 +581,27 @@ static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
 	attr.insns_cnt = probe_prog_length(attr.insns);
 	attr.license = "GPL";
 
+	if (test->fixup_value_insn) {
+		char buf[128];
+		ssize_t len;
+		int fd;
+
+		fd = open(sysctl_path, O_RDONLY | O_CLOEXEC);
+		if (fd < 0) {
+			log_err("open(%s) failed", sysctl_path);
+			return -1;
+		}
+		len = read(fd, buf, sizeof(buf));
+		if (len == -1) {
+			log_err("read(%s) failed", sysctl_path);
+			close(fd);
+			return -1;
+		}
+		close(fd);
+		if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn))
+			return -1;
+	}
+
 	ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
 	if (ret < 0 && test->result != LOAD_REJECT) {
 		log_err(">>> Loading program error.\n"
-- 
cgit v1.2.3


From 786047dd08de3334722af036e2827ce3b34205cc Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 8 Mar 2019 15:22:03 -0800
Subject: selftests/bpf: Test bpf_sysctl_{get,set}_new_value helpers

Test that new value provided by user space on sysctl write can be read
by bpf_sysctl_get_new_value and overridden by bpf_sysctl_set_new_value.

  # ./test_sysctl
  ...
  Test case: sysctl_get_new_value sysctl:read EINVAL .. [PASS]
  Test case: sysctl_get_new_value sysctl:write ok .. [PASS]
  Test case: sysctl_get_new_value sysctl:write ok long .. [PASS]
  Test case: sysctl_get_new_value sysctl:write E2BIG .. [PASS]
  Test case: sysctl_set_new_value sysctl:read EINVAL .. [PASS]
  Test case: sysctl_set_new_value sysctl:write ok .. [PASS]
  Summary: 22 PASSED, 0 FAILED

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_sysctl.c | 222 ++++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 9fb6560fa775..95437b72404f 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -536,6 +536,228 @@ static struct sysctl_test tests[] = {
 		.newval = "600", /* same as default, should fail anyway */
 		.result = OP_EPERM,
 	},
+	{
+		.descr = "sysctl_get_new_value sysctl:read EINVAL",
+		.insns = {
+			/* sysctl_get_new_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_new_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+
+			/* sysctl_get_new_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
+
+			/* if (ret == expected) */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_get_new_value sysctl:write ok",
+		.insns = {
+			/* sysctl_get_new_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_new_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+
+			/* sysctl_get_new_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
+
+			/*     buf[0:4] == "606\0") */
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_WRONLY,
+		.newval = "606",
+		.result = OP_EPERM,
+	},
+	{
+		.descr = "sysctl_get_new_value sysctl:write ok long",
+		.insns = {
+			/* sysctl_get_new_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_new_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 24),
+
+			/* sysctl_get_new_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14),
+
+			/*     buf[0:8] == "3000000 " && */
+			BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
+
+			/*     buf[8:16] == "4000000 " && */
+			BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
+
+			/*     buf[16:24] == "6000000\0") */
+			BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_WRONLY,
+		.newval = "3000000 4000000 6000000",
+		.result = OP_EPERM,
+	},
+	{
+		.descr = "sysctl_get_new_value sysctl:write E2BIG",
+		.insns = {
+			/* sysctl_get_new_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 3),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_get_new_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 3),
+
+			/* sysctl_get_new_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 4),
+
+			/*     buf[0:3] == "60\0") */
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_WRONLY,
+		.newval = "606",
+		.result = OP_EPERM,
+	},
+	{
+		.descr = "sysctl_set_new_value sysctl:read EINVAL",
+		.insns = {
+			/* sysctl_set_new_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_set_new_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 3),
+
+			/* sysctl_set_new_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value),
+
+			/* if (ret == expected) */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "sysctl_set_new_value sysctl:write ok",
+		.fixup_value_insn = 2,
+		.insns = {
+			/* sysctl_set_new_value arg2 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+
+			/* sysctl_set_new_value arg3 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_3, 3),
+
+			/* sysctl_set_new_value(ctx, buf, buf_len) */
+			BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value),
+
+			/* if (ret == expected) */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_WRONLY,
+		.newval = "606",
+		.result = SUCCESS,
+	},
 };
 
 static size_t probe_prog_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From 9a1027e52535db1a0adf7831afdfce745dc0a061 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 8 Mar 2019 15:25:02 -0800
Subject: selftests/bpf: Test file_pos field in bpf_sysctl ctx

Test access to file_pos field of bpf_sysctl context, both read (incl.
narrow read) and write.

  # ./test_sysctl
  ...
  Test case: ctx:file_pos sysctl:read read ok .. [PASS]
  Test case: ctx:file_pos sysctl:read read ok narrow .. [PASS]
  Test case: ctx:file_pos sysctl:read write ok .. [PASS]
  ...

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_sysctl.c | 64 +++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 95437b72404f..43008aae32d3 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -30,6 +30,7 @@ struct sysctl_test {
 	const char *sysctl;
 	int open_flags;
 	const char *newval;
+	const char *oldval;
 	enum {
 		LOAD_REJECT,
 		ATTACH_REJECT,
@@ -130,6 +131,64 @@ static struct sysctl_test tests[] = {
 		.open_flags = O_RDONLY,
 		.result = LOAD_REJECT,
 	},
+	{
+		.descr = "ctx:file_pos sysctl:read read ok",
+		.insns = {
+			/* If (file_pos == X) */
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct bpf_sysctl, file_pos)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "ctx:file_pos sysctl:read read ok narrow",
+		.insns = {
+			/* If (file_pos == X) */
+			BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct bpf_sysctl, file_pos)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		.descr = "ctx:file_pos sysctl:read write ok",
+		.insns = {
+			/* file_pos = X */
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sysctl, file_pos)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "kernel/ostype",
+		.open_flags = O_RDONLY,
+		.oldval = "nux\n",
+		.result = SUCCESS,
+	},
 	{
 		.descr = "sysctl_get_name sysctl_value:base ok",
 		.insns = {
@@ -848,6 +907,11 @@ static int access_sysctl(const char *sysctl_path,
 
 		if (read(fd, buf, sizeof(buf)) == -1)
 			goto err;
+		if (test->oldval &&
+		    strncmp(buf, test->oldval, strlen(test->oldval))) {
+			log_err("Read value %s != %s", buf, test->oldval);
+			goto err;
+		}
 	} else if (test->open_flags == O_WRONLY) {
 		if (!test->newval) {
 			log_err("New value for sysctl is not set");
-- 
cgit v1.2.3


From b457e5534c9988bc6bcf30a22fa7cd765b14ecbd Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Mon, 18 Mar 2019 18:07:01 -0700
Subject: bpf: Sync bpf.h to tools/

Sync bpf_strtoX related bpf UAPI changes to tools/.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 51 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 89976de909af..c26be24fd5e2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2575,6 +2575,53 @@ union bpf_attr {
  *		**-E2BIG** if the *buf_len* is too big.
  *
  *		**-EINVAL** if sysctl is being read.
+ *
+ * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ *	Description
+ *		Convert the initial part of the string from buffer *buf* of
+ *		size *buf_len* to a long integer according to the given base
+ *		and save the result in *res*.
+ *
+ *		The string may begin with an arbitrary amount of white space
+ *		(as determined by isspace(3)) followed by a single optional '-'
+ *		sign.
+ *
+ *		Five least significant bits of *flags* encode base, other bits
+ *		are currently unused.
+ *
+ *		Base must be either 8, 10, 16 or 0 to detect it automatically
+ *		similar to user space strtol(3).
+ *	Return
+ *		Number of characters consumed on success. Must be positive but
+ *		no more than buf_len.
+ *
+ *		**-EINVAL** if no valid digits were found or unsupported base
+ *		was provided.
+ *
+ *		**-ERANGE** if resulting value was out of range.
+ *
+ * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ *	Description
+ *		Convert the initial part of the string from buffer *buf* of
+ *		size *buf_len* to an unsigned long integer according to the
+ *		given base and save the result in *res*.
+ *
+ *		The string may begin with an arbitrary amount of white space
+ *		(as determined by isspace(3)).
+ *
+ *		Five least significant bits of *flags* encode base, other bits
+ *		are currently unused.
+ *
+ *		Base must be either 8, 10, 16 or 0 to detect it automatically
+ *		similar to user space strtoul(3).
+ *	Return
+ *		Number of characters consumed on success. Must be positive but
+ *		no more than buf_len.
+ *
+ *		**-EINVAL** if no valid digits were found or unsupported base
+ *		was provided.
+ *
+ *		**-ERANGE** if resulting value was out of range.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2681,7 +2728,9 @@ union bpf_attr {
 	FN(sysctl_get_name),		\
 	FN(sysctl_get_current_value),	\
 	FN(sysctl_get_new_value),	\
-	FN(sysctl_set_new_value),
+	FN(sysctl_set_new_value),	\
+	FN(strtol),			\
+	FN(strtoul),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From 99f57973ac5b8e38ee5b5e5f518cd5bf95e3b1e3 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Sat, 23 Mar 2019 15:47:05 -0700
Subject: selftests/bpf: Add sysctl and strtoX helpers to bpf_helpers.h

Add bpf_sysctl_* and bpf_strtoX helpers to bpf_helpers.h.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_helpers.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index e85d62cb53d0..59e221586cf7 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -192,6 +192,25 @@ static int (*bpf_skb_ecn_set_ce)(void *ctx) =
 static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
 	    void *ip, int ip_len, void *tcp, int tcp_len) =
 	(void *) BPF_FUNC_tcp_check_syncookie;
+static int (*bpf_sysctl_get_name)(void *ctx, char *buf,
+				  unsigned long long buf_len,
+				  unsigned long long flags) =
+	(void *) BPF_FUNC_sysctl_get_name;
+static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf,
+					   unsigned long long buf_len) =
+	(void *) BPF_FUNC_sysctl_get_current_value;
+static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf,
+				       unsigned long long buf_len) =
+	(void *) BPF_FUNC_sysctl_get_new_value;
+static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf,
+				       unsigned long long buf_len) =
+	(void *) BPF_FUNC_sysctl_set_new_value;
+static int (*bpf_strtol)(const char *buf, unsigned long long buf_len,
+			 unsigned long long flags, long *res) =
+	(void *) BPF_FUNC_strtol;
+static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len,
+			  unsigned long long flags, unsigned long *res) =
+	(void *) BPF_FUNC_strtoul;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
-- 
cgit v1.2.3


From c2d5f12e4c6cf7e94cfcb98389db51557cf05f9a Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Mon, 18 Mar 2019 18:17:03 -0700
Subject: selftests/bpf: Test ARG_PTR_TO_LONG arg type

Test that verifier handles new argument types properly, including
uninitialized or partially initialized value, misaligned stack access,
etc.

Example of output:
  #456/p ARG_PTR_TO_LONG uninitialized OK
  #457/p ARG_PTR_TO_LONG half-uninitialized OK
  #458/p ARG_PTR_TO_LONG misaligned OK
  #459/p ARG_PTR_TO_LONG size < sizeof(long) OK
  #460/p ARG_PTR_TO_LONG initialized OK

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/int_ptr.c | 160 +++++++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/int_ptr.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c
new file mode 100644
index 000000000000..ca3b4729df66
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/int_ptr.c
@@ -0,0 +1,160 @@
+{
+	"ARG_PTR_TO_LONG uninitialized",
+	.insns = {
+		/* bpf_strtoul arg1 (buf) */
+		BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+		BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+		/* bpf_strtoul arg2 (buf_len) */
+		BPF_MOV64_IMM(BPF_REG_2, 4),
+
+		/* bpf_strtoul arg3 (flags) */
+		BPF_MOV64_IMM(BPF_REG_3, 0),
+
+		/* bpf_strtoul arg4 (res) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+		BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+		/* bpf_strtoul() */
+		BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+	.errstr = "invalid indirect read from stack off -16+0 size 8",
+},
+{
+	"ARG_PTR_TO_LONG half-uninitialized",
+	.insns = {
+		/* bpf_strtoul arg1 (buf) */
+		BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+		BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+		/* bpf_strtoul arg2 (buf_len) */
+		BPF_MOV64_IMM(BPF_REG_2, 4),
+
+		/* bpf_strtoul arg3 (flags) */
+		BPF_MOV64_IMM(BPF_REG_3, 0),
+
+		/* bpf_strtoul arg4 (res) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+		BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
+		BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+		/* bpf_strtoul() */
+		BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+	.errstr = "invalid indirect read from stack off -16+4 size 8",
+},
+{
+	"ARG_PTR_TO_LONG misaligned",
+	.insns = {
+		/* bpf_strtoul arg1 (buf) */
+		BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+		BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+		/* bpf_strtoul arg2 (buf_len) */
+		BPF_MOV64_IMM(BPF_REG_2, 4),
+
+		/* bpf_strtoul arg3 (flags) */
+		BPF_MOV64_IMM(BPF_REG_3, 0),
+
+		/* bpf_strtoul arg4 (res) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -12),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
+		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+		BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+		/* bpf_strtoul() */
+		BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+	.errstr = "misaligned stack access off (0x0; 0x0)+-20+0 size 8",
+},
+{
+	"ARG_PTR_TO_LONG size < sizeof(long)",
+	.insns = {
+		/* bpf_strtoul arg1 (buf) */
+		BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
+		BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+		/* bpf_strtoul arg2 (buf_len) */
+		BPF_MOV64_IMM(BPF_REG_2, 4),
+
+		/* bpf_strtoul arg3 (flags) */
+		BPF_MOV64_IMM(BPF_REG_3, 0),
+
+		/* bpf_strtoul arg4 (res) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 12),
+		BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
+		BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+		/* bpf_strtoul() */
+		BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+	.errstr = "invalid stack type R4 off=-4 access_size=8",
+},
+{
+	"ARG_PTR_TO_LONG initialized",
+	.insns = {
+		/* bpf_strtoul arg1 (buf) */
+		BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+		BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+		/* bpf_strtoul arg2 (buf_len) */
+		BPF_MOV64_IMM(BPF_REG_2, 4),
+
+		/* bpf_strtoul arg3 (flags) */
+		BPF_MOV64_IMM(BPF_REG_3, 0),
+
+		/* bpf_strtoul arg4 (res) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+		BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+		/* bpf_strtoul() */
+		BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+},
-- 
cgit v1.2.3


From 8549ddc832d6f36be47279d201e95cc8ade6faa9 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Mon, 18 Mar 2019 18:21:18 -0700
Subject: selftests/bpf: Test bpf_strtol and bpf_strtoul helpers

Test that bpf_strtol and  bpf_strtoul helpers can be used to convert
provided buffer to long or unsigned long correspondingly and return both
correct result and number of consumed bytes, or proper errno.

Example of output:
  # ./test_sysctl
  ..
  Test case: bpf_strtoul one number string .. [PASS]
  Test case: bpf_strtoul multi number string .. [PASS]
  Test case: bpf_strtoul buf_len = 0, reject .. [PASS]
  Test case: bpf_strtoul supported base, ok .. [PASS]
  Test case: bpf_strtoul unsupported base, EINVAL .. [PASS]
  Test case: bpf_strtoul buf with spaces only, EINVAL .. [PASS]
  Test case: bpf_strtoul negative number, EINVAL .. [PASS]
  Test case: bpf_strtol negative number, ok .. [PASS]
  Test case: bpf_strtol hex number, ok .. [PASS]
  Test case: bpf_strtol max long .. [PASS]
  Test case: bpf_strtol overflow, ERANGE .. [PASS]
  Summary: 36 PASSED, 0 FAILED

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_sysctl.c | 485 ++++++++++++++++++++++++++++++
 1 file changed, 485 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 43008aae32d3..885675480af9 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -817,6 +817,491 @@ static struct sysctl_test tests[] = {
 		.newval = "606",
 		.result = SUCCESS,
 	},
+	{
+		"bpf_strtoul one number string",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
+			/*     res == expected) */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtoul multi number string",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			/* "600 602\0" */
+			BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 18),
+			/*     res == expected) */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 16),
+
+			/*     arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/*     arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0),
+
+			/*     arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/*     arg4 (res) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+			/*     if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4),
+			/*         res == expected) */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 602, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtoul buf_len = 0, reject",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = LOAD_REJECT,
+	},
+	{
+		"bpf_strtoul supported base, ok",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x00373730),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
+			/*     res == expected) */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 63, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtoul unsupported base, EINVAL",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 3),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+			/* if (ret == expected) */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtoul buf with spaces only, EINVAL",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+			/* if (ret == expected) */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtoul negative number, EINVAL",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtoul),
+
+			/* if (ret == expected) */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtol negative number, ok",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 10),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtol),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
+			/*     res == expected) */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, -6, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtol hex number, ok",
+		.insns = {
+			/* arg1 (buf) */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtol),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4),
+			/*     res == expected) */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 254, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtol max long",
+		.insns = {
+			/* arg1 (buf) 9223372036854775807 */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
+			BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
+			BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 19),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtol),
+
+			/* if (ret == expected && */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 19, 6),
+			/*     res == expected) */
+			BPF_LD_IMM64(BPF_REG_8, 0x7fffffffffffffffULL),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
+	{
+		"bpf_strtol overflow, ERANGE",
+		.insns = {
+			/* arg1 (buf) 9223372036854775808 */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
+			BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
+			BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
+
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+
+			/* arg2 (buf_len) */
+			BPF_MOV64_IMM(BPF_REG_2, 19),
+
+			/* arg3 (flags) */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+
+			/* arg4 (res) */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
+
+			BPF_EMIT_CALL(BPF_FUNC_strtol),
+
+			/* if (ret == expected) */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -ERANGE, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
 };
 
 static size_t probe_prog_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From 7568f4cbbeae687e4c545516275479f50c15a7cc Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Sat, 23 Mar 2019 15:51:00 -0700
Subject: selftests/bpf: C based test for sysctl and strtoX

Add C based test for a few bpf_sysctl_* helpers and bpf_strtoul.

Make sure that sysctl can be identified by name and that multiple
integers can be parsed from sysctl value with bpf_strtoul.

net/ipv4/tcp_mem is chosen as a testing sysctl, it contains 3 unsigned
longs, they all are parsed and compared (val[0] < val[1] < val[2]).

Example of output:
  # ./test_sysctl
  ...
  Test case: C prog: deny all writes .. [PASS]
  Test case: C prog: deny access by name .. [PASS]
  Test case: C prog: read tcp_mem .. [PASS]
  Summary: 39 PASSED, 0 FAILED

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/progs/test_sysctl_prog.c | 70 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_sysctl.c          | 57 +++++++++++++++++-
 2 files changed, 126 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_sysctl_prog.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
new file mode 100644
index 000000000000..a295cad805d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+
+/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
+#define MAX_ULONG_STR_LEN 0xF
+
+/* Max supported length of sysctl value string (pow2). */
+#define MAX_VALUE_STR_LEN 0x40
+
+static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
+{
+	char tcp_mem_name[] = "net/ipv4/tcp_mem";
+	unsigned char i;
+	char name[64];
+	int ret;
+
+	memset(name, 0, sizeof(name));
+	ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
+	if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
+		return 0;
+
+#pragma clang loop unroll(full)
+	for (i = 0; i < sizeof(tcp_mem_name); ++i)
+		if (name[i] != tcp_mem_name[i])
+			return 0;
+
+	return 1;
+}
+
+SEC("cgroup/sysctl")
+int sysctl_tcp_mem(struct bpf_sysctl *ctx)
+{
+	unsigned long tcp_mem[3] = {0, 0, 0};
+	char value[MAX_VALUE_STR_LEN];
+	unsigned char i, off = 0;
+	int ret;
+
+	if (ctx->write)
+		return 0;
+
+	if (!is_tcp_mem(ctx))
+		return 0;
+
+	ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
+	if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
+		return 0;
+
+#pragma clang loop unroll(full)
+	for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
+		ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
+				  tcp_mem + i);
+		if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
+			return 0;
+		off += ret & MAX_ULONG_STR_LEN;
+	}
+
+
+	return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 885675480af9..a3bebd7c68dd 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -11,6 +11,7 @@
 #include <linux/filter.h>
 
 #include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
@@ -26,6 +27,7 @@ struct sysctl_test {
 	const char *descr;
 	size_t fixup_value_insn;
 	struct bpf_insn	insns[MAX_INSNS];
+	const char *prog_file;
 	enum bpf_attach_type attach_type;
 	const char *sysctl;
 	int open_flags;
@@ -1302,6 +1304,31 @@ static struct sysctl_test tests[] = {
 		.open_flags = O_RDONLY,
 		.result = SUCCESS,
 	},
+	{
+		"C prog: deny all writes",
+		.prog_file = "./test_sysctl_prog.o",
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_WRONLY,
+		.newval = "123 456 789",
+		.result = OP_EPERM,
+	},
+	{
+		"C prog: deny access by name",
+		.prog_file = "./test_sysctl_prog.o",
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/route/mtu_expires",
+		.open_flags = O_RDONLY,
+		.result = OP_EPERM,
+	},
+	{
+		"C prog: read tcp_mem",
+		.prog_file = "./test_sysctl_prog.o",
+		.attach_type = BPF_CGROUP_SYSCTL,
+		.sysctl = "net/ipv4/tcp_mem",
+		.open_flags = O_RDONLY,
+		.result = SUCCESS,
+	},
 };
 
 static size_t probe_prog_length(const struct bpf_insn *fp)
@@ -1335,7 +1362,8 @@ static int fixup_sysctl_value(const char *buf, size_t buf_len,
 	return 0;
 }
 
-static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
+static int load_sysctl_prog_insns(struct sysctl_test *test,
+				  const char *sysctl_path)
 {
 	struct bpf_insn *prog = test->insns;
 	struct bpf_load_program_attr attr;
@@ -1377,6 +1405,33 @@ static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
 	return ret;
 }
 
+static int load_sysctl_prog_file(struct sysctl_test *test)
+{
+	struct bpf_prog_load_attr attr;
+	struct bpf_object *obj;
+	int prog_fd;
+
+	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+	attr.file = test->prog_file;
+	attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
+
+	if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+		if (test->result != LOAD_REJECT)
+			log_err(">>> Loading program (%s) error.\n",
+				test->prog_file);
+		return -1;
+	}
+
+	return prog_fd;
+}
+
+static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
+{
+		return test->prog_file
+			? load_sysctl_prog_file(test)
+			: load_sysctl_prog_insns(test, sysctl_path);
+}
+
 static int access_sysctl(const char *sysctl_path,
 			 const struct sysctl_test *test)
 {
-- 
cgit v1.2.3


From 38f58c972334833e0e0804a32e8cee8d8d475cb7 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 12 Apr 2019 14:49:27 +0200
Subject: netdevsim: move sdev specific bpf debugfs files to sdev dir

Some netdevsim bpf debugfs files are per-sdev, yet they are defined per
netdevsim instance. Move them under sdev directory.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/bpf.c                 | 18 +++++++++---------
 drivers/net/netdevsim/netdevsim.h           |  6 +++---
 tools/testing/selftests/bpf/test_offload.py |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index f92c43453ec6..ae8a2da43471 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -65,8 +65,8 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
 	struct nsim_bpf_bound_prog *state;
 
 	state = env->prog->aux->offload->dev_priv;
-	if (state->ns->bpf_bind_verifier_delay && !insn_idx)
-		msleep(state->ns->bpf_bind_verifier_delay);
+	if (state->ns->sdev->bpf_bind_verifier_delay && !insn_idx)
+		msleep(state->ns->sdev->bpf_bind_verifier_delay);
 
 	if (insn_idx == env->prog->len - 1)
 		pr_vlog(env, "Hello from netdevsim!\n");
@@ -250,7 +250,7 @@ static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
 {
 	struct netdevsim *ns = bpf_offload_dev_priv(prog->aux->offload->offdev);
 
-	if (!ns->bpf_bind_accept)
+	if (!ns->sdev->bpf_bind_accept)
 		return -EOPNOTSUPP;
 
 	return nsim_bpf_create_prog(ns, prog);
@@ -594,6 +594,12 @@ int nsim_bpf_init(struct netdevsim *ns)
 		err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
 		if (err)
 			return err;
+
+		ns->sdev->bpf_bind_accept = true;
+		debugfs_create_bool("bpf_bind_accept", 0600, ns->sdev->ddir,
+				    &ns->sdev->bpf_bind_accept);
+		debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->sdev->ddir,
+				   &ns->sdev->bpf_bind_verifier_delay);
 	}
 
 	err = bpf_offload_dev_netdev_register(ns->sdev->bpf_dev, ns->netdev);
@@ -603,12 +609,6 @@ int nsim_bpf_init(struct netdevsim *ns)
 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
 			   &ns->bpf_offloaded_id);
 
-	ns->bpf_bind_accept = true;
-	debugfs_create_bool("bpf_bind_accept", 0600, ns->ddir,
-			    &ns->bpf_bind_accept);
-	debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->ddir,
-			   &ns->bpf_bind_verifier_delay);
-
 	ns->bpf_tc_accept = true;
 	debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
 			    &ns->bpf_tc_accept);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index e5d6fea246a5..2667f9b0e1f9 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -39,6 +39,9 @@ struct netdevsim_shared_dev {
 
 	struct bpf_offload_dev *bpf_dev;
 
+	bool bpf_bind_accept;
+	u32 bpf_bind_verifier_delay;
+
 	struct dentry *ddir_bpf_bound_progs;
 	u32 prog_id_gen;
 
@@ -95,9 +98,6 @@ struct netdevsim {
 	struct xdp_attachment_info xdp;
 	struct xdp_attachment_info xdp_hw;
 
-	bool bpf_bind_accept;
-	u32 bpf_bind_verifier_delay;
-
 	bool bpf_tc_accept;
 	bool bpf_tc_non_bound_accept;
 	bool bpf_xdpdrv_accept;
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 84bea3985d64..a7f95106119f 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1055,7 +1055,7 @@ try:
 
     start_test("Test if netdev removal waits for translation...")
     delay_msec = 500
-    sim.dfs["bpf_bind_verifier_delay"] = delay_msec
+    sim.dfs["sdev/bpf_bind_verifier_delay"] = delay_msec
     start = time.time()
     cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
                (sim['ifname'], obj)
-- 
cgit v1.2.3


From 3321cff3c5706833651314fa5a3ba20ce63089fc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 14 Apr 2019 18:57:50 +0000
Subject: selftests: mlxsw: Test neighbour offload indication

Test that neighbour entries are marked as offloaded.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 26 ++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index c4cf6e6d800e..017d839fcefa 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -26,6 +26,7 @@ ALL_TESTS="
 	lag_dev_deletion_test
 	vlan_interface_uppers_test
 	bridge_extern_learn_test
+	neigh_offload_test
 	devlink_reload_test
 "
 NUM_NETIFS=2
@@ -561,6 +562,31 @@ bridge_extern_learn_test()
 	ip link del dev br0
 }
 
+neigh_offload_test()
+{
+	# Test that IPv4 and IPv6 neighbour entries are marked as offloaded
+	RET=0
+
+	ip -4 address add 192.0.2.1/24 dev $swp1
+	ip -6 address add 2001:db8:1::1/64 dev $swp1
+
+	ip -4 neigh add 192.0.2.2 lladdr de:ad:be:ef:13:37 nud perm dev $swp1
+	ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \
+		dev $swp1
+
+	ip -4 neigh show dev $swp1 | grep 192.0.2.2 | grep -q offload
+	check_err $? "ipv4 neigh entry not marked as offloaded when should"
+	ip -6 neigh show dev $swp1 | grep 2001:db8:1::2 | grep -q offload
+	check_err $? "ipv6 neigh entry not marked as offloaded when should"
+
+	log_test "neighbour offload indication"
+
+	ip -6 neigh del 2001:db8:1::2 dev $swp1
+	ip -4 neigh del 192.0.2.2 dev $swp1
+	ip -6 address del 2001:db8:1::1/64 dev $swp1
+	ip -4 address del 192.0.2.1/24 dev $swp1
+}
+
 devlink_reload_test()
 {
 	# Test that after executing all the above configuration tests, a
-- 
cgit v1.2.3


From 189cf5a4a7d5e0349f3079f05a8ccf4e4a2f0c3e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 15 Apr 2019 16:48:07 -0700
Subject: btf: add support for VAR and DATASEC in btf_dedup()

This patch adds support for VAR and DATASEC in btf_dedup(). VAR/DATASEC
are never deduplicated, but they need to be processed anyway as types
they refer to might need to be remapped due to deduplication and
compaction.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Yonghong Song <yhs@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/btf.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 701e7e28ada3..75eaf10b9e1a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1354,8 +1354,16 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 	}
 	/* special BTF "void" type is made canonical immediately */
 	d->map[0] = 0;
-	for (i = 1; i <= btf->nr_types; i++)
-		d->map[i] = BTF_UNPROCESSED_ID;
+	for (i = 1; i <= btf->nr_types; i++) {
+		struct btf_type *t = d->btf->types[i];
+		__u16 kind = BTF_INFO_KIND(t->info);
+
+		/* VAR and DATASEC are never deduped and are self-canonical */
+		if (kind == BTF_KIND_VAR || kind == BTF_KIND_DATASEC)
+			d->map[i] = i;
+		else
+			d->map[i] = BTF_UNPROCESSED_ID;
+	}
 
 	d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
 	if (!d->hypot_map) {
@@ -1946,6 +1954,8 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_UNION:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_FUNC_PROTO:
+	case BTF_KIND_VAR:
+	case BTF_KIND_DATASEC:
 		return 0;
 
 	case BTF_KIND_INT:
@@ -2699,6 +2709,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_VAR:
 		r = btf_dedup_remap_type_id(d, t->type);
 		if (r < 0)
 			return r;
@@ -2753,6 +2764,20 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 		break;
 	}
 
+	case BTF_KIND_DATASEC: {
+		struct btf_var_secinfo *var = (struct btf_var_secinfo *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+
+		for (i = 0; i < vlen; i++) {
+			r = btf_dedup_remap_type_id(d, var->type);
+			if (r < 0)
+				return r;
+			var->type = r;
+			var++;
+		}
+		break;
+	}
+
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From efb2ddc4ce5dba9b6c5ec106528d18a645424f3f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 15 Apr 2019 16:48:08 -0700
Subject: selftests/btf: add VAR and DATASEC case for dedup tests

Add test case verifying that dedup happens (INTs are deduped in this
case) and VAR/DATASEC types are not deduped, but have their referenced
type IDs adjusted correctly.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Yonghong Song <yhs@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_btf.c | 49 ++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 44cd3378d216..f8eb7987b794 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -6642,6 +6642,51 @@ const struct btf_dedup_test dedup_tests[] = {
 		.dont_resolve_fwds = false,
 	},
 },
+{
+	.descr = "dedup: datasec and vars pass-through",
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			/* static int t */
+			BTF_VAR_ENC(NAME_NTH(2), 1, 0),			/* [2] */
+			/* .bss section */				/* [3] */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+			BTF_VAR_SECINFO_ENC(2, 0, 4),
+			/* int, referenced from [5] */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [4] */
+			/* another static int t */
+			BTF_VAR_ENC(NAME_NTH(2), 4, 0),			/* [5] */
+			/* another .bss section */			/* [6] */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+			BTF_VAR_SECINFO_ENC(5, 0, 4),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0.bss\0t"),
+	},
+	.expect = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			/* static int t */
+			BTF_VAR_ENC(NAME_NTH(2), 1, 0),			/* [2] */
+			/* .bss section */				/* [3] */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+			BTF_VAR_SECINFO_ENC(2, 0, 4),
+			/* another static int t */
+			BTF_VAR_ENC(NAME_NTH(2), 1, 0),			/* [4] */
+			/* another .bss section */			/* [5] */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+			BTF_VAR_SECINFO_ENC(4, 0, 4),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0.bss\0t"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+		.dedup_table_size = 1
+	},
+},
 
 };
 
@@ -6671,6 +6716,10 @@ static int btf_type_size(const struct btf_type *t)
 		return base_size + vlen * sizeof(struct btf_member);
 	case BTF_KIND_FUNC_PROTO:
 		return base_size + vlen * sizeof(struct btf_param);
+	case BTF_KIND_VAR:
+		return base_size + sizeof(struct btf_var);
+	case BTF_KIND_DATASEC:
+		return base_size + vlen * sizeof(struct btf_var_secinfo);
 	default:
 		fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind);
 		return -EINVAL;
-- 
cgit v1.2.3


From bcbccad694b7ef0de9a993ecd918231c10a1496a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 11 Apr 2019 15:53:16 -0700
Subject: selftests/bpf: bring back (void *) cast to set_ipv4_csum in
 test_tc_tunnel

It was removed in commit 166b5a7f2ca3 ("selftests_bpf: extend
test_tc_tunnel for UDP encap") without any explanation.

Otherwise I see:
progs/test_tc_tunnel.c:160:17: warning: taking address of packed member 'ip' of class or structure
      'v4hdr' may result in an unaligned pointer value [-Waddress-of-packed-member]
        set_ipv4_csum(&h_outer.ip);
                       ^~~~~~~~~~
1 warning generated.

Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Fixes: 166b5a7f2ca3 ("selftests_bpf: extend test_tc_tunnel for UDP encap")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index bcb00d737e95..ab56a6a72b7a 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -157,7 +157,7 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 				       bpf_ntohs(h_outer.ip.tot_len));
 	h_outer.ip.protocol = encap_proto;
 
-	set_ipv4_csum(&h_outer.ip);
+	set_ipv4_csum((void *)&h_outer.ip);
 
 	/* store new outer network header */
 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
-- 
cgit v1.2.3


From bfb35c27c65fce60a12e188135ae1344d1b89e24 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Fri, 12 Apr 2019 12:27:34 +0100
Subject: bpf: fix whitespace for ENCAP_L2 defines in bpf.h

replace tab after #define with space in line with rest of definitions

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h       | 6 +++---
 tools/include/uapi/linux/bpf.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c26be24fd5e2..704bb69514a2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2792,14 +2792,14 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_adjust_room flags. */
 #define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
 
-#define	BPF_ADJ_ROOM_ENCAP_L2_MASK	0xff
-#define	BPF_ADJ_ROOM_ENCAP_L2_SHIFT	56
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK	0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT	56
 
 #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	(1ULL << 1)
 #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	(1ULL << 2)
 #define BPF_F_ADJ_ROOM_ENCAP_L4_GRE	(1ULL << 3)
 #define BPF_F_ADJ_ROOM_ENCAP_L4_UDP	(1ULL << 4)
-#define	BPF_F_ADJ_ROOM_ENCAP_L2(len)	(((__u64)len & \
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len)	(((__u64)len & \
 					  BPF_ADJ_ROOM_ENCAP_L2_MASK) \
 					 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c26be24fd5e2..704bb69514a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2792,14 +2792,14 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_adjust_room flags. */
 #define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
 
-#define	BPF_ADJ_ROOM_ENCAP_L2_MASK	0xff
-#define	BPF_ADJ_ROOM_ENCAP_L2_SHIFT	56
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK	0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT	56
 
 #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	(1ULL << 1)
 #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	(1ULL << 2)
 #define BPF_F_ADJ_ROOM_ENCAP_L4_GRE	(1ULL << 3)
 #define BPF_F_ADJ_ROOM_ENCAP_L4_UDP	(1ULL << 4)
-#define	BPF_F_ADJ_ROOM_ENCAP_L2(len)	(((__u64)len & \
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len)	(((__u64)len & \
 					  BPF_ADJ_ROOM_ENCAP_L2_MASK) \
 					 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
 
-- 
cgit v1.2.3


From 031ebc1aac3dc26c5ef9f5236ad35366bd57de61 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 12 Apr 2019 14:29:34 +0100
Subject: tools: bpftool: remove blank line after btf_id when listing programs

Commit 569b0c77735d ("tools/bpftool: show btf id in program information")
made bpftool print an empty line after each program entry when listing
the BPF programs loaded on the system (plain output). This is especially
confusing when some programs have an associated BTF id, and others
don't. Let's remove the blank line.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/prog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 81067803189e..4b7a307b9fc9 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -323,7 +323,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 	}
 
 	if (info->btf_id)
-		printf("\n\tbtf_id %d\n", info->btf_id);
+		printf("\n\tbtf_id %d", info->btf_id);
 
 	printf("\n");
 }
-- 
cgit v1.2.3


From 39c9f10639a3e8ead677492a7ab41cee5fa481a7 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 12 Apr 2019 14:29:35 +0100
Subject: tools: bpftool: reset errno for "bpftool cgroup tree"

When trying to dump the tree of all cgroups under a given root node,
bpftool attempts to query programs of all available attach types. Some
of those attach types do not support queries, therefore several of the
calls are actually expected to fail.

Those calls set errno to EINVAL, which has no consequence for dumping
the rest of the tree. It does have consequences however if errno is
inspected at a later time. For example, bpftool batch mode relies on
errno to determine whether a command has succeeded, and whether it
should carry on with the next command. Setting errno to EINVAL when
everything worked as expected would therefore make such command fail:

        # echo 'cgroup tree \n net show' | \
                bpftool batch file -

To improve this, reset errno when its value is EINVAL after attempting
to show programs for all existing attach types in do_show_tree_fn().

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/cgroup.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 4b5c8da2a7c0..a81b34343eb8 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -248,6 +248,13 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
 	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
 		show_attached_bpf_progs(cgroup_fd, type, ftw->level);
 
+	if (errno == EINVAL)
+		/* Last attach type does not support query.
+		 * Do not report an error for this, especially because batch
+		 * mode would stop processing commands.
+		 */
+		errno = 0;
+
 	if (json_output) {
 		jsonw_end_array(json_wtr);
 		jsonw_end_object(json_wtr);
-- 
cgit v1.2.3


From 9a487883bd6bffbfb99e5f58bbfab290a42cd1a6 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 12 Apr 2019 14:29:36 +0100
Subject: tools: bpftool: fix man page documentation for "pinmaps" keyword

The "pinmaps" keyword is present in the man page, in the verbose
description of the "bpftool prog load" command. However, it is missing
from the summary of available commands at the beginning of the file. Add
it there as well.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 9386bd6e0396..49c33939239f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -25,7 +25,7 @@ PROG COMMANDS
 |	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes** | **linum**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
-|	**bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+|	**bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
 |	**bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog tracelog**
-- 
cgit v1.2.3


From 88b3eed805e9edf2646d7ba3e701b616225572e1 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 12 Apr 2019 14:29:37 +0100
Subject: tools: bpftool: fix short option name for printing version in man
 pages

Manual pages would tell that option "-v" (lower case) would print the
version number for bpftool. This is wrong: the short name of the option
is "-V" (upper case). Fix the documentation accordingly.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst  | 2 +-
 tools/bpf/bpftool/Documentation/bpftool-feature.rst | 2 +-
 tools/bpf/bpftool/Documentation/bpftool-map.rst     | 2 +-
 tools/bpf/bpftool/Documentation/bpftool-net.rst     | 2 +-
 tools/bpf/bpftool/Documentation/bpftool-perf.rst    | 2 +-
 tools/bpf/bpftool/Documentation/bpftool-prog.rst    | 2 +-
 tools/bpf/bpftool/Documentation/bpftool.rst         | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 9bb9ace54ba8..5e3b7d9d7599 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -99,7 +99,7 @@ OPTIONS
 	-h, --help
 		  Print short generic help message (similar to **bpftool help**).
 
-	-v, --version
+	-V, --version
 		  Print version number (similar to **bpftool version**).
 
 	-j, --json
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 82de03dd8f52..10177343b7ce 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -63,7 +63,7 @@ OPTIONS
 	-h, --help
 		  Print short generic help message (similar to **bpftool help**).
 
-	-v, --version
+	-V, --version
 		  Print version number (similar to **bpftool version**).
 
 	-j, --json
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 5c984ffc9f01..55ecf80ca03e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -135,7 +135,7 @@ OPTIONS
 	-h, --help
 		  Print short generic help message (similar to **bpftool help**).
 
-	-v, --version
+	-V, --version
 		  Print version number (similar to **bpftool version**).
 
 	-j, --json
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 779dab3650ee..6b692b428373 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -55,7 +55,7 @@ OPTIONS
 	-h, --help
 		  Print short generic help message (similar to **bpftool help**).
 
-	-v, --version
+	-V, --version
 		  Print version number (similar to **bpftool version**).
 
 	-j, --json
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index bca5590a80d0..86154740fabb 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -43,7 +43,7 @@ OPTIONS
 	-h, --help
 		  Print short generic help message (similar to **bpftool help**).
 
-	-v, --version
+	-V, --version
 		  Print version number (similar to **bpftool version**).
 
 	-j, --json
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 49c33939239f..f2beb79a77e7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -144,7 +144,7 @@ OPTIONS
 	-h, --help
 		  Print short generic help message (similar to **bpftool help**).
 
-	-v, --version
+	-V, --version
 		  Print version number (similar to **bpftool version**).
 
 	-j, --json
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 4f2188845dd8..deb2ca911ddf 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -49,7 +49,7 @@ OPTIONS
 	-h, --help
 		  Print short help message (similar to **bpftool help**).
 
-	-v, --version
+	-V, --version
 		  Print version number (similar to **bpftool version**).
 
 	-j, --json
-- 
cgit v1.2.3


From 25df480def17eb792860d86b8f90fda00035f0f9 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 12 Apr 2019 14:29:38 +0100
Subject: tools: bpftool: add a note on program statistics in man page

Linux kernel now supports statistics for BPF programs, and bpftool is
able to dump them. However, these statistics are not enabled by default,
and administrators may not know how to access them.

Add a paragraph in bpftool documentation, under the description of the
"bpftool prog show" command, to explain that such statistics are
available and that their collection is controlled via a dedicated sysctl
knob.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index f2beb79a77e7..bb9bb00c0c2c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -56,6 +56,14 @@ DESCRIPTION
 		  Output will start with program ID followed by program type and
 		  zero or more named attributes (depending on kernel version).
 
+		  Since Linux 5.1 the kernel can collect statistics on BPF
+		  programs (such as the total time spent running the program,
+		  and the number of times it was run). If available, bpftool
+		  shows such statistics. However, the kernel does not collect
+		  them by defaults, as it slightly impacts performance on each
+		  program run. Activation or deactivation of the feature is
+		  performed via the **kernel.bpf_stats_enabled** sysctl knob.
+
 	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
 		  Dump eBPF instructions of the program from the kernel. By
 		  default, eBPF will be disassembled and printed to standard
-- 
cgit v1.2.3


From 0478c3bf812451315da6eeb79f8cf151db094767 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Mon, 15 Apr 2019 16:15:35 +0900
Subject: bpftool: Use print_entry_error() in case of ENOENT when dumping

Commit bf598a8f0f77 ("bpftool: Improve handling of ENOENT on map dumps")
used print_entry_plain() in case of ENOENT. However, that commit introduces
dead code. Per-cpu maps are zero-filled. When reading them, it's all or
nothing. There will never be a case where some cpus have an entry and
others don't.

The truth is that ENOENT is an error case. Use print_entry_error() to
output the desired message. That function's "value" parameter is also
renamed to indicate that we never use it for an actual map value.

The output format is unchanged.

Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/map.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e96903078991..df958af56b6c 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -261,20 +261,20 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
 }
 
 static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
-			      const char *value)
+			      const char *error_msg)
 {
-	int value_size = strlen(value);
+	int msg_size = strlen(error_msg);
 	bool single_line, break_names;
 
-	break_names = info->key_size > 16 || value_size > 16;
-	single_line = info->key_size + value_size <= 24 && !break_names;
+	break_names = info->key_size > 16 || msg_size > 16;
+	single_line = info->key_size + msg_size <= 24 && !break_names;
 
 	printf("key:%c", break_names ? '\n' : ' ');
 	fprint_hex(stdout, key, info->key_size, " ");
 
 	printf(single_line ? "  " : "\n");
 
-	printf("value:%c%s", break_names ? '\n' : ' ', value);
+	printf("value:%c%s", break_names ? '\n' : ' ', error_msg);
 
 	printf("\n");
 }
@@ -298,11 +298,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 
 		if (info->value_size) {
 			printf("value:%c", break_names ? '\n' : ' ');
-			if (value)
-				fprint_hex(stdout, value, info->value_size,
-					   " ");
-			else
-				printf("<no entry>");
+			fprint_hex(stdout, value, info->value_size, " ");
 		}
 
 		printf("\n");
@@ -321,11 +317,8 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 			for (i = 0; i < n; i++) {
 				printf("value (CPU %02d):%c",
 				       i, info->value_size > 16 ? '\n' : ' ');
-				if (value)
-					fprint_hex(stdout, value + i * step,
-						   info->value_size, " ");
-				else
-					printf("<no entry>");
+				fprint_hex(stdout, value + i * step,
+					   info->value_size, " ");
 				printf("\n");
 			}
 		}
@@ -722,11 +715,13 @@ static int dump_map_elem(int fd, void *key, void *value,
 		jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
 		jsonw_end_object(json_wtr);
 	} else {
+		const char *msg = NULL;
+
 		if (errno == ENOENT)
-			print_entry_plain(map_info, key, NULL);
-		else
-			print_entry_error(map_info, key,
-					  strerror(lookup_errno));
+			msg = "<no entry>";
+
+		print_entry_error(map_info, key,
+				  msg ? : strerror(lookup_errno));
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 3da6e7e408b9c229e5e5cc1ddab7445c7561afc3 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Mon, 15 Apr 2019 16:15:36 +0900
Subject: bpftool: Improve handling of ENOSPC on reuseport_array map dumps

avoids outputting a series of
	value:
	No space left on device

The value itself is not wrong but bpf_fd_reuseport_array_lookup_elem() can
only return it if the map was created with value_size = 8. There's nothing
bpftool can do about it. Instead of repeating this error for every key in
the map, print an explanatory warning and a specialized error.

example before:
key: 00 00 00 00
value:
No space left on device
key: 01 00 00 00
value:
No space left on device
key: 02 00 00 00
value:
No space left on device
Found 0 elements

example after:
Warning: cannot read values from reuseport_sockarray map with value_size != 8
key: 00 00 00 00  value: <cannot read>
key: 01 00 00 00  value: <cannot read>
key: 02 00 00 00  value: <cannot read>
Found 0 elements

Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/map.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index df958af56b6c..44b192e87708 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -719,6 +719,9 @@ static int dump_map_elem(int fd, void *key, void *value,
 
 		if (errno == ENOENT)
 			msg = "<no entry>";
+		else if (lookup_errno == ENOSPC &&
+			 map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+			msg = "<cannot read>";
 
 		print_entry_error(map_info, key,
 				  msg ? : strerror(lookup_errno));
@@ -775,6 +778,10 @@ static int do_dump(int argc, char **argv)
 			}
 		}
 
+	if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
+	    info.value_size != 8)
+		p_info("Warning: cannot read values from %s map with value_size != 8",
+		       map_type_name[info.type]);
 	while (true) {
 		err = bpf_map_get_next_key(fd, prev_key, key);
 		if (err) {
-- 
cgit v1.2.3


From 08de198c95438fcbfad7bc06121176794ec92c6e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 12 Apr 2019 14:41:32 -0700
Subject: selftests/bpf: two scale tests

Add two tests to check that sequence of 1024 jumps is verifiable.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c  | 70 ++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/verifier/scale.c | 18 +++++++
 2 files changed, 88 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/scale.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index e2ebcaddbe78..6cb6a1074fd1 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -208,6 +208,76 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	self->retval = (uint32_t)res;
 }
 
+/* test the sequence of 1k jumps */
+static void bpf_fill_scale1(struct bpf_test *self)
+{
+	struct bpf_insn *insn = self->fill_insns;
+	int i = 0, k = 0;
+
+	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+	/* test to check that the sequence of 1024 jumps is acceptable */
+	while (k++ < 1024) {
+		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					 BPF_FUNC_get_prandom_u32);
+		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
+		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
+					-8 * (k % 64 + 1));
+	}
+	/* every jump adds 1024 steps to insn_processed, so to stay exactly
+	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	 */
+	while (i < MAX_TEST_INSNS - 1025)
+		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
+	insn[i] = BPF_EXIT_INSN();
+	self->prog_len = i + 1;
+	self->retval = 42;
+}
+
+/* test the sequence of 1k jumps in inner most function (function depth 8)*/
+static void bpf_fill_scale2(struct bpf_test *self)
+{
+	struct bpf_insn *insn = self->fill_insns;
+	int i = 0, k = 0;
+
+#define FUNC_NEST 7
+	for (k = 0; k < FUNC_NEST; k++) {
+		insn[i++] = BPF_CALL_REL(1);
+		insn[i++] = BPF_EXIT_INSN();
+	}
+	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+	/* test to check that the sequence of 1024 jumps is acceptable */
+	while (k++ < 1024) {
+		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					 BPF_FUNC_get_prandom_u32);
+		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
+		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
+					-8 * (k % (64 - 4 * FUNC_NEST) + 1));
+	}
+	/* every jump adds 1024 steps to insn_processed, so to stay exactly
+	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	 */
+	while (i < MAX_TEST_INSNS - 1025)
+		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
+	insn[i] = BPF_EXIT_INSN();
+	self->prog_len = i + 1;
+	self->retval = 42;
+}
+
+static void bpf_fill_scale(struct bpf_test *self)
+{
+	switch (self->retval) {
+	case 1:
+		return bpf_fill_scale1(self);
+	case 2:
+		return bpf_fill_scale2(self);
+	default:
+		self->prog_len = 0;
+		break;
+	}
+}
+
 /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
 #define BPF_SK_LOOKUP(func)						\
 	/* struct bpf_sock_tuple tuple = {} */				\
diff --git a/tools/testing/selftests/bpf/verifier/scale.c b/tools/testing/selftests/bpf/verifier/scale.c
new file mode 100644
index 000000000000..7f868d4802e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/scale.c
@@ -0,0 +1,18 @@
+{
+	"scale: scale test 1",
+	.insns = { },
+	.data = { },
+	.fill_helper = bpf_fill_scale,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"scale: scale test 2",
+	.insns = { },
+	.data = { },
+	.fill_helper = bpf_fill_scale,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.retval = 2,
+},
-- 
cgit v1.2.3


From a5cb33464e53482ecc645b4b7703f5b6d151095f Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 12 Apr 2019 16:43:10 -0700
Subject: selftests/bpf: make flow dissector tests more extensible

Rewrite selftest to iterate over an array with input packet and
expected flow_keys. This should make it easier to extend this test
with additional cases without too much boilerplate.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/prog_tests/flow_dissector.c      | 197 ++++++++++++---------
 1 file changed, 116 insertions(+), 81 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index fc818bc1d729..1a73937826b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 
 #define CHECK_FLOW_KEYS(desc, got, expected)				\
-	CHECK(memcmp(&got, &expected, sizeof(got)) != 0,		\
+	CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0,		\
 	      desc,							\
 	      "nhoff=%u/%u "						\
 	      "thoff=%u/%u "						\
@@ -10,6 +10,7 @@
 	      "is_frag=%u/%u "						\
 	      "is_first_frag=%u/%u "					\
 	      "is_encap=%u/%u "						\
+	      "ip_proto=0x%x/0x%x "					\
 	      "n_proto=0x%x/0x%x "					\
 	      "sport=%u/%u "						\
 	      "dport=%u/%u\n",						\
@@ -19,53 +20,32 @@
 	      got.is_frag, expected.is_frag,				\
 	      got.is_first_frag, expected.is_first_frag,		\
 	      got.is_encap, expected.is_encap,				\
+	      got.ip_proto, expected.ip_proto,				\
 	      got.n_proto, expected.n_proto,				\
 	      got.sport, expected.sport,				\
 	      got.dport, expected.dport)
 
-static struct bpf_flow_keys pkt_v4_flow_keys = {
-	.nhoff = 0,
-	.thoff = sizeof(struct iphdr),
-	.addr_proto = ETH_P_IP,
-	.ip_proto = IPPROTO_TCP,
-	.n_proto = __bpf_constant_htons(ETH_P_IP),
-};
-
-static struct bpf_flow_keys pkt_v6_flow_keys = {
-	.nhoff = 0,
-	.thoff = sizeof(struct ipv6hdr),
-	.addr_proto = ETH_P_IPV6,
-	.ip_proto = IPPROTO_TCP,
-	.n_proto = __bpf_constant_htons(ETH_P_IPV6),
-};
-
-#define VLAN_HLEN	4
+struct ipv4_pkt {
+	struct ethhdr eth;
+	struct iphdr iph;
+	struct tcphdr tcp;
+} __packed;
 
-static struct {
+struct svlan_ipv4_pkt {
 	struct ethhdr eth;
 	__u16 vlan_tci;
 	__u16 vlan_proto;
 	struct iphdr iph;
 	struct tcphdr tcp;
-} __packed pkt_vlan_v4 = {
-	.eth.h_proto = __bpf_constant_htons(ETH_P_8021Q),
-	.vlan_proto = __bpf_constant_htons(ETH_P_IP),
-	.iph.ihl = 5,
-	.iph.protocol = IPPROTO_TCP,
-	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
-	.tcp.urg_ptr = 123,
-	.tcp.doff = 5,
-};
+} __packed;
 
-static struct bpf_flow_keys pkt_vlan_v4_flow_keys = {
-	.nhoff = VLAN_HLEN,
-	.thoff = VLAN_HLEN + sizeof(struct iphdr),
-	.addr_proto = ETH_P_IP,
-	.ip_proto = IPPROTO_TCP,
-	.n_proto = __bpf_constant_htons(ETH_P_IP),
-};
+struct ipv6_pkt {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct tcphdr tcp;
+} __packed;
 
-static struct {
+struct dvlan_ipv6_pkt {
 	struct ethhdr eth;
 	__u16 vlan_tci;
 	__u16 vlan_proto;
@@ -73,31 +53,97 @@ static struct {
 	__u16 vlan_proto2;
 	struct ipv6hdr iph;
 	struct tcphdr tcp;
-} __packed pkt_vlan_v6 = {
-	.eth.h_proto = __bpf_constant_htons(ETH_P_8021AD),
-	.vlan_proto = __bpf_constant_htons(ETH_P_8021Q),
-	.vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6),
-	.iph.nexthdr = IPPROTO_TCP,
-	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
-	.tcp.urg_ptr = 123,
-	.tcp.doff = 5,
+} __packed;
+
+struct test {
+	const char *name;
+	union {
+		struct ipv4_pkt ipv4;
+		struct svlan_ipv4_pkt svlan_ipv4;
+		struct ipv6_pkt ipv6;
+		struct dvlan_ipv6_pkt dvlan_ipv6;
+	} pkt;
+	struct bpf_flow_keys keys;
 };
 
-static struct bpf_flow_keys pkt_vlan_v6_flow_keys = {
-	.nhoff = VLAN_HLEN * 2,
-	.thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
-	.addr_proto = ETH_P_IPV6,
-	.ip_proto = IPPROTO_TCP,
-	.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+#define VLAN_HLEN	4
+
+struct test tests[] = {
+	{
+		.name = "ipv4",
+		.pkt.ipv4 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_TCP,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.tcp.doff = 5,
+		},
+		.keys = {
+			.nhoff = 0,
+			.thoff = sizeof(struct iphdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+		},
+	},
+	{
+		.name = "ipv6",
+		.pkt.ipv6 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.iph.nexthdr = IPPROTO_TCP,
+			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+			.tcp.doff = 5,
+		},
+		.keys = {
+			.nhoff = 0,
+			.thoff = sizeof(struct ipv6hdr),
+			.addr_proto = ETH_P_IPV6,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+		},
+	},
+	{
+		.name = "802.1q-ipv4",
+		.pkt.svlan_ipv4 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_8021Q),
+			.vlan_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_TCP,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.tcp.doff = 5,
+		},
+		.keys = {
+			.nhoff = VLAN_HLEN,
+			.thoff = VLAN_HLEN + sizeof(struct iphdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+		},
+	},
+	{
+		.name = "802.1ad-ipv6",
+		.pkt.dvlan_ipv6 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_8021AD),
+			.vlan_proto = __bpf_constant_htons(ETH_P_8021Q),
+			.vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6),
+			.iph.nexthdr = IPPROTO_TCP,
+			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+			.tcp.doff = 5,
+		},
+		.keys = {
+			.nhoff = VLAN_HLEN * 2,
+			.thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
+			.addr_proto = ETH_P_IPV6,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+		},
+	},
 };
 
 void test_flow_dissector(void)
 {
-	struct bpf_flow_keys flow_keys;
 	struct bpf_object *obj;
-	__u32 duration, retval;
 	int err, prog_fd;
-	__u32 size;
 
 	err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
 			    "jmp_table", &prog_fd);
@@ -106,35 +152,24 @@ void test_flow_dissector(void)
 		return;
 	}
 
-	err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
-				&flow_keys, &size, &retval, &duration);
-	CHECK(size != sizeof(flow_keys) || err || retval != 1, "ipv4",
-	      "err %d errno %d retval %d duration %d size %u/%lu\n",
-	      err, errno, retval, duration, size, sizeof(flow_keys));
-	CHECK_FLOW_KEYS("ipv4_flow_keys", flow_keys, pkt_v4_flow_keys);
-
-	err = bpf_prog_test_run(prog_fd, 10, &pkt_v6, sizeof(pkt_v6),
-				&flow_keys, &size, &retval, &duration);
-	CHECK(size != sizeof(flow_keys) || err || retval != 1, "ipv6",
-	      "err %d errno %d retval %d duration %d size %u/%lu\n",
-	      err, errno, retval, duration, size, sizeof(flow_keys));
-	CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys);
+	for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct bpf_flow_keys flow_keys;
+		struct bpf_prog_test_run_attr tattr = {
+			.prog_fd = prog_fd,
+			.data_in = &tests[i].pkt,
+			.data_size_in = sizeof(tests[i].pkt),
+			.data_out = &flow_keys,
+		};
 
-	err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v4, sizeof(pkt_vlan_v4),
-				&flow_keys, &size, &retval, &duration);
-	CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv4",
-	      "err %d errno %d retval %d duration %d size %u/%lu\n",
-	      err, errno, retval, duration, size, sizeof(flow_keys));
-	CHECK_FLOW_KEYS("vlan_ipv4_flow_keys", flow_keys,
-			pkt_vlan_v4_flow_keys);
-
-	err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v6, sizeof(pkt_vlan_v6),
-				&flow_keys, &size, &retval, &duration);
-	CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv6",
-	      "err %d errno %d retval %d duration %d size %u/%lu\n",
-	      err, errno, retval, duration, size, sizeof(flow_keys));
-	CHECK_FLOW_KEYS("vlan_ipv6_flow_keys", flow_keys,
-			pkt_vlan_v6_flow_keys);
+		err = bpf_prog_test_run_xattr(&tattr);
+		CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
+			   err || tattr.retval != 1,
+			   tests[i].name,
+			   "err %d errno %d retval %d duration %d size %u/%lu\n",
+			   err, errno, tattr.retval, tattr.duration,
+			   tattr.data_size_out, sizeof(flow_keys));
+		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+	}
 
 	bpf_object__close(obj);
 }
-- 
cgit v1.2.3


From 809041e765055ba311f3a0b8751db1c65739ad51 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Wed, 3 Apr 2019 08:43:38 -0700
Subject: selftests: bpf: add VRF test cases to lwt_ip_encap test.

This patch adds tests validating that VRF and BPF-LWT
encap work together well, as requested by David Ahern.

Signed-off-by: Peter Oskolkov <posk@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_lwt_ip_encap.sh | 134 +++++++++++++++--------
 1 file changed, 86 insertions(+), 48 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index d4d3391cc13a..acf7a74f97cd 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -129,6 +129,24 @@ setup()
 	ip link set veth7 netns ${NS2}
 	ip link set veth8 netns ${NS3}
 
+	if [ ! -z "${VRF}" ] ; then
+		ip -netns ${NS1} link add red type vrf table 1001
+		ip -netns ${NS1} link set red up
+		ip -netns ${NS1} route add table 1001 unreachable default metric 8192
+		ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192
+		ip -netns ${NS1} link set veth1 vrf red
+		ip -netns ${NS1} link set veth5 vrf red
+
+		ip -netns ${NS2} link add red type vrf table 1001
+		ip -netns ${NS2} link set red up
+		ip -netns ${NS2} route add table 1001 unreachable default metric 8192
+		ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192
+		ip -netns ${NS2} link set veth2 vrf red
+		ip -netns ${NS2} link set veth3 vrf red
+		ip -netns ${NS2} link set veth6 vrf red
+		ip -netns ${NS2} link set veth7 vrf red
+	fi
+
 	# configure addesses: the top route (1-2-3-4)
 	ip -netns ${NS1}    addr add ${IPv4_1}/24  dev veth1
 	ip -netns ${NS2}    addr add ${IPv4_2}/24  dev veth2
@@ -163,29 +181,29 @@ setup()
 
 	# NS1
 	# top route
-	ip -netns ${NS1}    route add ${IPv4_2}/32  dev veth1
-	ip -netns ${NS1}    route add default dev veth1 via ${IPv4_2}  # go top by default
-	ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1
-	ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2}  # go top by default
+	ip -netns ${NS1}    route add ${IPv4_2}/32  dev veth1 ${VRF}
+	ip -netns ${NS1}    route add default dev veth1 via ${IPv4_2} ${VRF}  # go top by default
+	ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF}
+	ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF}  # go top by default
 	# bottom route
-	ip -netns ${NS1}    route add ${IPv4_6}/32  dev veth5
-	ip -netns ${NS1}    route add ${IPv4_7}/32  dev veth5 via ${IPv4_6}
-	ip -netns ${NS1}    route add ${IPv4_8}/32  dev veth5 via ${IPv4_6}
-	ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5
-	ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6}
-	ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6}
+	ip -netns ${NS1}    route add ${IPv4_6}/32  dev veth5 ${VRF}
+	ip -netns ${NS1}    route add ${IPv4_7}/32  dev veth5 via ${IPv4_6} ${VRF}
+	ip -netns ${NS1}    route add ${IPv4_8}/32  dev veth5 via ${IPv4_6} ${VRF}
+	ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF}
+	ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF}
+	ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF}
 
 	# NS2
 	# top route
-	ip -netns ${NS2}    route add ${IPv4_1}/32  dev veth2
-	ip -netns ${NS2}    route add ${IPv4_4}/32  dev veth3
-	ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2
-	ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3
+	ip -netns ${NS2}    route add ${IPv4_1}/32  dev veth2 ${VRF}
+	ip -netns ${NS2}    route add ${IPv4_4}/32  dev veth3 ${VRF}
+	ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF}
+	ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF}
 	# bottom route
-	ip -netns ${NS2}    route add ${IPv4_5}/32  dev veth6
-	ip -netns ${NS2}    route add ${IPv4_8}/32  dev veth7
-	ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6
-	ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7
+	ip -netns ${NS2}    route add ${IPv4_5}/32  dev veth6 ${VRF}
+	ip -netns ${NS2}    route add ${IPv4_8}/32  dev veth7 ${VRF}
+	ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF}
+	ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF}
 
 	# NS3
 	# top route
@@ -207,16 +225,16 @@ setup()
 	ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
 	ip -netns ${NS3} link set gre_dev up
 	ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
-	ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6}
-	ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8}
+	ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF}
+	ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF}
 
 
 	# configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
 	ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
 	ip -netns ${NS3} link set gre6_dev up
 	ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
-	ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6}
-	ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8}
+	ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
+	ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
 
 	# rp_filter gets confused by what these tests are doing, so disable it
 	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
@@ -244,18 +262,18 @@ trap cleanup EXIT
 
 remove_routes_to_gredev()
 {
-	ip -netns ${NS1} route del ${IPv4_GRE} dev veth5
-	ip -netns ${NS2} route del ${IPv4_GRE} dev veth7
-	ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5
-	ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7
+	ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF}
+	ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF}
+	ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF}
+	ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF}
 }
 
 add_unreachable_routes_to_gredev()
 {
-	ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32
-	ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32
-	ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128
-	ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128
+	ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF}
+	ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF}
+	ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
+	ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
 }
 
 test_ping()
@@ -265,10 +283,10 @@ test_ping()
 	local RET=0
 
 	if [ "${PROTO}" == "IPv4" ] ; then
-		ip netns exec ${NS1} ping  -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null
+		ip netns exec ${NS1} ping  -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
 		RET=$?
 	elif [ "${PROTO}" == "IPv6" ] ; then
-		ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null
+		ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
 		RET=$?
 	else
 		echo "    test_ping: unknown PROTO: ${PROTO}"
@@ -328,7 +346,7 @@ test_gso()
 test_egress()
 {
 	local readonly ENCAP=$1
-	echo "starting egress ${ENCAP} encap test"
+	echo "starting egress ${ENCAP} encap test ${VRF}"
 	setup
 
 	# by default, pings work
@@ -336,26 +354,35 @@ test_egress()
 	test_ping IPv6 0
 
 	# remove NS2->DST routes, ping fails
-	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3
-	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
+	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3 ${VRF}
+	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
 	test_ping IPv4 1
 	test_ping IPv6 1
 
 	# install replacement routes (LWT/eBPF), pings succeed
 	if [ "${ENCAP}" == "IPv4" ] ; then
-		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
-		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
+		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
+			test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
+		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
+			test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
 	elif [ "${ENCAP}" == "IPv6" ] ; then
-		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
-		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
+		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
+			test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
+		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
+			test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
 	else
 		echo "    unknown encap ${ENCAP}"
 		TEST_STATUS=1
 	fi
 	test_ping IPv4 0
 	test_ping IPv6 0
-	test_gso IPv4
-	test_gso IPv6
+
+	# skip GSO tests with VRF: VRF routing needs properly assigned
+	# source IP/device, which is easy to do with ping and hard with dd/nc.
+	if [ -z "${VRF}" ] ; then
+		test_gso IPv4
+		test_gso IPv6
+	fi
 
 	# a negative test: remove routes to GRE devices: ping fails
 	remove_routes_to_gredev
@@ -374,7 +401,7 @@ test_egress()
 test_ingress()
 {
 	local readonly ENCAP=$1
-	echo "starting ingress ${ENCAP} encap test"
+	echo "starting ingress ${ENCAP} encap test ${VRF}"
 	setup
 
 	# need to wait a bit for IPv6 to autoconf, otherwise
@@ -385,18 +412,22 @@ test_ingress()
 	test_ping IPv6 0
 
 	# remove NS2->DST routes, pings fail
-	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3
-	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
+	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3 ${VRF}
+	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
 	test_ping IPv4 1
 	test_ping IPv6 1
 
 	# install replacement routes (LWT/eBPF), pings succeed
 	if [ "${ENCAP}" == "IPv4" ] ; then
-		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
-		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
+		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
+			test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
+		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
+			test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
 	elif [ "${ENCAP}" == "IPv6" ] ; then
-		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
-		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
+		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
+			test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
+		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
+			test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
 	else
 		echo "FAIL: unknown encap ${ENCAP}"
 		TEST_STATUS=1
@@ -418,6 +449,13 @@ test_ingress()
 	process_test_results
 }
 
+VRF=""
+test_egress IPv4
+test_egress IPv6
+test_ingress IPv4
+test_ingress IPv6
+
+VRF="vrf red"
 test_egress IPv4
 test_egress IPv6
 test_ingress IPv4
-- 
cgit v1.2.3


From e1d1dc4653ecdea55cb0e96844f88da62c65cd4f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 16 Apr 2019 11:47:17 -0700
Subject: libbpf: fix printf formatter for ptrdiff_t argument

Using %ld for printing out value of ptrdiff_t type is not portable
between 32-bit and 64-bit archs. This is causing compilation errors for
libbpf on 32-bit platform (discovered as part of an effort to integrate
libbpf into systemd ([0])). Proper formatter is %td, which is used in
this patch.

v2->v1:
  - add Reported-by
  - provide more context on how this issue was discovered

[0] https://github.com/systemd/systemd/pull/12151

Reported-by: Evgeny Vereshchagin <evvers@ya.ru>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e5b77ad97795..d817bf20f3d6 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -817,7 +817,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
 		memcpy(*data_buff, data->d_buf, data->d_size);
 	}
 
-	pr_debug("map %ld is \"%s\"\n", map - obj->maps, map->name);
+	pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
 	return 0;
 }
 
-- 
cgit v1.2.3


From f25377ee4fb1118650a08b403234aa6f57ce25a9 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Tue, 16 Apr 2019 13:13:47 -0700
Subject: bpftool: Support sysctl hook

Add support for recently added BPF_PROG_TYPE_CGROUP_SYSCTL program type
and BPF_CGROUP_SYSCTL attach type.

Example of bpftool output with sysctl program from selftests:

  # bpftool p load ./test_sysctl_prog.o /mnt/bpf/sysctl_prog type cgroup/sysctl
  # bpftool p l
  9: cgroup_sysctl  name sysctl_tcp_mem  tag 0dd05f81a8d0d52e  gpl
          loaded_at 2019-04-16T12:57:27-0700  uid 0
          xlated 1008B  jited 623B  memlock 4096B
  # bpftool c a /mnt/cgroup2/bla sysctl id 9
  # bpftool c t
  CgroupPath
  ID       AttachType      AttachFlags     Name
  /mnt/cgroup2/bla
      9        sysctl                          sysctl_tcp_mem
  # bpftool c d /mnt/cgroup2/bla sysctl id 9
  # bpftool c t
  CgroupPath
  ID       AttachType      AttachFlags     Name

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 5 +++--
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   | 3 ++-
 tools/bpf/bpftool/bash-completion/bpftool          | 7 ++++---
 tools/bpf/bpftool/cgroup.c                         | 3 ++-
 tools/bpf/bpftool/main.h                           | 1 +
 tools/bpf/bpftool/prog.c                           | 2 +-
 6 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 5e3b7d9d7599..89b6b10e2183 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -29,7 +29,7 @@ CGROUP COMMANDS
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
 |	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
 |		**bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-|               **sendmsg4** | **sendmsg6** }
+|		**sendmsg4** | **sendmsg6** | **sysctl** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
@@ -85,7 +85,8 @@ DESCRIPTION
 		  **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an
 		  unconnected udp4 socket (since 4.18);
 		  **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an
-		  unconnected udp6 socket (since 4.18).
+		  unconnected udp6 socket (since 4.18);
+		  **sysctl** sysctl access (since 5.2).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 		  Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index bb9bb00c0c2c..2f183ffd8351 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -39,7 +39,8 @@ PROG COMMANDS
 |		**cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
 |		**lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
-|		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
+|		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
+|		**cgroup/sysctl**
 |	}
 |       *ATTACH_TYPE* := {
 |		**msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index b803827d01e8..9f3ffe1e26ab 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -370,7 +370,8 @@ _bpftool()
                                 lirc_mode2 cgroup/bind4 cgroup/bind6 \
                                 cgroup/connect4 cgroup/connect6 \
                                 cgroup/sendmsg4 cgroup/sendmsg6 \
-                                cgroup/post_bind4 cgroup/post_bind6" -- \
+                                cgroup/post_bind4 cgroup/post_bind6 \
+                                cgroup/sysctl" -- \
                                                    "$cur" ) )
                             return 0
                             ;;
@@ -619,7 +620,7 @@ _bpftool()
                 attach|detach)
                     local ATTACH_TYPES='ingress egress sock_create sock_ops \
                         device bind4 bind6 post_bind4 post_bind6 connect4 \
-                        connect6 sendmsg4 sendmsg6'
+                        connect6 sendmsg4 sendmsg6 sysctl'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag'
                     case $prev in
@@ -629,7 +630,7 @@ _bpftool()
                             ;;
                         ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
                         post_bind4|post_bind6|connect4|connect6|sendmsg4|\
-                        sendmsg6)
+                        sendmsg6|sysctl)
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index a81b34343eb8..7e22f115c8c1 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -25,7 +25,7 @@
 	"       ATTACH_TYPE := { ingress | egress | sock_create |\n"	       \
 	"                        sock_ops | device | bind4 | bind6 |\n"	       \
 	"                        post_bind4 | post_bind6 | connect4 |\n"       \
-	"                        connect6 | sendmsg4 | sendmsg6 }"
+	"                        connect6 | sendmsg4 | sendmsg6 | sysctl }"
 
 static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
@@ -41,6 +41,7 @@ static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
 	[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
 	[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
+	[BPF_CGROUP_SYSCTL] = "sysctl",
 	[__MAX_BPF_ATTACH_TYPE] = NULL,
 };
 
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index d7dd84d3c660..1ccc46169a19 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -73,6 +73,7 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_LIRC_MODE2]		= "lirc_mode2",
 	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
 	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
+	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
 };
 
 extern const char * const map_type_name[];
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 4b7a307b9fc9..fc495b27f0fc 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1060,7 +1060,7 @@ static int do_help(int argc, char **argv)
 		"                 tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n"
 		"                 cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n"
 		"                 lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n"
-		"                 sk_reuseport | flow_dissector |\n"
+		"                 sk_reuseport | flow_dissector | cgroup/sysctl |\n"
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
 		"                 cgroup/sendmsg4 | cgroup/sendmsg6 }\n"
-- 
cgit v1.2.3


From d459b59ee0f5ed2fa96e77bbd919e095b1aeceb0 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Wed, 17 Apr 2019 09:22:58 +0900
Subject: tools/bpftool: re-organize newline printing for map listing

Let's move the final newline printing in show_map_close_plain() at
the end of the function because it looks correct and consistent with
prog.c. Also let's do related changes for the line which prints
pinned file name.

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/map.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 44b192e87708..846d2e5c64b4 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -597,15 +597,16 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 	}
 	close(fd);
 
-	printf("\n");
 	if (!hash_empty(map_table.table)) {
 		struct pinned_obj *obj;
 
 		hash_for_each_possible(map_table.table, obj, hash, info->id) {
 			if (obj->id == info->id)
-				printf("\tpinned %s\n", obj->path);
+				printf("\n\tpinned %s", obj->path);
 		}
 	}
+
+	printf("\n");
 	return 0;
 }
 
-- 
cgit v1.2.3


From d1b7725dfea3e6c1aff2ee94baf3658aba450fbe Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Wed, 17 Apr 2019 09:22:59 +0900
Subject: tools/bpftool: show btf_id in map listing

Let's print btf id of map similar to the way we are printing it
for programs.

Sample output:
user@test# bpftool map -f
61: lpm_trie  flags 0x1
	key 20B  value 8B  max_entries 1  memlock 4096B
133: array  name test_btf_id  flags 0x0
	key 4B  value 4B  max_entries 4  memlock 4096B
	pinned /sys/fs/bpf/test100
	btf_id 174
170: array  name test_btf_id  flags 0x0
	key 4B  value 4B  max_entries 4  memlock 4096B
	btf_id 240

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/map.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 846d2e5c64b4..10b6c9d3e525 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -531,6 +531,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 	}
 	close(fd);
 
+	if (info->btf_id)
+		jsonw_int_field(json_wtr, "btf_id", info->btf_id);
+
 	if (!hash_empty(map_table.table)) {
 		struct pinned_obj *obj;
 
@@ -606,6 +609,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 		}
 	}
 
+	if (info->btf_id)
+		printf("\n\tbtf_id %d", info->btf_id);
+
 	printf("\n");
 	return 0;
 }
-- 
cgit v1.2.3


From d5e63fdd443378531fd1a67a15a720a799635d93 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 16 Apr 2019 14:58:09 +0200
Subject: libbpf: fix XDP socket ring buffer memory ordering

The ring buffer code of	XDP sockets is missing a memory	barrier	on the
consumer side between the load of the data and the write that signals
that it is ok for the producer to put new data into the buffer. On
architectures that does not guarantee that stores are not reordered
with older loads, the producer might put data into the ring before the
consumer had the chance to read it. As IA does guarantee this
ordering, it would only need a compiler barrier here, but there are no
primitives in barrier.h for this specific case (hinder writes to be ordered
before older reads) so I had to add a smp_mb() here which will
translate into a run-time synch operation on IA.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/xsk.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index a497f00e2962..1b35c40dff73 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -36,6 +36,10 @@ struct name { \
 DEFINE_XSK_RING(xsk_ring_prod);
 DEFINE_XSK_RING(xsk_ring_cons);
 
+/* For a detailed explanation on the memory barriers associated with the
+ * ring, please take a look at net/xdp/xsk_queue.h.
+ */
+
 struct xsk_umem;
 struct xsk_socket;
 
@@ -116,8 +120,8 @@ static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
 
 static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
 {
-	/* Make sure everything has been written to the ring before signalling
-	 * this to the kernel.
+	/* Make sure everything has been written to the ring before indicating
+	 * this to the kernel by writing the producer pointer.
 	 */
 	smp_wmb();
 
@@ -144,6 +148,11 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
 
 static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
 {
+	/* Make sure data has been read before indicating we are done
+	 * with the entries by updating the consumer pointer.
+	 */
+	smp_mb();
+
 	*cons->consumer += nb;
 }
 
-- 
cgit v1.2.3


From a06d729646e87afc5ce06e539aecf762cc26c6e3 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 16 Apr 2019 14:58:10 +0200
Subject: libbpf: remove likely/unlikely in xsk.h

This patch removes the use of likely and unlikely in xsk.h since they
create a dependency on Linux headers as reported by several
users. There have also been reports that the use of these decreases
performance as the compiler puts the code on two different cache lines
instead of on a single one. All in all, I think we are better off
without them.

Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/xsk.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 1b35c40dff73..f264f24f06ac 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -109,7 +109,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
 static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
 					    size_t nb, __u32 *idx)
 {
-	if (unlikely(xsk_prod_nb_free(prod, nb) < nb))
+	if (xsk_prod_nb_free(prod, nb) < nb)
 		return 0;
 
 	*idx = prod->cached_prod;
@@ -133,7 +133,7 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
 {
 	size_t entries = xsk_cons_nb_avail(cons, nb);
 
-	if (likely(entries > 0)) {
+	if (entries > 0) {
 		/* Make sure we do not speculatively read the data before
 		 * we have received the packet buffers from the ring.
 		 */
-- 
cgit v1.2.3


From b7e3a28019c92ffe1f55de278c5641de33b6259a Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 16 Apr 2019 14:58:11 +0200
Subject: libbpf: remove dependency on barrier.h in xsk.h

The use of smp_rmb() and smp_wmb() creates a Linux header dependency
on barrier.h that is unnecessary in most parts. This patch implements
the two small defines that are needed from barrier.h. As a bonus, the
new implementations are faster than the default ones as they default
to sfence and lfence for x86, while we only need a compiler barrier in
our case. Just as it is when the same ring access code is compiled in
the kernel.

Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf_util.h | 25 +++++++++++++++++++++++++
 tools/lib/bpf/xsk.h         |  7 ++++---
 2 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index 81ecda0cb9c9..71fbb2898b87 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -23,6 +23,31 @@ do {				\
 #define pr_info(fmt, ...)	__pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
 #define pr_debug(fmt, ...)	__pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
 
+/* Use these barrier functions instead of smp_[rw]mb() when they are
+ * used in a libbpf header file. That way they can be built into the
+ * application that uses libbpf.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_rmb() asm volatile("" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("" : : : "memory")
+# define libbpf_smp_mb() \
+	asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
+#elif defined(__aarch64__)
+# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+#elif defined(__arm__)
+/* These are only valid for armv7 and above */
+# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+#else
+# warning Architecture missing native barrier functions in libbpf_util.h.
+# define libbpf_smp_rmb() __sync_synchronize()
+# define libbpf_smp_wmb() __sync_synchronize()
+# define libbpf_smp_mb() __sync_synchronize()
+#endif
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index f264f24f06ac..2377c7a7f1b1 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -16,6 +16,7 @@
 #include <linux/if_xdp.h>
 
 #include "libbpf.h"
+#include "libbpf_util.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -123,7 +124,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
 	/* Make sure everything has been written to the ring before indicating
 	 * this to the kernel by writing the producer pointer.
 	 */
-	smp_wmb();
+	libbpf_smp_wmb();
 
 	*prod->producer += nb;
 }
@@ -137,7 +138,7 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
 		/* Make sure we do not speculatively read the data before
 		 * we have received the packet buffers from the ring.
 		 */
-		smp_rmb();
+		libbpf_smp_rmb();
 
 		*idx = cons->cached_cons;
 		cons->cached_cons += entries;
@@ -151,7 +152,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
 	 */
-	smp_mb();
+	libbpf_smp_mb();
 
 	*cons->consumer += nb;
 }
-- 
cgit v1.2.3


From 2c5935f1b2b642cee8e1562396ec8a7781fc4c6d Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 16 Apr 2019 14:58:12 +0200
Subject: libbpf: optimize barrier for XDP socket rings

The full memory barrier in the XDP socket rings on the consumer side
between the load of the data and the store of the consumer ring is
there to protect the store from being executed before the load of the
data. If this was allowed to happen, the producer might overwrite the
data field with a new entry before the consumer got the chance to read
it.

On x86, stores are guaranteed not to be reordered with older loads, so
it does not need a full memory barrier here. A compile time barrier
would be enough. This patch introdcues a new primitive in
libbpf_util.h that implements a new barrier type (libbpf_smp_rwmb)
hindering stores to be reordered with older loads. It is then used in
the XDP socket ring access code in libbpf to improve performance.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf_util.h | 5 +++++
 tools/lib/bpf/xsk.h         | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index 71fbb2898b87..172b707e007b 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -32,20 +32,25 @@ do {				\
 # define libbpf_smp_wmb() asm volatile("" : : : "memory")
 # define libbpf_smp_mb() \
 	asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
+/* Hinders stores to be observed before older loads. */
+# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
 #elif defined(__aarch64__)
 # define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
 # define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
 # define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
 #elif defined(__arm__)
 /* These are only valid for armv7 and above */
 # define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
 # define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
 # define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
 #else
 # warning Architecture missing native barrier functions in libbpf_util.h.
 # define libbpf_smp_rmb() __sync_synchronize()
 # define libbpf_smp_wmb() __sync_synchronize()
 # define libbpf_smp_mb() __sync_synchronize()
+# define libbpf_smp_rwmb() __sync_synchronize()
 #endif
 
 #ifdef __cplusplus
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 2377c7a7f1b1..82ea71a0f3ec 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -152,7 +152,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
 	 */
-	libbpf_smp_mb();
+	libbpf_smp_rwmb();
 
 	*cons->consumer += nb;
 }
-- 
cgit v1.2.3


From ba02de1aa04e392e15ef503c6dd5166915d9d4de Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 17 Apr 2019 22:23:30 -0700
Subject: selftests/bpf: fix a compilation error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I hit the following compilation error with gcc 4.8.5.

  prog_tests/flow_dissector.c: In function ‘test_flow_dissector’:
  prog_tests/flow_dissector.c:155:2: error: ‘for’ loop initial declarations are only allowed in C99 mode
    for (int i = 0; i < ARRAY_SIZE(tests); i++) {
    ^
  prog_tests/flow_dissector.c:155:2: note: use option -std=c99 or -std=gnu99 to compile your code

Let us fix the issue by avoiding this particular c99 feature.

Fixes: a5cb33464e53 ("selftests/bpf: make flow dissector tests more extensible")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/flow_dissector.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 1a73937826b0..126319f9a97c 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -143,7 +143,7 @@ struct test tests[] = {
 void test_flow_dissector(void)
 {
 	struct bpf_object *obj;
-	int err, prog_fd;
+	int i, err, prog_fd;
 
 	err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
 			    "jmp_table", &prog_fd);
@@ -152,7 +152,7 @@ void test_flow_dissector(void)
 		return;
 	}
 
-	for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_flow_keys flow_keys;
 		struct bpf_prog_test_run_attr tattr = {
 			.prog_fd = prog_fd,
-- 
cgit v1.2.3


From 79b1b30e4c209c2ff1ddd6559b41dba1dfc8bcd8 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Thu, 18 Apr 2019 09:21:10 +0200
Subject: libbpf: remove compile time warning from libbpf_util.h

Having a helpful compile time warning in libbpf_util.h is not a good
idea since all warnings are treated as errors. Change this into a
comment in the code instead.

Fixes: b7e3a28019c9 ("libbpf: remove dependency on barrier.h in xsk.h")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index 172b707e007b..da94c4cb2e4d 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -46,7 +46,7 @@ do {				\
 # define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
 # define libbpf_smp_rwmb() libbpf_smp_mb()
 #else
-# warning Architecture missing native barrier functions in libbpf_util.h.
+/* Architecture missing native barrier functions. */
 # define libbpf_smp_rmb() __sync_synchronize()
 # define libbpf_smp_wmb() __sync_synchronize()
 # define libbpf_smp_mb() __sync_synchronize()
-- 
cgit v1.2.3


From 5de35e3ae9d01796a4909fd0dd8c3660d9316a77 Mon Sep 17 00:00:00 2001
From: Wang YanQing <udknight@gmail.com>
Date: Fri, 19 Apr 2019 02:05:13 +0800
Subject: selftests/bpf: fix compile errors due to unsync linux/in6.h and
 netinet/in.h

I meet below compile errors:
"
In file included from test_tcpnotify_kern.c:12:
/usr/include/netinet/in.h:101:5: error: expected identifier
    IPPROTO_HOPOPTS = 0,   /* IPv6 Hop-by-Hop options.  */
    ^
/usr/include/linux/in6.h:131:26: note: expanded from macro 'IPPROTO_HOPOPTS'
                                ^
In file included from test_tcpnotify_kern.c:12:
/usr/include/netinet/in.h:103:5: error: expected identifier
    IPPROTO_ROUTING = 43,  /* IPv6 routing header.  */
    ^
/usr/include/linux/in6.h:132:26: note: expanded from macro 'IPPROTO_ROUTING'
                                ^
In file included from test_tcpnotify_kern.c:12:
/usr/include/netinet/in.h:105:5: error: expected identifier
    IPPROTO_FRAGMENT = 44, /* IPv6 fragmentation header.  */
    ^
/usr/include/linux/in6.h:133:26: note: expanded from macro 'IPPROTO_FRAGMENT'
"
The same compile errors are reported for test_tcpbpf_kern.c too.

My environment:
lsb_release -a:
No LSB modules are available.
Distributor ID: Ubuntu
Description:    Ubuntu 16.04.6 LTS
Release:        16.04
Codename:       xenial

dpkg -l | grep libc-dev:
ii  libc-dev-bin              2.23-0ubuntu11           amd64        GNU C Library: Development binaries
ii  linux-libc-dev:amd64      4.4.0-145.171            amd64        Linux Kernel Headers for development.

The reason is linux/in6.h and netinet/in.h aren't synchronous about how to
handle the same definitions, IPPROTO_HOPOPTS, etc.

This patch fixes the compile errors by moving <netinet/in.h> to before the
<linux/*.h>.

Signed-off-by: Wang YanQing <udknight@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c    | 2 +-
 tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 74f73b33a7b0..c7c3240e0dd4 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stddef.h>
 #include <string.h>
+#include <netinet/in.h>
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
@@ -9,7 +10,6 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
-#include <netinet/in.h>
 #include "bpf_helpers.h"
 #include "bpf_endian.h"
 #include "test_tcpbpf.h"
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index edbca203ce2d..ec6db6e64c41 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stddef.h>
 #include <string.h>
+#include <netinet/in.h>
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
@@ -9,7 +10,6 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
-#include <netinet/in.h>
 #include "bpf_helpers.h"
 #include "bpf_endian.h"
 #include "test_tcpnotify.h"
-- 
cgit v1.2.3


From 849f257f61ff7dde49d59c62802e5913ff7a7cbb Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 18 Apr 2019 14:33:30 -0700
Subject: bpf: Increase MAX_NR_MAPS to 17 in test_verifier.c

map_fds[16] is the last one index-ed by fixup_map_array_small.
Hence, the MAX_NR_MAPS should be 17 instead.

Fixes: fb2abb73e575 ("bpf, selftest: test {rd, wr}only flags and direct value access")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6cb6a1074fd1..ed9e894afef3 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -52,7 +52,7 @@
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	16
+#define MAX_NR_MAPS	17
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
-- 
cgit v1.2.3


From 4519efa6f8ea343e43ade21b0189b0b295439202 Mon Sep 17 00:00:00 2001
From: "McCabe, Robert J" <robert.mccabe@rockwellcollins.com>
Date: Fri, 19 Apr 2019 18:37:20 -0500
Subject: libbpf: fix BPF_LOG_BUF_SIZE off-by-one error

The BPF_PROG_LOAD condition for kernel version <= 5.1 is

   log->len_total > UINT_MAX >> 8 /* (16 * 1024 * 1024) - 1 */

Signed-off-by: McCabe, Robert J <robert.mccabe@rockwellcollins.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index bc30783d1403..ff2506fe6bd2 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -92,7 +92,7 @@ struct bpf_load_program_attr {
 #define MAPS_RELAX_COMPAT	0x01
 
 /* Recommend log buffer size */
-#define BPF_LOG_BUF_SIZE (16 * 1024 * 1024) /* verifier maximum in kernels <= 5.1 */
+#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
 LIBBPF_API int
 bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 		       char *log_buf, size_t log_buf_sz);
-- 
cgit v1.2.3


From c9cb2c1e11cee75b3af5699add3302a3997f78e4 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 22 Apr 2019 08:55:49 -0700
Subject: selftests/bpf: add flow dissector bpf_skb_load_bytes helper test

When flow dissector is called without skb, we want to make sure
bpf_skb_load_bytes invocations return error. Add small test which tries
to read single byte from a packet.

bpf_skb_load_bytes should always fail under BPF_PROG_TEST_RUN because
it was converted to the skb-less mode.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../bpf/prog_tests/flow_dissector_load_bytes.c     | 48 ++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
new file mode 100644
index 000000000000..dc5ef155ec28
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_flow_dissector_load_bytes(void)
+{
+	struct bpf_flow_keys flow_keys;
+	__u32 duration = 0, retval, size;
+	struct bpf_insn prog[] = {
+		// BPF_REG_1 - 1st argument: context
+		// BPF_REG_2 - 2nd argument: offset, start at first byte
+		BPF_MOV64_IMM(BPF_REG_2, 0),
+		// BPF_REG_3 - 3rd argument: destination, reserve byte on stack
+		BPF_ALU64_REG(BPF_MOV, BPF_REG_3, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1),
+		// BPF_REG_4 - 4th argument: copy one byte
+		BPF_MOV64_IMM(BPF_REG_4, 1),
+		// bpf_skb_load_bytes(ctx, sizeof(pkt_v4), ptr, 1)
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_skb_load_bytes),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+		// if (ret == 0) return BPF_DROP (2)
+		BPF_MOV64_IMM(BPF_REG_0, BPF_DROP),
+		BPF_EXIT_INSN(),
+		// if (ret != 0) return BPF_OK (0)
+		BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
+		BPF_EXIT_INSN(),
+	};
+	int fd, err;
+
+	/* make sure bpf_skb_load_bytes is not allowed from skb-less context
+	 */
+	fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
+			      ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+	CHECK(fd < 0,
+	      "flow_dissector-bpf_skb_load_bytes-load",
+	      "fd %d errno %d\n",
+	      fd, errno);
+
+	err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
+				&flow_keys, &size, &retval, &duration);
+	CHECK(size != sizeof(flow_keys) || err || retval != 1,
+	      "flow_dissector-bpf_skb_load_bytes",
+	      "err %d errno %d retval %d duration %d size %u/%zu\n",
+	      err, errno, retval, duration, size, sizeof(flow_keys));
+
+	if (fd >= -1)
+		close(fd);
+}
-- 
cgit v1.2.3


From 0905beec9f52caf2c7065a8a88c08bc370850710 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 22 Apr 2019 08:55:50 -0700
Subject: selftests/bpf: run flow dissector tests in skb-less mode

Export last_dissection map from flow dissector and use a known place in
tun driver to trigger BPF flow dissection.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/flow_dissector_load.c  |   2 +-
 tools/testing/selftests/bpf/flow_dissector_load.h  |  16 +++-
 .../selftests/bpf/prog_tests/flow_dissector.c      | 102 ++++++++++++++++++++-
 tools/testing/selftests/bpf/progs/bpf_flow.c       |  79 ++++++++++------
 4 files changed, 165 insertions(+), 34 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 7136ab9ffa73..3fd83b9dc1bf 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -26,7 +26,7 @@ static void load_and_attach_program(void)
 	struct bpf_object *obj;
 
 	ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name,
-			    cfg_map_name, &prog_fd);
+			    cfg_map_name, NULL, &prog_fd, NULL);
 	if (ret)
 		error(1, 0, "bpf_flow_load %s", cfg_path_name);
 
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index 41dd6959feb0..eeb48b6fc827 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -9,10 +9,12 @@ static inline int bpf_flow_load(struct bpf_object **obj,
 				const char *path,
 				const char *section_name,
 				const char *map_name,
-				int *prog_fd)
+				const char *keys_map_name,
+				int *prog_fd,
+				int *keys_fd)
 {
 	struct bpf_program *prog, *main_prog;
-	struct bpf_map *prog_array;
+	struct bpf_map *prog_array, *keys;
 	int prog_array_fd;
 	int ret, fd, i;
 
@@ -37,6 +39,16 @@ static inline int bpf_flow_load(struct bpf_object **obj,
 	if (prog_array_fd < 0)
 		return ret;
 
+	if (keys_map_name && keys_fd) {
+		keys = bpf_object__find_map_by_name(*obj, keys_map_name);
+		if (!keys)
+			return -1;
+
+		*keys_fd = bpf_map__fd(keys);
+		if (*keys_fd < 0)
+			return -1;
+	}
+
 	i = 0;
 	bpf_object__for_each_program(prog, *obj) {
 		fd = bpf_program__fd(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 126319f9a97c..51758a0ca55e 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -1,5 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <error.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
 
 #define CHECK_FLOW_KEYS(desc, got, expected)				\
 	CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0,		\
@@ -140,13 +143,73 @@ struct test tests[] = {
 	},
 };
 
+static int create_tap(const char *ifname)
+{
+	struct ifreq ifr = {
+		.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS,
+	};
+	int fd, ret;
+
+	strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (fd < 0)
+		return -1;
+
+	ret = ioctl(fd, TUNSETIFF, &ifr);
+	if (ret)
+		return -1;
+
+	return fd;
+}
+
+static int tx_tap(int fd, void *pkt, size_t len)
+{
+	struct iovec iov[] = {
+		{
+			.iov_len = len,
+			.iov_base = pkt,
+		},
+	};
+	return writev(fd, iov, ARRAY_SIZE(iov));
+}
+
+static int ifup(const char *ifname)
+{
+	struct ifreq ifr = {};
+	int sk, ret;
+
+	strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+	sk = socket(PF_INET, SOCK_DGRAM, 0);
+	if (sk < 0)
+		return -1;
+
+	ret = ioctl(sk, SIOCGIFFLAGS, &ifr);
+	if (ret) {
+		close(sk);
+		return -1;
+	}
+
+	ifr.ifr_flags |= IFF_UP;
+	ret = ioctl(sk, SIOCSIFFLAGS, &ifr);
+	if (ret) {
+		close(sk);
+		return -1;
+	}
+
+	close(sk);
+	return 0;
+}
+
 void test_flow_dissector(void)
 {
+	int i, err, prog_fd, keys_fd = -1, tap_fd;
 	struct bpf_object *obj;
-	int i, err, prog_fd;
+	__u32 duration = 0;
 
 	err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
-			    "jmp_table", &prog_fd);
+			    "jmp_table", "last_dissection", &prog_fd, &keys_fd);
 	if (err) {
 		error_cnt++;
 		return;
@@ -171,5 +234,40 @@ void test_flow_dissector(void)
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
 	}
 
+	/* Do the same tests but for skb-less flow dissector.
+	 * We use a known path in the net/tun driver that calls
+	 * eth_get_headlen and we manually export bpf_flow_keys
+	 * via BPF map in this case.
+	 *
+	 * Note, that since eth_get_headlen operates on a L2 level,
+	 * we adjust exported nhoff/thoff by ETH_HLEN.
+	 */
+
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno);
+
+	tap_fd = create_tap("tap0");
+	CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno);
+	err = ifup("tap0");
+	CHECK(err, "ifup", "err %d errno %d", err, errno);
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct bpf_flow_keys flow_keys = {};
+		struct bpf_prog_test_run_attr tattr = {};
+		__u32 key = 0;
+
+		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
+		CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno);
+
+		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
+		CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+
+		flow_keys.nhoff -= ETH_HLEN;
+		flow_keys.thoff -= ETH_HLEN;
+
+		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
+		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+	}
+
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 75b17cada539..81ad9a0b29d0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -64,6 +64,25 @@ struct bpf_map_def SEC("maps") jmp_table = {
 	.max_entries = 8
 };
 
+struct bpf_map_def SEC("maps") last_dissection = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct bpf_flow_keys),
+	.max_entries = 1,
+};
+
+static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
+					    int ret)
+{
+	struct bpf_flow_keys *val;
+	__u32 key = 0;
+
+	val = bpf_map_lookup_elem(&last_dissection, &key);
+	if (val)
+		memcpy(val, keys, sizeof(*val));
+	return ret;
+}
+
 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
 							 __u16 hdr_size,
 							 void *buffer)
@@ -109,10 +128,10 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
 		break;
 	default:
 		/* Protocol not supported */
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 	}
 
-	return BPF_DROP;
+	return export_flow_keys(keys, BPF_DROP);
 }
 
 SEC("flow_dissector")
@@ -139,8 +158,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 	case IPPROTO_ICMP:
 		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
 		if (!icmp)
-			return BPF_DROP;
-		return BPF_OK;
+			return export_flow_keys(keys, BPF_DROP);
+		return export_flow_keys(keys, BPF_OK);
 	case IPPROTO_IPIP:
 		keys->is_encap = true;
 		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
@@ -150,11 +169,11 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 	case IPPROTO_GRE:
 		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
 		if (!gre)
-			return BPF_DROP;
+			return export_flow_keys(keys, BPF_DROP);
 
 		if (bpf_htons(gre->flags & GRE_VERSION))
 			/* Only inspect standard GRE packets with version 0 */
-			return BPF_OK;
+			return export_flow_keys(keys, BPF_OK);
 
 		keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
 		if (GRE_IS_CSUM(gre->flags))
@@ -170,7 +189,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
 							  &_eth);
 			if (!eth)
-				return BPF_DROP;
+				return export_flow_keys(keys, BPF_DROP);
 
 			keys->thoff += sizeof(*eth);
 
@@ -181,31 +200,31 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 	case IPPROTO_TCP:
 		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
 		if (!tcp)
-			return BPF_DROP;
+			return export_flow_keys(keys, BPF_DROP);
 
 		if (tcp->doff < 5)
-			return BPF_DROP;
+			return export_flow_keys(keys, BPF_DROP);
 
 		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
-			return BPF_DROP;
+			return export_flow_keys(keys, BPF_DROP);
 
 		keys->sport = tcp->source;
 		keys->dport = tcp->dest;
-		return BPF_OK;
+		return export_flow_keys(keys, BPF_OK);
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
 		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
 		if (!udp)
-			return BPF_DROP;
+			return export_flow_keys(keys, BPF_DROP);
 
 		keys->sport = udp->source;
 		keys->dport = udp->dest;
-		return BPF_OK;
+		return export_flow_keys(keys, BPF_OK);
 	default:
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 	}
 
-	return BPF_DROP;
+	return export_flow_keys(keys, BPF_DROP);
 }
 
 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
@@ -225,7 +244,7 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
 		return parse_ip_proto(skb, nexthdr);
 	}
 
-	return BPF_DROP;
+	return export_flow_keys(keys, BPF_DROP);
 }
 
 PROG(IP)(struct __sk_buff *skb)
@@ -238,11 +257,11 @@ PROG(IP)(struct __sk_buff *skb)
 
 	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
 	if (!iph)
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
 	/* IP header cannot be smaller than 20 bytes */
 	if (iph->ihl < 5)
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
 	keys->addr_proto = ETH_P_IP;
 	keys->ipv4_src = iph->saddr;
@@ -250,7 +269,7 @@ PROG(IP)(struct __sk_buff *skb)
 
 	keys->thoff += iph->ihl << 2;
 	if (data + keys->thoff > data_end)
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
 	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
 		keys->is_frag = true;
@@ -264,7 +283,7 @@ PROG(IP)(struct __sk_buff *skb)
 	}
 
 	if (done)
-		return BPF_OK;
+		return export_flow_keys(keys, BPF_OK);
 
 	return parse_ip_proto(skb, iph->protocol);
 }
@@ -276,7 +295,7 @@ PROG(IPV6)(struct __sk_buff *skb)
 
 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
 	if (!ip6h)
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
 	keys->addr_proto = ETH_P_IPV6;
 	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
@@ -288,11 +307,12 @@ PROG(IPV6)(struct __sk_buff *skb)
 
 PROG(IPV6OP)(struct __sk_buff *skb)
 {
+	struct bpf_flow_keys *keys = skb->flow_keys;
 	struct ipv6_opt_hdr *ip6h, _ip6h;
 
 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
 	if (!ip6h)
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
 	/* hlen is in 8-octets and does not include the first 8 bytes
 	 * of the header
@@ -309,7 +329,7 @@ PROG(IPV6FR)(struct __sk_buff *skb)
 
 	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
 	if (!fragh)
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
 	keys->thoff += sizeof(*fragh);
 	keys->is_frag = true;
@@ -321,13 +341,14 @@ PROG(IPV6FR)(struct __sk_buff *skb)
 
 PROG(MPLS)(struct __sk_buff *skb)
 {
+	struct bpf_flow_keys *keys = skb->flow_keys;
 	struct mpls_label *mpls, _mpls;
 
 	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
 	if (!mpls)
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
-	return BPF_OK;
+	return export_flow_keys(keys, BPF_OK);
 }
 
 PROG(VLAN)(struct __sk_buff *skb)
@@ -339,10 +360,10 @@ PROG(VLAN)(struct __sk_buff *skb)
 	if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
 		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
 		if (!vlan)
-			return BPF_DROP;
+			return export_flow_keys(keys, BPF_DROP);
 
 		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
-			return BPF_DROP;
+			return export_flow_keys(keys, BPF_DROP);
 
 		keys->nhoff += sizeof(*vlan);
 		keys->thoff += sizeof(*vlan);
@@ -350,14 +371,14 @@ PROG(VLAN)(struct __sk_buff *skb)
 
 	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
 	if (!vlan)
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
 	keys->nhoff += sizeof(*vlan);
 	keys->thoff += sizeof(*vlan);
 	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
 	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
 	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
-		return BPF_DROP;
+		return export_flow_keys(keys, BPF_DROP);
 
 	keys->n_proto = vlan->h_vlan_encapsulated_proto;
 	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
-- 
cgit v1.2.3


From fe993c646831105f579976fec28d1842608bd551 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 22 Apr 2019 08:55:51 -0700
Subject: selftests/bpf: properly return error from bpf_flow_load

Right now we incorrectly return 'ret' which is always zero at that
point.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/flow_dissector_load.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index eeb48b6fc827..daeaeb518894 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -25,19 +25,19 @@ static inline int bpf_flow_load(struct bpf_object **obj,
 
 	main_prog = bpf_object__find_program_by_title(*obj, section_name);
 	if (!main_prog)
-		return ret;
+		return -1;
 
 	*prog_fd = bpf_program__fd(main_prog);
 	if (*prog_fd < 0)
-		return ret;
+		return -1;
 
 	prog_array = bpf_object__find_map_by_name(*obj, map_name);
 	if (!prog_array)
-		return ret;
+		return -1;
 
 	prog_array_fd = bpf_map__fd(prog_array);
 	if (prog_array_fd < 0)
-		return ret;
+		return -1;
 
 	if (keys_map_name && keys_fd) {
 		keys = bpf_object__find_map_by_name(*obj, keys_map_name);
-- 
cgit v1.2.3


From 02ee0658362d3713421851bb7487af77a4098bb5 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 22 Apr 2019 08:55:52 -0700
Subject: bpf/flow_dissector: don't adjust nhoff by ETH_HLEN in
 BPF_PROG_TEST_RUN

Now that we use skb-less flow dissector let's return true nhoff and
thoff. We used to adjust them by ETH_HLEN because that's how it was
done in the skb case. For VLAN tests that looks confusing: nhoff is
pointing to vlan parts :-\

Warning, this is an API change for BPF_PROG_TEST_RUN! Feel free to drop
if you think that it's too late at this point to fix it.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/bpf/test_run.c                                 |  3 ---
 .../selftests/bpf/prog_tests/flow_dissector.c      | 23 +++++++++-------------
 2 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index db2ec88ab129..8606e5aef0b6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -418,9 +418,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
 					  size);
 
-		flow_keys.nhoff -= ETH_HLEN;
-		flow_keys.thoff -= ETH_HLEN;
-
 		if (signal_pending(current)) {
 			preempt_enable();
 			rcu_read_unlock();
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 51758a0ca55e..8b54adfd6264 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -82,8 +82,8 @@ struct test tests[] = {
 			.tcp.doff = 5,
 		},
 		.keys = {
-			.nhoff = 0,
-			.thoff = sizeof(struct iphdr),
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr),
 			.addr_proto = ETH_P_IP,
 			.ip_proto = IPPROTO_TCP,
 			.n_proto = __bpf_constant_htons(ETH_P_IP),
@@ -98,8 +98,8 @@ struct test tests[] = {
 			.tcp.doff = 5,
 		},
 		.keys = {
-			.nhoff = 0,
-			.thoff = sizeof(struct ipv6hdr),
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct ipv6hdr),
 			.addr_proto = ETH_P_IPV6,
 			.ip_proto = IPPROTO_TCP,
 			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
@@ -116,8 +116,8 @@ struct test tests[] = {
 			.tcp.doff = 5,
 		},
 		.keys = {
-			.nhoff = VLAN_HLEN,
-			.thoff = VLAN_HLEN + sizeof(struct iphdr),
+			.nhoff = ETH_HLEN + VLAN_HLEN,
+			.thoff = ETH_HLEN + VLAN_HLEN + sizeof(struct iphdr),
 			.addr_proto = ETH_P_IP,
 			.ip_proto = IPPROTO_TCP,
 			.n_proto = __bpf_constant_htons(ETH_P_IP),
@@ -134,8 +134,9 @@ struct test tests[] = {
 			.tcp.doff = 5,
 		},
 		.keys = {
-			.nhoff = VLAN_HLEN * 2,
-			.thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
+			.nhoff = ETH_HLEN + VLAN_HLEN * 2,
+			.thoff = ETH_HLEN + VLAN_HLEN * 2 +
+				sizeof(struct ipv6hdr),
 			.addr_proto = ETH_P_IPV6,
 			.ip_proto = IPPROTO_TCP,
 			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
@@ -238,9 +239,6 @@ void test_flow_dissector(void)
 	 * We use a known path in the net/tun driver that calls
 	 * eth_get_headlen and we manually export bpf_flow_keys
 	 * via BPF map in this case.
-	 *
-	 * Note, that since eth_get_headlen operates on a L2 level,
-	 * we adjust exported nhoff/thoff by ETH_HLEN.
 	 */
 
 	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
@@ -262,9 +260,6 @@ void test_flow_dissector(void)
 		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
 		CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
 
-		flow_keys.nhoff -= ETH_HLEN;
-		flow_keys.thoff -= ETH_HLEN;
-
 		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
 	}
-- 
cgit v1.2.3


From f6ad6accaa99dfa7462d18687961b8421d707c1e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 23 Apr 2019 14:43:49 -0400
Subject: selftests/bpf: expand test_tc_tunnel with SIT encap

So far, all BPF tc tunnel testcases encapsulate in the same network
protocol. Add an encap testcase that requires updating skb->protocol.

The 6in4 tunnel encapsulates an IPv6 packet inside an IPv4 tunnel.
Verify that bpf_skb_net_grow correctly updates skb->protocol to
select the right protocol handler in __netif_receive_skb_core.

The BPF program should also manually update the link layer header to
encode the right network protocol.

Changes v1->v2
  - improve documentation of non-obvious logic

Signed-off-by: Willem de Bruijn <willemb@google.com>
Tested-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/config                 |  1 +
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 64 ++++++++++++++++++++--
 tools/testing/selftests/bpf/test_tc_tunnel.sh      | 20 ++++++-
 3 files changed, 80 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 8c976476f6fd..f7a0744db31e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -33,3 +33,4 @@ CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
+CONFIG_IPV6_SIT=m
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index ab56a6a72b7a..74370e7e286d 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -77,17 +77,52 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 	struct v4hdr h_outer;
 	struct tcphdr tcph;
 	int olen, l2_len;
+	int tcp_off;
 	__u64 flags;
 
-	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
-			       sizeof(iph_inner)) < 0)
-		return TC_ACT_OK;
+	/* Most tests encapsulate a packet into a tunnel with the same
+	 * network protocol, and derive the outer header fields from
+	 * the inner header.
+	 *
+	 * The 6in4 case tests different inner and outer protocols. As
+	 * the inner is ipv6, but the outer expects an ipv4 header as
+	 * input, manually build a struct iphdr based on the ipv6hdr.
+	 */
+	if (encap_proto == IPPROTO_IPV6) {
+		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
+		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
+		struct ipv6hdr iph6_inner;
+
+		/* Read the IPv6 header */
+		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
+				       sizeof(iph6_inner)) < 0)
+			return TC_ACT_OK;
+
+		/* Derive the IPv4 header fields from the IPv6 header */
+		memset(&iph_inner, 0, sizeof(iph_inner));
+		iph_inner.version = 4;
+		iph_inner.ihl = 5;
+		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
+				    bpf_ntohs(iph6_inner.payload_len));
+		iph_inner.ttl = iph6_inner.hop_limit - 1;
+		iph_inner.protocol = iph6_inner.nexthdr;
+		iph_inner.saddr = __bpf_constant_htonl(saddr);
+		iph_inner.daddr = __bpf_constant_htonl(daddr);
+
+		tcp_off = sizeof(iph6_inner);
+	} else {
+		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+				       sizeof(iph_inner)) < 0)
+			return TC_ACT_OK;
+
+		tcp_off = sizeof(iph_inner);
+	}
 
 	/* filter only packets we want */
 	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
 		return TC_ACT_OK;
 
-	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
 			       &tcph, sizeof(tcph)) < 0)
 		return TC_ACT_OK;
 
@@ -129,6 +164,7 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 						  l2_len);
 		break;
 	case IPPROTO_IPIP:
+	case IPPROTO_IPV6:
 		break;
 	default:
 		return TC_ACT_OK;
@@ -164,6 +200,17 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 				BPF_F_INVALIDATE_HASH) < 0)
 		return TC_ACT_SHOT;
 
+	/* if changing outer proto type, update eth->h_proto */
+	if (encap_proto == IPPROTO_IPV6) {
+		struct ethhdr eth;
+
+		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
+			return TC_ACT_SHOT;
+		eth.h_proto = bpf_htons(ETH_P_IP);
+		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
+			return TC_ACT_SHOT;
+	}
+
 	return TC_ACT_OK;
 }
 
@@ -325,6 +372,15 @@ int __encap_udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
+SEC("encap_sit_none")
+int __encap_sit_none(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
+	else
+		return TC_ACT_OK;
+}
+
 SEC("encap_ip6tnl_none")
 int __encap_ip6tnl_none(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index d4d8d5d3b06e..ff0d31d38061 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -97,6 +97,9 @@ if [[ "$#" -eq "0" ]]; then
 	echo "ip6ip6"
 	$0 ipv6 ip6tnl none 100
 
+	echo "sit"
+	$0 ipv6 sit none 100
+
 	for mac in none mpls eth ; do
 		echo "ip gre $mac"
 		$0 ipv4 gre $mac 100
@@ -211,11 +214,20 @@ else
 	targs=""
 fi
 
+# tunnel address family differs from inner for SIT
+if [[ "${tuntype}" == "sit" ]]; then
+	link_addr1="${ns1_v4}"
+	link_addr2="${ns2_v4}"
+else
+	link_addr1="${addr1}"
+	link_addr2="${addr2}"
+fi
+
 # serverside, insert decap module
 # server is still running
 # client can connect again
 ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
-	${tmode} remote "${addr1}" local "${addr2}" $targs
+	${tmode} remote "${link_addr1}" local "${link_addr2}" $targs
 
 expect_tun_fail=0
 
@@ -260,6 +272,12 @@ else
 	server_listen
 fi
 
+# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3.
+if [[ "${tuntype}" == "sit" ]]; then
+	echo OK
+	exit 0
+fi
+
 # serverside, use BPF for decap
 ip netns exec "${ns2}" ip link del dev testtun0
 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
-- 
cgit v1.2.3


From 8837fe5dd09bd0331b3e2d1b6e400b7fcda8963a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 24 Apr 2019 00:45:56 +0200
Subject: bpf, libbpf: handle old kernels more graceful wrt global data
 sections

Andrii reported a corner case where e.g. global static data is present
in the BPF ELF file in form of .data/.bss/.rodata section, but without
any relocations to it. Such programs could be loaded before commit
d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections"),
whereas afterwards if kernel lacks support then loading would fail.

Add a probing mechanism which skips setting up libbpf internal maps
in case of missing kernel support. In presence of relocation entries,
we abort the load attempt.

Fixes: d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections")
Reported-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 99 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 86 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d817bf20f3d6..85315dedbde4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -126,6 +126,8 @@ static inline __u64 ptr_to_u64(const void *ptr)
 struct bpf_capabilities {
 	/* v4.14: kernel support for program & map names. */
 	__u32 name:1;
+	/* v5.2: kernel support for global data sections. */
+	__u32 global_data:1;
 };
 
 /*
@@ -854,12 +856,15 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 	 *
 	 * TODO: Detect array of map and report error.
 	 */
-	if (obj->efile.data_shndx >= 0)
-		nr_maps_glob++;
-	if (obj->efile.rodata_shndx >= 0)
-		nr_maps_glob++;
-	if (obj->efile.bss_shndx >= 0)
-		nr_maps_glob++;
+	if (obj->caps.global_data) {
+		if (obj->efile.data_shndx >= 0)
+			nr_maps_glob++;
+		if (obj->efile.rodata_shndx >= 0)
+			nr_maps_glob++;
+		if (obj->efile.bss_shndx >= 0)
+			nr_maps_glob++;
+	}
+
 	for (i = 0; data && i < nr_syms; i++) {
 		GElf_Sym sym;
 
@@ -971,6 +976,9 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 		map_idx++;
 	}
 
+	if (!obj->caps.global_data)
+		goto finalize;
+
 	/*
 	 * Populate rest of obj->maps with libbpf internal maps.
 	 */
@@ -988,6 +996,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
 						    LIBBPF_MAP_BSS,
 						    obj->efile.bss, NULL);
+finalize:
 	if (!ret)
 		qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
 		      compare_bpf_map);
@@ -1333,11 +1342,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 		if (bpf_object__shndx_is_maps(obj, shdr_idx) ||
 		    bpf_object__shndx_is_data(obj, shdr_idx)) {
 			type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
-			if (type != LIBBPF_MAP_UNSPEC &&
-			    GELF_ST_BIND(sym.st_info) == STB_GLOBAL) {
-				pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n",
-					   name, insn_idx, insns[insn_idx].code);
-				return -LIBBPF_ERRNO__RELOC;
+			if (type != LIBBPF_MAP_UNSPEC) {
+				if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) {
+					pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n",
+						   name, insn_idx, insns[insn_idx].code);
+					return -LIBBPF_ERRNO__RELOC;
+				}
+				if (!obj->caps.global_data) {
+					pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n",
+						   name, insn_idx);
+					return -LIBBPF_ERRNO__RELOC;
+				}
 			}
 
 			for (map_idx = 0; map_idx < nr_maps; map_idx++) {
@@ -1495,10 +1510,68 @@ bpf_object__probe_name(struct bpf_object *obj)
 	return 0;
 }
 
+static int
+bpf_object__probe_global_data(struct bpf_object *obj)
+{
+	struct bpf_load_program_attr prg_attr;
+	struct bpf_create_map_attr map_attr;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret, map;
+
+	memset(&map_attr, 0, sizeof(map_attr));
+	map_attr.map_type = BPF_MAP_TYPE_ARRAY;
+	map_attr.key_size = sizeof(int);
+	map_attr.value_size = 32;
+	map_attr.max_entries = 1;
+
+	map = bpf_create_map_xattr(&map_attr);
+	if (map < 0) {
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n",
+			   __func__, cp, errno);
+		return -errno;
+	}
+
+	insns[0].imm = map;
+
+	memset(&prg_attr, 0, sizeof(prg_attr));
+	prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	prg_attr.insns = insns;
+	prg_attr.insns_cnt = ARRAY_SIZE(insns);
+	prg_attr.license = "GPL";
+
+	ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
+	if (ret >= 0) {
+		obj->caps.global_data = 1;
+		close(ret);
+	}
+
+	close(map);
+	return 0;
+}
+
 static int
 bpf_object__probe_caps(struct bpf_object *obj)
 {
-	return bpf_object__probe_name(obj);
+	int (*probe_fn[])(struct bpf_object *obj) = {
+		bpf_object__probe_name,
+		bpf_object__probe_global_data,
+	};
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
+		ret = probe_fn[i](obj);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
 }
 
 static int
@@ -2100,6 +2173,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
 
 	CHECK_ERR(bpf_object__elf_init(obj), err, out);
 	CHECK_ERR(bpf_object__check_endianness(obj), err, out);
+	CHECK_ERR(bpf_object__probe_caps(obj), err, out);
 	CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out);
 	CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
 	CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
@@ -2193,7 +2267,6 @@ int bpf_object__load(struct bpf_object *obj)
 
 	obj->loaded = true;
 
-	CHECK_ERR(bpf_object__probe_caps(obj), err, out);
 	CHECK_ERR(bpf_object__create_maps(obj), err, out);
 	CHECK_ERR(bpf_object__relocate(obj), err, out);
 	CHECK_ERR(bpf_object__load_progs(obj), err, out);
-- 
cgit v1.2.3


From 4f8827d2b61ed32133e51f6a782bb69d80c7c3d4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 24 Apr 2019 00:45:57 +0200
Subject: bpf, libbpf: fix segfault in bpf_object__init_maps' pr_debug
 statement

Ran into it while testing; in bpf_object__init_maps() data can be NULL
in the case where no map section is present. Therefore we simply cannot
access data->d_size before NULL test. Move the pr_debug() where it's
safe to access.

Fixes: d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 85315dedbde4..9052061ba7fc 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -875,14 +875,14 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 		nr_maps++;
 	}
 
-	/* Alloc obj->maps and fill nr_maps. */
-	pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
-		 nr_maps, data->d_size);
 	if (!nr_maps && !nr_maps_glob)
 		return 0;
 
 	/* Assume equally sized map definitions */
 	if (data) {
+		pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
+			 nr_maps, data->d_size);
+
 		map_def_sz = data->d_size / nr_maps;
 		if (!data->d_size || (data->d_size % nr_maps) != 0) {
 			pr_warning("unable to determine map definition size "
-- 
cgit v1.2.3


From 32e621e55496a0009f44fe4914cd4a23cade4984 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Wed, 24 Apr 2019 05:24:56 +0900
Subject: libbpf: fix samples/bpf build failure due to undefined UINT32_MAX

Currently, building bpf samples will cause the following error.

    ./tools/lib/bpf/bpf.h:132:27: error: 'UINT32_MAX' undeclared here (not in a function) ..
     #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
                               ^
    ./samples/bpf/bpf_load.h:31:25: note: in expansion of macro 'BPF_LOG_BUF_SIZE'
     extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
                             ^~~~~~~~~~~~~~~~

Due to commit 4519efa6f8ea ("libbpf: fix BPF_LOG_BUF_SIZE off-by-one error")
hard-coded size of BPF_LOG_BUF_SIZE has been replaced with UINT32_MAX which is
defined in <stdint.h> header.

Even with this change, bpf selftests are running fine since these are built
with clang and it includes header(-idirafter) from clang/6.0.0/include.
(it has <stdint.h>)

    clang -I. -I./include/uapi -I../../../include/uapi -idirafter /usr/local/include -idirafter /usr/include \
    -idirafter /usr/lib/llvm-6.0/lib/clang/6.0.0/include -idirafter /usr/include/x86_64-linux-gnu \
    -Wno-compare-distinct-pointer-types -O2 -target bpf -emit-llvm -c progs/test_sysctl_prog.c -o - | \
    llc -march=bpf -mcpu=generic  -filetype=obj -o /linux/tools/testing/selftests/bpf/test_sysctl_prog.o

But bpf samples are compiled with GCC, and it only searches and includes
headers declared at the target file. As '#include <stdint.h>' hasn't been
declared in tools/lib/bpf/bpf.h, it causes build failure of bpf samples.

    gcc -Wp,-MD,./samples/bpf/.sockex3_user.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes \
    -O2 -fomit-frame-pointer -std=gnu89 -I./usr/include -I./tools/lib/ -I./tools/testing/selftests/bpf/ \
    -I./tools/  lib/ -I./tools/include -I./tools/perf -c -o ./samples/bpf/sockex3_user.o ./samples/bpf/sockex3_user.c;

This commit add declaration of '#include <stdint.h>' to tools/lib/bpf/bpf.h
to fix this problem.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ff2506fe6bd2..9593fec75652 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -26,6 +26,7 @@
 #include <linux/bpf.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdint.h>
 
 #ifdef __cplusplus
 extern "C" {
-- 
cgit v1.2.3


From 7f0c57fec80f198ae9fcd06e5bbca13196815a4b Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 25 Apr 2019 14:37:24 -0700
Subject: bpftool: show flow_dissector attachment status

Right now there is no way to query whether BPF flow_dissector program
is attached to a network namespace or not. In previous commit, I added
support for querying that info, show it when doing `bpftool net`:

$ bpftool prog loadall ./bpf_flow.o \
	/sys/fs/bpf/flow type flow_dissector \
	pinmaps /sys/fs/bpf/flow
$ bpftool prog
3: flow_dissector  name _dissect  tag 8c9e917b513dd5cc  gpl
        loaded_at 2019-04-23T16:14:48-0700  uid 0
        xlated 656B  jited 461B  memlock 4096B  map_ids 1,2
        btf_id 1
...

$ bpftool net -j
[{"xdp":[],"tc":[],"flow_dissector":[]}]

$ bpftool prog attach pinned \
	/sys/fs/bpf/flow/flow_dissector flow_dissector
$ bpftool net -j
[{"xdp":[],"tc":[],"flow_dissector":["id":3]}]

Doesn't show up in a different net namespace:
$ ip netns add test
$ ip netns exec test bpftool net -j
[{"xdp":[],"tc":[],"flow_dissector":[]}]

Non-json output:
$ bpftool net
xdp:

tc:

flow_dissector:
id 3

v2:
* initialization order (Jakub Kicinski)
* clear errno for batch mode (Quentin Monnet)

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/net.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index db0e7de49d49..67e99c56bc88 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -3,6 +3,7 @@
 
 #define _GNU_SOURCE
 #include <errno.h>
+#include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -12,6 +13,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/tc_act/tc_bpf.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
 #include <bpf.h>
 #include <nlattr.h>
@@ -48,6 +51,10 @@ struct bpf_filter_t {
 	int		ifindex;
 };
 
+struct bpf_attach_info {
+	__u32 flow_dissector_id;
+};
+
 static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
 {
 	struct bpf_netdev_t *netinfo = cookie;
@@ -180,8 +187,45 @@ out:
 	return 0;
 }
 
+static int query_flow_dissector(struct bpf_attach_info *attach_info)
+{
+	__u32 attach_flags;
+	__u32 prog_ids[1];
+	__u32 prog_cnt;
+	int err;
+	int fd;
+
+	fd = open("/proc/self/ns/net", O_RDONLY);
+	if (fd < 0) {
+		p_err("can't open /proc/self/ns/net: %d",
+		      strerror(errno));
+		return -1;
+	}
+	prog_cnt = ARRAY_SIZE(prog_ids);
+	err = bpf_prog_query(fd, BPF_FLOW_DISSECTOR, 0,
+			     &attach_flags, prog_ids, &prog_cnt);
+	close(fd);
+	if (err) {
+		if (errno == EINVAL) {
+			/* Older kernel's don't support querying
+			 * flow dissector programs.
+			 */
+			errno = 0;
+			return 0;
+		}
+		p_err("can't query prog: %s", strerror(errno));
+		return -1;
+	}
+
+	if (prog_cnt == 1)
+		attach_info->flow_dissector_id = prog_ids[0];
+
+	return 0;
+}
+
 static int do_show(int argc, char **argv)
 {
+	struct bpf_attach_info attach_info = {};
 	int i, sock, ret, filter_idx = -1;
 	struct bpf_netdev_t dev_array;
 	unsigned int nl_pid;
@@ -199,6 +243,10 @@ static int do_show(int argc, char **argv)
 		usage();
 	}
 
+	ret = query_flow_dissector(&attach_info);
+	if (ret)
+		return -1;
+
 	sock = libbpf_netlink_open(&nl_pid);
 	if (sock < 0) {
 		fprintf(stderr, "failed to open netlink sock\n");
@@ -227,6 +275,12 @@ static int do_show(int argc, char **argv)
 		}
 		NET_END_ARRAY("\n");
 	}
+
+	NET_START_ARRAY("flow_dissector", "%s:\n");
+	if (attach_info.flow_dissector_id > 0)
+		NET_DUMP_UINT("id", "id %u", attach_info.flow_dissector_id);
+	NET_END_ARRAY("\n");
+
 	NET_END_OBJECT;
 	if (json_output)
 		jsonw_end_array(json_wtr);
-- 
cgit v1.2.3


From 77d764263d1165a2edf13735915fc39bc44abf1d Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Thu, 11 Apr 2019 17:03:32 +0900
Subject: bpftool: Fix errno variable usage

The test meant to use the saved value of errno. Given the current code, it
makes no practical difference however.

Fixes: bf598a8f0f77 ("bpftool: Improve handling of ENOENT on map dumps")
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/map.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 10b6c9d3e525..e6dcb3653a77 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -724,7 +724,7 @@ static int dump_map_elem(int fd, void *key, void *value,
 	} else {
 		const char *msg = NULL;
 
-		if (errno == ENOENT)
+		if (lookup_errno == ENOENT)
 			msg = "<no entry>";
 		else if (lookup_errno == ENOSPC &&
 			 map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
-- 
cgit v1.2.3


From c93cc69004df340d71a9ab3433b8e5c9fd1fca7a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 25 Apr 2019 15:30:08 -0700
Subject: bpftool: add ability to dump BTF types

Add new `btf dump` sub-command to bpftool. It allows to dump
human-readable low-level BTF types representation of BTF types. BTF can
be retrieved from few different sources:
  - from BTF object by ID;
  - from PROG, if it has associated BTF;
  - from MAP, if it has associated BTF data; it's possible to narrow
    down types to either key type, value type, both, or all BTF types;
  - from ELF file (.BTF section).

Output format mostly follows BPF verifier log format with few notable
exceptions:
  - all the type/field/param/etc names are enclosed in single quotes to
    allow easier grepping and to stand out a little bit more;
  - FUNC_PROTO output follows STRUCT/UNION/ENUM format of having one
    line per each argument; this is more uniform and allows easy
    grepping, as opposed to succinct, but inconvenient format that BPF
    verifier log is using.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf.c  | 586 +++++++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.c |   3 +-
 tools/bpf/bpftool/main.h |   1 +
 3 files changed, 589 insertions(+), 1 deletion(-)
 create mode 100644 tools/bpf/bpftool/btf.c

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
new file mode 100644
index 000000000000..58a2cd002a4b
--- /dev/null
+++ b/tools/bpf/bpftool/btf.c
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Facebook */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/err.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <gelf.h>
+#include <bpf.h>
+#include <linux/btf.h>
+
+#include "btf.h"
+#include "json_writer.h"
+#include "main.h"
+
+static const char * const btf_kind_str[NR_BTF_KINDS] = {
+	[BTF_KIND_UNKN]		= "UNKNOWN",
+	[BTF_KIND_INT]		= "INT",
+	[BTF_KIND_PTR]		= "PTR",
+	[BTF_KIND_ARRAY]	= "ARRAY",
+	[BTF_KIND_STRUCT]	= "STRUCT",
+	[BTF_KIND_UNION]	= "UNION",
+	[BTF_KIND_ENUM]		= "ENUM",
+	[BTF_KIND_FWD]		= "FWD",
+	[BTF_KIND_TYPEDEF]	= "TYPEDEF",
+	[BTF_KIND_VOLATILE]	= "VOLATILE",
+	[BTF_KIND_CONST]	= "CONST",
+	[BTF_KIND_RESTRICT]	= "RESTRICT",
+	[BTF_KIND_FUNC]		= "FUNC",
+	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
+	[BTF_KIND_VAR]		= "VAR",
+	[BTF_KIND_DATASEC]	= "DATASEC",
+};
+
+static const char *btf_int_enc_str(__u8 encoding)
+{
+	switch (encoding) {
+	case 0:
+		return "(none)";
+	case BTF_INT_SIGNED:
+		return "SIGNED";
+	case BTF_INT_CHAR:
+		return "CHAR";
+	case BTF_INT_BOOL:
+		return "BOOL";
+	default:
+		return "UNKN";
+	}
+}
+
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+	switch (linkage) {
+	case BTF_VAR_STATIC:
+		return "static";
+	case BTF_VAR_GLOBAL_ALLOCATED:
+		return "global-alloc";
+	default:
+		return "(unknown)";
+	}
+}
+
+static const char *btf_str(const struct btf *btf, __u32 off)
+{
+	if (!off)
+		return "(anon)";
+	return btf__name_by_offset(btf, off) ? : "(invalid)";
+}
+
+static int dump_btf_type(const struct btf *btf, __u32 id,
+			 const struct btf_type *t)
+{
+	json_writer_t *w = json_wtr;
+	int kind, safe_kind;
+
+	kind = BTF_INFO_KIND(t->info);
+	safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+
+	if (json_output) {
+		jsonw_start_object(w);
+		jsonw_uint_field(w, "id", id);
+		jsonw_string_field(w, "kind", btf_kind_str[safe_kind]);
+		jsonw_string_field(w, "name", btf_str(btf, t->name_off));
+	} else {
+		printf("[%u] %s '%s'", id, btf_kind_str[safe_kind],
+		       btf_str(btf, t->name_off));
+	}
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT: {
+		__u32 v = *(__u32 *)(t + 1);
+		const char *enc;
+
+		enc = btf_int_enc_str(BTF_INT_ENCODING(v));
+
+		if (json_output) {
+			jsonw_uint_field(w, "size", t->size);
+			jsonw_uint_field(w, "bits_offset", BTF_INT_OFFSET(v));
+			jsonw_uint_field(w, "nr_bits", BTF_INT_BITS(v));
+			jsonw_string_field(w, "encoding", enc);
+		} else {
+			printf(" size=%u bits_offset=%u nr_bits=%u encoding=%s",
+			       t->size, BTF_INT_OFFSET(v), BTF_INT_BITS(v),
+			       enc);
+		}
+		break;
+	}
+	case BTF_KIND_PTR:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPEDEF:
+		if (json_output)
+			jsonw_uint_field(w, "type_id", t->type);
+		else
+			printf(" type_id=%u", t->type);
+		break;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *arr = (const void *)(t + 1);
+
+		if (json_output) {
+			jsonw_uint_field(w, "type_id", arr->type);
+			jsonw_uint_field(w, "index_type_id", arr->index_type);
+			jsonw_uint_field(w, "nr_elems", arr->nelems);
+		} else {
+			printf(" type_id=%u index_type_id=%u nr_elems=%u",
+			       arr->type, arr->index_type, arr->nelems);
+		}
+		break;
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = (const void *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int i;
+
+		if (json_output) {
+			jsonw_uint_field(w, "size", t->size);
+			jsonw_uint_field(w, "vlen", vlen);
+			jsonw_name(w, "members");
+			jsonw_start_array(w);
+		} else {
+			printf(" size=%u vlen=%u", t->size, vlen);
+		}
+		for (i = 0; i < vlen; i++, m++) {
+			const char *name = btf_str(btf, m->name_off);
+			__u32 bit_off, bit_sz;
+
+			if (BTF_INFO_KFLAG(t->info)) {
+				bit_off = BTF_MEMBER_BIT_OFFSET(m->offset);
+				bit_sz = BTF_MEMBER_BITFIELD_SIZE(m->offset);
+			} else {
+				bit_off = m->offset;
+				bit_sz = 0;
+			}
+
+			if (json_output) {
+				jsonw_start_object(w);
+				jsonw_string_field(w, "name", name);
+				jsonw_uint_field(w, "type_id", m->type);
+				jsonw_uint_field(w, "bits_offset", bit_off);
+				if (bit_sz) {
+					jsonw_uint_field(w, "bitfield_size",
+							 bit_sz);
+				}
+				jsonw_end_object(w);
+			} else {
+				printf("\n\t'%s' type_id=%u bits_offset=%u",
+				       name, m->type, bit_off);
+				if (bit_sz)
+					printf(" bitfield_size=%u", bit_sz);
+			}
+		}
+		if (json_output)
+			jsonw_end_array(w);
+		break;
+	}
+	case BTF_KIND_ENUM: {
+		const struct btf_enum *v = (const void *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int i;
+
+		if (json_output) {
+			jsonw_uint_field(w, "size", t->size);
+			jsonw_uint_field(w, "vlen", vlen);
+			jsonw_name(w, "values");
+			jsonw_start_array(w);
+		} else {
+			printf(" size=%u vlen=%u", t->size, vlen);
+		}
+		for (i = 0; i < vlen; i++, v++) {
+			const char *name = btf_str(btf, v->name_off);
+
+			if (json_output) {
+				jsonw_start_object(w);
+				jsonw_string_field(w, "name", name);
+				jsonw_uint_field(w, "val", v->val);
+				jsonw_end_object(w);
+			} else {
+				printf("\n\t'%s' val=%u", name, v->val);
+			}
+		}
+		if (json_output)
+			jsonw_end_array(w);
+		break;
+	}
+	case BTF_KIND_FWD: {
+		const char *fwd_kind = BTF_INFO_KIND(t->info) ? "union"
+							      : "struct";
+
+		if (json_output)
+			jsonw_string_field(w, "fwd_kind", fwd_kind);
+		else
+			printf(" fwd_kind=%s", fwd_kind);
+		break;
+	}
+	case BTF_KIND_FUNC:
+		if (json_output)
+			jsonw_uint_field(w, "type_id", t->type);
+		else
+			printf(" type_id=%u", t->type);
+		break;
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = (const void *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int i;
+
+		if (json_output) {
+			jsonw_uint_field(w, "ret_type_id", t->type);
+			jsonw_uint_field(w, "vlen", vlen);
+			jsonw_name(w, "params");
+			jsonw_start_array(w);
+		} else {
+			printf(" ret_type_id=%u vlen=%u", t->type, vlen);
+		}
+		for (i = 0; i < vlen; i++, p++) {
+			const char *name = btf_str(btf, p->name_off);
+
+			if (json_output) {
+				jsonw_start_object(w);
+				jsonw_string_field(w, "name", name);
+				jsonw_uint_field(w, "type_id", p->type);
+				jsonw_end_object(w);
+			} else {
+				printf("\n\t'%s' type_id=%u", name, p->type);
+			}
+		}
+		if (json_output)
+			jsonw_end_array(w);
+		break;
+	}
+	case BTF_KIND_VAR: {
+		const struct btf_var *v = (const void *)(t + 1);
+		const char *linkage;
+
+		linkage = btf_var_linkage_str(v->linkage);
+
+		if (json_output) {
+			jsonw_uint_field(w, "type_id", t->type);
+			jsonw_string_field(w, "linkage", linkage);
+		} else {
+			printf(" type_id=%u, linkage=%s", t->type, linkage);
+		}
+		break;
+	}
+	case BTF_KIND_DATASEC: {
+		const struct btf_var_secinfo *v = (const void *)(t+1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int i;
+
+		if (json_output) {
+			jsonw_uint_field(w, "size", t->size);
+			jsonw_uint_field(w, "vlen", vlen);
+			jsonw_name(w, "vars");
+			jsonw_start_array(w);
+		} else {
+			printf(" size=%u vlen=%u", t->size, vlen);
+		}
+		for (i = 0; i < vlen; i++, v++) {
+			if (json_output) {
+				jsonw_start_object(w);
+				jsonw_uint_field(w, "type_id", v->type);
+				jsonw_uint_field(w, "offset", v->offset);
+				jsonw_uint_field(w, "size", v->size);
+				jsonw_end_object(w);
+			} else {
+				printf("\n\ttype_id=%u offset=%u size=%u",
+				       v->type, v->offset, v->size);
+			}
+		}
+		if (json_output)
+			jsonw_end_array(w);
+		break;
+	}
+	default:
+		break;
+	}
+
+	if (json_output)
+		jsonw_end_object(json_wtr);
+	else
+		printf("\n");
+
+	return 0;
+}
+
+static int dump_btf_raw(const struct btf *btf,
+			__u32 *root_type_ids, int root_type_cnt)
+{
+	const struct btf_type *t;
+	int i;
+
+	if (json_output) {
+		jsonw_start_object(json_wtr);
+		jsonw_name(json_wtr, "types");
+		jsonw_start_array(json_wtr);
+	}
+
+	if (root_type_cnt) {
+		for (i = 0; i < root_type_cnt; i++) {
+			t = btf__type_by_id(btf, root_type_ids[i]);
+			dump_btf_type(btf, root_type_ids[i], t);
+		}
+	} else {
+		int cnt = btf__get_nr_types(btf);
+
+		for (i = 1; i <= cnt; i++) {
+			t = btf__type_by_id(btf, i);
+			dump_btf_type(btf, i, t);
+		}
+	}
+
+	if (json_output) {
+		jsonw_end_array(json_wtr);
+		jsonw_end_object(json_wtr);
+	}
+	return 0;
+}
+
+static bool check_btf_endianness(GElf_Ehdr *ehdr)
+{
+	static unsigned int const endian = 1;
+
+	switch (ehdr->e_ident[EI_DATA]) {
+	case ELFDATA2LSB:
+		return *(unsigned char const *)&endian == 1;
+	case ELFDATA2MSB:
+		return *(unsigned char const *)&endian == 0;
+	default:
+		return 0;
+	}
+}
+
+static int btf_load_from_elf(const char *path, struct btf **btf)
+{
+	int err = -1, fd = -1, idx = 0;
+	Elf_Data *btf_data = NULL;
+	Elf_Scn *scn = NULL;
+	Elf *elf = NULL;
+	GElf_Ehdr ehdr;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		p_err("failed to init libelf for %s", path);
+		return -1;
+	}
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		p_err("failed to open %s: %s", path, strerror(errno));
+		return -1;
+	}
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (!elf) {
+		p_err("failed to open %s as ELF file", path);
+		goto done;
+	}
+	if (!gelf_getehdr(elf, &ehdr)) {
+		p_err("failed to get EHDR from %s", path);
+		goto done;
+	}
+	if (!check_btf_endianness(&ehdr)) {
+		p_err("non-native ELF endianness is not supported");
+		goto done;
+	}
+	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
+		p_err("failed to get e_shstrndx from %s\n", path);
+		goto done;
+	}
+
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		GElf_Shdr sh;
+		char *name;
+
+		idx++;
+		if (gelf_getshdr(scn, &sh) != &sh) {
+			p_err("failed to get section(%d) header from %s",
+			      idx, path);
+			goto done;
+		}
+		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+		if (!name) {
+			p_err("failed to get section(%d) name from %s",
+			      idx, path);
+			goto done;
+		}
+		if (strcmp(name, BTF_ELF_SEC) == 0) {
+			btf_data = elf_getdata(scn, 0);
+			if (!btf_data) {
+				p_err("failed to get section(%d, %s) data from %s",
+				      idx, name, path);
+				goto done;
+			}
+			break;
+		}
+	}
+
+	if (!btf_data) {
+		p_err("%s ELF section not found in %s", BTF_ELF_SEC, path);
+		goto done;
+	}
+
+	*btf = btf__new(btf_data->d_buf, btf_data->d_size);
+	if (IS_ERR(*btf)) {
+		err = PTR_ERR(*btf);
+		*btf = NULL;
+		p_err("failed to load BTF data from %s: %s",
+		      path, strerror(err));
+		goto done;
+	}
+
+	err = 0;
+done:
+	if (err) {
+		if (*btf) {
+			btf__free(*btf);
+			*btf = NULL;
+		}
+	}
+	if (elf)
+		elf_end(elf);
+	close(fd);
+	return err;
+}
+
+static int do_dump(int argc, char **argv)
+{
+	struct btf *btf = NULL;
+	__u32 root_type_ids[2];
+	int root_type_cnt = 0;
+	__u32 btf_id = -1;
+	const char *src;
+	int fd = -1;
+	int err;
+
+	if (!REQ_ARGS(2)) {
+		usage();
+		return -1;
+	}
+	src = GET_ARG();
+
+	if (is_prefix(src, "map")) {
+		struct bpf_map_info info = {};
+		__u32 len = sizeof(info);
+
+		if (!REQ_ARGS(2)) {
+			usage();
+			return -1;
+		}
+
+		fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+		if (fd < 0)
+			return -1;
+
+		btf_id = info.btf_id;
+		if (argc && is_prefix(*argv, "key")) {
+			root_type_ids[root_type_cnt++] = info.btf_key_type_id;
+			NEXT_ARG();
+		} else if (argc && is_prefix(*argv, "value")) {
+			root_type_ids[root_type_cnt++] = info.btf_value_type_id;
+			NEXT_ARG();
+		} else if (argc && is_prefix(*argv, "all")) {
+			NEXT_ARG();
+		} else if (argc && is_prefix(*argv, "kv")) {
+			root_type_ids[root_type_cnt++] = info.btf_key_type_id;
+			root_type_ids[root_type_cnt++] = info.btf_value_type_id;
+			NEXT_ARG();
+		} else {
+			root_type_ids[root_type_cnt++] = info.btf_key_type_id;
+			root_type_ids[root_type_cnt++] = info.btf_value_type_id;
+		}
+	} else if (is_prefix(src, "prog")) {
+		struct bpf_prog_info info = {};
+		__u32 len = sizeof(info);
+
+		if (!REQ_ARGS(2)) {
+			usage();
+			return -1;
+		}
+
+		fd = prog_parse_fd(&argc, &argv);
+		if (fd < 0)
+			return -1;
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			p_err("can't get prog info: %s", strerror(errno));
+			goto done;
+		}
+
+		btf_id = info.btf_id;
+	} else if (is_prefix(src, "id")) {
+		char *endptr;
+
+		btf_id = strtoul(*argv, &endptr, 0);
+		if (*endptr) {
+			p_err("can't parse %s as ID", **argv);
+			return -1;
+		}
+		NEXT_ARG();
+	} else if (is_prefix(src, "file")) {
+		err = btf_load_from_elf(*argv, &btf);
+		if (err)
+			goto done;
+		NEXT_ARG();
+	} else {
+		err = -1;
+		p_err("unrecognized BTF source specifier: '%s'", src);
+		goto done;
+	}
+
+	if (!btf) {
+		err = btf__get_from_id(btf_id, &btf);
+		if (err) {
+			p_err("get btf by id (%u): %s", btf_id, strerror(err));
+			goto done;
+		}
+		if (!btf) {
+			err = ENOENT;
+			p_err("can't find btf with ID (%u)", btf_id);
+			goto done;
+		}
+	}
+
+	dump_btf_raw(btf, root_type_ids, root_type_cnt);
+
+done:
+	close(fd);
+	btf__free(btf);
+	return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s btf dump BTF_SRC\n"
+		"       %s btf help\n"
+		"\n"
+		"       BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
+		"       " HELP_SPEC_MAP "\n"
+		"       " HELP_SPEC_PROGRAM "\n"
+		"       " HELP_SPEC_OPTIONS "\n"
+		"",
+		bin_name, bin_name);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "help",	do_help },
+	{ "dump",	do_dump },
+	{ 0 }
+};
+
+int do_btf(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index a9d5e9e6a732..1ac1fc520e6a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -56,7 +56,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup | perf | net | feature }\n"
+		"       OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -188,6 +188,7 @@ static const struct cmd cmds[] = {
 	{ "perf",	do_perf },
 	{ "net",	do_net },
 	{ "feature",	do_feature },
+	{ "btf",	do_btf },
 	{ "version",	do_version },
 	{ 0 }
 };
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 1ccc46169a19..3d63feb7f852 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -150,6 +150,7 @@ int do_perf(int argc, char **arg);
 int do_net(int argc, char **arg);
 int do_tracelog(int argc, char **arg);
 int do_feature(int argc, char **argv);
+int do_btf(int argc, char **argv);
 
 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
-- 
cgit v1.2.3


From ca253339af928528ceb34770b076d3230ed7d629 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 25 Apr 2019 15:30:09 -0700
Subject: bpftool/docs: add btf sub-command documentation

Document usage and sample output format for `btf dump` sub-command.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/bpftool-btf.rst    | 222 +++++++++++++++++++++
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst |   3 +-
 .../bpf/bpftool/Documentation/bpftool-feature.rst  |   3 +-
 tools/bpf/bpftool/Documentation/bpftool-map.rst    |   3 +-
 tools/bpf/bpftool/Documentation/bpftool-net.rst    |   3 +-
 tools/bpf/bpftool/Documentation/bpftool-perf.rst   |   3 +-
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   |   3 +-
 tools/bpf/bpftool/Documentation/bpftool.rst        |   3 +-
 8 files changed, 236 insertions(+), 7 deletions(-)
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-btf.rst

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
new file mode 100644
index 000000000000..2dbc1413fabd
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -0,0 +1,222 @@
+================
+bpftool-btf
+================
+-------------------------------------------------------------------------------
+tool for inspection of BTF data
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **btf** *COMMAND*
+
+	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+
+	*COMMANDS* := { **dump** | **help** }
+
+BTF COMMANDS
+=============
+
+|	**bpftool** **btf dump** *BTF_SRC*
+|	**bpftool** **btf help**
+|
+|	*BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
+|	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+|	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+
+DESCRIPTION
+===========
+	**bpftool btf dump** *BTF_SRC*
+		  Dump BTF entries from a given *BTF_SRC*.
+
+                  When **id** is specified, BTF object with that ID will be
+                  loaded and all its BTF types emitted.
+
+                  When **map** is provided, it's expected that map has
+                  associated BTF object with BTF types describing key and
+                  value. It's possible to select whether to dump only BTF
+                  type(s) associated with key (**key**), value (**value**),
+                  both key and value (**kv**), or all BTF types present in
+                  associated BTF object (**all**). If not specified, **kv**
+                  is assumed.
+
+                  When **prog** is provided, it's expected that program has
+                  associated BTF object with BTF types.
+
+                  When specifying *FILE*, an ELF file is expected, containing
+                  .BTF section with well-defined BTF binary format data,
+                  typically produced by clang or pahole.
+
+	**bpftool btf help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-V, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+**# bpftool btf dump id 1226**
+::
+
+  [1] PTR '(anon)' type_id=2
+  [2] STRUCT 'dummy_tracepoint_args' size=16 vlen=2
+          'pad' type_id=3 bits_offset=0
+          'sock' type_id=4 bits_offset=64
+  [3] INT 'long long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none)
+  [4] PTR '(anon)' type_id=5
+  [5] FWD 'sock' fwd_kind=union
+
+This gives an example of default output for all supported BTF kinds.
+
+**$ cat prog.c**
+::
+
+  struct fwd_struct;
+
+  enum my_enum {
+          VAL1 = 3,
+          VAL2 = 7,
+  };
+
+  typedef struct my_struct my_struct_t;
+
+  struct my_struct {
+          const unsigned int const_int_field;
+          int bitfield_field: 4;
+          char arr_field[16];
+          const struct fwd_struct *restrict fwd_field;
+          enum my_enum enum_field;
+          volatile my_struct_t *typedef_ptr_field;
+  };
+
+  union my_union {
+          int a;
+          struct my_struct b;
+  };
+
+  struct my_struct struct_global_var __attribute__((section("data_sec"))) = {
+          .bitfield_field = 3,
+          .enum_field = VAL1,
+  };
+  int global_var __attribute__((section("data_sec"))) = 7;
+
+  __attribute__((noinline))
+  int my_func(union my_union *arg1, int arg2)
+  {
+          static int static_var __attribute__((section("data_sec"))) = 123;
+          static_var++;
+          return static_var;
+  }
+
+**$ bpftool btf dump file prog.o**
+::
+
+  [1] PTR '(anon)' type_id=2
+  [2] UNION 'my_union' size=48 vlen=2
+          'a' type_id=3 bits_offset=0
+          'b' type_id=4 bits_offset=0
+  [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+  [4] STRUCT 'my_struct' size=48 vlen=6
+          'const_int_field' type_id=5 bits_offset=0
+          'bitfield_field' type_id=3 bits_offset=32 bitfield_size=4
+          'arr_field' type_id=8 bits_offset=40
+          'fwd_field' type_id=10 bits_offset=192
+          'enum_field' type_id=14 bits_offset=256
+          'typedef_ptr_field' type_id=15 bits_offset=320
+  [5] CONST '(anon)' type_id=6
+  [6] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+  [7] INT 'char' size=1 bits_offset=0 nr_bits=8 encoding=SIGNED
+  [8] ARRAY '(anon)' type_id=7 index_type_id=9 nr_elems=16
+  [9] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+  [10] RESTRICT '(anon)' type_id=11
+  [11] PTR '(anon)' type_id=12
+  [12] CONST '(anon)' type_id=13
+  [13] FWD 'fwd_struct' fwd_kind=union
+  [14] ENUM 'my_enum' size=4 vlen=2
+          'VAL1' val=3
+          'VAL2' val=7
+  [15] PTR '(anon)' type_id=16
+  [16] VOLATILE '(anon)' type_id=17
+  [17] TYPEDEF 'my_struct_t' type_id=4
+  [18] FUNC_PROTO '(anon)' ret_type_id=3 vlen=2
+          'arg1' type_id=1
+          'arg2' type_id=3
+  [19] FUNC 'my_func' type_id=18
+  [20] VAR 'struct_global_var' type_id=4, linkage=global-alloc
+  [21] VAR 'global_var' type_id=3, linkage=global-alloc
+  [22] VAR 'my_func.static_var' type_id=3, linkage=static
+  [23] DATASEC 'data_sec' size=0 vlen=3
+          type_id=20 offset=0 size=48
+          type_id=21 offset=0 size=4
+          type_id=22 offset=52 size=4
+
+The following commands print BTF types associated with specified map's key,
+value, both key and value, and all BTF types, respectively. By default, both
+key and value types will be printed.
+
+**# bpftool btf dump map id 123 key**
+
+::
+
+  [39] TYPEDEF 'u32' type_id=37
+
+**# bpftool btf dump map id 123 value**
+
+::
+
+  [86] PTR '(anon)' type_id=87
+
+**# bpftool btf dump map id 123 kv**
+
+::
+
+  [39] TYPEDEF 'u32' type_id=37
+  [86] PTR '(anon)' type_id=87
+
+**# bpftool btf dump map id 123 all**
+
+::
+
+  [1] PTR '(anon)' type_id=0
+  .
+  .
+  .
+  [2866] ARRAY '(anon)' type_id=52 index_type_id=51 nr_elems=4
+
+All the standard ways to specify map or program are supported:
+
+**# bpftool btf dump map id 123**
+
+**# bpftool btf dump map pinned /sys/fs/bpf/map_name**
+
+**# bpftool btf dump prog id 456**
+
+**# bpftool btf dump prog tag b88e0a09b1d9759d**
+
+**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
+
+SEE ALSO
+========
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-feature**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 89b6b10e2183..ac26876389c2 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -145,4 +145,5 @@ SEE ALSO
 	**bpftool-map**\ (8),
 	**bpftool-feature**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 10177343b7ce..14180e887082 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -82,4 +82,5 @@ SEE ALSO
 	**bpftool-map**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 55ecf80ca03e..13ef27b39f20 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -258,4 +258,5 @@ SEE ALSO
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 6b692b428373..934580850f42 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -143,4 +143,5 @@ SEE ALSO
 	**bpftool-map**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index 86154740fabb..0c7576523a21 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -85,4 +85,5 @@ SEE ALSO
 	**bpftool-map**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
-	**bpftool-net**\ (8)
+	**bpftool-net**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 2f183ffd8351..e8118544d118 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -271,4 +271,5 @@ SEE ALSO
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index deb2ca911ddf..3e562d7fd56f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -76,4 +76,5 @@ SEE ALSO
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
-- 
cgit v1.2.3


From 4a714feefd99c25c7304b43ac58c9d5c0304e7cb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 25 Apr 2019 15:30:10 -0700
Subject: bpftool: add bash completions for btf command

Add full support for btf command in bash-completion script.

Cc: Quentin Monnet <quentin.monnet@netronome.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/bash-completion/bpftool | 46 +++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 9f3ffe1e26ab..bca91d04ed35 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -217,6 +217,7 @@ _bpftool()
     done
     cur=${words[cword]}
     prev=${words[cword - 1]}
+    pprev=${words[cword - 2]}
 
     local object=${words[1]} command=${words[2]}
 
@@ -607,6 +608,51 @@ _bpftool()
                     ;;
             esac
             ;;
+        btf)
+            local PROG_TYPE='id pinned tag'
+            local MAP_TYPE='id pinned'
+            case $command in
+                dump)
+                    case $prev in
+                        $command)
+                            COMPREPLY+=( $( compgen -W "id map prog file" -- \
+                                "$cur" ) )
+                            return 0
+                            ;;
+                        prog)
+                            COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        map)
+                            COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        id)
+                            case $pprev in
+                                prog)
+                                    _bpftool_get_prog_ids
+                                    ;;
+                                map)
+                                    _bpftool_get_map_ids
+                                    ;;
+                            esac
+                            return 0
+                            ;;
+                        *)
+                            if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then
+                                 COMPREPLY+=( $( compgen -W 'key value kv all' -- \
+                                     "$cur" ) )
+                            fi
+                            return 0
+                            ;;
+                    esac
+                    ;;
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'dump help' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
         cgroup)
             case $command in
                 show|list)
-- 
cgit v1.2.3


From 8ed1875bf3a77d1653b895e41774aec9a341a97f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 25 Apr 2019 15:30:11 -0700
Subject: bpftool: fix indendation in bash-completion/bpftool

Fix misaligned default case branch for `prog dump` sub-command.

Reported-by: Quentin Monnet <quentin.monnet@netronome.com>
Cc: Yonghong Song <yhs@fb.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/bash-completion/bpftool | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index bca91d04ed35..50e402a5a9c8 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -273,17 +273,17 @@ _bpftool()
                                 "$cur" ) )
                             return 0
                             ;;
-                    *)
-                        _bpftool_once_attr 'file'
-                        if _bpftool_search_list 'xlated'; then
-                            COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \
-                                "$cur" ) )
-                        else
-                            COMPREPLY+=( $( compgen -W 'opcodes linum' -- \
-                                "$cur" ) )
-                        fi
-                        return 0
-                        ;;
+                        *)
+                            _bpftool_once_attr 'file'
+                            if _bpftool_search_list 'xlated'; then
+                                COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \
+                                    "$cur" ) )
+                            else
+                                COMPREPLY+=( $( compgen -W 'opcodes linum' -- \
+                                    "$cur" ) )
+                            fi
+                            return 0
+                            ;;
                     esac
                     ;;
                 pin)
-- 
cgit v1.2.3


From d514f41e793d2cbc737ba107d7ae26f387f5eecf Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 25 Apr 2019 15:59:50 +0200
Subject: netdevsim: merge sdev into dev

As previously introduce dev which is mapped 1:1 to a bus device covers
the purpose of the original shared device, merge the sdev code into dev.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/Makefile              |  2 +-
 drivers/net/netdevsim/bpf.c                 | 79 ++++++++++++-----------------
 drivers/net/netdevsim/dev.c                 | 47 +++++++++++++++++
 drivers/net/netdevsim/netdev.c              | 49 +++++-------------
 drivers/net/netdevsim/netdevsim.h           | 54 ++++++++------------
 tools/testing/selftests/bpf/test_offload.py |  8 +--
 6 files changed, 120 insertions(+), 119 deletions(-)

(limited to 'tools')

diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
index ab7e2c42088f..09f1315d2f2a 100644
--- a/drivers/net/netdevsim/Makefile
+++ b/drivers/net/netdevsim/Makefile
@@ -3,7 +3,7 @@
 obj-$(CONFIG_NETDEVSIM) += netdevsim.o
 
 netdevsim-objs := \
-	netdev.o dev.o fib.o sdev.o bus.o
+	netdev.o dev.o fib.o bus.o
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
 netdevsim-objs += \
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index a93aafe87db3..89980b223adc 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -27,7 +27,7 @@
 	bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__)
 
 struct nsim_bpf_bound_prog {
-	struct netdevsim_shared_dev *sdev;
+	struct nsim_dev *nsim_dev;
 	struct bpf_prog *prog;
 	struct dentry *ddir;
 	const char *state;
@@ -65,8 +65,8 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
 	struct nsim_bpf_bound_prog *state;
 
 	state = env->prog->aux->offload->dev_priv;
-	if (state->sdev->bpf_bind_verifier_delay && !insn_idx)
-		msleep(state->sdev->bpf_bind_verifier_delay);
+	if (state->nsim_dev->bpf_bind_verifier_delay && !insn_idx)
+		msleep(state->nsim_dev->bpf_bind_verifier_delay);
 
 	if (insn_idx == env->prog->len - 1)
 		pr_vlog(env, "Hello from netdevsim!\n");
@@ -213,7 +213,7 @@ nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf,
 	return 0;
 }
 
-static int nsim_bpf_create_prog(struct netdevsim_shared_dev *sdev,
+static int nsim_bpf_create_prog(struct nsim_dev *nsim_dev,
 				struct bpf_prog *prog)
 {
 	struct nsim_bpf_bound_prog *state;
@@ -223,13 +223,13 @@ static int nsim_bpf_create_prog(struct netdevsim_shared_dev *sdev,
 	if (!state)
 		return -ENOMEM;
 
-	state->sdev = sdev;
+	state->nsim_dev = nsim_dev;
 	state->prog = prog;
 	state->state = "verify";
 
 	/* Program id is not populated yet when we create the state. */
-	sprintf(name, "%u", sdev->prog_id_gen++);
-	state->ddir = debugfs_create_dir(name, sdev->ddir_bpf_bound_progs);
+	sprintf(name, "%u", nsim_dev->prog_id_gen++);
+	state->ddir = debugfs_create_dir(name, nsim_dev->ddir_bpf_bound_progs);
 	if (IS_ERR_OR_NULL(state->ddir)) {
 		kfree(state);
 		return -ENOMEM;
@@ -240,7 +240,7 @@ static int nsim_bpf_create_prog(struct netdevsim_shared_dev *sdev,
 			    &state->state, &nsim_bpf_string_fops);
 	debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
 
-	list_add_tail(&state->l, &sdev->bpf_bound_progs);
+	list_add_tail(&state->l, &nsim_dev->bpf_bound_progs);
 
 	prog->aux->offload->dev_priv = state;
 
@@ -249,13 +249,13 @@ static int nsim_bpf_create_prog(struct netdevsim_shared_dev *sdev,
 
 static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
 {
-	struct netdevsim_shared_dev *sdev =
+	struct nsim_dev *nsim_dev =
 			bpf_offload_dev_priv(prog->aux->offload->offdev);
 
-	if (!sdev->bpf_bind_accept)
+	if (!nsim_dev->bpf_bind_accept)
 		return -EOPNOTSUPP;
 
-	return nsim_bpf_create_prog(sdev, prog);
+	return nsim_bpf_create_prog(nsim_dev, prog);
 }
 
 static int nsim_bpf_translate(struct bpf_prog *prog)
@@ -514,7 +514,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
 	}
 
 	offmap->dev_ops = &nsim_bpf_map_ops;
-	list_add_tail(&nmap->l, &ns->sdev->bpf_bound_maps);
+	list_add_tail(&nmap->l, &ns->nsim_dev->bpf_bound_maps);
 
 	return 0;
 
@@ -578,51 +578,46 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 	}
 }
 
-static int nsim_bpf_sdev_init(struct netdevsim_shared_dev *sdev)
+int nsim_bpf_dev_init(struct nsim_dev *nsim_dev)
 {
 	int err;
 
-	INIT_LIST_HEAD(&sdev->bpf_bound_progs);
-	INIT_LIST_HEAD(&sdev->bpf_bound_maps);
+	INIT_LIST_HEAD(&nsim_dev->bpf_bound_progs);
+	INIT_LIST_HEAD(&nsim_dev->bpf_bound_maps);
 
-	sdev->ddir_bpf_bound_progs =
-		debugfs_create_dir("bpf_bound_progs", sdev->ddir);
-	if (IS_ERR_OR_NULL(sdev->ddir_bpf_bound_progs))
+	nsim_dev->ddir_bpf_bound_progs = debugfs_create_dir("bpf_bound_progs",
+							    nsim_dev->ddir);
+	if (IS_ERR_OR_NULL(nsim_dev->ddir_bpf_bound_progs))
 		return -ENOMEM;
 
-	sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, sdev);
-	err = PTR_ERR_OR_ZERO(sdev->bpf_dev);
+	nsim_dev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, nsim_dev);
+	err = PTR_ERR_OR_ZERO(nsim_dev->bpf_dev);
 	if (err)
 		return err;
 
-	sdev->bpf_bind_accept = true;
-	debugfs_create_bool("bpf_bind_accept", 0600, sdev->ddir,
-			    &sdev->bpf_bind_accept);
-	debugfs_create_u32("bpf_bind_verifier_delay", 0600, sdev->ddir,
-			   &sdev->bpf_bind_verifier_delay);
+	nsim_dev->bpf_bind_accept = true;
+	debugfs_create_bool("bpf_bind_accept", 0600, nsim_dev->ddir,
+			    &nsim_dev->bpf_bind_accept);
+	debugfs_create_u32("bpf_bind_verifier_delay", 0600, nsim_dev->ddir,
+			   &nsim_dev->bpf_bind_verifier_delay);
 	return 0;
 }
 
-static void nsim_bpf_sdev_uninit(struct netdevsim_shared_dev *sdev)
+void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev)
 {
-	WARN_ON(!list_empty(&sdev->bpf_bound_progs));
-	WARN_ON(!list_empty(&sdev->bpf_bound_maps));
-	bpf_offload_dev_destroy(sdev->bpf_dev);
+	WARN_ON(!list_empty(&nsim_dev->bpf_bound_progs));
+	WARN_ON(!list_empty(&nsim_dev->bpf_bound_maps));
+	bpf_offload_dev_destroy(nsim_dev->bpf_dev);
 }
 
 int nsim_bpf_init(struct netdevsim *ns)
 {
 	int err;
 
-	if (ns->sdev->refcnt == 1) {
-		err = nsim_bpf_sdev_init(ns->sdev);
-		if (err)
-			return err;
-	}
-
-	err = bpf_offload_dev_netdev_register(ns->sdev->bpf_dev, ns->netdev);
+	err = bpf_offload_dev_netdev_register(ns->nsim_dev->bpf_dev,
+					      ns->netdev);
 	if (err)
-		goto err_bpf_sdev_uninit;
+		return err;
 
 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
 			   &ns->bpf_offloaded_id);
@@ -644,11 +639,6 @@ int nsim_bpf_init(struct netdevsim *ns)
 			    &ns->bpf_map_accept);
 
 	return 0;
-
-err_bpf_sdev_uninit:
-	if (ns->sdev->refcnt == 1)
-		nsim_bpf_sdev_uninit(ns->sdev);
-	return err;
 }
 
 void nsim_bpf_uninit(struct netdevsim *ns)
@@ -656,8 +646,5 @@ void nsim_bpf_uninit(struct netdevsim *ns)
 	WARN_ON(ns->xdp.prog);
 	WARN_ON(ns->xdp_hw.prog);
 	WARN_ON(ns->bpf_offloaded);
-	bpf_offload_dev_netdev_unregister(ns->sdev->bpf_dev, ns->netdev);
-
-	if (ns->sdev->refcnt == 1)
-		nsim_bpf_sdev_uninit(ns->sdev);
+	bpf_offload_dev_netdev_unregister(ns->nsim_dev->bpf_dev, ns->netdev);
 }
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 664b9329a65c..93abd0b6c1f9 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -15,12 +15,31 @@
  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
  */
 
+#include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/rtnetlink.h>
 #include <net/devlink.h>
 
 #include "netdevsim.h"
 
+static struct dentry *nsim_dev_ddir;
+
+static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
+{
+	char dev_ddir_name[10];
+
+	sprintf(dev_ddir_name, "%u", nsim_dev->nsim_bus_dev->dev.id);
+	nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir);
+	if (IS_ERR_OR_NULL(nsim_dev->ddir))
+		return PTR_ERR_OR_ZERO(nsim_dev->ddir) ?: -EINVAL;
+	return 0;
+}
+
+static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev)
+{
+	debugfs_remove_recursive(nsim_dev->ddir);
+}
+
 static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv)
 {
 	struct nsim_dev *nsim_dev = priv;
@@ -184,6 +203,7 @@ static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev)
 	if (!devlink)
 		return ERR_PTR(-ENOMEM);
 	nsim_dev = devlink_priv(devlink);
+	nsim_dev->nsim_bus_dev = nsim_bus_dev;
 
 	nsim_dev->fib_data = nsim_fib_create();
 	if (IS_ERR(nsim_dev->fib_data)) {
@@ -199,8 +219,20 @@ static struct nsim_dev *nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev)
 	if (err)
 		goto err_resources_unregister;
 
+	err = nsim_dev_debugfs_init(nsim_dev);
+	if (err)
+		goto err_dl_unregister;
+
+	err = nsim_bpf_dev_init(nsim_dev);
+	if (err)
+		goto err_debugfs_exit;
+
 	return nsim_dev;
 
+err_debugfs_exit:
+	nsim_dev_debugfs_exit(nsim_dev);
+err_dl_unregister:
+	devlink_unregister(devlink);
 err_resources_unregister:
 	devlink_resources_unregister(devlink, NULL);
 err_fib_destroy:
@@ -228,8 +260,23 @@ void nsim_dev_destroy(struct nsim_dev *nsim_dev)
 {
 	struct devlink *devlink = priv_to_devlink(nsim_dev);
 
+	nsim_bpf_dev_exit(nsim_dev);
+	nsim_dev_debugfs_exit(nsim_dev);
 	devlink_unregister(devlink);
 	devlink_resources_unregister(devlink, NULL);
 	nsim_fib_destroy(nsim_dev->fib_data);
 	devlink_free(devlink);
 }
+
+int nsim_dev_init(void)
+{
+	nsim_dev_ddir = debugfs_create_dir(DRV_NAME "_dev", NULL);
+	if (IS_ERR_OR_NULL(nsim_dev_ddir))
+		return -ENOMEM;
+	return 0;
+}
+
+void nsim_dev_exit(void)
+{
+	debugfs_remove_recursive(nsim_dev_ddir);
+}
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 28231bfbc989..c5f4bbb9716f 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -32,24 +32,24 @@ static int nsim_get_port_parent_id(struct net_device *dev,
 {
 	struct netdevsim *ns = netdev_priv(dev);
 
-	ppid->id_len = sizeof(ns->sdev->switch_id);
-	memcpy(&ppid->id, &ns->sdev->switch_id, ppid->id_len);
+	ppid->id_len = sizeof(ns->nsim_dev->nsim_bus_dev->dev.id);
+	memcpy(&ppid->id, &ns->nsim_dev->nsim_bus_dev->dev.id, ppid->id_len);
 	return 0;
 }
 
 static int nsim_init(struct net_device *dev)
 {
 	struct netdevsim *ns = netdev_priv(dev);
-	char sdev_link_name[32];
+	char dev_link_name[32];
 	int err;
 
 	ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir);
 	if (IS_ERR_OR_NULL(ns->ddir))
 		return -ENOMEM;
 
-	sprintf(sdev_link_name, "../../" DRV_NAME "_sdev/%u",
-		ns->sdev->switch_id);
-	debugfs_create_symlink("sdev", ns->ddir, sdev_link_name);
+	sprintf(dev_link_name, "../../" DRV_NAME "_dev/%u",
+		ns->nsim_dev->nsim_bus_dev->dev.id);
+	debugfs_create_symlink("dev", ns->ddir, dev_link_name);
 
 	err = nsim_bpf_init(ns);
 	if (err)
@@ -80,7 +80,6 @@ static void nsim_free(struct net_device *dev)
 	nsim_dev_destroy(ns->nsim_dev);
 	nsim_bus_dev_del(ns->nsim_bus_dev);
 	/* netdev and vf state will be freed out of device_release() */
-	nsim_sdev_put(ns->sdev);
 }
 
 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -366,31 +365,11 @@ static int nsim_newlink(struct net *src_net, struct net_device *dev,
 			struct netlink_ext_ack *extack)
 {
 	struct netdevsim *ns = netdev_priv(dev);
-	struct netdevsim *joinns = NULL;
 	int err;
 
-	if (tb[IFLA_LINK]) {
-		struct net_device *joindev;
-
-		joindev = __dev_get_by_index(src_net,
-					     nla_get_u32(tb[IFLA_LINK]));
-		if (!joindev)
-			return -ENODEV;
-		if (joindev->netdev_ops != &nsim_netdev_ops)
-			return -EINVAL;
-
-		joinns = netdev_priv(joindev);
-	}
-
-	ns->sdev = nsim_sdev_get(joinns);
-	if (IS_ERR(ns->sdev))
-		return PTR_ERR(ns->sdev);
-
 	ns->nsim_bus_dev = nsim_bus_dev_new(~0, 0);
-	if (IS_ERR(ns->nsim_bus_dev)) {
-		err = PTR_ERR(ns->nsim_bus_dev);
-		goto err_sdev_put;
-	}
+	if (IS_ERR(ns->nsim_bus_dev))
+		return PTR_ERR(ns->nsim_bus_dev);
 
 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
 	ns->netdev = dev;
@@ -410,8 +389,6 @@ err_dev_destroy:
 	nsim_dev_destroy(ns->nsim_dev);
 err_dev_del:
 	nsim_bus_dev_del(ns->nsim_bus_dev);
-err_sdev_put:
-	nsim_sdev_put(ns->sdev);
 	return err;
 }
 
@@ -431,13 +408,13 @@ static int __init nsim_module_init(void)
 	if (IS_ERR_OR_NULL(nsim_ddir))
 		return -ENOMEM;
 
-	err = nsim_sdev_init();
+	err = nsim_dev_init();
 	if (err)
 		goto err_debugfs_destroy;
 
 	err = nsim_bus_init();
 	if (err)
-		goto err_sdev_exit;
+		goto err_dev_exit;
 
 	err = rtnl_link_register(&nsim_link_ops);
 	if (err)
@@ -447,8 +424,8 @@ static int __init nsim_module_init(void)
 
 err_bus_exit:
 	nsim_bus_exit();
-err_sdev_exit:
-	nsim_sdev_exit();
+err_dev_exit:
+	nsim_dev_exit();
 err_debugfs_destroy:
 	debugfs_remove_recursive(nsim_ddir);
 	return err;
@@ -458,7 +435,7 @@ static void __exit nsim_module_exit(void)
 {
 	rtnl_link_unregister(&nsim_link_ops);
 	nsim_bus_exit();
-	nsim_sdev_exit();
+	nsim_dev_exit();
 	debugfs_remove_recursive(nsim_ddir);
 }
 
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index d6b3668f9afd..4ef44a9538db 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -26,37 +26,6 @@
 
 #define NSIM_EA(extack, msg)	NL_SET_ERR_MSG_MOD((extack), msg)
 
-struct bpf_prog;
-struct bpf_offload_dev;
-struct dentry;
-struct nsim_vf_config;
-struct nsim_fib_data;
-
-struct netdevsim_shared_dev {
-	unsigned int refcnt;
-	u32 switch_id;
-
-	struct dentry *ddir;
-
-	struct bpf_offload_dev *bpf_dev;
-
-	bool bpf_bind_accept;
-	u32 bpf_bind_verifier_delay;
-
-	struct dentry *ddir_bpf_bound_progs;
-	u32 prog_id_gen;
-
-	struct list_head bpf_bound_progs;
-	struct list_head bpf_bound_maps;
-};
-
-struct netdevsim;
-
-struct netdevsim_shared_dev *nsim_sdev_get(struct netdevsim *joinns);
-void nsim_sdev_put(struct netdevsim_shared_dev *sdev);
-int nsim_sdev_init(void);
-void nsim_sdev_exit(void);
-
 #define NSIM_IPSEC_MAX_SA_COUNT		33
 #define NSIM_IPSEC_VALID		BIT(31)
 
@@ -87,7 +56,6 @@ struct netdevsim {
 	struct u64_stats_sync syncp;
 
 	struct nsim_bus_dev *nsim_bus_dev;
-	struct netdevsim_shared_dev *sdev;
 
 	struct dentry *ddir;
 
@@ -107,6 +75,8 @@ struct netdevsim {
 };
 
 #ifdef CONFIG_BPF_SYSCALL
+int nsim_bpf_dev_init(struct nsim_dev *nsim_dev);
+void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev);
 int nsim_bpf_init(struct netdevsim *ns);
 void nsim_bpf_uninit(struct netdevsim *ns);
 int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf);
@@ -114,6 +84,15 @@ int nsim_bpf_disable_tc(struct netdevsim *ns);
 int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
 			       void *type_data, void *cb_priv);
 #else
+
+static inline int nsim_bpf_dev_init(struct nsim_dev *nsim_dev)
+{
+	return 0;
+}
+
+static inline void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev)
+{
+}
 static inline int nsim_bpf_init(struct netdevsim *ns)
 {
 	return 0;
@@ -152,13 +131,24 @@ enum nsim_resource_id {
 };
 
 struct nsim_dev {
+	struct nsim_bus_dev *nsim_bus_dev;
 	struct nsim_fib_data *fib_data;
+	struct dentry *ddir;
+	struct bpf_offload_dev *bpf_dev;
+	bool bpf_bind_accept;
+	u32 bpf_bind_verifier_delay;
+	struct dentry *ddir_bpf_bound_progs;
+	u32 prog_id_gen;
+	struct list_head bpf_bound_progs;
+	struct list_head bpf_bound_maps;
 };
 
 struct nsim_dev *
 nsim_dev_create_with_ns(struct nsim_bus_dev *nsim_bus_dev,
 			struct netdevsim *ns);
 void nsim_dev_destroy(struct nsim_dev *nsim_dev);
+int nsim_dev_init(void);
+void nsim_dev_exit(void);
 
 struct nsim_fib_data *nsim_fib_create(void);
 void nsim_fib_destroy(struct nsim_fib_data *fib_data);
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index a7f95106119f..91dfe876a707 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -335,7 +335,7 @@ class NetdevSim:
         self.ns = ""
 
         self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
-        self.sdev_dir = self.dfs_dir + '/sdev/'
+        self.dev_dir = self.dfs_dir + '/dev/'
         self.dfs_refresh()
 
     def __getitem__(self, key):
@@ -368,12 +368,12 @@ class NetdevSim:
         return data.strip()
 
     def dfs_num_bound_progs(self):
-        path = os.path.join(self.sdev_dir, "bpf_bound_progs")
+        path = os.path.join(self.dev_dir, "bpf_bound_progs")
         _, progs = cmd('ls %s' % (path))
         return len(progs.split())
 
     def dfs_get_bound_progs(self, expected):
-        progs = DebugfsDir(os.path.join(self.sdev_dir, "bpf_bound_progs"))
+        progs = DebugfsDir(os.path.join(self.dev_dir, "bpf_bound_progs"))
         if expected is not None:
             if len(progs) != expected:
                 fail(True, "%d BPF programs bound, expected %d" %
@@ -1055,7 +1055,7 @@ try:
 
     start_test("Test if netdev removal waits for translation...")
     delay_msec = 500
-    sim.dfs["sdev/bpf_bind_verifier_delay"] = delay_msec
+    sim.dfs["dev/bpf_bind_verifier_delay"] = delay_msec
     start = time.time()
     cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
                (sim['ifname'], obj)
-- 
cgit v1.2.3


From ab1d0cc004d706523dcad7cdad97a2b94eecf169 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 25 Apr 2019 15:59:52 +0200
Subject: netdevsim: change debugfs tree topology

With the model where dev is represented by devlink and ports are
represented by devlink ports, make debugfs file names independent
on netdev names. Change the topology to the one illustrated
by the following example:

$ ls /sys/kernel/debug/netdevsim/
netdevsim1
$ ls /sys/kernel/debug/netdevsim/netdevsim1/
bpf_bind_accept  bpf_bind_verifier_delay  bpf_bound_progs  ports
$ ls /sys/kernel/debug/netdevsim/netdevsim1/ports/
0  1
$ ls /sys/kernel/debug/netdevsim/netdevsim1/ports/0/
bpf_map_accept  bpf_offloaded_id  bpf_tc_accept  bpf_tc_non_bound_accept  bpf_xdpdrv_accept  bpf_xdpoffload_accept  dev  ipsec
$ ls /sys/kernel/debug/netdevsim/netdevsim1/ports/0/dev -l
lrwxrwxrwx 1 root root 0 Apr 13 15:58 /sys/kernel/debug/netdevsim/netdevsim1/ports/0/dev -> ../../../netdevsim1

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c                 | 10 +++++++---
 drivers/net/netdevsim/netdev.c              | 15 +++------------
 drivers/net/netdevsim/netdevsim.h           |  1 +
 tools/testing/selftests/bpf/test_offload.py |  4 +++-
 tools/testing/selftests/net/rtnetlink.sh    |  2 +-
 5 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 98a56d8bdcec..14946d162e53 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -27,17 +27,21 @@ static struct dentry *nsim_dev_ddir;
 
 static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
 {
-	char dev_ddir_name[10];
+	char dev_ddir_name[16];
 
-	sprintf(dev_ddir_name, "%u", nsim_dev->nsim_bus_dev->dev.id);
+	sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id);
 	nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir);
 	if (IS_ERR_OR_NULL(nsim_dev->ddir))
 		return PTR_ERR_OR_ZERO(nsim_dev->ddir) ?: -EINVAL;
+	nsim_dev->ports_ddir = debugfs_create_dir("ports", nsim_dev->ddir);
+	if (IS_ERR_OR_NULL(nsim_dev->ports_ddir))
+		return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL;
 	return 0;
 }
 
 static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev)
 {
+	debugfs_remove_recursive(nsim_dev->ports_ddir);
 	debugfs_remove_recursive(nsim_dev->ddir);
 }
 
@@ -273,7 +277,7 @@ void nsim_dev_destroy(struct nsim_dev *nsim_dev)
 
 int nsim_dev_init(void)
 {
-	nsim_dev_ddir = debugfs_create_dir(DRV_NAME "_dev", NULL);
+	nsim_dev_ddir = debugfs_create_dir(DRV_NAME, NULL);
 	if (IS_ERR_OR_NULL(nsim_dev_ddir))
 		return -ENOMEM;
 	return 0;
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 9b4310e20129..eb823bd0dc39 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -25,8 +25,6 @@
 
 #include "netdevsim.h"
 
-static struct dentry *nsim_ddir;
-
 static int nsim_get_port_parent_id(struct net_device *dev,
 				   struct netdev_phys_item_id *ppid)
 {
@@ -42,11 +40,11 @@ static int nsim_init(struct net_device *dev)
 	char dev_link_name[32];
 	int err;
 
-	ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir);
+	ns->ddir = debugfs_create_dir("0", ns->nsim_dev->ports_ddir);
 	if (IS_ERR_OR_NULL(ns->ddir))
 		return -ENOMEM;
 
-	sprintf(dev_link_name, "../../" DRV_NAME "_dev/%u",
+	sprintf(dev_link_name, "../../../" DRV_NAME "%u",
 		ns->nsim_dev->nsim_bus_dev->dev.id);
 	debugfs_create_symlink("dev", ns->ddir, dev_link_name);
 
@@ -403,13 +401,9 @@ static int __init nsim_module_init(void)
 {
 	int err;
 
-	nsim_ddir = debugfs_create_dir(DRV_NAME, NULL);
-	if (IS_ERR_OR_NULL(nsim_ddir))
-		return -ENOMEM;
-
 	err = nsim_dev_init();
 	if (err)
-		goto err_debugfs_destroy;
+		return err;
 
 	err = nsim_bus_init();
 	if (err)
@@ -425,8 +419,6 @@ err_bus_exit:
 	nsim_bus_exit();
 err_dev_exit:
 	nsim_dev_exit();
-err_debugfs_destroy:
-	debugfs_remove_recursive(nsim_ddir);
 	return err;
 }
 
@@ -435,7 +427,6 @@ static void __exit nsim_module_exit(void)
 	rtnl_link_unregister(&nsim_link_ops);
 	nsim_bus_exit();
 	nsim_dev_exit();
-	debugfs_remove_recursive(nsim_ddir);
 }
 
 module_init(nsim_module_init);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 17639c7c9032..e951b1ccc3f2 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -134,6 +134,7 @@ struct nsim_dev {
 	struct nsim_bus_dev *nsim_bus_dev;
 	struct nsim_fib_data *fib_data;
 	struct dentry *ddir;
+	struct dentry *ports_ddir;
 	struct bpf_offload_dev *bpf_dev;
 	bool bpf_bind_accept;
 	u32 bpf_bind_verifier_delay;
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 91dfe876a707..5f2e4f9e70e4 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -306,6 +306,8 @@ class DebugfsDir:
 
         _, out = cmd('ls ' + path)
         for f in out.split():
+            if f == "ports":
+                continue
             p = os.path.join(path, f)
             if os.path.isfile(p):
                 _, out = cmd('cat %s/%s' % (path, f))
@@ -334,7 +336,7 @@ class NetdevSim:
 
         self.ns = ""
 
-        self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
+        self.dfs_dir = '/sys/kernel/debug/netdevsim/netdevsim0/ports/0/'
         self.dev_dir = self.dfs_dir + '/dev/'
         self.dfs_refresh()
 
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index b447803f3f8a..311f82bcbe8d 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -697,7 +697,7 @@ kci_test_ipsec_offload()
 	srcip=192.168.123.3
 	dstip=192.168.123.4
 	dev=simx1
-	sysfsd=/sys/kernel/debug/netdevsim/$dev
+	sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
 	sysfsf=$sysfsd/ipsec
 
 	# setup netdevsim since dummydev doesn't have offload support
-- 
cgit v1.2.3


From e05b2d141fef22cfac1928cf0eb6890e5dae4216 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 25 Apr 2019 15:59:55 +0200
Subject: netdevsim: move netdev creation/destruction to dev probe

Remove the existing way to create netdevsim over rtnetlink and move the
netdev creation/destruction to dev probe, so for every probed port,
a netdevsim-netdev instance is created.

Adjust selftests to work with new interface.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/bpf.c                 |  13 +-
 drivers/net/netdevsim/bus.c                 |  25 ++--
 drivers/net/netdevsim/dev.c                 |  13 +-
 drivers/net/netdevsim/ipsec.c               |   3 +-
 drivers/net/netdevsim/netdev.c              | 138 ++++++-----------
 drivers/net/netdevsim/netdevsim.h           |  11 +-
 tools/testing/selftests/bpf/test_offload.py | 223 ++++++++++++++++++----------
 tools/testing/selftests/net/rtnetlink.sh    |   9 +-
 8 files changed, 238 insertions(+), 197 deletions(-)

(limited to 'tools')

diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 89980b223adc..2b74425822ab 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -612,6 +612,7 @@ void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev)
 
 int nsim_bpf_init(struct netdevsim *ns)
 {
+	struct dentry *ddir = ns->nsim_dev_port->ddir;
 	int err;
 
 	err = bpf_offload_dev_netdev_register(ns->nsim_dev->bpf_dev,
@@ -619,23 +620,23 @@ int nsim_bpf_init(struct netdevsim *ns)
 	if (err)
 		return err;
 
-	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
+	debugfs_create_u32("bpf_offloaded_id", 0400, ddir,
 			   &ns->bpf_offloaded_id);
 
 	ns->bpf_tc_accept = true;
-	debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
+	debugfs_create_bool("bpf_tc_accept", 0600, ddir,
 			    &ns->bpf_tc_accept);
-	debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ns->ddir,
+	debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ddir,
 			    &ns->bpf_tc_non_bound_accept);
 	ns->bpf_xdpdrv_accept = true;
-	debugfs_create_bool("bpf_xdpdrv_accept", 0600, ns->ddir,
+	debugfs_create_bool("bpf_xdpdrv_accept", 0600, ddir,
 			    &ns->bpf_xdpdrv_accept);
 	ns->bpf_xdpoffload_accept = true;
-	debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir,
+	debugfs_create_bool("bpf_xdpoffload_accept", 0600, ddir,
 			    &ns->bpf_xdpoffload_accept);
 
 	ns->bpf_map_accept = true;
-	debugfs_create_bool("bpf_map_accept", 0600, ns->ddir,
+	debugfs_create_bool("bpf_map_accept", 0600, ddir,
 			    &ns->bpf_map_accept);
 
 	return 0;
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 549c399f29da..ae482347b67b 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -153,6 +153,9 @@ static struct device_type nsim_bus_dev_type = {
 	.release = nsim_bus_dev_release,
 };
 
+static struct nsim_bus_dev *
+nsim_bus_dev_new(unsigned int id, unsigned int port_count);
+
 static ssize_t
 new_device_store(struct bus_type *bus, const char *buf, size_t count)
 {
@@ -188,6 +191,8 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count)
 }
 static BUS_ATTR_WO(new_device);
 
+static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev);
+
 static ssize_t
 del_device_store(struct bus_type *bus, const char *buf, size_t count)
 {
@@ -261,7 +266,8 @@ static struct bus_type nsim_bus = {
 	.num_vf		= nsim_num_vf,
 };
 
-struct nsim_bus_dev *nsim_bus_dev_new(unsigned int id, unsigned int port_count)
+static struct nsim_bus_dev *
+nsim_bus_dev_new(unsigned int id, unsigned int port_count)
 {
 	struct nsim_bus_dev *nsim_bus_dev;
 	int err;
@@ -270,8 +276,7 @@ struct nsim_bus_dev *nsim_bus_dev_new(unsigned int id, unsigned int port_count)
 	if (!nsim_bus_dev)
 		return ERR_PTR(-ENOMEM);
 
-	err = ida_alloc_range(&nsim_bus_dev_ids,
-			      id == ~0 ? 0 : id, id, GFP_KERNEL);
+	err = ida_alloc_range(&nsim_bus_dev_ids, id, id, GFP_KERNEL);
 	if (err < 0)
 		goto err_nsim_bus_dev_free;
 	nsim_bus_dev->dev.id = err;
@@ -291,19 +296,7 @@ err_nsim_bus_dev_free:
 	return ERR_PTR(err);
 }
 
-struct nsim_bus_dev *nsim_bus_dev_new_with_ns(struct netdevsim *ns)
-{
-	struct nsim_bus_dev *nsim_bus_dev;
-
-	dev_hold(ns->netdev);
-	rtnl_unlock();
-	nsim_bus_dev = nsim_bus_dev_new(~0, 0);
-	rtnl_lock();
-	dev_put(ns->netdev);
-	return nsim_bus_dev;
-}
-
-void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
+static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
 {
 	device_unregister(&nsim_bus_dev->dev);
 	ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 2fa1b2061370..b509b941d5ca 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -278,7 +278,7 @@ err_devlink_free:
 	return ERR_PTR(err);
 }
 
-void nsim_dev_destroy(struct nsim_dev *nsim_dev)
+static void nsim_dev_destroy(struct nsim_dev *nsim_dev)
 {
 	struct devlink *devlink = priv_to_devlink(nsim_dev);
 
@@ -317,10 +317,19 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev,
 	if (err)
 		goto err_dl_port_unregister;
 
+	nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port);
+	if (IS_ERR(nsim_dev_port->ns)) {
+		err = PTR_ERR(nsim_dev_port->ns);
+		goto err_port_debugfs_exit;
+	}
+
+	devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev);
 	list_add(&nsim_dev_port->list, &nsim_dev->port_list);
 
 	return 0;
 
+err_port_debugfs_exit:
+	nsim_dev_port_debugfs_exit(nsim_dev_port);
 err_dl_port_unregister:
 	devlink_port_unregister(devlink_port);
 err_port_free:
@@ -333,6 +342,8 @@ static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port)
 	struct devlink_port *devlink_port = &nsim_dev_port->devlink_port;
 
 	list_del(&nsim_dev_port->list);
+	devlink_port_type_clear(devlink_port);
+	nsim_destroy(nsim_dev_port->ns);
 	nsim_dev_port_debugfs_exit(nsim_dev_port);
 	devlink_port_unregister(devlink_port);
 	kfree(nsim_dev_port);
diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c
index 76e11d889bb6..e27fc1a4516d 100644
--- a/drivers/net/netdevsim/ipsec.c
+++ b/drivers/net/netdevsim/ipsec.c
@@ -283,7 +283,8 @@ void nsim_ipsec_init(struct netdevsim *ns)
 	ns->netdev->features |= NSIM_ESP_FEATURES;
 	ns->netdev->hw_enc_features |= NSIM_ESP_FEATURES;
 
-	ns->ipsec.pfile = debugfs_create_file("ipsec", 0400, ns->ddir, ns,
+	ns->ipsec.pfile = debugfs_create_file("ipsec", 0400,
+					      ns->nsim_dev_port->ddir, ns,
 					      &ipsec_dbg_fops);
 }
 
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 99169fe521f2..040c390d0c01 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -25,59 +25,6 @@
 
 #include "netdevsim.h"
 
-static int nsim_get_port_parent_id(struct net_device *dev,
-				   struct netdev_phys_item_id *ppid)
-{
-	struct netdevsim *ns = netdev_priv(dev);
-
-	memcpy(ppid, &ns->nsim_dev->switch_id, sizeof(*ppid));
-	return 0;
-}
-
-static int nsim_init(struct net_device *dev)
-{
-	struct netdevsim *ns = netdev_priv(dev);
-	char dev_link_name[32];
-	int err;
-
-	ns->ddir = debugfs_create_dir("0", ns->nsim_dev->ports_ddir);
-	if (IS_ERR_OR_NULL(ns->ddir))
-		return -ENOMEM;
-
-	sprintf(dev_link_name, "../../../" DRV_NAME "%u",
-		ns->nsim_dev->nsim_bus_dev->dev.id);
-	debugfs_create_symlink("dev", ns->ddir, dev_link_name);
-
-	err = nsim_bpf_init(ns);
-	if (err)
-		goto err_debugfs_destroy;
-
-	nsim_ipsec_init(ns);
-
-	return 0;
-
-err_debugfs_destroy:
-	debugfs_remove_recursive(ns->ddir);
-	return err;
-}
-
-static void nsim_uninit(struct net_device *dev)
-{
-	struct netdevsim *ns = netdev_priv(dev);
-
-	nsim_ipsec_teardown(ns);
-	debugfs_remove_recursive(ns->ddir);
-	nsim_bpf_uninit(ns);
-}
-
-static void nsim_free(struct net_device *dev)
-{
-	struct netdevsim *ns = netdev_priv(dev);
-
-	nsim_bus_dev_del(ns->nsim_bus_dev);
-	/* netdev and vf state will be freed out of device_release() */
-}
-
 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdevsim *ns = netdev_priv(dev);
@@ -299,8 +246,6 @@ nsim_set_features(struct net_device *dev, netdev_features_t features)
 }
 
 static const struct net_device_ops nsim_netdev_ops = {
-	.ndo_init		= nsim_init,
-	.ndo_uninit		= nsim_uninit,
 	.ndo_start_xmit		= nsim_start_xmit,
 	.ndo_set_rx_mode	= nsim_set_rx_mode,
 	.ndo_set_mac_address	= eth_mac_addr,
@@ -318,7 +263,6 @@ static const struct net_device_ops nsim_netdev_ops = {
 	.ndo_setup_tc		= nsim_setup_tc,
 	.ndo_set_features	= nsim_set_features,
 	.ndo_bpf		= nsim_bpf,
-	.ndo_get_port_parent_id	= nsim_get_port_parent_id,
 };
 
 static void nsim_setup(struct net_device *dev)
@@ -326,10 +270,6 @@ static void nsim_setup(struct net_device *dev)
 	ether_setup(dev);
 	eth_hw_addr_random(dev);
 
-	dev->netdev_ops = &nsim_netdev_ops;
-	dev->needs_free_netdev = true;
-	dev->priv_destructor = nsim_free;
-
 	dev->tx_queue_len = 0;
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
@@ -344,50 +284,70 @@ static void nsim_setup(struct net_device *dev)
 	dev->max_mtu = ETH_MAX_MTU;
 }
 
-static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
-			 struct netlink_ext_ack *extack)
-{
-	if (tb[IFLA_ADDRESS]) {
-		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
-			return -EINVAL;
-		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
-			return -EADDRNOTAVAIL;
-	}
-	return 0;
-}
-
-static int nsim_newlink(struct net *src_net, struct net_device *dev,
-			struct nlattr *tb[], struct nlattr *data[],
-			struct netlink_ext_ack *extack)
+struct netdevsim *
+nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 {
-	struct netdevsim *ns = netdev_priv(dev);
+	struct net_device *dev;
+	struct netdevsim *ns;
 	int err;
 
-	ns->netdev = dev;
-	ns->nsim_bus_dev = nsim_bus_dev_new_with_ns(ns);
-	if (IS_ERR(ns->nsim_bus_dev))
-		return PTR_ERR(ns->nsim_bus_dev);
+	dev = alloc_netdev(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
 
+	ns = netdev_priv(dev);
+	ns->netdev = dev;
+	ns->nsim_dev = nsim_dev;
+	ns->nsim_dev_port = nsim_dev_port;
+	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
+	dev->netdev_ops = &nsim_netdev_ops;
 
-	ns->nsim_dev = dev_get_drvdata(&ns->nsim_bus_dev->dev);
+	rtnl_lock();
+	err = nsim_bpf_init(ns);
+	if (err)
+		goto err_free_netdev;
+
+	nsim_ipsec_init(ns);
 
 	err = register_netdevice(dev);
 	if (err)
-		goto err_dev_del;
-	return 0;
+		goto err_ipsec_teardown;
+	rtnl_unlock();
 
-err_dev_del:
-	nsim_bus_dev_del(ns->nsim_bus_dev);
-	return err;
+	return ns;
+
+err_ipsec_teardown:
+	nsim_ipsec_teardown(ns);
+	nsim_bpf_uninit(ns);
+	rtnl_unlock();
+err_free_netdev:
+	free_netdev(dev);
+	return ERR_PTR(err);
+}
+
+void nsim_destroy(struct netdevsim *ns)
+{
+	struct net_device *dev = ns->netdev;
+
+	rtnl_lock();
+	unregister_netdevice(dev);
+	nsim_ipsec_teardown(ns);
+	nsim_bpf_uninit(ns);
+	rtnl_unlock();
+	free_netdev(dev);
+}
+
+static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
+			 struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG_MOD(extack, "Please use: echo \"[ID] [PORT_COUNT]\" > /sys/bus/netdevsim/new_device");
+	return -EOPNOTSUPP;
 }
 
 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
 	.kind		= DRV_NAME,
-	.priv_size	= sizeof(struct netdevsim),
-	.setup		= nsim_setup,
 	.validate	= nsim_validate,
-	.newlink	= nsim_newlink,
 };
 
 static int __init nsim_module_init(void)
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 6b60589cab91..3f398797c2bc 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -51,6 +51,7 @@ struct nsim_ipsec {
 struct netdevsim {
 	struct net_device *netdev;
 	struct nsim_dev *nsim_dev;
+	struct nsim_dev_port *nsim_dev_port;
 
 	u64 tx_packets;
 	u64 tx_bytes;
@@ -58,8 +59,6 @@ struct netdevsim {
 
 	struct nsim_bus_dev *nsim_bus_dev;
 
-	struct dentry *ddir;
-
 	struct bpf_prog	*bpf_offloaded;
 	u32 bpf_offloaded_id;
 
@@ -75,6 +74,10 @@ struct netdevsim {
 	struct nsim_ipsec ipsec;
 };
 
+struct netdevsim *
+nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
+void nsim_destroy(struct netdevsim *ns);
+
 #ifdef CONFIG_BPF_SYSCALL
 int nsim_bpf_dev_init(struct nsim_dev *nsim_dev);
 void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev);
@@ -136,6 +139,7 @@ struct nsim_dev_port {
 	struct devlink_port devlink_port;
 	unsigned int port_index;
 	struct dentry *ddir;
+	struct netdevsim *ns;
 };
 
 struct nsim_dev {
@@ -212,8 +216,5 @@ struct nsim_bus_dev {
 	struct nsim_vf_config *vfconfigs;
 };
 
-struct nsim_bus_dev *nsim_bus_dev_new(unsigned int id, unsigned int port_count);
-struct nsim_bus_dev *nsim_bus_dev_new_with_ns(struct netdevsim *ns);
-void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev);
 int nsim_bus_init(void);
 void nsim_bus_exit(void);
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 5f2e4f9e70e4..425f9ed27c3b 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1,6 +1,7 @@
 #!/usr/bin/python3
 
 # Copyright (C) 2017 Netronome Systems, Inc.
+# Copyright (c) 2019 Mellanox Technologies. All rights reserved
 #
 # This software is licensed under the GNU General License Version 2,
 # June 1991 as shown in the file COPYING in the top-level directory of this
@@ -15,10 +16,12 @@
 
 from datetime import datetime
 import argparse
+import errno
 import json
 import os
 import pprint
 import random
+import re
 import string
 import struct
 import subprocess
@@ -323,42 +326,112 @@ class DebugfsDir:
 
         return dfs
 
-class NetdevSim:
+class NetdevSimDev:
     """
-    Class for netdevsim netdevice and its attributes.
+    Class for netdevsim bus device and its attributes.
     """
 
-    def __init__(self, link=None):
-        self.link = link
+    def __init__(self, port_count=1):
+        addr = 0
+        while True:
+            try:
+                with open("/sys/bus/netdevsim/new_device", "w") as f:
+                    f.write("%u %u" % (addr, port_count))
+            except OSError as e:
+                if e.errno == errno.ENOSPC:
+                    addr += 1
+                    continue
+                raise e
+            break
+        self.addr = addr
+
+        # As probe of netdevsim device might happen from a workqueue,
+        # so wait here until all netdevs appear.
+        self.wait_for_netdevs(port_count)
+
+        ret, out = cmd("udevadm settle", fail=False)
+        if ret:
+            raise Exception("udevadm settle failed")
+        ifnames = self.get_ifnames()
 
-        self.dev = self._netdevsim_create()
         devs.append(self)
+        self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr
+
+        self.nsims = []
+        for port_index in range(port_count):
+            self.nsims.append(NetdevSim(self, port_index, ifnames[port_index]))
+
+    def get_ifnames(self):
+        ifnames = []
+        listdir = os.listdir("/sys/bus/netdevsim/devices/netdevsim%u/net/" % self.addr)
+        for ifname in listdir:
+            ifnames.append(ifname)
+        ifnames.sort()
+        return ifnames
+
+    def wait_for_netdevs(self, port_count):
+        timeout = 5
+        timeout_start = time.time()
+
+        while True:
+            try:
+                ifnames = self.get_ifnames()
+            except FileNotFoundError as e:
+                ifnames = []
+            if len(ifnames) == port_count:
+                break
+            if time.time() < timeout_start + timeout:
+                continue
+            raise Exception("netdevices did not appear within timeout")
 
-        self.ns = ""
+    def dfs_num_bound_progs(self):
+        path = os.path.join(self.dfs_dir, "bpf_bound_progs")
+        _, progs = cmd('ls %s' % (path))
+        return len(progs.split())
 
-        self.dfs_dir = '/sys/kernel/debug/netdevsim/netdevsim0/ports/0/'
-        self.dev_dir = self.dfs_dir + '/dev/'
-        self.dfs_refresh()
+    def dfs_get_bound_progs(self, expected):
+        progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs"))
+        if expected is not None:
+            if len(progs) != expected:
+                fail(True, "%d BPF programs bound, expected %d" %
+                     (len(progs), expected))
+        return progs
 
-    def __getitem__(self, key):
-        return self.dev[key]
+    def remove(self):
+        with open("/sys/bus/netdevsim/del_device", "w") as f:
+            f.write("%u" % self.addr)
+        devs.remove(self)
+
+    def remove_nsim(self, nsim):
+        self.nsims.remove(nsim)
+        with open("/sys/bus/netdevsim/devices/netdevsim%u/del_port" % self.addr ,"w") as f:
+            f.write("%u" % nsim.port_index)
+
+class NetdevSim:
+    """
+    Class for netdevsim netdevice and its attributes.
+    """
 
-    def _netdevsim_create(self):
-        link = "" if self.link is None else "link " + self.link.dev['ifname']
-        _, old  = ip("link show")
-        ip("link add sim%d {link} type netdevsim".format(link=link))
-        _, new  = ip("link show")
+    def __init__(self, nsimdev, port_index, ifname):
+        # In case udev renamed the netdev to according to new schema,
+        # check if the name matches the port_index.
+        nsimnamere = re.compile("eni\d+np(\d+)")
+        match = nsimnamere.match(ifname)
+        if match and int(match.groups()[0]) != port_index + 1:
+            raise Exception("netdevice name mismatches the expected one")
 
-        for dev in new:
-            f = filter(lambda x: x["ifname"] == dev["ifname"], old)
-            if len(list(f)) == 0:
-                return dev
+        self.nsimdev = nsimdev
+        self.port_index = port_index
+        self.ns = ""
+        self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
+        self.dfs_refresh()
+        _, [self.dev] = ip("link show dev %s" % ifname)
 
-        raise Exception("failed to create netdevsim device")
+    def __getitem__(self, key):
+        return self.dev[key]
 
     def remove(self):
-        devs.remove(self)
-        ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
+        self.nsimdev.remove_nsim(self)
 
     def dfs_refresh(self):
         self.dfs = DebugfsDir(self.dfs_dir)
@@ -369,22 +442,9 @@ class NetdevSim:
         _, data = cmd('cat %s' % (path))
         return data.strip()
 
-    def dfs_num_bound_progs(self):
-        path = os.path.join(self.dev_dir, "bpf_bound_progs")
-        _, progs = cmd('ls %s' % (path))
-        return len(progs.split())
-
-    def dfs_get_bound_progs(self, expected):
-        progs = DebugfsDir(os.path.join(self.dev_dir, "bpf_bound_progs"))
-        if expected is not None:
-            if len(progs) != expected:
-                fail(True, "%d BPF programs bound, expected %d" %
-                     (len(progs), expected))
-        return progs
-
     def wait_for_flush(self, bound=0, total=0, n_retry=20):
         for i in range(n_retry):
-            nbound = self.dfs_num_bound_progs()
+            nbound = self.nsimdev.dfs_num_bound_progs()
             nprogs = len(bpftool_prog_list())
             if nbound == bound and nprogs == total:
                 return
@@ -614,7 +674,7 @@ def test_spurios_extack(sim, obj, skip_hw, needle):
                             include_stderr=True)
     check_no_extack(res, needle)
 
-def test_multi_prog(sim, obj, modename, modeid):
+def test_multi_prog(simdev, sim, obj, modename, modeid):
     start_test("Test multi-attachment XDP - %s + offload..." %
                (modename or "default", ))
     sim.set_xdp(obj, "offload")
@@ -670,11 +730,12 @@ def test_multi_prog(sim, obj, modename, modeid):
     check_multi_basic(two_xdps)
 
     start_test("Test multi-attachment XDP - device remove...")
-    sim.remove()
+    simdev.remove()
 
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
     sim.set_ethtool_tc_offloads(True)
-    return sim
+    return [simdev, sim]
 
 # Parse command line
 parser = argparse.ArgumentParser()
@@ -731,12 +792,14 @@ try:
     bytecode = bpf_bytecode("1,6 0 0 4294967295,")
 
     start_test("Test destruction of generic XDP...")
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
     sim.set_xdp(obj, "generic")
-    sim.remove()
+    simdev.remove()
     bpftool_prog_list_wait(expected=0)
 
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
     sim.tc_add_ingress()
 
     start_test("Test TC non-offloaded...")
@@ -746,7 +809,7 @@ try:
     start_test("Test TC non-offloaded isn't getting bound...")
     ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
     fail(ret != 0, "Software TC filter did not load")
-    sim.dfs_get_bound_progs(expected=0)
+    simdev.dfs_get_bound_progs(expected=0)
 
     sim.tc_flush_filters()
 
@@ -763,7 +826,7 @@ try:
     start_test("Test TC offload by default...")
     ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
     fail(ret != 0, "Software TC filter did not load")
-    sim.dfs_get_bound_progs(expected=0)
+    simdev.dfs_get_bound_progs(expected=0)
     ingress = sim.tc_show_ingress(expected=1)
     fltr = ingress[0]
     fail(not fltr["in_hw"], "Filter not offloaded by default")
@@ -773,7 +836,7 @@ try:
     start_test("Test TC cBPF bytcode tries offload by default...")
     ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False)
     fail(ret != 0, "Software TC filter did not load")
-    sim.dfs_get_bound_progs(expected=0)
+    simdev.dfs_get_bound_progs(expected=0)
     ingress = sim.tc_show_ingress(expected=1)
     fltr = ingress[0]
     fail(not fltr["in_hw"], "Bytecode not offloaded by default")
@@ -841,7 +904,7 @@ try:
     check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test TC offload basics...")
-    dfs = sim.dfs_get_bound_progs(expected=1)
+    dfs = simdev.dfs_get_bound_progs(expected=1)
     progs = bpftool_prog_list(expected=1)
     ingress = sim.tc_show_ingress(expected=1)
 
@@ -876,18 +939,20 @@ try:
 
     start_test("Test destroying device gets rid of TC filters...")
     sim.cls_bpf_add_filter(obj, skip_sw=True)
-    sim.remove()
+    simdev.remove()
     bpftool_prog_list_wait(expected=0)
 
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
     sim.set_ethtool_tc_offloads(True)
 
     start_test("Test destroying device gets rid of XDP...")
     sim.set_xdp(obj, "offload")
-    sim.remove()
+    simdev.remove()
     bpftool_prog_list_wait(expected=0)
 
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
     sim.set_ethtool_tc_offloads(True)
 
     start_test("Test XDP prog reporting...")
@@ -973,7 +1038,7 @@ try:
     check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test XDP offload is device bound...")
-    dfs = sim.dfs_get_bound_progs(expected=1)
+    dfs = simdev.dfs_get_bound_progs(expected=1)
     dprog = dfs[0]
 
     fail(prog["id"] != link_xdp["id"], "Program IDs don't match")
@@ -992,7 +1057,8 @@ try:
     bpftool_prog_list_wait(expected=0)
 
     start_test("Test attempt to use a program for a wrong device...")
-    sim2 = NetdevSim()
+    simdev2 = NetdevSimDev()
+    sim2, = simdev2.nsims
     sim2.set_xdp(obj, "offload")
     pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
 
@@ -1000,7 +1066,7 @@ try:
                               fail=False, include_stderr=True)
     fail(ret == 0, "Pinned program loaded for a different device accepted")
     check_extack_nsim(err, "program bound to different dev.", args)
-    sim2.remove()
+    simdev2.remove()
     ret, _, err = sim.set_xdp(pinned, "offload",
                               fail=False, include_stderr=True)
     fail(ret == 0, "Pinned program loaded for a removed device accepted")
@@ -1008,9 +1074,9 @@ try:
     rm(pin_file)
     bpftool_prog_list_wait(expected=0)
 
-    sim = test_multi_prog(sim, obj, "", 1)
-    sim = test_multi_prog(sim, obj, "drv", 1)
-    sim = test_multi_prog(sim, obj, "generic", 2)
+    simdev, sim = test_multi_prog(simdev, sim, obj, "", 1)
+    simdev, sim = test_multi_prog(simdev, sim, obj, "drv", 1)
+    simdev, sim = test_multi_prog(simdev, sim, obj, "generic", 2)
 
     start_test("Test mixing of TC and XDP...")
     sim.tc_add_ingress()
@@ -1063,9 +1129,9 @@ try:
                (sim['ifname'], obj)
     tc_proc = cmd(cmd_line, background=True, fail=False)
     # Wait for the verifier to start
-    while sim.dfs_num_bound_progs() <= 2:
+    while simdev.dfs_num_bound_progs() <= 2:
         pass
-    sim.remove()
+    simdev.remove()
     end = time.time()
     ret, _ = cmd_result(tc_proc, fail=False)
     time_diff = end - start
@@ -1080,7 +1146,8 @@ try:
     clean_up()
     bpftool_prog_list_wait(expected=0)
 
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
     map_obj = bpf_obj("sample_map_ret0.o")
     start_test("Test loading program with maps...")
     sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
@@ -1102,7 +1169,7 @@ try:
 
     prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
     map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
-    sim.remove()
+    simdev.remove()
 
     start_test("Test bpftool bound info reporting (removed dev)...")
     check_dev_info_removed(prog_file=prog_file, map_file=map_file)
@@ -1111,7 +1178,8 @@ try:
     clean_up()
     bpftool_prog_list_wait(expected=0)
 
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
 
     start_test("Test map update (no flags)...")
     sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
@@ -1192,27 +1260,29 @@ try:
     start_test("Test map remove...")
     sim.unset_xdp("offload")
     bpftool_map_list_wait(expected=0)
-    sim.remove()
+    simdev.remove()
 
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
     sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
-    sim.remove()
+    simdev.remove()
     bpftool_map_list_wait(expected=0)
 
     start_test("Test map creation fail path...")
-    sim = NetdevSim()
+    simdev = NetdevSimDev()
+    sim, = simdev.nsims
     sim.dfs["bpf_map_accept"] = "N"
     ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
     fail(ret == 0,
          "netdevsim didn't refuse to create a map with offload disabled")
 
-    sim.remove()
+    simdev.remove()
 
     start_test("Test multi-dev ASIC program reuse...")
-    simA = NetdevSim()
-    simB1 = NetdevSim()
-    simB2 = NetdevSim(link=simB1)
-    simB3 = NetdevSim(link=simB1)
+    simdevA = NetdevSimDev()
+    simA, = simdevA.nsims
+    simdevB = NetdevSimDev(3)
+    simB1, simB2, simB3 = simdevB.nsims
     sims = (simA, simB1, simB2, simB3)
     simB = (simB1, simB2, simB3)
 
@@ -1224,13 +1294,13 @@ try:
     progB = bpf_pinned("/sys/fs/bpf/nsimB")
 
     simA.set_xdp(progA, "offload", JSON=False)
-    for d in simB:
+    for d in simdevB.nsims:
         d.set_xdp(progB, "offload", JSON=False)
 
     start_test("Test multi-dev ASIC cross-dev replace...")
     ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False)
     fail(ret == 0, "cross-ASIC program allowed")
-    for d in simB:
+    for d in simdevB.nsims:
         ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False)
         fail(ret == 0, "cross-ASIC program allowed")
 
@@ -1242,7 +1312,7 @@ try:
                                fail=False, include_stderr=True)
     fail(ret == 0, "cross-ASIC program allowed")
     check_extack_nsim(err, "program bound to different dev.", args)
-    for d in simB:
+    for d in simdevB.nsims:
         ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
                                 fail=False, include_stderr=True)
         fail(ret == 0, "cross-ASIC program allowed")
@@ -1279,7 +1349,7 @@ try:
     start_test("Test multi-dev ASIC cross-dev destruction...")
     bpftool_prog_list_wait(expected=2)
 
-    simA.remove()
+    simdevA.remove()
     bpftool_prog_list_wait(expected=1)
 
     ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
@@ -1297,6 +1367,7 @@ try:
     fail(ifnameB != simB3['ifname'], "program not bound to remaining device")
 
     simB3.remove()
+    simdevB.remove()
     bpftool_prog_list_wait(expected=0)
 
     start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 311f82bcbe8d..b25c9fe019d2 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -696,9 +696,9 @@ kci_test_ipsec_offload()
 	algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
 	srcip=192.168.123.3
 	dstip=192.168.123.4
-	dev=simx1
 	sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
 	sysfsf=$sysfsd/ipsec
+	sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
 
 	# setup netdevsim since dummydev doesn't have offload support
 	modprobe netdevsim
@@ -708,7 +708,11 @@ kci_test_ipsec_offload()
 		return 1
 	fi
 
-	ip link add $dev type netdevsim
+	echo "0" > /sys/bus/netdevsim/new_device
+	while [ ! -d $sysfsnet ] ; do :; done
+	udevadm settle
+	dev=`ls $sysfsnet`
+
 	ip addr add $srcip dev $dev
 	ip link set $dev up
 	if [ ! -d $sysfsd ] ; then
@@ -781,7 +785,6 @@ EOF
 	fi
 
 	# clean up any leftovers
-	ip link del $dev
 	rmmod netdevsim
 
 	if [ $ret -ne 0 ]; then
-- 
cgit v1.2.3


From 4635b0ae4d26f87cf68dbab6740955dd1ad67cf4 Mon Sep 17 00:00:00 2001
From: Matt Mullins <mmullins@fb.com>
Date: Fri, 26 Apr 2019 11:49:50 -0700
Subject: tools: sync bpf.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, and fixes up the

	error: enumeration value ‘BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE’ not handled in switch [-Werror=switch-enum]

build errors it would otherwise cause in libbpf.

Signed-off-by: Matt Mullins <mmullins@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 10 +++++++++-
 tools/lib/bpf/libbpf.c         |  1 +
 tools/lib/bpf/libbpf_probes.c  |  1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 704bb69514a2..f7fa7a34a62d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -168,6 +168,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_REUSEPORT,
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
+	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 };
 
 enum bpf_attach_type {
@@ -1737,12 +1738,19 @@ union bpf_attr {
  * 		error if an eBPF program tries to set a callback that is not
  * 		supported in the current kernel.
  *
- * 		The supported callback values that *argval* can combine are:
+ * 		*argval* is a flag array which can combine these flags:
  *
  * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
  * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
  * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
  *
+ * 		Therefore, this function can be used to clear a callback flag by
+ * 		setting the appropriate bit to zero. e.g. to disable the RTO
+ * 		callback:
+ *
+ * 		**bpf_sock_ops_cb_flags_set(bpf_sock,**
+ * 			**bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+ *
  * 		Here are some examples of where one could call such eBPF
  * 		program:
  *
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9052061ba7fc..11a65db4b93f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2136,6 +2136,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_UNSPEC:
 	case BPF_PROG_TYPE_TRACEPOINT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 		return false;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 0f25541632e3..80ee922f290c 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -93,6 +93,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_CGROUP_DEVICE:
 	case BPF_PROG_TYPE_SK_MSG:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
 	case BPF_PROG_TYPE_LIRC_MODE2:
 	case BPF_PROG_TYPE_SK_REUSEPORT:
-- 
cgit v1.2.3


From e950e843367d7990b9d7ea964e3c33876d477c4b Mon Sep 17 00:00:00 2001
From: Matt Mullins <mmullins@fb.com>
Date: Fri, 26 Apr 2019 11:49:51 -0700
Subject: selftests: bpf: test writable buffers in raw tps

This tests that:
  * a BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE cannot be attached if it
    uses either:
    * a variable offset to the tracepoint buffer, or
    * an offset beyond the size of the tracepoint buffer
  * a tracer can modify the buffer provided when attached to a writable
    tracepoint in bpf_prog_test_run

Signed-off-by: Matt Mullins <mmullins@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/trace/events/bpf_test_run.h                | 50 ++++++++++++++
 net/bpf/test_run.c                                 |  4 ++
 .../raw_tp_writable_reject_nbd_invalid.c           | 42 ++++++++++++
 .../bpf/prog_tests/raw_tp_writable_test_run.c      | 80 ++++++++++++++++++++++
 .../selftests/bpf/verifier/raw_tp_writable.c       | 34 +++++++++
 5 files changed, 210 insertions(+)
 create mode 100644 include/trace/events/bpf_test_run.h
 create mode 100644 tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
 create mode 100644 tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
 create mode 100644 tools/testing/selftests/bpf/verifier/raw_tp_writable.c

(limited to 'tools')

diff --git a/include/trace/events/bpf_test_run.h b/include/trace/events/bpf_test_run.h
new file mode 100644
index 000000000000..265447e3f71a
--- /dev/null
+++ b/include/trace/events/bpf_test_run.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf_test_run
+
+#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BPF_TEST_RUN_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(bpf_test_finish,
+
+	TP_PROTO(int *err),
+
+	TP_ARGS(err),
+
+	TP_STRUCT__entry(
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->err = *err;
+	),
+
+	TP_printk("bpf_test_finish with err=%d", __entry->err)
+);
+
+#ifdef DEFINE_EVENT_WRITABLE
+#undef BPF_TEST_RUN_DEFINE_EVENT
+#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size)	\
+	DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto),		\
+			      PARAMS(args), size)
+#else
+#undef BPF_TEST_RUN_DEFINE_EVENT
+#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size)	\
+	DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
+#endif
+
+BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish,
+
+	TP_PROTO(int *err),
+
+	TP_ARGS(err),
+
+	sizeof(int)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 8606e5aef0b6..6c4694ae4241 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -13,6 +13,9 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/bpf_test_run.h>
+
 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 			u32 *retval, u32 *time)
 {
@@ -100,6 +103,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
 	if (err != -ENOSPC)
 		err = 0;
 out:
+	trace_bpf_test_finish(&err);
 	return err;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
new file mode 100644
index 000000000000..9807336a3016
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <linux/nbd.h>
+
+void test_raw_tp_writable_reject_nbd_invalid(void)
+{
+	__u32 duration = 0;
+	char error[4096];
+	int bpf_fd = -1, tp_fd = -1;
+
+	const struct bpf_insn program[] = {
+		/* r6 is our tp buffer */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+		/* one byte beyond the end of the nbd_request struct */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6,
+			    sizeof(struct nbd_request)),
+		BPF_EXIT_INSN(),
+	};
+
+	struct bpf_load_program_attr load_attr = {
+		.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+		.license = "GPL v2",
+		.insns = program,
+		.insns_cnt = sizeof(program) / sizeof(struct bpf_insn),
+		.log_level = 2,
+	};
+
+	bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+	if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load",
+		  "failed: %d errno %d\n", bpf_fd, errno))
+		return;
+
+	tp_fd = bpf_raw_tracepoint_open("nbd_send_request", bpf_fd);
+	if (CHECK(tp_fd >= 0, "bpf_raw_tracepoint_writable open",
+		  "erroneously succeeded\n"))
+		goto out_bpffd;
+
+	close(tp_fd);
+out_bpffd:
+	close(bpf_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
new file mode 100644
index 000000000000..5c45424cac5f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <linux/nbd.h>
+
+void test_raw_tp_writable_test_run(void)
+{
+	__u32 duration = 0;
+	char error[4096];
+
+	const struct bpf_insn trace_program[] = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+		BPF_MOV64_IMM(BPF_REG_0, 42),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+
+	struct bpf_load_program_attr load_attr = {
+		.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+		.license = "GPL v2",
+		.insns = trace_program,
+		.insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+		.log_level = 2,
+	};
+
+	int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+	if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded",
+		  "failed: %d errno %d\n", bpf_fd, errno))
+		return;
+
+	const struct bpf_insn skb_program[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+
+	struct bpf_load_program_attr skb_load_attr = {
+		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+		.license = "GPL v2",
+		.insns = skb_program,
+		.insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
+	};
+
+	int filter_fd =
+		bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
+	if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
+		  filter_fd, errno))
+		goto out_bpffd;
+
+	int tp_fd = bpf_raw_tracepoint_open("bpf_test_finish", bpf_fd);
+	if (CHECK(tp_fd < 0, "bpf_raw_tracepoint_writable opened",
+		  "failed: %d errno %d\n", tp_fd, errno))
+		goto out_filterfd;
+
+	char test_skb[128] = {
+		0,
+	};
+
+	__u32 prog_ret;
+	int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
+				    0, &prog_ret, 0);
+	CHECK(err != 42, "test_run",
+	      "tracepoint did not modify return value\n");
+	CHECK(prog_ret != 0, "test_run_ret",
+	      "socket_filter did not return 0\n");
+
+	close(tp_fd);
+
+	err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0,
+				&prog_ret, 0);
+	CHECK(err != 0, "test_run_notrace",
+	      "test_run failed with %d errno %d\n", err, errno);
+	CHECK(prog_ret != 0, "test_run_ret_notrace",
+	      "socket_filter did not return 0\n");
+
+out_filterfd:
+	close(filter_fd);
+out_bpffd:
+	close(bpf_fd);
+}
diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
new file mode 100644
index 000000000000..95b5d70a1dc1
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
@@ -0,0 +1,34 @@
+{
+	"raw_tracepoint_writable: reject variable offset",
+	.insns = {
+		/* r6 is our tp buffer */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+
+		BPF_LD_MAP_FD(BPF_REG_1, 0),
+		/* move the key (== 0) to r10-8 */
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+		BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+		/* lookup in the map */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_map_lookup_elem),
+
+		/* exit clean if null */
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_EXIT_INSN(),
+
+		/* shift the buffer pointer to a variable location */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+		BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0),
+		/* clobber whatever's there */
+		BPF_MOV64_IMM(BPF_REG_7, 4242),
+		BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0),
+
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 1, },
+	.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+	.errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)",
+},
-- 
cgit v1.2.3


From 948d930e3d531e81dc6a2c864bda25618dfe7ff0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 26 Apr 2019 16:39:42 -0700
Subject: bpf: Sync bpf.h to tools

This patch sync the bpf.h to tools/.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 44 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f7fa7a34a62d..72336bac7573 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -133,6 +133,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
+	BPF_MAP_TYPE_SK_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -2630,6 +2631,42 @@ union bpf_attr {
  *		was provided.
  *
  *		**-ERANGE** if resulting value was out of range.
+ *
+ * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ *	Description
+ *		Get a bpf-local-storage from a sk.
+ *
+ *		Logically, it could be thought of getting the value from
+ *		a *map* with *sk* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem(map, &sk)** except this
+ *		helper enforces the key must be a **bpf_fullsock()**
+ *		and the map must be a BPF_MAP_TYPE_SK_STORAGE also.
+ *
+ *		Underneath, the value is stored locally at *sk* instead of
+ *		the map.  The *map* is used as the bpf-local-storage **type**.
+ *		The bpf-local-storage **type** (i.e. the *map*) is searched
+ *		against all bpf-local-storages residing at sk.
+ *
+ *		An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be
+ *		used such that a new bpf-local-storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with BPF_SK_STORAGE_GET_F_CREATE to specify
+ *		the initial value of a bpf-local-storage.  If *value* is
+ *		NULL, the new bpf-local-storage will be zero initialized.
+ *	Return
+ *		A bpf-local-storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf-local-storage.
+ *
+ * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ *	Description
+ *		Delete a bpf-local-storage from a sk.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf-local-storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2738,7 +2775,9 @@ union bpf_attr {
 	FN(sysctl_get_new_value),	\
 	FN(sysctl_set_new_value),	\
 	FN(strtol),			\
-	FN(strtoul),
+	FN(strtoul),			\
+	FN(sk_storage_get),		\
+	FN(sk_storage_delete),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2814,6 +2853,9 @@ enum bpf_func_id {
 /* BPF_FUNC_sysctl_get_name flags. */
 #define BPF_F_SYSCTL_BASE_NAME		(1ULL << 0)
 
+/* BPF_FUNC_sk_storage_get flags */
+#define BPF_SK_STORAGE_GET_F_CREATE	(1ULL << 0)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
-- 
cgit v1.2.3


From a19f89f3667c950ad13c1560e4abd8aa8526b6b1 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 26 Apr 2019 16:39:44 -0700
Subject: bpf: Support BPF_MAP_TYPE_SK_STORAGE in bpf map probing

This patch supports probing for the new BPF_MAP_TYPE_SK_STORAGE.
BPF_MAP_TYPE_SK_STORAGE enforces BTF usage, so the new probe
requires to create and load a BTF also.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/map.c       |  1 +
 tools/lib/bpf/libbpf_probes.c | 74 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e6dcb3653a77..e951d45c0131 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -46,6 +46,7 @@ const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
 	[BPF_MAP_TYPE_QUEUE]			= "queue",
 	[BPF_MAP_TYPE_STACK]			= "stack",
+	[BPF_MAP_TYPE_SK_STORAGE]		= "sk_storage",
 };
 
 const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 80ee922f290c..a2c64a9ce1a6 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -9,6 +9,7 @@
 #include <net/if.h>
 #include <sys/utsname.h>
 
+#include <linux/btf.h>
 #include <linux/filter.h>
 #include <linux/kernel.h>
 
@@ -131,11 +132,65 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
 	return errno != EINVAL && errno != EOPNOTSUPP;
 }
 
+static int load_btf(void)
+{
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) \
+	(name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+	BTF_INT_ENC(encoding, bits_offset, bits)
+#define BTF_MEMBER_ENC(name, type, bits_offset) \
+	(name), (type), (bits_offset)
+
+	const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l";
+	/* struct bpf_spin_lock {
+	 *   int val;
+	 * };
+	 * struct val {
+	 *   int cnt;
+	 *   struct bpf_spin_lock l;
+	 * };
+	 */
+	__u32 btf_raw_types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* struct bpf_spin_lock */                      /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),
+		BTF_MEMBER_ENC(15, 1, 0), /* int val; */
+		/* struct val */                                /* [3] */
+		BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
+		BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+	};
+	struct btf_header btf_hdr = {
+		.magic = BTF_MAGIC,
+		.version = BTF_VERSION,
+		.hdr_len = sizeof(struct btf_header),
+		.type_len = sizeof(btf_raw_types),
+		.str_off = sizeof(btf_raw_types),
+		.str_len = sizeof(btf_str_sec),
+	};
+	__u8 raw_btf[sizeof(struct btf_header) + sizeof(btf_raw_types) +
+		     sizeof(btf_str_sec)];
+
+	memcpy(raw_btf, &btf_hdr, sizeof(btf_hdr));
+	memcpy(raw_btf + sizeof(btf_hdr), btf_raw_types, sizeof(btf_raw_types));
+	memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types),
+	       btf_str_sec, sizeof(btf_str_sec));
+
+	return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0);
+}
+
 bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 {
 	int key_size, value_size, max_entries, map_flags;
+	__u32 btf_key_type_id = 0, btf_value_type_id = 0;
 	struct bpf_create_map_attr attr = {};
-	int fd = -1, fd_inner;
+	int fd = -1, btf_fd = -1, fd_inner;
 
 	key_size	= sizeof(__u32);
 	value_size	= sizeof(__u32);
@@ -161,6 +216,16 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 	case BPF_MAP_TYPE_STACK:
 		key_size	= 0;
 		break;
+	case BPF_MAP_TYPE_SK_STORAGE:
+		btf_key_type_id = 1;
+		btf_value_type_id = 3;
+		value_size = 8;
+		max_entries = 0;
+		map_flags = BPF_F_NO_PREALLOC;
+		btf_fd = load_btf();
+		if (btf_fd < 0)
+			return false;
+		break;
 	case BPF_MAP_TYPE_UNSPEC:
 	case BPF_MAP_TYPE_HASH:
 	case BPF_MAP_TYPE_ARRAY:
@@ -206,11 +271,18 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		attr.max_entries = max_entries;
 		attr.map_flags = map_flags;
 		attr.map_ifindex = ifindex;
+		if (btf_fd >= 0) {
+			attr.btf_fd = btf_fd;
+			attr.btf_key_type_id = btf_key_type_id;
+			attr.btf_value_type_id = btf_value_type_id;
+		}
 
 		fd = bpf_create_map_xattr(&attr);
 	}
 	if (fd >= 0)
 		close(fd);
+	if (btf_fd >= 0)
+		close(btf_fd);
 
 	return fd >= 0;
 }
-- 
cgit v1.2.3


From 3f4d4c74101d60b88d23289bd4f5f6126c7235fc Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 26 Apr 2019 16:39:46 -0700
Subject: bpf: Refactor BTF encoding macro to test_btf.h

Refactor common BTF encoding macros for other tests to use.
The libbpf may reuse some of them in the future  which requires
some more thoughts before publishing as a libbpf API.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_btf.c | 63 +------------------------------
 tools/testing/selftests/bpf/test_btf.h | 69 ++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 62 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_btf.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index f8eb7987b794..42c1ce988945 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -24,6 +24,7 @@
 
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
+#include "test_btf.h"
 
 #define MAX_INSNS	512
 #define MAX_SUBPROGS	16
@@ -58,68 +59,6 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
 	return vfprintf(stderr, format, args);
 }
 
-#define BTF_INFO_ENC(kind, kind_flag, vlen)			\
-	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
-
-#define BTF_TYPE_ENC(name, info, size_or_type)	\
-	(name), (info), (size_or_type)
-
-#define BTF_INT_ENC(encoding, bits_offset, nr_bits)	\
-	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
-#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz)	\
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz),	\
-	BTF_INT_ENC(encoding, bits_offset, bits)
-
-#define BTF_FWD_ENC(name, kind_flag) \
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FWD, kind_flag, 0), 0)
-
-#define BTF_ARRAY_ENC(type, index_type, nr_elems)	\
-	(type), (index_type), (nr_elems)
-#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
-	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
-	BTF_ARRAY_ENC(type, index_type, nr_elems)
-
-#define BTF_STRUCT_ENC(name, nr_elems, sz)	\
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, nr_elems), sz)
-
-#define BTF_UNION_ENC(name, nr_elems, sz)	\
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz)
-
-#define BTF_VAR_ENC(name, type, linkage)	\
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), type), (linkage)
-#define BTF_VAR_SECINFO_ENC(type, offset, size)	\
-	(type), (offset), (size)
-
-#define BTF_MEMBER_ENC(name, type, bits_offset)	\
-	(name), (type), (bits_offset)
-#define BTF_ENUM_ENC(name, val) (name), (val)
-#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \
-	((bitfield_size) << 24 | (bits_offset))
-
-#define BTF_TYPEDEF_ENC(name, type) \
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
-
-#define BTF_PTR_ENC(type) \
-	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
-
-#define BTF_CONST_ENC(type) \
-	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type)
-
-#define BTF_VOLATILE_ENC(type) \
-	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), type)
-
-#define BTF_RESTRICT_ENC(type) \
-	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), type)
-
-#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \
-	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type)
-
-#define BTF_FUNC_PROTO_ARG_ENC(name, type) \
-	(name), (type)
-
-#define BTF_FUNC_ENC(name, func_proto) \
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
-
 #define BTF_END_RAW 0xdeadbeef
 #define NAME_TBD 0xdeadb33f
 
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
new file mode 100644
index 000000000000..2023725f1962
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef _TEST_BTF_H
+#define _TEST_BTF_H
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen)			\
+	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+
+#define BTF_TYPE_ENC(name, info, size_or_type)	\
+	(name), (info), (size_or_type)
+
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits)	\
+	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz),	\
+	BTF_INT_ENC(encoding, bits_offset, bits)
+
+#define BTF_FWD_ENC(name, kind_flag) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FWD, kind_flag, 0), 0)
+
+#define BTF_ARRAY_ENC(type, index_type, nr_elems)	\
+	(type), (index_type), (nr_elems)
+#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
+	BTF_ARRAY_ENC(type, index_type, nr_elems)
+
+#define BTF_STRUCT_ENC(name, nr_elems, sz)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, nr_elems), sz)
+
+#define BTF_UNION_ENC(name, nr_elems, sz)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz)
+
+#define BTF_VAR_ENC(name, type, linkage)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), type), (linkage)
+#define BTF_VAR_SECINFO_ENC(type, offset, size)	\
+	(type), (offset), (size)
+
+#define BTF_MEMBER_ENC(name, type, bits_offset)	\
+	(name), (type), (bits_offset)
+#define BTF_ENUM_ENC(name, val) (name), (val)
+#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \
+	((bitfield_size) << 24 | (bits_offset))
+
+#define BTF_TYPEDEF_ENC(name, type) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
+
+#define BTF_PTR_ENC(type) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+
+#define BTF_CONST_ENC(type) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type)
+
+#define BTF_VOLATILE_ENC(type) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), type)
+
+#define BTF_RESTRICT_ENC(type) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), type)
+
+#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type)
+
+#define BTF_FUNC_PROTO_ARG_ENC(name, type) \
+	(name), (type)
+
+#define BTF_FUNC_ENC(name, func_proto) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
+
+#endif /* _TEST_BTF_H */
-- 
cgit v1.2.3


From 7a9bb9762d3302bb407c7bdb0b5f754e5aa595a5 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 26 Apr 2019 16:39:49 -0700
Subject: bpf: Add verifier tests for the bpf_sk_storage

This patch adds verifier tests for the bpf_sk_storage:
1. ARG_PTR_TO_MAP_VALUE_OR_NULL
2. Map and helper compatibility (e.g. disallow bpf_map_loookup_elem)

It also takes this chance to remove the unused struct btf_raw_data
and uses the BTF encoding macros from "test_btf.h".

Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c |  55 ++++++++-----
 tools/testing/selftests/bpf/verifier/sock.c | 116 ++++++++++++++++++++++++++++
 2 files changed, 152 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ed9e894afef3..ccd896b98cac 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -47,12 +47,13 @@
 #include "bpf_rlimit.h"
 #include "bpf_rand.h"
 #include "bpf_util.h"
+#include "test_btf.h"
 #include "../../../include/linux/filter.h"
 
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	17
+#define MAX_NR_MAPS	18
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -85,6 +86,7 @@ struct bpf_test {
 	int fixup_map_array_ro[MAX_FIXUPS];
 	int fixup_map_array_wo[MAX_FIXUPS];
 	int fixup_map_array_small[MAX_FIXUPS];
+	int fixup_sk_storage_map[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	uint32_t retval, retval_unpriv, insn_processed;
@@ -497,24 +499,6 @@ static int create_cgroup_storage(bool percpu)
 	return fd;
 }
 
-#define BTF_INFO_ENC(kind, kind_flag, vlen) \
-	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
-#define BTF_TYPE_ENC(name, info, size_or_type) \
-	(name), (info), (size_or_type)
-#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
-	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
-#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
-	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
-	BTF_INT_ENC(encoding, bits_offset, bits)
-#define BTF_MEMBER_ENC(name, type, bits_offset) \
-	(name), (type), (bits_offset)
-
-struct btf_raw_data {
-	__u32 raw_types[64];
-	const char *str_sec;
-	__u32 str_sec_size;
-};
-
 /* struct bpf_spin_lock {
  *   int val;
  * };
@@ -589,6 +573,31 @@ static int create_map_spin_lock(void)
 	return fd;
 }
 
+static int create_sk_storage_map(void)
+{
+	struct bpf_create_map_attr attr = {
+		.name = "test_map",
+		.map_type = BPF_MAP_TYPE_SK_STORAGE,
+		.key_size = 4,
+		.value_size = 8,
+		.max_entries = 0,
+		.map_flags = BPF_F_NO_PREALLOC,
+		.btf_key_type_id = 1,
+		.btf_value_type_id = 3,
+	};
+	int fd, btf_fd;
+
+	btf_fd = load_btf();
+	if (btf_fd < 0)
+		return -1;
+	attr.btf_fd = btf_fd;
+	fd = bpf_create_map_xattr(&attr);
+	close(attr.btf_fd);
+	if (fd < 0)
+		printf("Failed to create sk_storage_map\n");
+	return fd;
+}
+
 static char bpf_vlog[UINT_MAX >> 8];
 
 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
@@ -611,6 +620,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_map_array_ro = test->fixup_map_array_ro;
 	int *fixup_map_array_wo = test->fixup_map_array_wo;
 	int *fixup_map_array_small = test->fixup_map_array_small;
+	int *fixup_sk_storage_map = test->fixup_sk_storage_map;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -765,6 +775,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_map_array_small++;
 		} while (*fixup_map_array_small);
 	}
+	if (*fixup_sk_storage_map) {
+		map_fds[17] = create_sk_storage_map();
+		do {
+			prog[*fixup_sk_storage_map].imm = map_fds[17];
+			fixup_sk_storage_map++;
+		} while (*fixup_sk_storage_map);
+	}
 }
 
 static int set_admin(bool admin)
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 416436231fab..b31cd2cf50d0 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -382,3 +382,119 @@
 	.result = REJECT,
 	.errstr = "reference has not been acquired before",
 },
+{
+	"sk_storage_get(map, skb->sk, NULL, 0): value == NULL",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_sk_storage_map = { 11 },
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+},
+{
+	"sk_storage_get(map, skb->sk, 1, 1): value == 1",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_4, 1),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_sk_storage_map = { 11 },
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "R3 type=inv expected=fp",
+},
+{
+	"sk_storage_get(map, skb->sk, &stack_value, 1): stack_value",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_4, 1),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_sk_storage_map = { 14 },
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+},
+{
+	"sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_4, 1),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_sk_storage_map = { 14 },
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "invalid indirect read from stack",
+},
+{
+	"bpf_map_lookup_elem(smap, &key)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_sk_storage_map = { 3 },
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",
+},
-- 
cgit v1.2.3


From 51a0e301a563bf7266854e0698cdf3dc25116f7b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 26 Apr 2019 16:39:52 -0700
Subject: bpf: Add BPF_MAP_TYPE_SK_STORAGE test to test_maps

This patch adds BPF_MAP_TYPE_SK_STORAGE test to test_maps.
The src file is rather long, so it is put into another dir map_tests/
and compile like the current prog_tests/ does.  Other existing
tests in test_maps can also be re-factored into map_tests/ in the
future.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile               |  25 +-
 .../selftests/bpf/map_tests/sk_storage_map.c       | 629 +++++++++++++++++++++
 tools/testing/selftests/bpf/test_maps.c            |  18 +-
 tools/testing/selftests/bpf/test_maps.h            |  17 +
 4 files changed, 679 insertions(+), 10 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/map_tests/sk_storage_map.c
 create mode 100644 tools/testing/selftests/bpf/test_maps.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f9d83ba7843e..66f2dca1dee1 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -74,6 +74,8 @@ all: $(TEST_CUSTOM_PROGS)
 $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c
 	$(CC) -o $@ $< -Wl,--build-id
 
+$(OUTPUT)/test_maps: map_tests/*.c
+
 BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
@@ -232,6 +234,27 @@ $(PROG_TESTS_H): $(PROG_TESTS_DIR) $(PROG_TESTS_FILES)
 		  echo '#endif' \
 		 ) > $(PROG_TESTS_H))
 
+TEST_MAPS_CFLAGS := -I. -I$(OUTPUT)
+MAP_TESTS_DIR = $(OUTPUT)/map_tests
+$(MAP_TESTS_DIR):
+	mkdir -p $@
+MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h
+test_maps.c: $(MAP_TESTS_H)
+$(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS)
+MAP_TESTS_FILES := $(wildcard map_tests/*.c)
+$(MAP_TESTS_H): $(MAP_TESTS_DIR) $(MAP_TESTS_FILES)
+	$(shell ( cd map_tests/; \
+		  echo '/* Generated header, do not edit */'; \
+		  echo '#ifdef DECLARE'; \
+		  ls *.c 2> /dev/null | \
+			sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \
+		  echo '#endif'; \
+		  echo '#ifdef CALL'; \
+		  ls *.c 2> /dev/null | \
+			sed -e 's@\([^\.]*\)\.c@test_\1();@'; \
+		  echo '#endif' \
+		 ) > $(MAP_TESTS_H))
+
 VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h
 test_verifier.c: $(VERIFIER_TESTS_H)
 $(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
@@ -251,4 +274,4 @@ $(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES)
 		 ) > $(VERIFIER_TESTS_H))
 
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \
-	$(VERIFIER_TESTS_H) $(PROG_TESTS_H)
+	$(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H)
diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
new file mode 100644
index 000000000000..e569edc679d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
@@ -0,0 +1,629 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook  */
+#include <linux/compiler.h>
+#include <linux/err.h>
+
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <linux/btf.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_btf.h>
+#include <test_maps.h>
+
+static struct bpf_create_map_attr xattr = {
+	.name = "sk_storage_map",
+	.map_type = BPF_MAP_TYPE_SK_STORAGE,
+	.map_flags = BPF_F_NO_PREALLOC,
+	.max_entries = 0,
+	.key_size = 4,
+	.value_size = 8,
+	.btf_key_type_id = 1,
+	.btf_value_type_id = 3,
+	.btf_fd = -1,
+};
+
+static unsigned int nr_sk_threads_done;
+static unsigned int nr_sk_threads_err;
+static unsigned int nr_sk_per_thread = 4096;
+static unsigned int nr_sk_threads = 4;
+static int sk_storage_map = -1;
+static unsigned int stop;
+static int runtime_s = 5;
+
+static bool is_stopped(void)
+{
+	return READ_ONCE(stop);
+}
+
+static unsigned int threads_err(void)
+{
+	return READ_ONCE(nr_sk_threads_err);
+}
+
+static void notify_thread_err(void)
+{
+	__sync_add_and_fetch(&nr_sk_threads_err, 1);
+}
+
+static bool wait_for_threads_err(void)
+{
+	while (!is_stopped() && !threads_err())
+		usleep(500);
+
+	return !is_stopped();
+}
+
+static unsigned int threads_done(void)
+{
+	return READ_ONCE(nr_sk_threads_done);
+}
+
+static void notify_thread_done(void)
+{
+	__sync_add_and_fetch(&nr_sk_threads_done, 1);
+}
+
+static void notify_thread_redo(void)
+{
+	__sync_sub_and_fetch(&nr_sk_threads_done, 1);
+}
+
+static bool wait_for_threads_done(void)
+{
+	while (threads_done() != nr_sk_threads && !is_stopped() &&
+	       !threads_err())
+		usleep(50);
+
+	return !is_stopped() && !threads_err();
+}
+
+static bool wait_for_threads_redo(void)
+{
+	while (threads_done() && !is_stopped() && !threads_err())
+		usleep(50);
+
+	return !is_stopped() && !threads_err();
+}
+
+static bool wait_for_map(void)
+{
+	while (READ_ONCE(sk_storage_map) == -1 && !is_stopped())
+		usleep(50);
+
+	return !is_stopped();
+}
+
+static bool wait_for_map_close(void)
+{
+	while (READ_ONCE(sk_storage_map) != -1 && !is_stopped())
+		;
+
+	return !is_stopped();
+}
+
+static int load_btf(void)
+{
+	const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l";
+	__u32 btf_raw_types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* struct bpf_spin_lock */                      /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),
+		BTF_MEMBER_ENC(15, 1, 0), /* int val; */
+		/* struct val */                                /* [3] */
+		BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
+		BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+	};
+	struct btf_header btf_hdr = {
+		.magic = BTF_MAGIC,
+		.version = BTF_VERSION,
+		.hdr_len = sizeof(struct btf_header),
+		.type_len = sizeof(btf_raw_types),
+		.str_off = sizeof(btf_raw_types),
+		.str_len = sizeof(btf_str_sec),
+	};
+	__u8 raw_btf[sizeof(struct btf_header) + sizeof(btf_raw_types) +
+		     sizeof(btf_str_sec)];
+
+	memcpy(raw_btf, &btf_hdr, sizeof(btf_hdr));
+	memcpy(raw_btf + sizeof(btf_hdr), btf_raw_types, sizeof(btf_raw_types));
+	memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types),
+	       btf_str_sec, sizeof(btf_str_sec));
+
+	return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0);
+}
+
+static int create_sk_storage_map(void)
+{
+	int btf_fd, map_fd;
+
+	btf_fd = load_btf();
+	CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n",
+	      btf_fd, errno);
+	xattr.btf_fd = btf_fd;
+
+	map_fd = bpf_create_map_xattr(&xattr);
+	xattr.btf_fd = -1;
+	close(btf_fd);
+	CHECK(map_fd == -1,
+	      "bpf_create_map_xattr()", "errno:%d\n", errno);
+
+	return map_fd;
+}
+
+static void *insert_close_thread(void *arg)
+{
+	struct {
+		int cnt;
+		int lock;
+	} value = { .cnt = 0xeB9F, .lock = 0, };
+	int i, map_fd, err, *sk_fds;
+
+	sk_fds = malloc(sizeof(*sk_fds) * nr_sk_per_thread);
+	if (!sk_fds) {
+		notify_thread_err();
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < nr_sk_per_thread; i++)
+		sk_fds[i] = -1;
+
+	while (!is_stopped()) {
+		if (!wait_for_map())
+			goto close_all;
+
+		map_fd = READ_ONCE(sk_storage_map);
+		for (i = 0; i < nr_sk_per_thread && !is_stopped(); i++) {
+			sk_fds[i] = socket(AF_INET6, SOCK_STREAM, 0);
+			if (sk_fds[i] == -1) {
+				err = -errno;
+				fprintf(stderr, "socket(): errno:%d\n", errno);
+				goto errout;
+			}
+			err = bpf_map_update_elem(map_fd, &sk_fds[i], &value,
+						  BPF_NOEXIST);
+			if (err) {
+				err = -errno;
+				fprintf(stderr,
+					"bpf_map_update_elem(): errno:%d\n",
+					errno);
+				goto errout;
+			}
+		}
+
+		notify_thread_done();
+		wait_for_map_close();
+
+close_all:
+		for (i = 0; i < nr_sk_per_thread; i++) {
+			close(sk_fds[i]);
+			sk_fds[i] = -1;
+		}
+
+		notify_thread_redo();
+	}
+
+	free(sk_fds);
+	return NULL;
+
+errout:
+	for (i = 0; i < nr_sk_per_thread && sk_fds[i] != -1; i++)
+		close(sk_fds[i]);
+	free(sk_fds);
+	notify_thread_err();
+	return ERR_PTR(err);
+}
+
+static int do_sk_storage_map_stress_free(void)
+{
+	int i, map_fd = -1, err = 0, nr_threads_created = 0;
+	pthread_t *sk_thread_ids;
+	void *thread_ret;
+
+	sk_thread_ids = malloc(sizeof(pthread_t) * nr_sk_threads);
+	if (!sk_thread_ids) {
+		fprintf(stderr, "malloc(sk_threads): NULL\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_sk_threads; i++) {
+		err = pthread_create(&sk_thread_ids[i], NULL,
+				     insert_close_thread, NULL);
+		if (err) {
+			err = -errno;
+			goto done;
+		}
+		nr_threads_created++;
+	}
+
+	while (!is_stopped()) {
+		map_fd = create_sk_storage_map();
+		WRITE_ONCE(sk_storage_map, map_fd);
+
+		if (!wait_for_threads_done())
+			break;
+
+		WRITE_ONCE(sk_storage_map, -1);
+		close(map_fd);
+		map_fd = -1;
+
+		if (!wait_for_threads_redo())
+			break;
+	}
+
+done:
+	WRITE_ONCE(stop, 1);
+	for (i = 0; i < nr_threads_created; i++) {
+		pthread_join(sk_thread_ids[i], &thread_ret);
+		if (IS_ERR(thread_ret) && !err) {
+			err = PTR_ERR(thread_ret);
+			fprintf(stderr, "threads#%u: err:%d\n", i, err);
+		}
+	}
+	free(sk_thread_ids);
+
+	if (map_fd != -1)
+		close(map_fd);
+
+	return err;
+}
+
+static void *update_thread(void *arg)
+{
+	struct {
+		int cnt;
+		int lock;
+	} value = { .cnt = 0xeB9F, .lock = 0, };
+	int map_fd = READ_ONCE(sk_storage_map);
+	int sk_fd = *(int *)arg;
+	int err = 0; /* Suppress compiler false alarm */
+
+	while (!is_stopped()) {
+		err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+		if (err && errno != EAGAIN) {
+			err = -errno;
+			fprintf(stderr, "bpf_map_update_elem: %d %d\n",
+				err, errno);
+			break;
+		}
+	}
+
+	if (!is_stopped()) {
+		notify_thread_err();
+		return ERR_PTR(err);
+	}
+
+	return NULL;
+}
+
+static void *delete_thread(void *arg)
+{
+	int map_fd = READ_ONCE(sk_storage_map);
+	int sk_fd = *(int *)arg;
+	int err = 0; /* Suppress compiler false alarm */
+
+	while (!is_stopped()) {
+		err = bpf_map_delete_elem(map_fd, &sk_fd);
+		if (err && errno != ENOENT) {
+			err = -errno;
+			fprintf(stderr, "bpf_map_delete_elem: %d %d\n",
+				err, errno);
+			break;
+		}
+	}
+
+	if (!is_stopped()) {
+		notify_thread_err();
+		return ERR_PTR(err);
+	}
+
+	return NULL;
+}
+
+static int do_sk_storage_map_stress_change(void)
+{
+	int i, sk_fd, map_fd = -1, err = 0, nr_threads_created = 0;
+	pthread_t *sk_thread_ids;
+	void *thread_ret;
+
+	sk_thread_ids = malloc(sizeof(pthread_t) * nr_sk_threads);
+	if (!sk_thread_ids) {
+		fprintf(stderr, "malloc(sk_threads): NULL\n");
+		return -ENOMEM;
+	}
+
+	sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (sk_fd == -1) {
+		err = -errno;
+		goto done;
+	}
+
+	map_fd = create_sk_storage_map();
+	WRITE_ONCE(sk_storage_map, map_fd);
+
+	for (i = 0; i < nr_sk_threads; i++) {
+		if (i & 0x1)
+			err = pthread_create(&sk_thread_ids[i], NULL,
+					     update_thread, &sk_fd);
+		else
+			err = pthread_create(&sk_thread_ids[i], NULL,
+					     delete_thread, &sk_fd);
+		if (err) {
+			err = -errno;
+			goto done;
+		}
+		nr_threads_created++;
+	}
+
+	wait_for_threads_err();
+
+done:
+	WRITE_ONCE(stop, 1);
+	for (i = 0; i < nr_threads_created; i++) {
+		pthread_join(sk_thread_ids[i], &thread_ret);
+		if (IS_ERR(thread_ret) && !err) {
+			err = PTR_ERR(thread_ret);
+			fprintf(stderr, "threads#%u: err:%d\n", i, err);
+		}
+	}
+	free(sk_thread_ids);
+
+	if (sk_fd != -1)
+		close(sk_fd);
+	close(map_fd);
+
+	return err;
+}
+
+static void stop_handler(int signum)
+{
+	if (signum != SIGALRM)
+		printf("stopping...\n");
+	WRITE_ONCE(stop, 1);
+}
+
+#define BPF_SK_STORAGE_MAP_TEST_NR_THREADS "BPF_SK_STORAGE_MAP_TEST_NR_THREADS"
+#define BPF_SK_STORAGE_MAP_TEST_SK_PER_THREAD "BPF_SK_STORAGE_MAP_TEST_SK_PER_THREAD"
+#define BPF_SK_STORAGE_MAP_TEST_RUNTIME_S "BPF_SK_STORAGE_MAP_TEST_RUNTIME_S"
+#define BPF_SK_STORAGE_MAP_TEST_NAME "BPF_SK_STORAGE_MAP_TEST_NAME"
+
+static void test_sk_storage_map_stress_free(void)
+{
+	struct rlimit rlim_old, rlim_new = {};
+	int err;
+
+	getrlimit(RLIMIT_NOFILE, &rlim_old);
+
+	signal(SIGTERM, stop_handler);
+	signal(SIGINT, stop_handler);
+	if (runtime_s > 0) {
+		signal(SIGALRM, stop_handler);
+		alarm(runtime_s);
+	}
+
+	if (rlim_old.rlim_cur < nr_sk_threads * nr_sk_per_thread) {
+		rlim_new.rlim_cur = nr_sk_threads * nr_sk_per_thread + 128;
+		rlim_new.rlim_max = rlim_new.rlim_cur + 128;
+		err = setrlimit(RLIMIT_NOFILE, &rlim_new);
+		CHECK(err, "setrlimit(RLIMIT_NOFILE)", "rlim_new:%lu errno:%d",
+		      rlim_new.rlim_cur, errno);
+	}
+
+	err = do_sk_storage_map_stress_free();
+
+	signal(SIGTERM, SIG_DFL);
+	signal(SIGINT, SIG_DFL);
+	if (runtime_s > 0) {
+		signal(SIGALRM, SIG_DFL);
+		alarm(0);
+	}
+
+	if (rlim_new.rlim_cur)
+		setrlimit(RLIMIT_NOFILE, &rlim_old);
+
+	CHECK(err, "test_sk_storage_map_stress_free", "err:%d\n", err);
+}
+
+static void test_sk_storage_map_stress_change(void)
+{
+	int err;
+
+	signal(SIGTERM, stop_handler);
+	signal(SIGINT, stop_handler);
+	if (runtime_s > 0) {
+		signal(SIGALRM, stop_handler);
+		alarm(runtime_s);
+	}
+
+	err = do_sk_storage_map_stress_change();
+
+	signal(SIGTERM, SIG_DFL);
+	signal(SIGINT, SIG_DFL);
+	if (runtime_s > 0) {
+		signal(SIGALRM, SIG_DFL);
+		alarm(0);
+	}
+
+	CHECK(err, "test_sk_storage_map_stress_change", "err:%d\n", err);
+}
+
+static void test_sk_storage_map_basic(void)
+{
+	struct {
+		int cnt;
+		int lock;
+	} value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value;
+	struct bpf_create_map_attr bad_xattr;
+	int btf_fd, map_fd, sk_fd, err;
+
+	btf_fd = load_btf();
+	CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n",
+	      btf_fd, errno);
+	xattr.btf_fd = btf_fd;
+
+	sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n",
+	      sk_fd, errno);
+
+	map_fd = bpf_create_map_xattr(&xattr);
+	CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)",
+	      "map_fd:%d errno:%d\n", map_fd, errno);
+
+	/* Add new elem */
+	memcpy(&lookup_value, &value, sizeof(value));
+	err = bpf_map_update_elem(map_fd, &sk_fd, &value,
+				  BPF_NOEXIST | BPF_F_LOCK);
+	CHECK(err, "bpf_map_update_elem(BPF_NOEXIST|BPF_F_LOCK)",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
+					BPF_F_LOCK);
+	CHECK(err || lookup_value.cnt != value.cnt,
+	      "bpf_map_lookup_elem_flags(BPF_F_LOCK)",
+	      "err:%d errno:%d cnt:%x(%x)\n",
+	      err, errno, lookup_value.cnt, value.cnt);
+
+	/* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */
+	value.cnt += 1;
+	err = bpf_map_update_elem(map_fd, &sk_fd, &value,
+				  BPF_EXIST | BPF_F_LOCK);
+	CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
+					BPF_F_LOCK);
+	CHECK(err || lookup_value.cnt != value.cnt,
+	      "bpf_map_lookup_elem_flags(BPF_F_LOCK)",
+	      "err:%d errno:%d cnt:%x(%x)\n",
+	      err, errno, lookup_value.cnt, value.cnt);
+
+	/* Bump the cnt and update with BPF_EXIST */
+	value.cnt += 1;
+	err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST);
+	CHECK(err, "bpf_map_update_elem(BPF_EXIST)",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
+					BPF_F_LOCK);
+	CHECK(err || lookup_value.cnt != value.cnt,
+	      "bpf_map_lookup_elem_flags(BPF_F_LOCK)",
+	      "err:%d errno:%d cnt:%x(%x)\n",
+	      err, errno, lookup_value.cnt, value.cnt);
+
+	/* Update with BPF_NOEXIST */
+	value.cnt += 1;
+	err = bpf_map_update_elem(map_fd, &sk_fd, &value,
+				  BPF_NOEXIST | BPF_F_LOCK);
+	CHECK(!err || errno != EEXIST,
+	      "bpf_map_update_elem(BPF_NOEXIST|BPF_F_LOCK)",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST);
+	CHECK(!err || errno != EEXIST, "bpf_map_update_elem(BPF_NOEXIST)",
+	      "err:%d errno:%d\n", err, errno);
+	value.cnt -= 1;
+	err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
+					BPF_F_LOCK);
+	CHECK(err || lookup_value.cnt != value.cnt,
+	      "bpf_map_lookup_elem_flags(BPF_F_LOCK)",
+	      "err:%d errno:%d cnt:%x(%x)\n",
+	      err, errno, lookup_value.cnt, value.cnt);
+
+	/* Bump the cnt again and update with map_flags == 0 */
+	value.cnt += 1;
+	err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+	CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n",
+	      err, errno);
+	err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
+					BPF_F_LOCK);
+	CHECK(err || lookup_value.cnt != value.cnt,
+	      "bpf_map_lookup_elem_flags(BPF_F_LOCK)",
+	      "err:%d errno:%d cnt:%x(%x)\n",
+	      err, errno, lookup_value.cnt, value.cnt);
+
+	/* Test delete elem */
+	err = bpf_map_delete_elem(map_fd, &sk_fd);
+	CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n",
+	      err, errno);
+	err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
+					BPF_F_LOCK);
+	CHECK(!err || errno != ENOENT,
+	      "bpf_map_lookup_elem_flags(BPF_F_LOCK)",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_delete_elem(map_fd, &sk_fd);
+	CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()",
+	      "err:%d errno:%d\n", err, errno);
+
+	memcpy(&bad_xattr, &xattr, sizeof(xattr));
+	bad_xattr.btf_key_type_id = 0;
+	err = bpf_create_map_xattr(&bad_xattr);
+	CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+	      "err:%d errno:%d\n", err, errno);
+
+	memcpy(&bad_xattr, &xattr, sizeof(xattr));
+	bad_xattr.btf_key_type_id = 3;
+	err = bpf_create_map_xattr(&bad_xattr);
+	CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+	      "err:%d errno:%d\n", err, errno);
+
+	memcpy(&bad_xattr, &xattr, sizeof(xattr));
+	bad_xattr.max_entries = 1;
+	err = bpf_create_map_xattr(&bad_xattr);
+	CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+	      "err:%d errno:%d\n", err, errno);
+
+	memcpy(&bad_xattr, &xattr, sizeof(xattr));
+	bad_xattr.map_flags = 0;
+	err = bpf_create_map_xattr(&bad_xattr);
+	CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+	      "err:%d errno:%d\n", err, errno);
+
+	xattr.btf_fd = -1;
+	close(btf_fd);
+	close(map_fd);
+	close(sk_fd);
+}
+
+void test_sk_storage_map(void)
+{
+	const char *test_name, *env_opt;
+	bool test_ran = false;
+
+	test_name = getenv(BPF_SK_STORAGE_MAP_TEST_NAME);
+
+	env_opt = getenv(BPF_SK_STORAGE_MAP_TEST_NR_THREADS);
+	if (env_opt)
+		nr_sk_threads = atoi(env_opt);
+
+	env_opt = getenv(BPF_SK_STORAGE_MAP_TEST_SK_PER_THREAD);
+	if (env_opt)
+		nr_sk_per_thread = atoi(env_opt);
+
+	env_opt = getenv(BPF_SK_STORAGE_MAP_TEST_RUNTIME_S);
+	if (env_opt)
+		runtime_s = atoi(env_opt);
+
+	if (!test_name || !strcmp(test_name, "basic")) {
+		test_sk_storage_map_basic();
+		test_ran = true;
+	}
+	if (!test_name || !strcmp(test_name, "stress_free")) {
+		test_sk_storage_map_stress_free();
+		test_ran = true;
+	}
+	if (!test_name || !strcmp(test_name, "stress_change")) {
+		test_sk_storage_map_stress_change();
+		test_ran = true;
+	}
+
+	if (test_ran)
+		printf("%s:PASS\n", __func__);
+	else
+		CHECK(1, "Invalid test_name", "%s\n", test_name);
+}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 3c627771f965..246f745cb006 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -27,6 +27,7 @@
 
 #include "bpf_util.h"
 #include "bpf_rlimit.h"
+#include "test_maps.h"
 
 #ifndef ENOTSUPP
 #define ENOTSUPP 524
@@ -36,15 +37,6 @@ static int skips;
 
 static int map_flags;
 
-#define CHECK(condition, tag, format...) ({				\
-	int __ret = !!(condition);					\
-	if (__ret) {							\
-		printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);	\
-		printf(format);						\
-		exit(-1);						\
-	}								\
-})
-
 static void test_hashmap(unsigned int task, void *data)
 {
 	long long key, next_key, first_key, value;
@@ -1703,6 +1695,10 @@ static void run_all_tests(void)
 	test_map_in_map();
 }
 
+#define DECLARE
+#include <map_tests/tests.h>
+#undef DECLARE
+
 int main(void)
 {
 	srand(time(NULL));
@@ -1713,6 +1709,10 @@ int main(void)
 	map_flags = BPF_F_NO_PREALLOC;
 	run_all_tests();
 
+#define CALL
+#include <map_tests/tests.h>
+#undef CALL
+
 	printf("test_maps: OK, %d SKIPPED\n", skips);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
new file mode 100644
index 000000000000..77d8587ac4ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TEST_MAPS_H
+#define _TEST_MAPS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define CHECK(condition, tag, format...) ({				\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);	\
+		printf(format);						\
+		exit(-1);						\
+	}								\
+})
+
+#endif
-- 
cgit v1.2.3


From 263d0b35334112dd999afb4246646a2ea9da800d Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 26 Apr 2019 16:39:54 -0700
Subject: bpf: Add ene-to-end test for bpf_sk_storage_* helpers

This patch rides on an existing BPF_PROG_TYPE_CGROUP_SKB test
(test_sock_fields.c) to do a TCP end-to-end test on the new
bpf_sk_storage_* helpers.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_helpers.h          |   5 +
 .../selftests/bpf/progs/test_sock_fields_kern.c    |  53 ++++++++++
 tools/testing/selftests/bpf/test_sock_fields.c     | 115 ++++++++++++++++++---
 3 files changed, 157 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 59e221586cf7..6e80b66d7fb1 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -211,6 +211,11 @@ static int (*bpf_strtol)(const char *buf, unsigned long long buf_len,
 static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len,
 			  unsigned long long flags, unsigned long *res) =
 	(void *) BPF_FUNC_strtoul;
+static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
+				   void *value, __u64 flags) =
+	(void *) BPF_FUNC_sk_storage_get;
+static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
+	(void *)BPF_FUNC_sk_storage_delete;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
index 37328f148538..1c39e4ccb7f1 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -55,6 +55,31 @@ struct bpf_map_def SEC("maps") linum_map = {
 	.max_entries = __NR_BPF_LINUM_ARRAY_IDX,
 };
 
+struct bpf_spinlock_cnt {
+	struct bpf_spin_lock lock;
+	__u32 cnt;
+};
+
+struct bpf_map_def SEC("maps") sk_pkt_out_cnt = {
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct bpf_spinlock_cnt),
+	.max_entries = 0,
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+
+BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt);
+
+struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = {
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct bpf_spinlock_cnt),
+	.max_entries = 0,
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+
+BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt);
+
 static bool is_loopback6(__u32 *a6)
 {
 	return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
@@ -120,7 +145,9 @@ static void tpcpy(struct bpf_tcp_sock *dst,
 SEC("cgroup_skb/egress")
 int egress_read_sock_fields(struct __sk_buff *skb)
 {
+	struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
 	__u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
+	struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
 	struct sockaddr_in6 *srv_sa6, *cli_sa6;
 	struct bpf_tcp_sock *tp, *tp_ret;
 	struct bpf_sock *sk, *sk_ret;
@@ -161,6 +188,32 @@ int egress_read_sock_fields(struct __sk_buff *skb)
 	skcpy(sk_ret, sk);
 	tpcpy(tp_ret, tp);
 
+	if (result_idx == EGRESS_SRV_IDX) {
+		/* The userspace has created it for srv sk */
+		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0);
+		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk,
+						   0, 0);
+	} else {
+		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
+						 &cli_cnt_init,
+						 BPF_SK_STORAGE_GET_F_CREATE);
+		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
+						   sk, &cli_cnt_init,
+						   BPF_SK_STORAGE_GET_F_CREATE);
+	}
+
+	if (!pkt_out_cnt || !pkt_out_cnt10)
+		RETURN;
+
+	/* Even both cnt and cnt10 have lock defined in their BTF,
+	 * intentionally one cnt takes lock while one does not
+	 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
+	 */
+	pkt_out_cnt->cnt += 1;
+	bpf_spin_lock(&pkt_out_cnt10->lock);
+	pkt_out_cnt10->cnt += 10;
+	bpf_spin_unlock(&pkt_out_cnt10->lock);
+
 	RETURN;
 }
 
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index dcae7f664dce..e089477fa0a3 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -35,6 +35,11 @@ enum bpf_linum_array_idx {
 	__NR_BPF_LINUM_ARRAY_IDX,
 };
 
+struct bpf_spinlock_cnt {
+	struct bpf_spin_lock lock;
+	__u32 cnt;
+};
+
 #define CHECK(condition, tag, format...) ({				\
 	int __ret = !!(condition);					\
 	if (__ret) {							\
@@ -50,6 +55,8 @@ enum bpf_linum_array_idx {
 #define DATA_LEN sizeof(DATA)
 
 static struct sockaddr_in6 srv_sa6, cli_sa6;
+static int sk_pkt_out_cnt10_fd;
+static int sk_pkt_out_cnt_fd;
 static int linum_map_fd;
 static int addr_map_fd;
 static int tp_map_fd;
@@ -220,28 +227,90 @@ static void check_result(void)
 	      "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
 	      ingress_linum);
 
-	CHECK(srv_tp.data_segs_out != 1 ||
+	CHECK(srv_tp.data_segs_out != 2 ||
 	      srv_tp.data_segs_in ||
 	      srv_tp.snd_cwnd != 10 ||
 	      srv_tp.total_retrans ||
-	      srv_tp.bytes_acked != DATA_LEN,
+	      srv_tp.bytes_acked != 2 * DATA_LEN,
 	      "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
 	      egress_linum);
 
 	CHECK(cli_tp.data_segs_out ||
-	      cli_tp.data_segs_in != 1 ||
+	      cli_tp.data_segs_in != 2 ||
 	      cli_tp.snd_cwnd != 10 ||
 	      cli_tp.total_retrans ||
-	      cli_tp.bytes_received != DATA_LEN,
+	      cli_tp.bytes_received != 2 * DATA_LEN,
 	      "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
 	      egress_linum);
 }
 
+static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
+{
+	struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
+	int err;
+
+	pkt_out_cnt.cnt = ~0;
+	pkt_out_cnt10.cnt = ~0;
+	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
+	if (!err)
+		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
+					  &pkt_out_cnt10);
+
+	/* The bpf prog only counts for fullsock and
+	 * passive conneciton did not become fullsock until 3WHS
+	 * had been finished.
+	 * The bpf prog only counted two data packet out but we
+	 * specially init accept_fd's pkt_out_cnt by 2 in
+	 * init_sk_storage().  Hence, 4 here.
+	 */
+	CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40,
+	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
+	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
+	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+
+	pkt_out_cnt.cnt = ~0;
+	pkt_out_cnt10.cnt = ~0;
+	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
+	if (!err)
+		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
+					  &pkt_out_cnt10);
+	/* Active connection is fullsock from the beginning.
+	 * 1 SYN and 1 ACK during 3WHS
+	 * 2 Acks on data packet.
+	 *
+	 * The bpf_prog initialized it to 0xeB9F.
+	 */
+	CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 ||
+	      pkt_out_cnt10.cnt != 0xeB9F + 40,
+	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
+	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
+	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+}
+
+static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
+{
+	struct bpf_spinlock_cnt scnt = {};
+	int err;
+
+	scnt.cnt = pkt_out_cnt;
+	err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
+				  BPF_NOEXIST);
+	CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
+	      "err:%d errno:%d", err, errno);
+
+	scnt.cnt *= 10;
+	err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
+				  BPF_NOEXIST);
+	CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
+	      "err:%d errno:%d", err, errno);
+}
+
 static void test(void)
 {
 	int listen_fd, cli_fd, accept_fd, epfd, err;
 	struct epoll_event ev;
 	socklen_t addrlen;
+	int i;
 
 	addrlen = sizeof(struct sockaddr_in6);
 	ev.events = EPOLLIN;
@@ -308,24 +377,30 @@ static void test(void)
 	      accept_fd, errno);
 	close(listen_fd);
 
-	/* Send some data from accept_fd to cli_fd */
-	err = send(accept_fd, DATA, DATA_LEN, 0);
-	CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
-	      err, errno);
-
-	/* Have some timeout in recv(cli_fd). Just in case. */
 	ev.data.fd = cli_fd;
 	err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
 	CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
 	      err, errno);
 
-	err = epoll_wait(epfd, &ev, 1, 1000);
-	CHECK(err != 1 || ev.data.fd != cli_fd,
-	      "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
-	      err, errno, ev.data.fd, cli_fd);
+	init_sk_storage(accept_fd, 2);
 
-	err = recv(cli_fd, NULL, 0, MSG_TRUNC);
-	CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
+	for (i = 0; i < 2; i++) {
+		/* Send some data from accept_fd to cli_fd */
+		err = send(accept_fd, DATA, DATA_LEN, 0);
+		CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
+		      err, errno);
+
+		/* Have some timeout in recv(cli_fd). Just in case. */
+		err = epoll_wait(epfd, &ev, 1, 1000);
+		CHECK(err != 1 || ev.data.fd != cli_fd,
+		      "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
+		      err, errno, ev.data.fd, cli_fd);
+
+		err = recv(cli_fd, NULL, 0, MSG_TRUNC);
+		CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
+	}
+
+	check_sk_pkt_out_cnt(accept_fd, cli_fd);
 
 	close(epfd);
 	close(accept_fd);
@@ -395,6 +470,14 @@ int main(int argc, char **argv)
 	CHECK(!map, "cannot find linum_map", "(null)");
 	linum_map_fd = bpf_map__fd(map);
 
+	map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt");
+	CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)");
+	sk_pkt_out_cnt_fd = bpf_map__fd(map);
+
+	map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10");
+	CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)");
+	sk_pkt_out_cnt10_fd = bpf_map__fd(map);
+
 	test();
 
 	bpf_object__close(obj);
-- 
cgit v1.2.3


From 711aef1bbf88212a21f7103e88f397b47a528805 Mon Sep 17 00:00:00 2001
From: Wang YanQing <udknight@gmail.com>
Date: Sat, 27 Apr 2019 16:28:26 +0800
Subject: bpf, x32: Fix bug for BPF_JMP | {BPF_JSGT, BPF_JSLE, BPF_JSLT,
 BPF_JSGE}

The current method to compare 64-bit numbers for conditional jump is:

1) Compare the high 32-bit first.

2) If the high 32-bit isn't the same, then goto step 4.

3) Compare the low 32-bit.

4) Check the desired condition.

This method is right for unsigned comparison, but it is buggy for signed
comparison, because it does signed comparison for low 32-bit too.

There is only one sign bit in 64-bit number, that is the MSB in the 64-bit
number, it is wrong to treat low 32-bit as signed number and do the signed
comparison for it.

This patch fixes the bug and adds a testcase in selftests/bpf for such bug.

Signed-off-by: Wang YanQing <udknight@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 arch/x86/net/bpf_jit_comp32.c              | 217 ++++++++++++++++++++++-------
 tools/testing/selftests/bpf/verifier/jit.c |  19 +++
 2 files changed, 185 insertions(+), 51 deletions(-)

(limited to 'tools')

diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 0d9cdffce6ac..8097b88d744f 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -117,6 +117,8 @@ static bool is_simm32(s64 value)
 #define IA32_JLE 0x7E
 #define IA32_JG  0x7F
 
+#define COND_JMP_OPCODE_INVALID	(0xFF)
+
 /*
  * Map eBPF registers to IA32 32bit registers or stack scratch space.
  *
@@ -1613,6 +1615,75 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog)
 	*pprog = prog;
 }
 
+static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
+{
+	u8 jmp_cond;
+
+	/* Convert BPF opcode to x86 */
+	switch (op) {
+	case BPF_JEQ:
+		jmp_cond = IA32_JE;
+		break;
+	case BPF_JSET:
+	case BPF_JNE:
+		jmp_cond = IA32_JNE;
+		break;
+	case BPF_JGT:
+		/* GT is unsigned '>', JA in x86 */
+		jmp_cond = IA32_JA;
+		break;
+	case BPF_JLT:
+		/* LT is unsigned '<', JB in x86 */
+		jmp_cond = IA32_JB;
+		break;
+	case BPF_JGE:
+		/* GE is unsigned '>=', JAE in x86 */
+		jmp_cond = IA32_JAE;
+		break;
+	case BPF_JLE:
+		/* LE is unsigned '<=', JBE in x86 */
+		jmp_cond = IA32_JBE;
+		break;
+	case BPF_JSGT:
+		if (!is_cmp_lo)
+			/* Signed '>', GT in x86 */
+			jmp_cond = IA32_JG;
+		else
+			/* GT is unsigned '>', JA in x86 */
+			jmp_cond = IA32_JA;
+		break;
+	case BPF_JSLT:
+		if (!is_cmp_lo)
+			/* Signed '<', LT in x86 */
+			jmp_cond = IA32_JL;
+		else
+			/* LT is unsigned '<', JB in x86 */
+			jmp_cond = IA32_JB;
+		break;
+	case BPF_JSGE:
+		if (!is_cmp_lo)
+			/* Signed '>=', GE in x86 */
+			jmp_cond = IA32_JGE;
+		else
+			/* GE is unsigned '>=', JAE in x86 */
+			jmp_cond = IA32_JAE;
+		break;
+	case BPF_JSLE:
+		if (!is_cmp_lo)
+			/* Signed '<=', LE in x86 */
+			jmp_cond = IA32_JLE;
+		else
+			/* LE is unsigned '<=', JBE in x86 */
+			jmp_cond = IA32_JBE;
+		break;
+	default: /* to silence GCC warning */
+		jmp_cond = COND_JMP_OPCODE_INVALID;
+		break;
+	}
+
+	return jmp_cond;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -2069,10 +2140,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_JMP | BPF_JLT | BPF_X:
 		case BPF_JMP | BPF_JGE | BPF_X:
 		case BPF_JMP | BPF_JLE | BPF_X:
-		case BPF_JMP | BPF_JSGT | BPF_X:
-		case BPF_JMP | BPF_JSLE | BPF_X:
-		case BPF_JMP | BPF_JSLT | BPF_X:
-		case BPF_JMP | BPF_JSGE | BPF_X:
 		case BPF_JMP32 | BPF_JEQ | BPF_X:
 		case BPF_JMP32 | BPF_JNE | BPF_X:
 		case BPF_JMP32 | BPF_JGT | BPF_X:
@@ -2118,6 +2185,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
 			goto emit_cond_jmp;
 		}
+		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP | BPF_JSLE | BPF_X:
+		case BPF_JMP | BPF_JSLT | BPF_X:
+		case BPF_JMP | BPF_JSGE | BPF_X: {
+			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+
+			if (dstk) {
+				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+				      STACK_VAR(dst_lo));
+				EMIT3(0x8B,
+				      add_2reg(0x40, IA32_EBP,
+					       IA32_EDX),
+				      STACK_VAR(dst_hi));
+			}
+
+			if (sstk) {
+				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+				      STACK_VAR(src_lo));
+				EMIT3(0x8B,
+				      add_2reg(0x40, IA32_EBP,
+					       IA32_EBX),
+				      STACK_VAR(src_hi));
+			}
+
+			/* cmp dreg_hi,sreg_hi */
+			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+			EMIT2(IA32_JNE, 10);
+			/* cmp dreg_lo,sreg_lo */
+			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+			goto emit_cond_jmp_signed;
+		}
 		case BPF_JMP | BPF_JSET | BPF_X:
 		case BPF_JMP32 | BPF_JSET | BPF_X: {
 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
@@ -2194,10 +2295,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_JMP | BPF_JLT | BPF_K:
 		case BPF_JMP | BPF_JGE | BPF_K:
 		case BPF_JMP | BPF_JLE | BPF_K:
-		case BPF_JMP | BPF_JSGT | BPF_K:
-		case BPF_JMP | BPF_JSLE | BPF_K:
-		case BPF_JMP | BPF_JSLT | BPF_K:
-		case BPF_JMP | BPF_JSGE | BPF_K:
 		case BPF_JMP32 | BPF_JEQ | BPF_K:
 		case BPF_JMP32 | BPF_JNE | BPF_K:
 		case BPF_JMP32 | BPF_JGT | BPF_K:
@@ -2238,50 +2335,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			/* cmp dreg_lo,sreg_lo */
 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
 
-emit_cond_jmp:		/* Convert BPF opcode to x86 */
-			switch (BPF_OP(code)) {
-			case BPF_JEQ:
-				jmp_cond = IA32_JE;
-				break;
-			case BPF_JSET:
-			case BPF_JNE:
-				jmp_cond = IA32_JNE;
-				break;
-			case BPF_JGT:
-				/* GT is unsigned '>', JA in x86 */
-				jmp_cond = IA32_JA;
-				break;
-			case BPF_JLT:
-				/* LT is unsigned '<', JB in x86 */
-				jmp_cond = IA32_JB;
-				break;
-			case BPF_JGE:
-				/* GE is unsigned '>=', JAE in x86 */
-				jmp_cond = IA32_JAE;
-				break;
-			case BPF_JLE:
-				/* LE is unsigned '<=', JBE in x86 */
-				jmp_cond = IA32_JBE;
-				break;
-			case BPF_JSGT:
-				/* Signed '>', GT in x86 */
-				jmp_cond = IA32_JG;
-				break;
-			case BPF_JSLT:
-				/* Signed '<', LT in x86 */
-				jmp_cond = IA32_JL;
-				break;
-			case BPF_JSGE:
-				/* Signed '>=', GE in x86 */
-				jmp_cond = IA32_JGE;
-				break;
-			case BPF_JSLE:
-				/* Signed '<=', LE in x86 */
-				jmp_cond = IA32_JLE;
-				break;
-			default: /* to silence GCC warning */
+emit_cond_jmp:		jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
+			if (jmp_cond == COND_JMP_OPCODE_INVALID)
 				return -EFAULT;
-			}
 			jmp_offset = addrs[i + insn->off] - addrs[i];
 			if (is_imm8(jmp_offset)) {
 				EMIT2(jmp_cond, jmp_offset);
@@ -2291,7 +2347,66 @@ emit_cond_jmp:		/* Convert BPF opcode to x86 */
 				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
 				return -EFAULT;
 			}
+			break;
+		}
+		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSLE | BPF_K:
+		case BPF_JMP | BPF_JSLT | BPF_K:
+		case BPF_JMP | BPF_JSGE | BPF_K: {
+			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+			u8 sreg_lo = IA32_ECX;
+			u8 sreg_hi = IA32_EBX;
+			u32 hi;
+
+			if (dstk) {
+				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+				      STACK_VAR(dst_lo));
+				EMIT3(0x8B,
+				      add_2reg(0x40, IA32_EBP,
+					       IA32_EDX),
+				      STACK_VAR(dst_hi));
+			}
+
+			/* mov ecx,imm32 */
+			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+			hi = imm32 & (1 << 31) ? (u32)~0 : 0;
+			/* mov ebx,imm32 */
+			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+			/* cmp dreg_hi,sreg_hi */
+			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+			EMIT2(IA32_JNE, 10);
+			/* cmp dreg_lo,sreg_lo */
+			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+
+			/*
+			 * For simplicity of branch offset computation,
+			 * let's use fixed jump coding here.
+			 */
+emit_cond_jmp_signed:	/* Check the condition for low 32-bit comparison */
+			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true);
+			if (jmp_cond == COND_JMP_OPCODE_INVALID)
+				return -EFAULT;
+			jmp_offset = addrs[i + insn->off] - addrs[i] + 8;
+			if (is_simm32(jmp_offset)) {
+				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+			} else {
+				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+				return -EFAULT;
+			}
+			EMIT2(0xEB, 6);
 
+			/* Check the condition for high 32-bit comparison */
+			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
+			if (jmp_cond == COND_JMP_OPCODE_INVALID)
+				return -EFAULT;
+			jmp_offset = addrs[i + insn->off] - addrs[i];
+			if (is_simm32(jmp_offset)) {
+				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+			} else {
+				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+				return -EFAULT;
+			}
 			break;
 		}
 		case BPF_JMP | BPF_JA:
diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
index be488b4495a3..c33adf344fae 100644
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -86,3 +86,22 @@
 	.result = ACCEPT,
 	.retval = 2,
 },
+{
+	"jit: jsgt, jslt",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_1, 0x80000000ULL),
+	BPF_LD_IMM64(BPF_REG_2, 0x0ULL),
+	BPF_JMP_REG(BPF_JSGT, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+
+	BPF_JMP_REG(BPF_JSLT, BPF_REG_2, BPF_REG_1, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 2,
+},
-- 
cgit v1.2.3


From 6cea33701eb024bc6c920ab83940ee22afd29139 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 29 Apr 2019 16:59:38 -0700
Subject: selftests/bpf: set RLIMIT_MEMLOCK properly for test_libbpf_open.c

Test test_libbpf.sh failed on my development server with failure
  -bash-4.4$ sudo ./test_libbpf.sh
  [0] libbpf: Error in bpf_object__probe_name():Operation not permitted(1).
      Couldn't load basic 'r0 = 0' BPF program.
  test_libbpf: failed at file test_l4lb.o
  selftests: test_libbpf [FAILED]
  -bash-4.4$

The reason is because my machine has 64KB locked memory by default which
is not enough for this program to get locked memory.
Similar to other bpf selftests, let us increase RLIMIT_MEMLOCK
to infinity, which fixed the issue.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_libbpf_open.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c
index 65cbd30704b5..9e9db202d218 100644
--- a/tools/testing/selftests/bpf/test_libbpf_open.c
+++ b/tools/testing/selftests/bpf/test_libbpf_open.c
@@ -11,6 +11,8 @@ static const char *__doc__ =
 #include <bpf/libbpf.h>
 #include <getopt.h>
 
+#include "bpf_rlimit.h"
+
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
 	{"debug",	no_argument,		NULL, 'D' },
-- 
cgit v1.2.3


From 0e6741f092979535d159d5a851f12c88bfb7cb9a Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Tue, 30 Apr 2019 14:45:35 +0200
Subject: libbpf: fix invalid munmap call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When unmapping the AF_XDP memory regions used for the rings, an
invalid address was passed to the munmap() calls. Instead of passing
the beginning of the memory region, the descriptor region was passed
to munmap.

When the userspace application tried to tear down an AF_XDP socket,
the operation failed and the application would still have a reference
to socket it wished to get rid of.

Reported-by: William Tu <u9012063@gmail.com>
Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: William Tu <u9012063@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/xsk.c | 77 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 37 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 557ef8d1250d..78b4a4bd4a25 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -248,8 +248,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
 	return 0;
 
 out_mmap:
-	munmap(umem->fill,
-	       off.fr.desc + umem->config.fill_size * sizeof(__u64));
+	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
 out_socket:
 	close(umem->fd);
 out_umem_alloc:
@@ -524,11 +523,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
 		       const struct xsk_socket_config *usr_config)
 {
+	void *rx_map = NULL, *tx_map = NULL;
 	struct sockaddr_xdp sxdp = {};
 	struct xdp_mmap_offsets off;
 	struct xsk_socket *xsk;
 	socklen_t optlen;
-	void *map;
 	int err;
 
 	if (!umem || !xsk_ptr || !rx || !tx)
@@ -594,40 +593,40 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	}
 
 	if (rx) {
-		map = xsk_mmap(NULL, off.rx.desc +
-			       xsk->config.rx_size * sizeof(struct xdp_desc),
-			       PROT_READ | PROT_WRITE,
-			       MAP_SHARED | MAP_POPULATE,
-			       xsk->fd, XDP_PGOFF_RX_RING);
-		if (map == MAP_FAILED) {
+		rx_map = xsk_mmap(NULL, off.rx.desc +
+				  xsk->config.rx_size * sizeof(struct xdp_desc),
+				  PROT_READ | PROT_WRITE,
+				  MAP_SHARED | MAP_POPULATE,
+				  xsk->fd, XDP_PGOFF_RX_RING);
+		if (rx_map == MAP_FAILED) {
 			err = -errno;
 			goto out_socket;
 		}
 
 		rx->mask = xsk->config.rx_size - 1;
 		rx->size = xsk->config.rx_size;
-		rx->producer = map + off.rx.producer;
-		rx->consumer = map + off.rx.consumer;
-		rx->ring = map + off.rx.desc;
+		rx->producer = rx_map + off.rx.producer;
+		rx->consumer = rx_map + off.rx.consumer;
+		rx->ring = rx_map + off.rx.desc;
 	}
 	xsk->rx = rx;
 
 	if (tx) {
-		map = xsk_mmap(NULL, off.tx.desc +
-			       xsk->config.tx_size * sizeof(struct xdp_desc),
-			       PROT_READ | PROT_WRITE,
-			       MAP_SHARED | MAP_POPULATE,
-			       xsk->fd, XDP_PGOFF_TX_RING);
-		if (map == MAP_FAILED) {
+		tx_map = xsk_mmap(NULL, off.tx.desc +
+				  xsk->config.tx_size * sizeof(struct xdp_desc),
+				  PROT_READ | PROT_WRITE,
+				  MAP_SHARED | MAP_POPULATE,
+				  xsk->fd, XDP_PGOFF_TX_RING);
+		if (tx_map == MAP_FAILED) {
 			err = -errno;
 			goto out_mmap_rx;
 		}
 
 		tx->mask = xsk->config.tx_size - 1;
 		tx->size = xsk->config.tx_size;
-		tx->producer = map + off.tx.producer;
-		tx->consumer = map + off.tx.consumer;
-		tx->ring = map + off.tx.desc;
+		tx->producer = tx_map + off.tx.producer;
+		tx->consumer = tx_map + off.tx.consumer;
+		tx->ring = tx_map + off.tx.desc;
 		tx->cached_cons = xsk->config.tx_size;
 	}
 	xsk->tx = tx;
@@ -654,13 +653,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 
 out_mmap_tx:
 	if (tx)
-		munmap(xsk->tx,
-		       off.tx.desc +
+		munmap(tx_map, off.tx.desc +
 		       xsk->config.tx_size * sizeof(struct xdp_desc));
 out_mmap_rx:
 	if (rx)
-		munmap(xsk->rx,
-		       off.rx.desc +
+		munmap(rx_map, off.rx.desc +
 		       xsk->config.rx_size * sizeof(struct xdp_desc));
 out_socket:
 	if (--umem->refcount)
@@ -685,10 +682,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
 	optlen = sizeof(off);
 	err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
 	if (!err) {
-		munmap(umem->fill->ring,
-		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
-		munmap(umem->comp->ring,
-		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
+		(void)munmap(umem->fill->ring - off.fr.desc,
+			     off.fr.desc +
+			     umem->config.fill_size * sizeof(__u64));
+		(void)munmap(umem->comp->ring - off.cr.desc,
+			     off.cr.desc +
+			     umem->config.comp_size * sizeof(__u64));
 	}
 
 	close(umem->fd);
@@ -699,6 +698,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
 
 void xsk_socket__delete(struct xsk_socket *xsk)
 {
+	size_t desc_sz = sizeof(struct xdp_desc);
 	struct xdp_mmap_offsets off;
 	socklen_t optlen;
 	int err;
@@ -711,14 +711,17 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	optlen = sizeof(off);
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
 	if (!err) {
-		if (xsk->rx)
-			munmap(xsk->rx->ring,
-			       off.rx.desc +
-			       xsk->config.rx_size * sizeof(struct xdp_desc));
-		if (xsk->tx)
-			munmap(xsk->tx->ring,
-			       off.tx.desc +
-			       xsk->config.tx_size * sizeof(struct xdp_desc));
+		if (xsk->rx) {
+			(void)munmap(xsk->rx->ring - off.rx.desc,
+				     off.rx.desc +
+				     xsk->config.rx_size * desc_sz);
+		}
+		if (xsk->tx) {
+			(void)munmap(xsk->tx->ring - off.tx.desc,
+				     off.tx.desc +
+				     xsk->config.tx_size * desc_sz);
+		}
+
 	}
 
 	xsk->umem->refcount--;
-- 
cgit v1.2.3


From 5750902a6e9bc6adb77da8257c0e34db2bfdebb2 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Tue, 30 Apr 2019 14:45:36 +0200
Subject: libbpf: proper XSKMAP cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bpf_map_update_elem() function, when used on an XSKMAP, will fail
if not a valid AF_XDP socket is passed as value. Therefore, this is
function cannot be used to clear the XSKMAP. Instead, the
bpf_map_delete_elem() function should be used for that.

This patch also simplifies the code by breaking up
xsk_update_bpf_maps() into three smaller functions.

Reported-by: William Tu <u9012063@gmail.com>
Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: William Tu <u9012063@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/xsk.c | 115 +++++++++++++++++++++++++++-------------------------
 1 file changed, 60 insertions(+), 55 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 78b4a4bd4a25..525f1cc163b5 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -387,21 +387,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
 {
 	close(xsk->qidconf_map_fd);
 	close(xsk->xsks_map_fd);
+	xsk->qidconf_map_fd = -1;
+	xsk->xsks_map_fd = -1;
 }
 
-static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
-			       int xsks_value)
+static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 {
-	bool qidconf_map_updated = false, xsks_map_updated = false;
+	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+	__u32 map_len = sizeof(struct bpf_map_info);
 	struct bpf_prog_info prog_info = {};
-	__u32 prog_len = sizeof(prog_info);
 	struct bpf_map_info map_info;
-	__u32 map_len = sizeof(map_info);
-	__u32 *map_ids;
-	int reset_value = 0;
-	__u32 num_maps;
-	unsigned int i;
-	int err;
+	int fd, err;
 
 	err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
 	if (err)
@@ -422,66 +418,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
 		goto out_map_ids;
 
 	for (i = 0; i < prog_info.nr_map_ids; i++) {
-		int fd;
+		if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
+			break;
 
 		fd = bpf_map_get_fd_by_id(map_ids[i]);
-		if (fd < 0) {
-			err = -errno;
-			goto out_maps;
-		}
+		if (fd < 0)
+			continue;
 
 		err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
-		if (err)
-			goto out_maps;
+		if (err) {
+			close(fd);
+			continue;
+		}
 
 		if (!strcmp(map_info.name, "qidconf_map")) {
-			err = bpf_map_update_elem(fd, &xsk->queue_id,
-						  &qidconf_value, 0);
-			if (err)
-				goto out_maps;
-			qidconf_map_updated = true;
 			xsk->qidconf_map_fd = fd;
-		} else if (!strcmp(map_info.name, "xsks_map")) {
-			err = bpf_map_update_elem(fd, &xsk->queue_id,
-						  &xsks_value, 0);
-			if (err)
-				goto out_maps;
-			xsks_map_updated = true;
+			continue;
+		}
+
+		if (!strcmp(map_info.name, "xsks_map")) {
 			xsk->xsks_map_fd = fd;
+			continue;
 		}
 
-		if (qidconf_map_updated && xsks_map_updated)
-			break;
+		close(fd);
 	}
 
-	if (!(qidconf_map_updated && xsks_map_updated)) {
+	err = 0;
+	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
 		err = -ENOENT;
-		goto out_maps;
+		xsk_delete_bpf_maps(xsk);
 	}
 
-	err = 0;
-	goto out_success;
-
-out_maps:
-	if (qidconf_map_updated)
-		(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id,
-					  &reset_value, 0);
-	if (xsks_map_updated)
-		(void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
-					  &reset_value, 0);
-out_success:
-	if (qidconf_map_updated)
-		close(xsk->qidconf_map_fd);
-	if (xsks_map_updated)
-		close(xsk->xsks_map_fd);
 out_map_ids:
 	free(map_ids);
 	return err;
 }
 
+static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
+{
+	int qid = false;
+
+	(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
+	(void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
+}
+
+static int xsk_set_bpf_maps(struct xsk_socket *xsk)
+{
+	int qid = true, fd = xsk->fd, err;
+
+	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
+	if (err)
+		goto out;
+
+	err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
+	if (err)
+		goto out;
+
+	return 0;
+out:
+	xsk_clear_bpf_maps(xsk);
+	return err;
+}
+
 static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 {
-	bool prog_attached = false;
 	__u32 prog_id = 0;
 	int err;
 
@@ -491,7 +492,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 		return err;
 
 	if (!prog_id) {
-		prog_attached = true;
 		err = xsk_create_bpf_maps(xsk);
 		if (err)
 			return err;
@@ -501,20 +501,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 			goto out_maps;
 	} else {
 		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+		err = xsk_lookup_bpf_maps(xsk);
+		if (err)
+			goto out_load;
 	}
 
-	err = xsk_update_bpf_maps(xsk, true, xsk->fd);
+	err = xsk_set_bpf_maps(xsk);
 	if (err)
 		goto out_load;
 
 	return 0;
 
 out_load:
-	if (prog_attached)
-		close(xsk->prog_fd);
+	close(xsk->prog_fd);
 out_maps:
-	if (prog_attached)
-		xsk_delete_bpf_maps(xsk);
+	xsk_delete_bpf_maps(xsk);
 	return err;
 }
 
@@ -642,6 +643,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		goto out_mmap_tx;
 	}
 
+	xsk->qidconf_map_fd = -1;
+	xsk->xsks_map_fd = -1;
+
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 		err = xsk_setup_xdp_prog(xsk);
 		if (err)
@@ -706,7 +710,8 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	if (!xsk)
 		return;
 
-	(void)xsk_update_bpf_maps(xsk, 0, 0);
+	xsk_clear_bpf_maps(xsk);
+	xsk_delete_bpf_maps(xsk);
 
 	optlen = sizeof(off);
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
-- 
cgit v1.2.3


From a7d006714724de4334c5e3548701b33f7b12ca96 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 1 May 2019 22:45:59 +0900
Subject: bpftool: exclude bash-completion/bpftool from .gitignore pattern

tools/bpf/bpftool/.gitignore has the "bpftool" pattern, which is
intended to ignore the following build artifact:

  tools/bpf/bpftool/bpftool

However, the .gitignore entry is effective not only for the current
directory, but also for any sub-directories.

So, from the point of .gitignore grammar, the following check-in file
is also considered to be ignored:

  tools/bpf/bpftool/bash-completion/bpftool

As the manual gitignore(5) says "Files already tracked by Git are not
affected", this is not a problem as far as Git is concerned.

However, Git is not the only program that parses .gitignore because
.gitignore is useful to distinguish build artifacts from source files.

For example, tar(1) supports the --exclude-vcs-ignore option. As of
writing, this option does not work perfectly, but it intends to create
a tarball excluding files specified by .gitignore.

So, I believe it is better to fix this issue.

You can fix it by prefixing the pattern with a slash; the leading slash
means the specified pattern is relative to the current directory.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 67167e44b726..8248b8dd89d4 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,5 +1,5 @@
 *.d
-bpftool
+/bpftool
 bpftool*.8
 bpf-helpers.*
 FEATURE-DUMP.bpftool
-- 
cgit v1.2.3


From ca31ca8247e2d3807ff5fa1d1760616a2292001c Mon Sep 17 00:00:00 2001
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Date: Thu, 2 May 2019 08:56:50 -0700
Subject: tools/bpf: fix perf build error with uClibc (seen on ARC)

When build perf for ARC recently, there was a build failure due to lack
of __NR_bpf.

| Auto-detecting system features:
|
| ...                     get_cpuid: [ OFF ]
| ...                           bpf: [ on  ]
|
| #  error __NR_bpf not defined. libbpf does not support your arch.
    ^~~~~
| bpf.c: In function 'sys_bpf':
| bpf.c:66:17: error: '__NR_bpf' undeclared (first use in this function)
|  return syscall(__NR_bpf, cmd, attr, size);
|                 ^~~~~~~~
|                 sys_bpf

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 955191c64b64..c4a48086dc9a 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -46,6 +46,8 @@
 #  define __NR_bpf 349
 # elif defined(__s390__)
 #  define __NR_bpf 351
+# elif defined(__arc__)
+#  define __NR_bpf 280
 # else
 #  error __NR_bpf not defined. libbpf does not support your arch.
 # endif
-- 
cgit v1.2.3


From 7080da8909844602b650c8959ecd7814d2829ea5 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Thu, 2 May 2019 11:33:38 -0700
Subject: libbpf: add libbpf_util.h to header install.

The libbpf_util.h is used by xsk.h, so add it to
the install headers.

Reported-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index c6c06bc6683c..f91639bf5650 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -230,6 +230,7 @@ install_headers:
 		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,btf.h,$(prefix)/include/bpf,644); \
+		$(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
 		$(call do_install,xsk.h,$(prefix)/include/bpf,644);
 
 install_pkgconfig: $(PC_FILE)
-- 
cgit v1.2.3


From ad11340994d55b103d2e4853d32782fd10cf687f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 5 May 2019 09:48:07 +0300
Subject: selftests: Add loopback test

Add selftest for loopback feature

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/loopback.sh | 94 ++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/loopback.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/net/forwarding/loopback.sh
new file mode 100755
index 000000000000..6e4626ae71b0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/loopback.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="loopback_test"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+	tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+loopback_test()
+{
+	RET=0
+
+	tc filter add dev $h1 ingress protocol arp pref 1 handle 101 flower \
+		skip_hw arp_op reply arp_tip 192.0.2.1 action drop
+
+	$MZ $h1 -c 1 -t arp -q
+
+	tc_check_packets "dev $h1 ingress" 101 1
+	check_fail $? "Matched on a filter without loopback setup"
+
+	ethtool -K $h1 loopback on
+	check_err $? "Failed to enable loopback"
+
+	setup_wait_dev $h1
+
+	$MZ $h1 -c 1 -t arp -q
+
+	tc_check_packets "dev $h1 ingress" 101 1
+	check_err $? "Did not match on filter with loopback"
+
+	ethtool -K $h1 loopback off
+	check_err $? "Failed to disable loopback"
+
+	$MZ $h1 -c 1 -t arp -q
+
+	tc_check_packets "dev $h1 ingress" 101 2
+	check_fail $? "Matched on a filter after loopback was removed"
+
+	tc filter del dev $h1 ingress protocol arp pref 1 handle 101 flower
+
+	log_test "loopback"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From d24ed99b3b270c6de8f47c25d709b5f6ef7d3807 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Mon, 6 May 2019 11:24:43 +0200
Subject: libbpf: remove unnecessary cast-to-void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The patches with fixes tags added a cast-to-void in the places when
the return value of a function was ignored.

This is not common practice in the kernel, and is therefore removed in
this patch.

Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Fixes: 5750902a6e9b ("libbpf: proper XSKMAP cleanup")
Fixes: 0e6741f09297 ("libbpf: fix invalid munmap call")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/xsk.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 525f1cc163b5..a3d1a302bc9c 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -459,8 +459,8 @@ static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
 {
 	int qid = false;
 
-	(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
-	(void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
+	bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
+	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
 }
 
 static int xsk_set_bpf_maps(struct xsk_socket *xsk)
@@ -686,12 +686,10 @@ int xsk_umem__delete(struct xsk_umem *umem)
 	optlen = sizeof(off);
 	err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
 	if (!err) {
-		(void)munmap(umem->fill->ring - off.fr.desc,
-			     off.fr.desc +
-			     umem->config.fill_size * sizeof(__u64));
-		(void)munmap(umem->comp->ring - off.cr.desc,
-			     off.cr.desc +
-			     umem->config.comp_size * sizeof(__u64));
+		munmap(umem->fill->ring - off.fr.desc,
+		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
+		munmap(umem->comp->ring - off.cr.desc,
+		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
 	}
 
 	close(umem->fd);
@@ -717,14 +715,12 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
 	if (!err) {
 		if (xsk->rx) {
-			(void)munmap(xsk->rx->ring - off.rx.desc,
-				     off.rx.desc +
-				     xsk->config.rx_size * desc_sz);
+			munmap(xsk->rx->ring - off.rx.desc,
+			       off.rx.desc + xsk->config.rx_size * desc_sz);
 		}
 		if (xsk->tx) {
-			(void)munmap(xsk->tx->ring - off.tx.desc,
-				     off.tx.desc +
-				     xsk->config.tx_size * desc_sz);
+			munmap(xsk->tx->ring - off.tx.desc,
+			       off.tx.desc + xsk->config.tx_size * desc_sz);
 		}
 
 	}
-- 
cgit v1.2.3