From 52875a04f4b26e7ef30a288ea096f7cfec0e93cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:20 -0800 Subject: bpf: verifier: remove dead code Instead of overwriting dead code with jmp -1 instructions remove it completely for root. Adjust verifier state and line info appropriately. v2: - adjust func_info (Alexei); - make sure first instruction retains line info (Alexei). v4: (Yonghong) - remove unnecessary if (!insn to remove) checks; - always keep last line info if first live instruction lacks one. v5: (Martin Lau) - improve and clarify comments. Signed-off-by: Jakub Kicinski Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..be9af6b4a9e4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -778,6 +778,7 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); void bpf_clear_redirect_map(struct bpf_map *map); -- cgit v1.2.3 From 9e4c24e7ee7dfd3898269519103e823892b730d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:23 -0800 Subject: bpf: verifier: record original instruction index The communication between the verifier and advanced JITs is based on instruction indexes. We have to keep them stable throughout the optimizations otherwise referring to a particular instruction gets messy quickly. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 573cca00a0e6..f3ae00ee5516 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -187,6 +187,7 @@ struct bpf_insn_aux_data { int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ u8 alu_state; /* used in combination with alu_limit */ + unsigned int orig_idx; /* original instruction index */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f39bca188a5c..f2c49b4235df 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7371,7 +7371,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, { struct bpf_verifier_env *env; struct bpf_verifier_log *log; - int ret = -EINVAL; + int i, len, ret = -EINVAL; bool is_priv; /* no program is valid */ @@ -7386,12 +7386,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, return -ENOMEM; log = &env->log; + len = (*prog)->len; env->insn_aux_data = - vzalloc(array_size(sizeof(struct bpf_insn_aux_data), - (*prog)->len)); + vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); ret = -ENOMEM; if (!env->insn_aux_data) goto err_free_env; + for (i = 0; i < len; i++) + env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; -- cgit v1.2.3 From 08ca90afba255d05dc3253caa44056e7aecbe8c5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:24 -0800 Subject: bpf: notify offload JITs about optimizations Let offload JITs know when instructions are replaced and optimized out, so they can update their state appropriately. The optimizations are best effort, if JIT returns an error from any callback verifier will stop notifying it as state may now be out of sync, but the verifier continues making progress. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 +++++++ include/linux/bpf_verifier.h | 5 +++++ kernel/bpf/offload.c | 35 +++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 6 ++++++ 4 files changed, 53 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e734f163bd0b..3851529062ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,9 +268,15 @@ struct bpf_verifier_ops { }; struct bpf_prog_offload_ops { + /* verifier basic callbacks */ int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); + /* verifier optimization callbacks (called after .finalize) */ + int (*replace_insn)(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); + int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt); + /* program management callbacks */ int (*prepare)(struct bpf_prog *prog); int (*translate)(struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); @@ -283,6 +289,7 @@ struct bpf_prog_offload { void *dev_priv; struct list_head offloads; bool dev_state; + bool opt_failed; void *jited_image; u32 jited_len; }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f3ae00ee5516..0620e418dde5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -266,5 +266,10 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int bpf_prog_offload_finalize(struct bpf_verifier_env *env); +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 54cf2b9c44a4..39dba8c90331 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -173,6 +173,41 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env) return ret; } +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn) +{ + const struct bpf_prog_offload_ops *ops; + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + ops = offload->offdev->ops; + if (!offload->opt_failed && ops->replace_insn) + ret = ops->replace_insn(env, off, insn); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + if (!offload->opt_failed && offload->offdev->ops->remove_insns) + ret = offload->offdev->ops->remove_insns(env, off, cnt); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2c49b4235df..8cfe39ef770f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6558,6 +6558,9 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) unsigned int orig_prog_len = env->prog->len; int err; + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_remove_insns(env, off, cnt); + err = bpf_remove_insns(env->prog, off, cnt); if (err) return err; @@ -6632,6 +6635,9 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) else continue; + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_replace_insn(env, i, &ja); + memcpy(insn, &ja, sizeof(ja)); } } -- cgit v1.2.3 From d9ff286a0f59fa7843549e49bd240393dd7d8b87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Jan 2019 09:22:27 -0800 Subject: bpf: allow BPF programs access skb_shared_info->gso_segs field This adds the ability to read gso_segs from a BPF program. v3: Use BPF_REG_AX instead of BPF_REG_TMP for the temporary register, as suggested by Martin. v2: refined Eddie Hao patch to address Alexei feedback. Signed-off-by: Eric Dumazet Cc: Eddie Hao Cc: Martin KaFai Lau Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 21 +++++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + tools/testing/selftests/bpf/test_verifier.c | 36 +++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 91c43884f295..2940a9854f6d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2540,6 +2540,7 @@ struct __sk_buff { __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); __u64 tstamp; __u32 wire_len; + __u32 gso_segs; }; struct bpf_tunnel_key { diff --git a/net/core/filter.c b/net/core/filter.c index 2b3b436ef545..8e587dd1da20 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6700,6 +6700,27 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, target_size)); break; + case offsetof(struct __sk_buff, gso_segs): + /* si->dst_reg = skb_shinfo(SKB); */ +#ifdef NET_SKBUFF_DATA_USES_OFFSET + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, end)); + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); +#else + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, end)); +#endif + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), + si->dst_reg, si->dst_reg, + bpf_target_off(struct skb_shared_info, + gso_segs, 2, + target_size)); + break; case offsetof(struct __sk_buff, wire_len): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 91c43884f295..2940a9854f6d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2540,6 +2540,7 @@ struct __sk_buff { __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); __u64 tstamp; __u32 wire_len; + __u32 gso_segs; }; struct bpf_tunnel_key { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 4f67afeec8d9..e4fef6ca8071 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -5663,6 +5663,42 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, + { + "read gso_segs from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_segs)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "write gso_segs from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, gso_segs)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .result_unpriv = REJECT, + .errstr = "invalid bpf_context access off=164 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "read gso_segs from CLS", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_segs)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "multiple registers share map_lookup_elem result", .insns = { -- cgit v1.2.3 From 1d0dc06930a917eaca4156193c6c49f798b95ce7 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 24 Jan 2019 19:59:37 +0100 Subject: net: xsk: track AF_XDP sockets on a per-netns list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track each AF_XDP socket in a per-netns list. This will be used later by the sock_diag interface for querying sockets from userspace. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/net/net_namespace.h | 4 ++++ include/net/netns/xdp.h | 13 +++++++++++++ net/xdp/xsk.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 include/net/netns/xdp.h (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 99d4148e0f90..a68ced28d8f4 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -160,6 +161,9 @@ struct net { #endif #if IS_ENABLED(CONFIG_CAN) struct netns_can can; +#endif +#ifdef CONFIG_XDP_SOCKETS + struct netns_xdp xdp; #endif struct sock *diag_nlsk; atomic_t fnhe_genid; diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h new file mode 100644 index 000000000000..e5734261ba0a --- /dev/null +++ b/include/net/netns/xdp.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NETNS_XDP_H__ +#define __NETNS_XDP_H__ + +#include +#include + +struct netns_xdp { + struct mutex lock; + struct hlist_head list; +}; + +#endif /* __NETNS_XDP_H__ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index a03268454a27..80ca48cefc42 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -350,6 +350,10 @@ static int xsk_release(struct socket *sock) net = sock_net(sk); + mutex_lock(&net->xdp.lock); + sk_del_node_init_rcu(sk); + mutex_unlock(&net->xdp.lock); + local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); @@ -746,6 +750,10 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, mutex_init(&xs->mutex); spin_lock_init(&xs->tx_completion_lock); + mutex_lock(&net->xdp.lock); + sk_add_node_rcu(sk, &net->xdp.list); + mutex_unlock(&net->xdp.lock); + local_bh_disable(); sock_prot_inuse_add(net, &xsk_proto, 1); local_bh_enable(); @@ -759,6 +767,23 @@ static const struct net_proto_family xsk_family_ops = { .owner = THIS_MODULE, }; +static int __net_init xsk_net_init(struct net *net) +{ + mutex_init(&net->xdp.lock); + INIT_HLIST_HEAD(&net->xdp.list); + return 0; +} + +static void __net_exit xsk_net_exit(struct net *net) +{ + WARN_ON_ONCE(!hlist_empty(&net->xdp.list)); +} + +static struct pernet_operations xsk_net_ops = { + .init = xsk_net_init, + .exit = xsk_net_exit, +}; + static int __init xsk_init(void) { int err; @@ -771,8 +796,13 @@ static int __init xsk_init(void) if (err) goto out_proto; + err = register_pernet_subsys(&xsk_net_ops); + if (err) + goto out_sk; return 0; +out_sk: + sock_unregister(PF_XDP); out_proto: proto_unregister(&xsk_proto); out: -- cgit v1.2.3 From 50e74c0131a5b3a3e387798a5705158c04fb3bd0 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 24 Jan 2019 19:59:38 +0100 Subject: xsk: add id to umem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds an id to the umem structure. The id uniquely identifies a umem instance, and will be exposed to user-space via the socket monitoring interface. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 1 + net/xdp/xdp_umem.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 13acb9803a6d..61cf7dbb6782 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -42,6 +42,7 @@ struct xdp_umem { struct work_struct work; struct page **pgs; u32 npgs; + int id; struct net_device *dev; struct xdp_umem_fq_reuse *fq_reuse; u16 queue_id; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index a264cf2accd0..eabdb0f59031 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -13,12 +13,15 @@ #include #include #include +#include #include "xdp_umem.h" #include "xsk_queue.h" #define XDP_UMEM_MIN_CHUNK_SIZE 2048 +static DEFINE_IDA(umem_ida); + void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) { unsigned long flags; @@ -183,6 +186,8 @@ static void xdp_umem_release(struct xdp_umem *umem) xdp_umem_clear_dev(umem); + ida_simple_remove(&umem_ida, umem->id); + if (umem->fq) { xskq_destroy(umem->fq); umem->fq = NULL; @@ -389,8 +394,16 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) if (!umem) return ERR_PTR(-ENOMEM); + err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); + if (err < 0) { + kfree(umem); + return ERR_PTR(err); + } + umem->id = err; + err = xdp_umem_reg(umem, mr); if (err) { + ida_simple_remove(&umem_ida, umem->id); kfree(umem); return ERR_PTR(err); } -- cgit v1.2.3 From a36b38aa2af61146ea80980a01cf6e952ab021c1 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 24 Jan 2019 19:59:39 +0100 Subject: xsk: add sock_diag interface for AF_XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the sock_diag interface for querying sockets from user space. Tools like iproute2 ss(8) can use this interface to list open AF_XDP sockets. The user-space ABI is defined in linux/xdp_diag.h and includes netlink request and response structs. The request can query sockets and the response contains socket information about the rings, umems, inode and more. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/uapi/linux/xdp_diag.h | 72 ++++++++++++++++ net/xdp/Kconfig | 8 ++ net/xdp/Makefile | 1 + net/xdp/xsk.c | 6 +- net/xdp/xsk.h | 12 +++ net/xdp/xsk_diag.c | 191 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 285 insertions(+), 5 deletions(-) create mode 100644 include/uapi/linux/xdp_diag.h create mode 100644 net/xdp/xsk.h create mode 100644 net/xdp/xsk_diag.c (limited to 'include') diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h new file mode 100644 index 000000000000..78b2591a7782 --- /dev/null +++ b/include/uapi/linux/xdp_diag.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * xdp_diag: interface for query/monitor XDP sockets + * Copyright(c) 2019 Intel Corporation. + */ + +#ifndef _LINUX_XDP_DIAG_H +#define _LINUX_XDP_DIAG_H + +#include + +struct xdp_diag_req { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u16 pad; + __u32 xdiag_ino; + __u32 xdiag_show; + __u32 xdiag_cookie[2]; +}; + +struct xdp_diag_msg { + __u8 xdiag_family; + __u8 xdiag_type; + __u16 pad; + __u32 xdiag_ino; + __u32 xdiag_cookie[2]; +}; + +#define XDP_SHOW_INFO (1 << 0) /* Basic information */ +#define XDP_SHOW_RING_CFG (1 << 1) +#define XDP_SHOW_UMEM (1 << 2) +#define XDP_SHOW_MEMINFO (1 << 3) + +enum { + XDP_DIAG_NONE, + XDP_DIAG_INFO, + XDP_DIAG_UID, + XDP_DIAG_RX_RING, + XDP_DIAG_TX_RING, + XDP_DIAG_UMEM, + XDP_DIAG_UMEM_FILL_RING, + XDP_DIAG_UMEM_COMPLETION_RING, + XDP_DIAG_MEMINFO, + __XDP_DIAG_MAX, +}; + +#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1) + +struct xdp_diag_info { + __u32 ifindex; + __u32 queue_id; +}; + +struct xdp_diag_ring { + __u32 entries; /*num descs */ +}; + +#define XDP_DU_F_ZEROCOPY (1 << 0) + +struct xdp_diag_umem { + __u64 size; + __u32 id; + __u32 num_pages; + __u32 chunk_size; + __u32 headroom; + __u32 ifindex; + __u32 queue_id; + __u32 flags; + __u32 refs; +}; + +#endif /* _LINUX_XDP_DIAG_H */ diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig index 90e4a7152854..0255b33cff4b 100644 --- a/net/xdp/Kconfig +++ b/net/xdp/Kconfig @@ -5,3 +5,11 @@ config XDP_SOCKETS help XDP sockets allows a channel between XDP programs and userspace applications. + +config XDP_SOCKETS_DIAG + tristate "XDP sockets: monitoring interface" + depends on XDP_SOCKETS + default n + help + Support for PF_XDP sockets monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/xdp/Makefile b/net/xdp/Makefile index 04f073146256..59dbfdf93dca 100644 --- a/net/xdp/Makefile +++ b/net/xdp/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o +obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 80ca48cefc42..949d3bbccb2f 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -27,14 +27,10 @@ #include "xsk_queue.h" #include "xdp_umem.h" +#include "xsk.h" #define TX_BATCH_SIZE 16 -static struct xdp_sock *xdp_sk(struct sock *sk) -{ - return (struct xdp_sock *)sk; -} - bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) && diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h new file mode 100644 index 000000000000..ba8120610426 --- /dev/null +++ b/net/xdp/xsk.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2019 Intel Corporation. */ + +#ifndef XSK_H_ +#define XSK_H_ + +static inline struct xdp_sock *xdp_sk(struct sock *sk) +{ + return (struct xdp_sock *)sk; +} + +#endif /* XSK_H_ */ diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c new file mode 100644 index 000000000000..661d007c3b28 --- /dev/null +++ b/net/xdp/xsk_diag.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 +/* XDP sockets monitoring support + * + * Copyright(c) 2019 Intel Corporation. + * + * Author: Björn Töpel + */ + +#include +#include +#include +#include + +#include "xsk_queue.h" +#include "xsk.h" + +static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb) +{ + struct xdp_diag_info di = {}; + + di.ifindex = xs->dev ? xs->dev->ifindex : 0; + di.queue_id = xs->queue_id; + return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di); +} + +static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type, + struct sk_buff *nlskb) +{ + struct xdp_diag_ring dr = {}; + + dr.entries = queue->nentries; + return nla_put(nlskb, nl_type, sizeof(dr), &dr); +} + +static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs, + struct sk_buff *nlskb) +{ + int err = 0; + + if (xs->rx) + err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb); + if (!err && xs->tx) + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb); + return err; +} + +static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) +{ + struct xdp_umem *umem = xs->umem; + struct xdp_diag_umem du = {}; + int err; + + if (!umem) + return 0; + + du.id = umem->id; + du.size = umem->size; + du.num_pages = umem->npgs; + du.chunk_size = (__u32)(~umem->chunk_mask + 1); + du.headroom = umem->headroom; + du.ifindex = umem->dev ? umem->dev->ifindex : 0; + du.queue_id = umem->queue_id; + du.flags = 0; + if (umem->zc) + du.flags |= XDP_DU_F_ZEROCOPY; + du.refs = refcount_read(&umem->users); + + err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); + + if (!err && umem->fq) + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); + if (!err && umem->cq) { + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, + nlskb); + } + return err; +} + +static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, + struct xdp_diag_req *req, + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) +{ + struct xdp_sock *xs = xdp_sk(sk); + struct xdp_diag_msg *msg; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg), + flags); + if (!nlh) + return -EMSGSIZE; + + msg = nlmsg_data(nlh); + memset(msg, 0, sizeof(*msg)); + msg->xdiag_family = AF_XDP; + msg->xdiag_type = sk->sk_type; + msg->xdiag_ino = sk_ino; + sock_diag_save_cookie(sk, msg->xdiag_cookie); + + if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_INFO) && + nla_put_u32(nlskb, XDP_DIAG_UID, + from_kuid_munged(user_ns, sock_i_uid(sk)))) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_RING_CFG) && + xsk_diag_put_rings_cfg(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_UMEM) && + xsk_diag_put_umem(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_MEMINFO) && + sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) + goto out_nlmsg_trim; + + nlmsg_end(nlskb, nlh); + return 0; + +out_nlmsg_trim: + nlmsg_cancel(nlskb, nlh); + return -EMSGSIZE; +} + +static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb) +{ + struct xdp_diag_req *req = nlmsg_data(cb->nlh); + struct net *net = sock_net(nlskb->sk); + int num = 0, s_num = cb->args[0]; + struct sock *sk; + + mutex_lock(&net->xdp.lock); + + sk_for_each(sk, &net->xdp.list) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num++ < s_num) + continue; + + if (xsk_diag_fill(sk, nlskb, req, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + num--; + break; + } + } + + mutex_unlock(&net->xdp.lock); + cb->args[0] = num; + return nlskb->len; +} + +static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr) +{ + struct netlink_dump_control c = { .dump = xsk_diag_dump }; + int hdrlen = sizeof(struct xdp_diag_req); + struct net *net = sock_net(nlskb->sk); + + if (nlmsg_len(hdr) < hdrlen) + return -EINVAL; + + if (!(hdr->nlmsg_flags & NLM_F_DUMP)) + return -EOPNOTSUPP; + + return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c); +} + +static const struct sock_diag_handler xsk_diag_handler = { + .family = AF_XDP, + .dump = xsk_diag_handler_dump, +}; + +static int __init xsk_diag_init(void) +{ + return sock_diag_register(&xsk_diag_handler); +} + +static void __exit xsk_diag_exit(void) +{ + sock_diag_unregister(&xsk_diag_handler); +} + +module_init(xsk_diag_init); +module_exit(xsk_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP); -- cgit v1.2.3 From d405c7407a5468d4fc11724d76063e0647d80106 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:25:59 -0500 Subject: bpf: allocate 0x06 to new eBPF instruction class JMP32 The new eBPF instruction class JMP32 uses the reserved class number 0x6. Kernel BPF ISA documentation updated accordingly. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- Documentation/networking/filter.txt | 15 ++++++++------- include/uapi/linux/bpf.h | 1 + tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 2196b824e96c..01603bc2eff1 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -865,7 +865,7 @@ Three LSB bits store instruction class which is one of: BPF_STX 0x03 BPF_STX 0x03 BPF_ALU 0x04 BPF_ALU 0x04 BPF_JMP 0x05 BPF_JMP 0x05 - BPF_RET 0x06 [ class 6 unused, for future if needed ] + BPF_RET 0x06 BPF_JMP32 0x06 BPF_MISC 0x07 BPF_ALU64 0x07 When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ... @@ -902,9 +902,9 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of: BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */ BPF_END 0xd0 /* eBPF only: endianness conversion */ -If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of: +If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of: - BPF_JA 0x00 + BPF_JA 0x00 /* BPF_JMP only */ BPF_JEQ 0x10 BPF_JGT 0x20 BPF_JGE 0x30 @@ -912,8 +912,8 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of: BPF_JNE 0x50 /* eBPF only: jump != */ BPF_JSGT 0x60 /* eBPF only: signed '>' */ BPF_JSGE 0x70 /* eBPF only: signed '>=' */ - BPF_CALL 0x80 /* eBPF only: function call */ - BPF_EXIT 0x90 /* eBPF only: function return */ + BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */ + BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */ BPF_JLT 0xa0 /* eBPF only: unsigned '<' */ BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */ BPF_JSLT 0xc0 /* eBPF only: signed '<' */ @@ -936,8 +936,9 @@ Classic BPF wastes the whole BPF_RET class to represent a single 'ret' operation. Classic BPF_RET | BPF_K means copy imm32 into return register and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT in eBPF means function exit only. The eBPF program needs to store return -value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently -unused and reserved for future use. +value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as +BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide +operands for the comparisons instead. For load and store instructions the 8-bit 'code' field is divided as: diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2940a9854f6d..60b99b730a41 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -14,6 +14,7 @@ /* Extended instruction set based on top of classic BPF */ /* instruction classes */ +#define BPF_JMP32 0x06 /* jmp mode in word width */ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2940a9854f6d..60b99b730a41 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -14,6 +14,7 @@ /* Extended instruction set based on top of classic BPF */ /* instruction classes */ +#define BPF_JMP32 0x06 /* jmp mode in word width */ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -- cgit v1.2.3 From a7b76c8857692b0fce063b94ed83da11c396d341 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:05 -0500 Subject: bpf: JIT blinds support JMP32 This patch adds JIT blinds support for JMP32. Like BPF_JMP_REG/IMM, JMP32 version are needed for building raw bpf insn. They are added to both include/linux/filter.h and tools/include/linux/filter.h. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 20 ++++++++++++++++++++ kernel/bpf/core.c | 21 +++++++++++++++++++++ tools/include/linux/filter.h | 20 ++++++++++++++++++++ 3 files changed, 61 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index be9af6b4a9e4..e4b473f85b46 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -277,6 +277,26 @@ struct sock_reuseport; .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bba11c2565ee..a7bcb23bee84 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -949,6 +949,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); break; + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + /* Accommodate for extra offset in case of a backjump. */ + off = from->off; + if (off < 0) + off -= 2; + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX, + off); + break; + case BPF_LD | BPF_IMM | BPF_DW: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index af55acf73e75..cce0b02c0e28 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -199,6 +199,16 @@ .off = OFF, \ .imm = 0 }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ @@ -209,6 +219,16 @@ .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ -- cgit v1.2.3 From c8aa703822bf811269975cf7251b5eaad4c38e9c Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 08:53:53 -0800 Subject: net/flow_dissector: move bpf case into __skb_flow_bpf_dissect This way, we can reuse it for flow dissector in BPF_PROG_TEST_RUN. No functional changes. Signed-off-by: Stanislav Fomichev Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/skbuff.h | 5 +++ net/core/flow_dissector.c | 92 +++++++++++++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 93f56fddd92a..be762fc34ff3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1221,6 +1221,11 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) } #endif +struct bpf_flow_keys; +bool __skb_flow_bpf_dissect(struct bpf_prog *prog, + const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + struct bpf_flow_keys *flow_keys); bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9f2840510e63..bb1a54747d64 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -683,6 +683,46 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, } } +bool __skb_flow_bpf_dissect(struct bpf_prog *prog, + const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + struct bpf_flow_keys *flow_keys) +{ + struct bpf_skb_data_end cb_saved; + struct bpf_skb_data_end *cb; + u32 result; + + /* Note that even though the const qualifier is discarded + * throughout the execution of the BPF program, all changes(the + * control block) are reverted after the BPF program returns. + * Therefore, __skb_flow_dissect does not alter the skb. + */ + + cb = (struct bpf_skb_data_end *)skb->cb; + + /* Save Control Block */ + memcpy(&cb_saved, cb, sizeof(cb_saved)); + memset(cb, 0, sizeof(*cb)); + + /* Pass parameters to the BPF program */ + memset(flow_keys, 0, sizeof(*flow_keys)); + cb->qdisc_cb.flow_keys = flow_keys; + flow_keys->nhoff = skb_network_offset(skb); + flow_keys->thoff = flow_keys->nhoff; + + bpf_compute_data_pointers((struct sk_buff *)skb); + result = BPF_PROG_RUN(prog, skb); + + /* Restore state */ + memcpy(cb, &cb_saved, sizeof(cb_saved)); + + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len); + flow_keys->thoff = clamp_t(u16, flow_keys->thoff, + flow_keys->nhoff, skb->len); + + return result == BPF_OK; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified @@ -714,7 +754,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_vlan *key_vlan; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; - struct bpf_prog *attached = NULL; int num_hdrs = 0; u8 ip_proto = 0; bool ret; @@ -754,53 +793,30 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); - rcu_read_lock(); if (skb) { + struct bpf_flow_keys flow_keys; + struct bpf_prog *attached = NULL; + + rcu_read_lock(); + if (skb->dev) attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog); else if (skb->sk) attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog); else WARN_ON_ONCE(1); - } - if (attached) { - /* Note that even though the const qualifier is discarded - * throughout the execution of the BPF program, all changes(the - * control block) are reverted after the BPF program returns. - * Therefore, __skb_flow_dissect does not alter the skb. - */ - struct bpf_flow_keys flow_keys = {}; - struct bpf_skb_data_end cb_saved; - struct bpf_skb_data_end *cb; - u32 result; - - cb = (struct bpf_skb_data_end *)skb->cb; - - /* Save Control Block */ - memcpy(&cb_saved, cb, sizeof(cb_saved)); - memset(cb, 0, sizeof(cb_saved)); - /* Pass parameters to the BPF program */ - cb->qdisc_cb.flow_keys = &flow_keys; - flow_keys.nhoff = nhoff; - flow_keys.thoff = nhoff; - - bpf_compute_data_pointers((struct sk_buff *)skb); - result = BPF_PROG_RUN(attached, skb); - - /* Restore state */ - memcpy(cb, &cb_saved, sizeof(cb_saved)); - - flow_keys.nhoff = clamp_t(u16, flow_keys.nhoff, 0, skb->len); - flow_keys.thoff = clamp_t(u16, flow_keys.thoff, - flow_keys.nhoff, skb->len); - - __skb_flow_bpf_to_target(&flow_keys, flow_dissector, - target_container); + if (attached) { + ret = __skb_flow_bpf_dissect(attached, skb, + flow_dissector, + &flow_keys); + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, + target_container); + rcu_read_unlock(); + return ret; + } rcu_read_unlock(); - return result == BPF_OK; } - rcu_read_unlock(); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { -- cgit v1.2.3 From b7a1848e8398b8396c990279e6a10272d818577e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 28 Jan 2019 08:53:54 -0800 Subject: bpf: add BPF_PROG_TEST_RUN support for flow dissector The input is packet data, the output is struct bpf_flow_key. This should make it easy to test flow dissector programs without elaborate setup. Signed-off-by: Stanislav Fomichev Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 3 ++ net/bpf/test_run.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++ net/core/filter.c | 1 + 3 files changed, 86 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3851529062ec..0394f1f9213b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -404,6 +404,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); /* an array of programs to be executed under rcu_lock. * diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index fa2644d276ef..2c5172b33209 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -240,3 +240,85 @@ out: kfree(data); return ret; } + +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct bpf_flow_keys flow_keys; + u64 time_start, time_spent = 0; + struct bpf_skb_data_end *cb; + u32 retval, duration; + struct sk_buff *skb; + struct sock *sk; + void *data; + int ret; + u32 i; + + if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) + return -EINVAL; + + data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + sk = kzalloc(sizeof(*sk), GFP_USER); + if (!sk) { + kfree(data); + return -ENOMEM; + } + sock_net_set(sk, current->nsproxy->net_ns); + sock_init_data(NULL, sk); + + skb = build_skb(data, 0); + if (!skb) { + kfree(data); + kfree(sk); + return -ENOMEM; + } + skb->sk = sk; + + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + __skb_put(skb, size); + skb->protocol = eth_type_trans(skb, + current->nsproxy->net_ns->loopback_dev); + skb_reset_network_header(skb); + + cb = (struct bpf_skb_data_end *)skb->cb; + cb->qdisc_cb.flow_keys = &flow_keys; + + if (!repeat) + repeat = 1; + + time_start = ktime_get_ns(); + for (i = 0; i < repeat; i++) { + preempt_disable(); + rcu_read_lock(); + retval = __skb_flow_bpf_dissect(prog, skb, + &flow_keys_dissector, + &flow_keys); + rcu_read_unlock(); + preempt_enable(); + + if (need_resched()) { + if (signal_pending(current)) + break; + time_spent += ktime_get_ns() - time_start; + cond_resched(); + time_start = ktime_get_ns(); + } + } + time_spent += ktime_get_ns() - time_start; + do_div(time_spent, repeat); + duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + + ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), + retval, duration); + + kfree_skb(skb); + kfree(sk); + return ret; +} diff --git a/net/core/filter.c b/net/core/filter.c index 8e587dd1da20..8ce421796ac6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7711,6 +7711,7 @@ const struct bpf_verifier_ops flow_dissector_verifier_ops = { }; const struct bpf_prog_ops flow_dissector_prog_ops = { + .test_run = bpf_prog_test_run_flow_dissector, }; int sk_detach_filter(struct sock *sk) -- cgit v1.2.3