summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c82
-rw-r--r--net/core/filter.c22
-rw-r--r--net/core/flow_dissector.c92
-rw-r--r--net/xdp/Kconfig8
-rw-r--r--net/xdp/Makefile1
-rw-r--r--net/xdp/xdp_umem.c13
-rw-r--r--net/xdp/xsk.c36
-rw-r--r--net/xdp/xsk.h12
-rw-r--r--net/xdp/xsk_diag.c191
9 files changed, 414 insertions, 43 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index fa2644d276ef..2c5172b33209 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -240,3 +240,85 @@ out:
kfree(data);
return ret;
}
+
+int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ u32 size = kattr->test.data_size_in;
+ u32 repeat = kattr->test.repeat;
+ struct bpf_flow_keys flow_keys;
+ u64 time_start, time_spent = 0;
+ struct bpf_skb_data_end *cb;
+ u32 retval, duration;
+ struct sk_buff *skb;
+ struct sock *sk;
+ void *data;
+ int ret;
+ u32 i;
+
+ if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
+ return -EINVAL;
+
+ data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ sk = kzalloc(sizeof(*sk), GFP_USER);
+ if (!sk) {
+ kfree(data);
+ return -ENOMEM;
+ }
+ sock_net_set(sk, current->nsproxy->net_ns);
+ sock_init_data(NULL, sk);
+
+ skb = build_skb(data, 0);
+ if (!skb) {
+ kfree(data);
+ kfree(sk);
+ return -ENOMEM;
+ }
+ skb->sk = sk;
+
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+ __skb_put(skb, size);
+ skb->protocol = eth_type_trans(skb,
+ current->nsproxy->net_ns->loopback_dev);
+ skb_reset_network_header(skb);
+
+ cb = (struct bpf_skb_data_end *)skb->cb;
+ cb->qdisc_cb.flow_keys = &flow_keys;
+
+ if (!repeat)
+ repeat = 1;
+
+ time_start = ktime_get_ns();
+ for (i = 0; i < repeat; i++) {
+ preempt_disable();
+ rcu_read_lock();
+ retval = __skb_flow_bpf_dissect(prog, skb,
+ &flow_keys_dissector,
+ &flow_keys);
+ rcu_read_unlock();
+ preempt_enable();
+
+ if (need_resched()) {
+ if (signal_pending(current))
+ break;
+ time_spent += ktime_get_ns() - time_start;
+ cond_resched();
+ time_start = ktime_get_ns();
+ }
+ }
+ time_spent += ktime_get_ns() - time_start;
+ do_div(time_spent, repeat);
+ duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+
+ ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
+ retval, duration);
+
+ kfree_skb(skb);
+ kfree(sk);
+ return ret;
+}
diff --git a/net/core/filter.c b/net/core/filter.c
index 7559d6835ecb..41984ad4b9b4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6708,6 +6708,27 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
target_size));
break;
+ case offsetof(struct __sk_buff, gso_segs):
+ /* si->dst_reg = skb_shinfo(SKB); */
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, head));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+ BPF_REG_AX, si->src_reg,
+ offsetof(struct sk_buff, end));
+ *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+#else
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, end));
+#endif
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
+ si->dst_reg, si->dst_reg,
+ bpf_target_off(struct skb_shared_info,
+ gso_segs, 2,
+ target_size));
+ break;
case offsetof(struct __sk_buff, wire_len):
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
@@ -7698,6 +7719,7 @@ const struct bpf_verifier_ops flow_dissector_verifier_ops = {
};
const struct bpf_prog_ops flow_dissector_prog_ops = {
+ .test_run = bpf_prog_test_run_flow_dissector,
};
int sk_detach_filter(struct sock *sk)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 9f2840510e63..bb1a54747d64 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -683,6 +683,46 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
}
}
+bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
+ const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ struct bpf_flow_keys *flow_keys)
+{
+ struct bpf_skb_data_end cb_saved;
+ struct bpf_skb_data_end *cb;
+ u32 result;
+
+ /* Note that even though the const qualifier is discarded
+ * throughout the execution of the BPF program, all changes(the
+ * control block) are reverted after the BPF program returns.
+ * Therefore, __skb_flow_dissect does not alter the skb.
+ */
+
+ cb = (struct bpf_skb_data_end *)skb->cb;
+
+ /* Save Control Block */
+ memcpy(&cb_saved, cb, sizeof(cb_saved));
+ memset(cb, 0, sizeof(*cb));
+
+ /* Pass parameters to the BPF program */
+ memset(flow_keys, 0, sizeof(*flow_keys));
+ cb->qdisc_cb.flow_keys = flow_keys;
+ flow_keys->nhoff = skb_network_offset(skb);
+ flow_keys->thoff = flow_keys->nhoff;
+
+ bpf_compute_data_pointers((struct sk_buff *)skb);
+ result = BPF_PROG_RUN(prog, skb);
+
+ /* Restore state */
+ memcpy(cb, &cb_saved, sizeof(cb_saved));
+
+ flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len);
+ flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
+ flow_keys->nhoff, skb->len);
+
+ return result == BPF_OK;
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -714,7 +754,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_vlan *key_vlan;
enum flow_dissect_ret fdret;
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
- struct bpf_prog *attached = NULL;
int num_hdrs = 0;
u8 ip_proto = 0;
bool ret;
@@ -754,53 +793,30 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
- rcu_read_lock();
if (skb) {
+ struct bpf_flow_keys flow_keys;
+ struct bpf_prog *attached = NULL;
+
+ rcu_read_lock();
+
if (skb->dev)
attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog);
else if (skb->sk)
attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog);
else
WARN_ON_ONCE(1);
- }
- if (attached) {
- /* Note that even though the const qualifier is discarded
- * throughout the execution of the BPF program, all changes(the
- * control block) are reverted after the BPF program returns.
- * Therefore, __skb_flow_dissect does not alter the skb.
- */
- struct bpf_flow_keys flow_keys = {};
- struct bpf_skb_data_end cb_saved;
- struct bpf_skb_data_end *cb;
- u32 result;
-
- cb = (struct bpf_skb_data_end *)skb->cb;
-
- /* Save Control Block */
- memcpy(&cb_saved, cb, sizeof(cb_saved));
- memset(cb, 0, sizeof(cb_saved));
- /* Pass parameters to the BPF program */
- cb->qdisc_cb.flow_keys = &flow_keys;
- flow_keys.nhoff = nhoff;
- flow_keys.thoff = nhoff;
-
- bpf_compute_data_pointers((struct sk_buff *)skb);
- result = BPF_PROG_RUN(attached, skb);
-
- /* Restore state */
- memcpy(cb, &cb_saved, sizeof(cb_saved));
-
- flow_keys.nhoff = clamp_t(u16, flow_keys.nhoff, 0, skb->len);
- flow_keys.thoff = clamp_t(u16, flow_keys.thoff,
- flow_keys.nhoff, skb->len);
-
- __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
- target_container);
+ if (attached) {
+ ret = __skb_flow_bpf_dissect(attached, skb,
+ flow_dissector,
+ &flow_keys);
+ __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+ target_container);
+ rcu_read_unlock();
+ return ret;
+ }
rcu_read_unlock();
- return result == BPF_OK;
}
- rcu_read_unlock();
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
index 90e4a7152854..0255b33cff4b 100644
--- a/net/xdp/Kconfig
+++ b/net/xdp/Kconfig
@@ -5,3 +5,11 @@ config XDP_SOCKETS
help
XDP sockets allows a channel between XDP programs and
userspace applications.
+
+config XDP_SOCKETS_DIAG
+ tristate "XDP sockets: monitoring interface"
+ depends on XDP_SOCKETS
+ default n
+ help
+ Support for PF_XDP sockets monitoring interface used by the ss tool.
+ If unsure, say Y.
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 04f073146256..59dbfdf93dca 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1 +1,2 @@
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index d74dfdc106b5..5ab236c5c9a5 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -13,12 +13,15 @@
#include <linux/mm.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
+#include <linux/idr.h>
#include "xdp_umem.h"
#include "xsk_queue.h"
#define XDP_UMEM_MIN_CHUNK_SIZE 2048
+static DEFINE_IDA(umem_ida);
+
void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
{
unsigned long flags;
@@ -194,6 +197,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
xdp_umem_clear_dev(umem);
+ ida_simple_remove(&umem_ida, umem->id);
+
if (umem->fq) {
xskq_destroy(umem->fq);
umem->fq = NULL;
@@ -400,8 +405,16 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
if (!umem)
return ERR_PTR(-ENOMEM);
+ err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
+ if (err < 0) {
+ kfree(umem);
+ return ERR_PTR(err);
+ }
+ umem->id = err;
+
err = xdp_umem_reg(umem, mr);
if (err) {
+ ida_simple_remove(&umem_ida, umem->id);
kfree(umem);
return ERR_PTR(err);
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a03268454a27..949d3bbccb2f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -27,14 +27,10 @@
#include "xsk_queue.h"
#include "xdp_umem.h"
+#include "xsk.h"
#define TX_BATCH_SIZE 16
-static struct xdp_sock *xdp_sk(struct sock *sk)
-{
- return (struct xdp_sock *)sk;
-}
-
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
@@ -350,6 +346,10 @@ static int xsk_release(struct socket *sock)
net = sock_net(sk);
+ mutex_lock(&net->xdp.lock);
+ sk_del_node_init_rcu(sk);
+ mutex_unlock(&net->xdp.lock);
+
local_bh_disable();
sock_prot_inuse_add(net, sk->sk_prot, -1);
local_bh_enable();
@@ -746,6 +746,10 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
mutex_init(&xs->mutex);
spin_lock_init(&xs->tx_completion_lock);
+ mutex_lock(&net->xdp.lock);
+ sk_add_node_rcu(sk, &net->xdp.list);
+ mutex_unlock(&net->xdp.lock);
+
local_bh_disable();
sock_prot_inuse_add(net, &xsk_proto, 1);
local_bh_enable();
@@ -759,6 +763,23 @@ static const struct net_proto_family xsk_family_ops = {
.owner = THIS_MODULE,
};
+static int __net_init xsk_net_init(struct net *net)
+{
+ mutex_init(&net->xdp.lock);
+ INIT_HLIST_HEAD(&net->xdp.list);
+ return 0;
+}
+
+static void __net_exit xsk_net_exit(struct net *net)
+{
+ WARN_ON_ONCE(!hlist_empty(&net->xdp.list));
+}
+
+static struct pernet_operations xsk_net_ops = {
+ .init = xsk_net_init,
+ .exit = xsk_net_exit,
+};
+
static int __init xsk_init(void)
{
int err;
@@ -771,8 +792,13 @@ static int __init xsk_init(void)
if (err)
goto out_proto;
+ err = register_pernet_subsys(&xsk_net_ops);
+ if (err)
+ goto out_sk;
return 0;
+out_sk:
+ sock_unregister(PF_XDP);
out_proto:
proto_unregister(&xsk_proto);
out:
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
new file mode 100644
index 000000000000..ba8120610426
--- /dev/null
+++ b/net/xdp/xsk.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. */
+
+#ifndef XSK_H_
+#define XSK_H_
+
+static inline struct xdp_sock *xdp_sk(struct sock *sk)
+{
+ return (struct xdp_sock *)sk;
+}
+
+#endif /* XSK_H_ */
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
new file mode 100644
index 000000000000..661d007c3b28
--- /dev/null
+++ b/net/xdp/xsk_diag.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP sockets monitoring support
+ *
+ * Copyright(c) 2019 Intel Corporation.
+ *
+ * Author: Björn Töpel <bjorn.topel@intel.com>
+ */
+
+#include <linux/module.h>
+#include <net/xdp_sock.h>
+#include <linux/xdp_diag.h>
+#include <linux/sock_diag.h>
+
+#include "xsk_queue.h"
+#include "xsk.h"
+
+static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+ struct xdp_diag_info di = {};
+
+ di.ifindex = xs->dev ? xs->dev->ifindex : 0;
+ di.queue_id = xs->queue_id;
+ return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
+}
+
+static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
+ struct sk_buff *nlskb)
+{
+ struct xdp_diag_ring dr = {};
+
+ dr.entries = queue->nentries;
+ return nla_put(nlskb, nl_type, sizeof(dr), &dr);
+}
+
+static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
+ struct sk_buff *nlskb)
+{
+ int err = 0;
+
+ if (xs->rx)
+ err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
+ if (!err && xs->tx)
+ err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
+ return err;
+}
+
+static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+ struct xdp_umem *umem = xs->umem;
+ struct xdp_diag_umem du = {};
+ int err;
+
+ if (!umem)
+ return 0;
+
+ du.id = umem->id;
+ du.size = umem->size;
+ du.num_pages = umem->npgs;
+ du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+ du.headroom = umem->headroom;
+ du.ifindex = umem->dev ? umem->dev->ifindex : 0;
+ du.queue_id = umem->queue_id;
+ du.flags = 0;
+ if (umem->zc)
+ du.flags |= XDP_DU_F_ZEROCOPY;
+ du.refs = refcount_read(&umem->users);
+
+ err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
+
+ if (!err && umem->fq)
+ err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
+ if (!err && umem->cq) {
+ err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
+ nlskb);
+ }
+ return err;
+}
+
+static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
+ struct xdp_diag_req *req,
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+ struct xdp_diag_msg *msg;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
+ flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ msg = nlmsg_data(nlh);
+ memset(msg, 0, sizeof(*msg));
+ msg->xdiag_family = AF_XDP;
+ msg->xdiag_type = sk->sk_type;
+ msg->xdiag_ino = sk_ino;
+ sock_diag_save_cookie(sk, msg->xdiag_cookie);
+
+ if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
+ goto out_nlmsg_trim;
+
+ if ((req->xdiag_show & XDP_SHOW_INFO) &&
+ nla_put_u32(nlskb, XDP_DIAG_UID,
+ from_kuid_munged(user_ns, sock_i_uid(sk))))
+ goto out_nlmsg_trim;
+
+ if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
+ xsk_diag_put_rings_cfg(xs, nlskb))
+ goto out_nlmsg_trim;
+
+ if ((req->xdiag_show & XDP_SHOW_UMEM) &&
+ xsk_diag_put_umem(xs, nlskb))
+ goto out_nlmsg_trim;
+
+ if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
+ sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
+ goto out_nlmsg_trim;
+
+ nlmsg_end(nlskb, nlh);
+ return 0;
+
+out_nlmsg_trim:
+ nlmsg_cancel(nlskb, nlh);
+ return -EMSGSIZE;
+}
+
+static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
+{
+ struct xdp_diag_req *req = nlmsg_data(cb->nlh);
+ struct net *net = sock_net(nlskb->sk);
+ int num = 0, s_num = cb->args[0];
+ struct sock *sk;
+
+ mutex_lock(&net->xdp.lock);
+
+ sk_for_each(sk, &net->xdp.list) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num++ < s_num)
+ continue;
+
+ if (xsk_diag_fill(sk, nlskb, req,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ sock_i_ino(sk)) < 0) {
+ num--;
+ break;
+ }
+ }
+
+ mutex_unlock(&net->xdp.lock);
+ cb->args[0] = num;
+ return nlskb->len;
+}
+
+static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
+{
+ struct netlink_dump_control c = { .dump = xsk_diag_dump };
+ int hdrlen = sizeof(struct xdp_diag_req);
+ struct net *net = sock_net(nlskb->sk);
+
+ if (nlmsg_len(hdr) < hdrlen)
+ return -EINVAL;
+
+ if (!(hdr->nlmsg_flags & NLM_F_DUMP))
+ return -EOPNOTSUPP;
+
+ return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
+}
+
+static const struct sock_diag_handler xsk_diag_handler = {
+ .family = AF_XDP,
+ .dump = xsk_diag_handler_dump,
+};
+
+static int __init xsk_diag_init(void)
+{
+ return sock_diag_register(&xsk_diag_handler);
+}
+
+static void __exit xsk_diag_exit(void)
+{
+ sock_diag_unregister(&xsk_diag_handler);
+}
+
+module_init(xsk_diag_init);
+module_exit(xsk_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);