summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c9
-rw-r--r--net/core/filter.c228
-rw-r--r--net/ipv4/inet_hashtables.c60
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/udp.c97
-rw-r--r--net/ipv6/inet6_hashtables.c66
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/udp.c97
-rw-r--r--net/netlink/af_netlink.c7
9 files changed, 472 insertions, 103 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 19f1abc26fcd..316349f6cea5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5449,6 +5449,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
for (i = 0; i < new->aux->used_map_cnt; i++) {
if (dev_map_can_have_prog(new->aux->used_maps[i]))
return -EINVAL;
+ if (cpu_map_prog_allowed(new->aux->used_maps[i]))
+ return -EINVAL;
}
}
@@ -8880,6 +8882,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
return -EINVAL;
}
+ if (prog->expected_attach_type == BPF_XDP_CPUMAP) {
+ NL_SET_ERR_MSG(extack,
+ "BPF_XDP_CPUMAP programs can not be attached to a device");
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
/* prog->aux->id may be 0 for orphaned device-bound progs */
if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index 2bf6624796d8..3fa16b8c0d61 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9252,61 +9252,205 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
-#endif /* CONFIG_INET */
-DEFINE_BPF_DISPATCHER(xdp)
+DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
+EXPORT_SYMBOL(bpf_sk_lookup_enabled);
-void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
+ struct sock *, sk, u64, flags)
{
- bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
+ if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
+ BPF_SK_LOOKUP_F_NO_REUSEPORT)))
+ return -EINVAL;
+ if (unlikely(sk && sk_is_refcounted(sk)))
+ return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
+ if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
+ return -ESOCKTNOSUPPORT; /* reject connected sockets */
+
+ /* Check if socket is suitable for packet L3/L4 protocol */
+ if (sk && sk->sk_protocol != ctx->protocol)
+ return -EPROTOTYPE;
+ if (sk && sk->sk_family != ctx->family &&
+ (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
+ return -EAFNOSUPPORT;
+
+ if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
+ return -EEXIST;
+
+ /* Select socket as lookup result */
+ ctx->selected_sk = sk;
+ ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
+ return 0;
}
-/* Define a list of socket types which can be the argument for
- * skc_to_*_sock() helpers. All these sockets should have
- * sock_common as the first argument in its memory layout.
- */
-#define BTF_SOCK_TYPE_xxx \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock")
-
-enum {
-#define BTF_SOCK_TYPE(name, str) name,
-BTF_SOCK_TYPE_xxx
-#undef BTF_SOCK_TYPE
-MAX_BTF_SOCK_TYPE,
+static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
+ .func = bpf_sk_lookup_assign,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
+ .arg3_type = ARG_ANYTHING,
};
-static int btf_sock_ids[MAX_BTF_SOCK_TYPE];
+static const struct bpf_func_proto *
+sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_event_output_data_proto;
+ case BPF_FUNC_sk_assign:
+ return &bpf_sk_lookup_assign_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
-#ifdef CONFIG_BPF_SYSCALL
-static const char *bpf_sock_types[] = {
-#define BTF_SOCK_TYPE(name, str) str,
-BTF_SOCK_TYPE_xxx
-#undef BTF_SOCK_TYPE
-};
+static bool sk_lookup_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (type != BPF_READ)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ info->reg_type = PTR_TO_SOCKET_OR_NULL;
+ return size == sizeof(__u64);
-void init_btf_sock_ids(struct btf *btf)
+ case bpf_ctx_range(struct bpf_sk_lookup, family):
+ case bpf_ctx_range(struct bpf_sk_lookup, protocol):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+ bpf_ctx_record_field_size(info, sizeof(__u32));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+
+ default:
+ return false;
+ }
+}
+
+static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
{
- int i, btf_id;
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, selected_sk));
+ break;
- for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
- btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
- BTF_KIND_STRUCT);
- if (btf_id > 0)
- btf_sock_ids[i] = btf_id;
+ case offsetof(struct bpf_sk_lookup, family):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ family, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, protocol):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ protocol, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, remote_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.saddr, 4, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.daddr, 4, target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ remote_ip6[0], remote_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.saddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ }
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ local_ip6[0], local_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.daddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
}
+ case offsetof(struct bpf_sk_lookup, remote_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ sport, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ dport, 2, target_size));
+ break;
+ }
+
+ return insn - insn_buf;
}
+
+const struct bpf_prog_ops sk_lookup_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_lookup_verifier_ops = {
+ .get_func_proto = sk_lookup_func_proto,
+ .is_valid_access = sk_lookup_is_valid_access,
+ .convert_ctx_access = sk_lookup_convert_ctx_access,
+};
+
+#endif /* CONFIG_INET */
+
+DEFINE_BPF_DISPATCHER(xdp)
+
+void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+{
+ bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
+}
+
+#ifdef CONFIG_DEBUG_INFO_BTF
+BTF_ID_LIST_GLOBAL(btf_sock_ids)
+#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+#else
+u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
#endif
static bool check_arg_btf_id(u32 btf_id, u32 arg)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2bbaaf0c7176..4eb4cd8d20dd 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -246,6 +246,21 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 phash;
+
+ if (sk->sk_reuseport) {
+ phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+ }
+ return reuse_sk;
+}
+
/*
* Here are some nice properties to exploit here. The BSD API
* does not allow a listening sock to specify the remote port nor the
@@ -265,21 +280,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
hiscore = score;
}
@@ -288,6 +299,29 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
+static inline struct sock *inet_lookup_run_bpf(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (hashinfo != &tcp_hashinfo)
+ return NULL; /* only TCP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -299,6 +333,14 @@ struct sock *__inet_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ goto done;
+ }
+
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd81b6e04efb..daa39d33702b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -76,6 +76,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
+#include <linux/btf_ids.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
@@ -2946,7 +2947,7 @@ static void bpf_iter_fini_tcp(void *priv_data)
bpf_iter_fini_seq_net(priv_data);
}
-static const struct bpf_iter_reg tcp_reg_info = {
+static struct bpf_iter_reg tcp_reg_info = {
.target = "tcp",
.seq_ops = &bpf_iter_tcp_seq_ops,
.init_seq_private = bpf_iter_init_tcp,
@@ -2961,6 +2962,7 @@ static const struct bpf_iter_reg tcp_reg_info = {
static void __init bpf_iter_register(void)
{
+ tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON];
if (bpf_iter_reg_target(&tcp_reg_info))
pr_warn("Warning: could not register bpf iterator tcp\n");
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d4be4471c424..bb95cddcb040 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,6 +106,7 @@
#include <net/xfrm.h>
#include <trace/events/udp.h>
#include <linux/static_key.h>
+#include <linux/btf_ids.h>
#include <trace/events/skb.h>
#include <net/busy_poll.h>
#include "udp_impl.h"
@@ -408,6 +409,25 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
udp_ehash_secret + net_hash_mix(net));
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 hash;
+
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+ hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ /* Fall back to scoring if group has connections */
+ if (reuseport_has_conns(sk, false))
+ return NULL;
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@@ -418,7 +438,6 @@ static struct sock *udp4_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
- u32 hash = 0;
result = NULL;
badness = 0;
@@ -426,15 +445,11 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- if (sk->sk_reuseport &&
- sk->sk_state != TCP_ESTABLISHED) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
- }
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
badness = score;
result = sk;
}
@@ -442,6 +457,29 @@ static struct sock *udp4_lib_lookup2(struct net *net,
return result;
}
+static inline struct sock *udp4_lookup_run_bpf(struct net *net,
+ struct udp_table *udptable,
+ struct sk_buff *skb,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (udptable != &udp_table)
+ return NULL; /* only UDP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@@ -449,27 +487,45 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
- struct sock *result;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ struct sock *result, *sk;
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
+ /* Lookup connected or non-wildcard socket */
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
hslot2, skb);
- if (!result) {
- hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
-
- result = udp4_lib_lookup2(net, saddr, sport,
- htonl(INADDR_ANY), hnum, dif, sdif,
- hslot2, skb);
+ if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
+ goto done;
+
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ sk = udp4_lookup_run_bpf(net, udptable, skb,
+ saddr, sport, daddr, hnum);
+ if (sk) {
+ result = sk;
+ goto done;
+ }
}
+
+ /* Got non-wildcard socket or error on first lookup */
+ if (result)
+ goto done;
+
+ /* Lookup wildcard sockets */
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp4_lib_lookup2(net, saddr, sport,
+ htonl(INADDR_ANY), hnum, dif, sdif,
+ hslot2, skb);
+done:
if (IS_ERR(result))
return NULL;
return result;
@@ -3153,7 +3209,7 @@ static void bpf_iter_fini_udp(void *priv_data)
bpf_iter_fini_seq_net(priv_data);
}
-static const struct bpf_iter_reg udp_reg_info = {
+static struct bpf_iter_reg udp_reg_info = {
.target = "udp",
.seq_ops = &bpf_iter_udp_seq_ops,
.init_seq_private = bpf_iter_init_udp,
@@ -3168,6 +3224,7 @@ static const struct bpf_iter_reg udp_reg_info = {
static void __init bpf_iter_register(void)
{
+ udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP];
if (bpf_iter_reg_target(&udp_reg_info))
pr_warn("Warning: could not register bpf iterator udp\n");
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index fbe9d4295eac..2d3add9e6116 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,6 +21,8 @@
#include <net/ip.h>
#include <net/sock_reuseport.h>
+extern struct inet_hashinfo tcp_hashinfo;
+
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
@@ -111,6 +113,23 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 phash;
+
+ if (sk->sk_reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_listen_hashbucket *ilb2,
@@ -123,21 +142,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr, dif, sdif,
exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
hiscore = score;
}
@@ -146,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
+static inline struct sock *inet6_lookup_run_bpf(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (hashinfo != &tcp_hashinfo)
+ return NULL; /* only TCP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -157,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ goto done;
+ }
+
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 427b81cbc164..33f5efbad0a9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
#include <net/l3mdev.h>
#include <net/ip.h>
#include <linux/uaccess.h>
+#include <linux/btf_ids.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@@ -6423,7 +6424,10 @@ void __init ip6_route_init_special_entries(void)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
-static const struct bpf_iter_reg ipv6_route_reg_info = {
+BTF_ID_LIST(btf_fib6_info_id)
+BTF_ID(struct, fib6_info)
+
+static struct bpf_iter_reg ipv6_route_reg_info = {
.target = "ipv6_route",
.seq_ops = &ipv6_route_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
@@ -6438,6 +6442,7 @@ static const struct bpf_iter_reg ipv6_route_reg_info = {
static int __init bpf_iter_register(void)
{
+ ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
return bpf_iter_reg_target(&ipv6_route_reg_info);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5aff0856a05b..7c1143feb2bf 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -141,6 +141,27 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 hash;
+
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+ hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ /* Fall back to scoring if group has connections */
+ if (reuseport_has_conns(sk, false))
+ return NULL;
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -150,7 +171,6 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
- u32 hash = 0;
result = NULL;
badness = -1;
@@ -158,16 +178,11 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- if (sk->sk_reuseport &&
- sk->sk_state != TCP_ESTABLISHED) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
-
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
- }
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
badness = score;
}
@@ -175,6 +190,31 @@ static struct sock *udp6_lib_lookup2(struct net *net,
return result;
}
+static inline struct sock *udp6_lookup_run_bpf(struct net *net,
+ struct udp_table *udptable,
+ struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (udptable != &udp_table)
+ return NULL; /* only UDP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -185,25 +225,42 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
- struct sock *result;
+ struct sock *result, *sk;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
+ /* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
hslot2, skb);
- if (!result) {
- hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
+ if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
+ goto done;
+
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ sk = udp6_lookup_run_bpf(net, udptable, skb,
+ saddr, sport, daddr, hnum);
+ if (sk) {
+ result = sk;
+ goto done;
+ }
+ }
- hslot2 = &udptable->hash2[slot2];
+ /* Got non-wildcard socket or error on first lookup */
+ if (result)
+ goto done;
- result = udp6_lib_lookup2(net, saddr, sport,
- &in6addr_any, hnum, dif, sdif,
- hslot2, skb);
- }
+ /* Lookup wildcard sockets */
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ &in6addr_any, hnum, dif, sdif,
+ hslot2, skb);
+done:
if (IS_ERR(result))
return NULL;
return result;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4f2c3b14ddbf..3cd58f0c2de4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -60,6 +60,7 @@
#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
+#include <linux/btf_ids.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -2803,7 +2804,10 @@ static const struct rhashtable_params netlink_rhashtable_params = {
};
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
-static const struct bpf_iter_reg netlink_reg_info = {
+BTF_ID_LIST(btf_netlink_sock_id)
+BTF_ID(struct, netlink_sock)
+
+static struct bpf_iter_reg netlink_reg_info = {
.target = "netlink",
.seq_ops = &netlink_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
@@ -2818,6 +2822,7 @@ static const struct bpf_iter_reg netlink_reg_info = {
static int __init bpf_iter_register(void)
{
+ netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id;
return bpf_iter_reg_target(&netlink_reg_info);
}
#endif