diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 9 | ||||
-rw-r--r-- | net/core/filter.c | 228 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 60 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/udp.c | 97 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 66 | ||||
-rw-r--r-- | net/ipv6/route.c | 7 | ||||
-rw-r--r-- | net/ipv6/udp.c | 97 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 7 |
9 files changed, 472 insertions, 103 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 19f1abc26fcd..316349f6cea5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5449,6 +5449,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) for (i = 0; i < new->aux->used_map_cnt; i++) { if (dev_map_can_have_prog(new->aux->used_maps[i])) return -EINVAL; + if (cpu_map_prog_allowed(new->aux->used_maps[i])) + return -EINVAL; } } @@ -8880,6 +8882,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return -EINVAL; } + if (prog->expected_attach_type == BPF_XDP_CPUMAP) { + NL_SET_ERR_MSG(extack, + "BPF_XDP_CPUMAP programs can not be attached to a device"); + bpf_prog_put(prog); + return -EINVAL; + } + /* prog->aux->id may be 0 for orphaned device-bound progs */ if (prog->aux->id && prog->aux->id == prog_id) { bpf_prog_put(prog); diff --git a/net/core/filter.c b/net/core/filter.c index 2bf6624796d8..3fa16b8c0d61 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9252,61 +9252,205 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = { const struct bpf_prog_ops sk_reuseport_prog_ops = { }; -#endif /* CONFIG_INET */ -DEFINE_BPF_DISPATCHER(xdp) +DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled); +EXPORT_SYMBOL(bpf_sk_lookup_enabled); -void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) +BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, + struct sock *, sk, u64, flags) { - bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); + if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE | + BPF_SK_LOOKUP_F_NO_REUSEPORT))) + return -EINVAL; + if (unlikely(sk && sk_is_refcounted(sk))) + return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ + if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED)) + return -ESOCKTNOSUPPORT; /* reject connected sockets */ + + /* Check if socket is suitable for packet L3/L4 protocol */ + if (sk && sk->sk_protocol != ctx->protocol) + return -EPROTOTYPE; + if (sk && sk->sk_family != ctx->family && + (sk->sk_family == AF_INET || ipv6_only_sock(sk))) + return -EAFNOSUPPORT; + + if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE)) + return -EEXIST; + + /* Select socket as lookup result */ + ctx->selected_sk = sk; + ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT; + return 0; } -/* Define a list of socket types which can be the argument for - * skc_to_*_sock() helpers. All these sockets should have - * sock_common as the first argument in its memory layout. - */ -#define BTF_SOCK_TYPE_xxx \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock") - -enum { -#define BTF_SOCK_TYPE(name, str) name, -BTF_SOCK_TYPE_xxx -#undef BTF_SOCK_TYPE -MAX_BTF_SOCK_TYPE, +static const struct bpf_func_proto bpf_sk_lookup_assign_proto = { + .func = bpf_sk_lookup_assign, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL, + .arg3_type = ARG_ANYTHING, }; -static int btf_sock_ids[MAX_BTF_SOCK_TYPE]; +static const struct bpf_func_proto * +sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_event_output_data_proto; + case BPF_FUNC_sk_assign: + return &bpf_sk_lookup_assign_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; + default: + return bpf_base_func_proto(func_id); + } +} -#ifdef CONFIG_BPF_SYSCALL -static const char *bpf_sock_types[] = { -#define BTF_SOCK_TYPE(name, str) str, -BTF_SOCK_TYPE_xxx -#undef BTF_SOCK_TYPE -}; +static bool sk_lookup_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= sizeof(struct bpf_sk_lookup)) + return false; + if (off % size != 0) + return false; + if (type != BPF_READ) + return false; + + switch (off) { + case offsetof(struct bpf_sk_lookup, sk): + info->reg_type = PTR_TO_SOCKET_OR_NULL; + return size == sizeof(__u64); -void init_btf_sock_ids(struct btf *btf) + case bpf_ctx_range(struct bpf_sk_lookup, family): + case bpf_ctx_range(struct bpf_sk_lookup, protocol): + case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4): + case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): + case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): + case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): + case bpf_ctx_range(struct bpf_sk_lookup, remote_port): + case bpf_ctx_range(struct bpf_sk_lookup, local_port): + bpf_ctx_record_field_size(info, sizeof(__u32)); + return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); + + default: + return false; + } +} + +static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) { - int i, btf_id; + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct bpf_sk_lookup, sk): + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, selected_sk)); + break; - for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) { - btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i], - BTF_KIND_STRUCT); - if (btf_id > 0) - btf_sock_ids[i] = btf_id; + case offsetof(struct bpf_sk_lookup, family): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + family, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, protocol): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + protocol, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, remote_ip4): + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + v4.saddr, 4, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, local_ip4): + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + v4.daddr, 4, target_size)); + break; + + case bpf_ctx_range_till(struct bpf_sk_lookup, + remote_ip6[0], remote_ip6[3]): { +#if IS_ENABLED(CONFIG_IPV6) + int off = si->off; + + off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]); + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, v6.saddr)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + } + case bpf_ctx_range_till(struct bpf_sk_lookup, + local_ip6[0], local_ip6[3]): { +#if IS_ENABLED(CONFIG_IPV6) + int off = si->off; + + off -= offsetof(struct bpf_sk_lookup, local_ip6[0]); + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, v6.daddr)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; } + case offsetof(struct bpf_sk_lookup, remote_port): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + sport, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, local_port): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + dport, 2, target_size)); + break; + } + + return insn - insn_buf; } + +const struct bpf_prog_ops sk_lookup_prog_ops = { +}; + +const struct bpf_verifier_ops sk_lookup_verifier_ops = { + .get_func_proto = sk_lookup_func_proto, + .is_valid_access = sk_lookup_is_valid_access, + .convert_ctx_access = sk_lookup_convert_ctx_access, +}; + +#endif /* CONFIG_INET */ + +DEFINE_BPF_DISPATCHER(xdp) + +void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) +{ + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); +} + +#ifdef CONFIG_DEBUG_INFO_BTF +BTF_ID_LIST_GLOBAL(btf_sock_ids) +#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) +BTF_SOCK_TYPE_xxx +#undef BTF_SOCK_TYPE +#else +u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; #endif static bool check_arg_btf_id(u32 btf_id, u32 arg) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2bbaaf0c7176..4eb4cd8d20dd 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -246,6 +246,21 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum) +{ + struct sock *reuse_sk = NULL; + u32 phash; + + if (sk->sk_reuseport) { + phash = inet_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); + } + return reuse_sk; +} + /* * Here are some nice properties to exploit here. The BSD API * does not allow a listening sock to specify the remote port nor the @@ -265,21 +280,17 @@ static struct sock *inet_lhash2_lookup(struct net *net, struct inet_connection_sock *icsk; struct sock *sk, *result = NULL; int score, hiscore = 0; - u32 phash = 0; inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { sk = (struct sock *)icsk; score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { - if (sk->sk_reuseport) { - phash = inet_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, phash, - skb, doff); - if (result) - return result; - } + result = lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum); + if (result) + return result; + result = sk; hiscore = score; } @@ -288,6 +299,29 @@ static struct sock *inet_lhash2_lookup(struct net *net, return result; } +static inline struct sock *inet_lookup_run_bpf(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (hashinfo != &tcp_hashinfo) + return NULL; /* only TCP is supported */ + + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -299,6 +333,14 @@ struct sock *__inet_lookup_listener(struct net *net, struct sock *result = NULL; unsigned int hash2; + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + result = inet_lookup_run_bpf(net, hashinfo, skb, doff, + saddr, sport, daddr, hnum); + if (result) + goto done; + } + hash2 = ipv4_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cd81b6e04efb..daa39d33702b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -76,6 +76,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/inetdevice.h> +#include <linux/btf_ids.h> #include <crypto/hash.h> #include <linux/scatterlist.h> @@ -2946,7 +2947,7 @@ static void bpf_iter_fini_tcp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static const struct bpf_iter_reg tcp_reg_info = { +static struct bpf_iter_reg tcp_reg_info = { .target = "tcp", .seq_ops = &bpf_iter_tcp_seq_ops, .init_seq_private = bpf_iter_init_tcp, @@ -2961,6 +2962,7 @@ static const struct bpf_iter_reg tcp_reg_info = { static void __init bpf_iter_register(void) { + tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON]; if (bpf_iter_reg_target(&tcp_reg_info)) pr_warn("Warning: could not register bpf iterator tcp\n"); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d4be4471c424..bb95cddcb040 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -106,6 +106,7 @@ #include <net/xfrm.h> #include <trace/events/udp.h> #include <linux/static_key.h> +#include <linux/btf_ids.h> #include <trace/events/skb.h> #include <net/busy_poll.h> #include "udp_impl.h" @@ -408,6 +409,25 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum) +{ + struct sock *reuse_sk = NULL; + u32 hash; + + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { + hash = udp_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + /* Fall back to scoring if group has connections */ + if (reuseport_has_conns(sk, false)) + return NULL; + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, @@ -418,7 +438,6 @@ static struct sock *udp4_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; - u32 hash = 0; result = NULL; badness = 0; @@ -426,15 +445,11 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport && - sk->sk_state != TCP_ESTABLISHED) { - hash = udp_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; - } + result = lookup_reuseport(net, sk, skb, + saddr, sport, daddr, hnum); + if (result) + return result; + badness = score; result = sk; } @@ -442,6 +457,29 @@ static struct sock *udp4_lib_lookup2(struct net *net, return result; } +static inline struct sock *udp4_lookup_run_bpf(struct net *net, + struct udp_table *udptable, + struct sk_buff *skb, + __be32 saddr, __be16 sport, + __be32 daddr, u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (udptable != &udp_table) + return NULL; /* only UDP is supported */ + + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -449,27 +487,45 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *udptable, struct sk_buff *skb) { - struct sock *result; unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; + struct sock *result, *sk; hash2 = ipv4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; + /* Lookup connected or non-wildcard socket */ result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); - if (!result) { - hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); - slot2 = hash2 & udptable->mask; - hslot2 = &udptable->hash2[slot2]; - - result = udp4_lib_lookup2(net, saddr, sport, - htonl(INADDR_ANY), hnum, dif, sdif, - hslot2, skb); + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) + goto done; + + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + sk = udp4_lookup_run_bpf(net, udptable, skb, + saddr, sport, daddr, hnum); + if (sk) { + result = sk; + goto done; + } } + + /* Got non-wildcard socket or error on first lookup */ + if (result) + goto done; + + /* Lookup wildcard sockets */ + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp4_lib_lookup2(net, saddr, sport, + htonl(INADDR_ANY), hnum, dif, sdif, + hslot2, skb); +done: if (IS_ERR(result)) return NULL; return result; @@ -3153,7 +3209,7 @@ static void bpf_iter_fini_udp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static const struct bpf_iter_reg udp_reg_info = { +static struct bpf_iter_reg udp_reg_info = { .target = "udp", .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, @@ -3168,6 +3224,7 @@ static const struct bpf_iter_reg udp_reg_info = { static void __init bpf_iter_register(void) { + udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP]; if (bpf_iter_reg_target(&udp_reg_info)) pr_warn("Warning: could not register bpf iterator udp\n"); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index fbe9d4295eac..2d3add9e6116 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -21,6 +21,8 @@ #include <net/ip.h> #include <net/sock_reuseport.h> +extern struct inet_hashinfo tcp_hashinfo; + u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) @@ -111,6 +113,23 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum) +{ + struct sock *reuse_sk = NULL; + u32 phash; + + if (sk->sk_reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(struct net *net, struct inet_listen_hashbucket *ilb2, @@ -123,21 +142,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net, struct inet_connection_sock *icsk; struct sock *sk, *result = NULL; int score, hiscore = 0; - u32 phash = 0; inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { sk = (struct sock *)icsk; score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { - if (sk->sk_reuseport) { - phash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, phash, - skb, doff); - if (result) - return result; - } + result = lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum); + if (result) + return result; + result = sk; hiscore = score; } @@ -146,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net, return result; } +static inline struct sock *inet6_lookup_run_bpf(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (hashinfo != &tcp_hashinfo) + return NULL; /* only TCP is supported */ + + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -157,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net, struct sock *result = NULL; unsigned int hash2; + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, + saddr, sport, daddr, hnum); + if (result) + goto done; + } + hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 427b81cbc164..33f5efbad0a9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -61,6 +61,7 @@ #include <net/l3mdev.h> #include <net/ip.h> #include <linux/uaccess.h> +#include <linux/btf_ids.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> @@ -6423,7 +6424,10 @@ void __init ip6_route_init_special_entries(void) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) -static const struct bpf_iter_reg ipv6_route_reg_info = { +BTF_ID_LIST(btf_fib6_info_id) +BTF_ID(struct, fib6_info) + +static struct bpf_iter_reg ipv6_route_reg_info = { .target = "ipv6_route", .seq_ops = &ipv6_route_seq_ops, .init_seq_private = bpf_iter_init_seq_net, @@ -6438,6 +6442,7 @@ static const struct bpf_iter_reg ipv6_route_reg_info = { static int __init bpf_iter_register(void) { + ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id; return bpf_iter_reg_target(&ipv6_route_reg_info); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5aff0856a05b..7c1143feb2bf 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -141,6 +141,27 @@ static int compute_score(struct sock *sk, struct net *net, return score; } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned int hnum) +{ + struct sock *reuse_sk = NULL; + u32 hash; + + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { + hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + /* Fall back to scoring if group has connections */ + if (reuseport_has_conns(sk, false)) + return NULL; + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -150,7 +171,6 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; - u32 hash = 0; result = NULL; badness = -1; @@ -158,16 +178,11 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport && - sk->sk_state != TCP_ESTABLISHED) { - hash = udp6_ehashfn(net, daddr, hnum, - saddr, sport); - - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; - } + result = lookup_reuseport(net, sk, skb, + saddr, sport, daddr, hnum); + if (result) + return result; + result = sk; badness = score; } @@ -175,6 +190,31 @@ static struct sock *udp6_lib_lookup2(struct net *net, return result; } +static inline struct sock *udp6_lookup_run_bpf(struct net *net, + struct udp_table *udptable, + struct sk_buff *skb, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (udptable != &udp_table) + return NULL; /* only UDP is supported */ + + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -185,25 +225,42 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; - struct sock *result; + struct sock *result, *sk; hash2 = ipv6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; + /* Lookup connected or non-wildcard sockets */ result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); - if (!result) { - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); - slot2 = hash2 & udptable->mask; + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) + goto done; + + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + sk = udp6_lookup_run_bpf(net, udptable, skb, + saddr, sport, daddr, hnum); + if (sk) { + result = sk; + goto done; + } + } - hslot2 = &udptable->hash2[slot2]; + /* Got non-wildcard socket or error on first lookup */ + if (result) + goto done; - result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, sdif, - hslot2, skb); - } + /* Lookup wildcard sockets */ + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp6_lib_lookup2(net, saddr, sport, + &in6addr_any, hnum, dif, sdif, + hslot2, skb); +done: if (IS_ERR(result)) return NULL; return result; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4f2c3b14ddbf..3cd58f0c2de4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -60,6 +60,7 @@ #include <linux/genetlink.h> #include <linux/net_namespace.h> #include <linux/nospec.h> +#include <linux/btf_ids.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -2803,7 +2804,10 @@ static const struct rhashtable_params netlink_rhashtable_params = { }; #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) -static const struct bpf_iter_reg netlink_reg_info = { +BTF_ID_LIST(btf_netlink_sock_id) +BTF_ID(struct, netlink_sock) + +static struct bpf_iter_reg netlink_reg_info = { .target = "netlink", .seq_ops = &netlink_seq_ops, .init_seq_private = bpf_iter_init_seq_net, @@ -2818,6 +2822,7 @@ static const struct bpf_iter_reg netlink_reg_info = { static int __init bpf_iter_register(void) { + netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id; return bpf_iter_reg_target(&netlink_reg_info); } #endif |