From 8934ce2fd08171e8605f7fada91ee7619fe17ab8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 28 Mar 2018 12:49:15 -0700 Subject: bpf: sockmap redirect ingress support Add support for the BPF_F_INGRESS flag in sk_msg redirect helper. To do this add a scatterlist ring for receiving socks to check before calling into regular recvmsg call path. Additionally, because the poll wakeup logic only checked the skb recv queue we need to add a hook in TCP stack (similar to write side) so that we have a way to wake up polling socks when a scatterlist is redirected to that sock. After this all that is needed is for the redirect helper to push the scatterlist into the psock receive queue. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 709311132d4c..b8ff435fa96e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1085,6 +1085,7 @@ struct proto { #endif bool (*stream_memory_free)(const struct sock *sk); + bool (*stream_memory_read)(const struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); -- cgit v1.2.3 From 3679d585bbc07a1ac4448d5b478b492cad3587ce Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:04 -0700 Subject: net: Introduce __inet_bind() and __inet6_bind Refactor `bind()` code to make it ready to be called from BPF helper function `bpf_bind()` (will be added soon). Implementation of `inet_bind()` and `inet6_bind()` is separated into `__inet_bind()` and `__inet6_bind()` correspondingly. These function can be used from both `sk_prot->bind` and `bpf_bind()` contexts. New functions have two additional arguments. `force_bind_address_no_port` forces binding to IP only w/o checking `inet_sock.bind_address_no_port` field. It'll allow to bind local end of a connection to desired IP in `bpf_bind()` w/o changing `bind_address_no_port` field of a socket. It's useful since `bpf_bind()` can return an error and we'd need to restore original value of `bind_address_no_port` in that case if we changed this before calling to the helper. `with_lock` specifies whether to lock socket when working with `struct sk` or not. The argument is set to `true` for `sk_prot->bind`, i.e. old behavior is preserved. But it will be set to `false` for `bpf_bind()` use-case. The reason is all call-sites, where `bpf_bind()` will be called, already hold that socket lock. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/net/inet_common.h | 2 ++ include/net/ipv6.h | 2 ++ net/ipv4/af_inet.c | 39 ++++++++++++++++++++++++--------------- net/ipv6/af_inet6.c | 37 ++++++++++++++++++++++++------------- 4 files changed, 52 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 500f81375200..384b90c62c0b 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -32,6 +32,8 @@ int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); +int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 50a6f0ddb878..2e5fedc56e59 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1066,6 +1066,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); int inet6_release(struct socket *sock); +int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2dec266507dc..e203a39d6988 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -432,30 +432,37 @@ EXPORT_SYMBOL(inet_release); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; - struct inet_sock *inet = inet_sk(sk); - struct net *net = sock_net(sk); - unsigned short snum; - int chk_addr_ret; - u32 tb_id = RT_TABLE_LOCAL; int err; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { - err = sk->sk_prot->bind(sk, uaddr, addr_len); - goto out; + return sk->sk_prot->bind(sk, uaddr, addr_len); } - err = -EINVAL; if (addr_len < sizeof(struct sockaddr_in)) - goto out; + return -EINVAL; /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr); if (err) - goto out; + return err; + + return __inet_bind(sk, uaddr, addr_len, false, true); +} +EXPORT_SYMBOL(inet_bind); + +int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock) +{ + struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; + struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); + unsigned short snum; + int chk_addr_ret; + u32 tb_id = RT_TABLE_LOCAL; + int err; if (addr->sin_family != AF_INET) { /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) @@ -499,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * would be illegal to use them (multicast/broadcast) in * which case the sending device address is used. */ - lock_sock(sk); + if (with_lock) + lock_sock(sk); /* Check these errors (active socket, double bind). */ err = -EINVAL; @@ -511,7 +519,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ - if ((snum || !inet->bind_address_no_port) && + if ((snum || !(inet->bind_address_no_port || + force_bind_address_no_port)) && sk->sk_prot->get_port(sk, snum)) { inet->inet_saddr = inet->inet_rcv_saddr = 0; err = -EADDRINUSE; @@ -528,11 +537,11 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); err = 0; out_release_sock: - release_sock(sk); + if (with_lock) + release_sock(sk); out: return err; } -EXPORT_SYMBOL(inet_bind); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index fa24e3f06ac6..13110bee5c14 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -277,15 +277,7 @@ out_rcu_unlock: /* bind for INET6 API */ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct net *net = sock_net(sk); - __be32 v4addr = 0; - unsigned short snum; - bool saved_ipv6only; - int addr_type = 0; int err = 0; /* If the socket has its own bind function then use it. */ @@ -302,11 +294,28 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (err) return err; + return __inet6_bind(sk, uaddr, addr_len, false, true); +} +EXPORT_SYMBOL(inet6_bind); + +int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock) +{ + struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); + __be32 v4addr = 0; + unsigned short snum; + bool saved_ipv6only; + int addr_type = 0; + int err = 0; + if (addr->sin6_family != AF_INET6) return -EAFNOSUPPORT; addr_type = ipv6_addr_type(&addr->sin6_addr); - if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) + if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM) return -EINVAL; snum = ntohs(addr->sin6_port); @@ -314,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; - lock_sock(sk); + if (with_lock) + lock_sock(sk); /* Check these errors (active socket, double bind). */ if (sk->sk_state != TCP_CLOSE || inet->inet_num) { @@ -402,7 +412,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sk->sk_ipv6only = 1; /* Make sure we are allowed to bind here. */ - if ((snum || !inet->bind_address_no_port) && + if ((snum || !(inet->bind_address_no_port || + force_bind_address_no_port)) && sk->sk_prot->get_port(sk, snum)) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); @@ -418,13 +429,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_dport = 0; inet->inet_daddr = 0; out: - release_sock(sk); + if (with_lock) + release_sock(sk); return err; out_unlock: rcu_read_unlock(); goto out; } -EXPORT_SYMBOL(inet6_bind); int inet6_release(struct socket *sock) { -- cgit v1.2.3 From d74bad4e74ee373787a9ae24197c17b7cdc428d5 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:05 -0700 Subject: bpf: Hooks for sys_connect == The problem == See description of the problem in the initial patch of this patch set. == The solution == The patch provides much more reliable in-kernel solution for the 2nd part of the problem: making outgoing connecttion from desired IP. It adds new attach types `BPF_CGROUP_INET4_CONNECT` and `BPF_CGROUP_INET6_CONNECT` for program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both source and destination of a connection at connect(2) time. Local end of connection can be bound to desired IP using newly introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though, and doesn't support binding to port, i.e. leverages `IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this: * looking for a free port is expensive and can affect performance significantly; * there is no use-case for port. As for remote end (`struct sockaddr *` passed by user), both parts of it can be overridden, remote IP and remote port. It's useful if an application inside cgroup wants to connect to another application inside same cgroup or to itself, but knows nothing about IP assigned to the cgroup. Support is added for IPv4 and IPv6, for TCP and UDP. IPv4 and IPv6 have separate attach types for same reason as sys_bind hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. == Implementation notes == The patch introduces new field in `struct proto`: `pre_connect` that is a pointer to a function with same signature as `connect` but is called before it. The reason is in some cases BPF hooks should be called way before control is passed to `sk->sk_prot->connect`. Specifically `inet_dgram_connect` autobinds socket before calling `sk->sk_prot->connect` and there is no way to call `bpf_bind()` from hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since it'd cause double-bind. On the other hand `proto.pre_connect` provides a flexible way to add BPF hooks for connect only for necessary `proto` and call them at desired time before `connect`. Since `bpf_bind()` is allowed to bind only to IP and autobind in `inet_dgram_connect` binds only port there is no chance of double-bind. bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite of value of `bind_address_no_port` socket field. bpf_bind() sets `with_lock` to `false` when calling to __inet_bind() and __inet6_bind() since all call-sites, where bpf_bind() is called, already hold socket lock. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 31 +++++++++++++++++++++++++ include/net/addrconf.h | 7 ++++++ include/net/sock.h | 3 +++ include/net/udp.h | 1 + include/uapi/linux/bpf.h | 12 +++++++++- kernel/bpf/syscall.c | 8 +++++++ net/core/filter.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 13 +++++++++++ net/ipv4/tcp_ipv4.c | 16 +++++++++++++ net/ipv4/udp.c | 14 ++++++++++++ net/ipv6/af_inet6.c | 5 ++++ net/ipv6/tcp_ipv6.c | 16 +++++++++++++ net/ipv6/udp.c | 20 ++++++++++++++++ 13 files changed, 202 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 67dc4a6471ad..c6ab295e6dcb 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -116,12 +116,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, __ret; \ }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) { \ + lock_sock(sk); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \ + release_sock(sk); \ + } \ + __ret; \ +}) + #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND) #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND) +#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \ + sk->sk_prot->pre_connect) + +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT) + #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ int __ret = 0; \ @@ -151,11 +177,16 @@ struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } +#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 132e5b95167a..378d601258be 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -231,6 +231,13 @@ struct ipv6_stub { }; extern const struct ipv6_stub *ipv6_stub __read_mostly; +/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ +struct ipv6_bpf_stub { + int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock); +}; +extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; + /* * identify MLD packets for MLD filter exceptions */ diff --git a/include/net/sock.h b/include/net/sock.h index b8ff435fa96e..49bd2c1796b0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size) struct proto { void (*close)(struct sock *sk, long timeout); + int (*pre_connect)(struct sock *sk, + struct sockaddr *uaddr, + int addr_len); int (*connect)(struct sock *sk, struct sockaddr *uaddr, int addr_len); diff --git a/include/net/udp.h b/include/net/udp.h index 850a8e581cce..0676b272f6ac 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); int udp_init_sock(struct sock *sk); +int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ce3e69e3c793..77afaf1ba556 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -150,6 +150,8 @@ enum bpf_attach_type { BPF_SK_MSG_VERDICT, BPF_CGROUP_INET4_BIND, BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, __MAX_BPF_ATTACH_TYPE }; @@ -744,6 +746,13 @@ union bpf_attr { * @flags: reserved for future use * Return: SK_PASS * + * int bpf_bind(ctx, addr, addr_len) + * Bind socket to address. Only binding to IP is supported, no port can be + * set in addr. + * @ctx: pointer to context of type bpf_sock_addr + * @addr: pointer to struct sockaddr to bind socket to + * @addr_len: length of sockaddr structure + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -809,7 +818,8 @@ union bpf_attr { FN(msg_redirect_map), \ FN(msg_apply_bytes), \ FN(msg_cork_bytes), \ - FN(msg_pull_data), + FN(msg_pull_data), \ + FN(bind), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2cad66a4cacb..cf1b29bc0ab8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1180,6 +1180,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, switch (expected_attach_type) { case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: return 0; default: return -EINVAL; @@ -1491,6 +1493,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) break; case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1557,6 +1561,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) break; case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1610,6 +1616,8 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: break; diff --git a/net/core/filter.c b/net/core/filter.c index c08e5b121558..bdb9cadd4d27 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -3656,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { .arg2_type = ARG_ANYTHING, }; +const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; +EXPORT_SYMBOL_GPL(ipv6_bpf_stub); + +BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, + int, addr_len) +{ +#ifdef CONFIG_INET + struct sock *sk = ctx->sk; + int err; + + /* Binding to port can be expensive so it's prohibited in the helper. + * Only binding to IP is supported. + */ + err = -EINVAL; + if (addr->sa_family == AF_INET) { + if (addr_len < sizeof(struct sockaddr_in)) + return err; + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + return err; + return __inet_bind(sk, addr, addr_len, true, false); +#if IS_ENABLED(CONFIG_IPV6) + } else if (addr->sa_family == AF_INET6) { + if (addr_len < SIN6_LEN_RFC2133) + return err; + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + return err; + /* ipv6_bpf_stub cannot be NULL, since it's called from + * bpf_cgroup_inet6_connect hook and ipv6 is already loaded + */ + return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false); +#endif /* CONFIG_IPV6 */ + } +#endif /* CONFIG_INET */ + + return -EAFNOSUPPORT; +} + +static const struct bpf_func_proto bpf_bind_proto = { + .func = bpf_bind, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -3707,6 +3754,14 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) */ case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_bind: + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + return &bpf_bind_proto; + default: + return NULL; + } default: return bpf_base_func_proto(func_id); } @@ -4213,6 +4268,7 @@ static bool sock_addr_is_valid_access(int off, int size, case bpf_ctx_range(struct bpf_sock_addr, user_ip4): switch (prog->expected_attach_type) { case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET4_CONNECT: break; default: return false; @@ -4221,6 +4277,7 @@ static bool sock_addr_is_valid_access(int off, int size, case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): switch (prog->expected_attach_type) { case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET6_CONNECT: break; default: return false; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e203a39d6988..488fe26ac8e5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -547,12 +547,19 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; + int err; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) return sk->sk_prot->disconnect(sk, flags); + if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { + err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + if (err) + return err; + } + if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); @@ -633,6 +640,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_state != TCP_CLOSE) goto out; + if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { + err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + if (err) + goto out; + } + err = sk->sk_prot->connect(sk, uaddr, addr_len); if (err < 0) goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2c6aec2643e8..3c11d992d784 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) } EXPORT_SYMBOL_GPL(tcp_twsk_unique); +static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* This check is replicated from tcp_v4_connect() and intended to + * prevent BPF program called below from accessing bytes that are out + * of the bound specified by user in addr_len. + */ + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + sock_owned_by_me(sk); + + return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr); +} + /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -2409,6 +2424,7 @@ struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, .close = tcp_close, + .pre_connect = tcp_v4_pre_connect, .connect = tcp_v4_connect, .disconnect = tcp_disconnect, .accept = inet_csk_accept, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 908fc02fb4f8..9c6c77fec963 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1658,6 +1658,19 @@ csum_copy_err: goto try_again; } +int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + /* This check is replicated from __ip4_datagram_connect() and + * intended to prevent BPF program called below from accessing bytes + * that are out of the bound specified by user in addr_len. + */ + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); +} +EXPORT_SYMBOL(udp_pre_connect); + int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -2530,6 +2543,7 @@ struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, .close = udp_lib_close, + .pre_connect = udp_pre_connect, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 13110bee5c14..566cec0e0a44 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -887,6 +887,10 @@ static const struct ipv6_stub ipv6_stub_impl = { .nd_tbl = &nd_tbl, }; +static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { + .inet6_bind = __inet6_bind, +}; + static int __init inet6_init(void) { struct list_head *r; @@ -1043,6 +1047,7 @@ static int __init inet6_init(void) /* ensure that ipv6 stubs are visible only after ipv6 is ready */ wmb(); ipv6_stub = &ipv6_stub_impl; + ipv6_bpf_stub = &ipv6_bpf_stub_impl; out: return err; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5425d7b100ee..6469b741cf5a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) ipv6_hdr(skb)->saddr.s6_addr32); } +static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* This check is replicated from tcp_v6_connect() and intended to + * prevent BPF program called below from accessing bytes that are out + * of the bound specified by user in addr_len. + */ + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + sock_owned_by_me(sk); + + return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); +} + static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, .close = tcp_close, + .pre_connect = tcp_v6_pre_connect, .connect = tcp_v6_connect, .disconnect = tcp_disconnect, .accept = inet_csk_accept, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ad30f5e31969..6861ed479469 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk) } } +static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* The following checks are replicated from __ip6_datagram_connect() + * and intended to prevent BPF program called below from accessing + * bytes that are out of the bound specified by user in addr_len. + */ + if (uaddr->sa_family == AF_INET) { + if (__ipv6_only_sock(sk)) + return -EAFNOSUPPORT; + return udp_pre_connect(sk, uaddr, addr_len); + } + + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); +} + /** * udp6_hwcsum_outgoing - handle outgoing HW checksumming * @sk: socket we are sending on @@ -1512,6 +1531,7 @@ struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, .close = udp_lib_close, + .pre_connect = udpv6_pre_connect, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, -- cgit v1.2.3