diff options
author | David S. Miller <davem@davemloft.net> | 2020-07-31 16:55:32 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-07-31 16:55:32 -0700 |
commit | d9790bc2fea58739506a97e260c706bcbfed0d44 (patch) | |
tree | df6aca9d2a30b4e08dddfce8a99773b16fdfa997 | |
parent | 77aec5e1c4935e37c8cbb818a69f2f7d3433ede5 (diff) | |
parent | 00587187ad3016382bd8d3fad0f3bba0a518ab40 (diff) | |
download | lwn-d9790bc2fea58739506a97e260c706bcbfed0d44.tar.gz lwn-d9790bc2fea58739506a97e260c706bcbfed0d44.zip |
Merge branch 'mptcp-syncookies'
Florian Westphal says:
====================
mptcp: add syncookie support
Changes in v2:
- first patch renames req->ts_cookie to req->syncookie instead of
removing ts_cookie member.
- patch to add 'want_cookie' arg to init_req() functions has been dropped.
All users of that arg were changed to check 'req->syncookie' instead.
v1 cover letter:
When syn-cookies are used the SYN?ACK never contains a MPTCP option,
because the code path that creates a request socket based on a valid
cookie ACK lacks the needed changes to construct MPTCP request sockets.
After this series, if SYN carries MP_CAPABLE option, the option is not
cleared anymore and request socket will be reconstructed using the
MP_CAPABLE option data that is re-sent with the ACK.
This means that no additional state gets encoded into the syn cookie or
the TCP timestamp.
There are two caveats for SYN-Cookies with MPTCP:
1. When syn-cookies are used, the server-generated key is not stored.
The drawback is that the next connection request that comes in before
the cookie-ACK has a small chance that it will generate the same local_key.
If this happens, the cookie ACK that comes in second will (re)compute the
token hash and then detects that this is already in use.
Unlike normal case, where the server will pick a new key value and then
re-tries, we can't do that because we already committed to the key value
(it was sent to peer already).
Im this case, MPTCP cannot be used and late TCP fallback happens.
2). SYN packets with a MP_JOIN requests cannot be handled without storing
state. This is because the SYN contains a nonce value that is needed to
verify the HMAC of the MP_JOIN ACK that completes the three-way
handshake. Also, a local nonce is generated and used in the cookie
SYN/ACK.
There are only 2 ways to solve this:
a) Do not support JOINs when cookies are in effect.
b) Store the nonces somewhere.
The approach chosen here is b).
Patch 8 adds a fixed-size (1024 entries) state table to store the
information required to validate the MP_JOIN ACK and re-build the
request socket.
State gets stored when syn-cookies are active and the token in the JOIN
request referred to an established MPTCP connection that can also accept
a new subflow.
State is restored if the ACK cookie is valid, an MP_JOIN option is present
and the state slot contains valid data from a previous SYN.
After the request socket has been re-build, normal HMAC check is done just
as without syn cookies.
Largely identical to last RFC, except patch #8 which follows Paolos
suggestion to use a private table storage area rather than keeping
request sockets around. This also means I dropped the patch to remove
const qualifier from sk_listener pointers.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/crypto/chelsio/chtls/chtls_cm.c | 2 | ||||
-rw-r--r-- | include/net/mptcp.h | 11 | ||||
-rw-r--r-- | include/net/request_sock.h | 2 | ||||
-rw-r--r-- | include/net/tcp.h | 2 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 44 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
-rw-r--r-- | net/ipv6/syncookies.c | 5 | ||||
-rw-r--r-- | net/mptcp/Makefile | 1 | ||||
-rw-r--r-- | net/mptcp/ctrl.c | 1 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 21 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 116 | ||||
-rw-r--r-- | net/mptcp/syncookies.c | 132 | ||||
-rw-r--r-- | net/mptcp/token.c | 38 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_connect.sh | 47 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_join.sh | 66 |
16 files changed, 453 insertions, 43 deletions
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index f924c335a195..05520dccd906 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1348,7 +1348,7 @@ static void chtls_pass_accept_request(struct sock *sk, oreq->rsk_rcv_wnd = 0; oreq->rsk_window_clamp = 0; - oreq->cookie_ts = 0; + oreq->syncookie = 0; oreq->mss = 0; oreq->ts_recent = 0; diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 02158c257bd4..3525d2822abe 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -58,6 +58,7 @@ struct mptcp_out_options { }; #ifdef CONFIG_MPTCP +extern struct request_sock_ops mptcp_subflow_request_sock_ops; void mptcp_init(void); @@ -130,6 +131,9 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, } void mptcp_seq_show(struct seq_file *seq); +int mptcp_subflow_init_cookie_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb); #else static inline void mptcp_init(void) @@ -199,6 +203,13 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { } static inline void mptcp_seq_show(struct seq_file *seq) { } + +static inline int mptcp_subflow_init_cookie_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) +{ + return 0; /* TCP fallback */ +} #endif /* CONFIG_MPTCP */ #if IS_ENABLED(CONFIG_MPTCP_IPV6) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cf8b33213bbc..b2eb8b4ba697 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -54,7 +54,7 @@ struct request_sock { struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ - u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ + u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */ u8 num_timeout:7; /* number of timeouts */ u32 ts_recent; struct timer_list rsk_timer; diff --git a/include/net/tcp.h b/include/net/tcp.h index e0c35d56091f..dbf5c791a6eb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -469,6 +469,8 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); +struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9a4f6b16c9bc..11b20474be83 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -212,6 +212,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); + + if (tcp_rsk(req)->drop_req) { + refcount_set(&req->rsk_refcnt, 2); + return child; + } + if (inet_csk_reqsk_queue_add(sk, req, child)) return child; @@ -276,6 +282,39 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, } EXPORT_SYMBOL(cookie_ecn_ok); +struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk, + struct sk_buff *skb) +{ + struct tcp_request_sock *treq; + struct request_sock *req; + +#ifdef CONFIG_MPTCP + if (sk_is_mptcp(sk)) + ops = &mptcp_subflow_request_sock_ops; +#endif + + req = inet_reqsk_alloc(ops, sk, false); + if (!req) + return NULL; + +#if IS_ENABLED(CONFIG_MPTCP) + treq = tcp_rsk(req); + treq->is_mptcp = sk_is_mptcp(sk); + if (treq->is_mptcp) { + int err = mptcp_subflow_init_cookie_req(req, sk, skb); + + if (err) { + reqsk_free(req); + return NULL; + } + } +#endif + + return req; +} +EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); + /* On input, sk is a listener. * Output is listener if incoming packet would not create a child * NULL if memory could not be allocated. @@ -326,7 +365,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */ + req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); if (!req) goto out; @@ -350,9 +389,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack = 0; treq->tfo_listener = false; - if (IS_ENABLED(CONFIG_MPTCP)) - treq->is_mptcp = 0; - if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a018bafd7bdf..739da25b0c23 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6519,7 +6519,6 @@ static void tcp_openreq_init(struct request_sock *req, struct inet_request_sock *ireq = inet_rsk(req); req->rsk_rcv_wnd = 0; /* So that tcp_send_synack() knows! */ - req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; tcp_rsk(req)->snt_synack = 0; @@ -6674,6 +6673,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!req) goto drop; + req->syncookie = want_cookie; tcp_rsk(req)->af_specific = af_ops; tcp_rsk(req)->ts_off = 0; #if IS_ENABLED(CONFIG_MPTCP) @@ -6701,9 +6701,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->init_req(req, sk, skb); - if (IS_ENABLED(CONFIG_MPTCP) && want_cookie) - tcp_rsk(req)->is_mptcp = 0; - if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; @@ -6739,7 +6736,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (want_cookie) { isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d8f16f6a9b02..85ff417bda7f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3393,7 +3393,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, memset(&opts, 0, sizeof(opts)); now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES - if (unlikely(req->cookie_ts)) + if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) skb->skb_mstamp_ns = cookie_init_timestamp(req, now); else #endif diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 13235a012388..e796a64be308 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -170,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false); + req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); if (!req) goto out; @@ -178,9 +178,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->tfo_listener = false; - if (IS_ENABLED(CONFIG_MPTCP)) - treq->is_mptcp = 0; - if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index 2360cbd27d59..a611968be4d7 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mib.o pm_netlink.o +obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o mptcp_crypto_test-objs := crypto_test.o diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 8e39585d37f3..54b888f94009 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -112,6 +112,7 @@ static struct pernet_operations mptcp_pernet_ops = { void __init mptcp_init(void) { + mptcp_join_cookie_init(); mptcp_proto_init(); if (register_pernet_subsys(&mptcp_pernet_ops) < 0) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index beb34b8a5363..60b27d44c184 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -400,6 +400,7 @@ void mptcp_token_destroy_request(struct request_sock *req); int mptcp_token_new_connect(struct sock *sk); void mptcp_token_accept(struct mptcp_subflow_request_sock *r, struct mptcp_sock *msk); +bool mptcp_token_exists(u32 token); struct mptcp_sock *mptcp_token_get_sock(u32 token); struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, long *s_num); @@ -505,4 +506,24 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) !subflow->conn_finished; } +#ifdef CONFIG_SYN_COOKIES +void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb); +bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb); +void __init mptcp_join_cookie_init(void); +#else +static inline void +subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb) {} +static inline bool +mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb) +{ + return false; +} + +static inline void mptcp_join_cookie_init(void) {} +#endif + #endif /* __MPTCP_PROTOCOL_H */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1c8482bc2ce5..a4cc4591bd4e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -91,17 +91,9 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, return msk; } -static void subflow_init_req(struct request_sock *req, - const struct sock *sk_listener, - struct sk_buff *skb) +static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener) { - struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - struct mptcp_options_received mp_opt; - - pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - - mptcp_get_options(skb, &mp_opt); subflow_req->mp_capable = 0; subflow_req->mp_join = 0; @@ -113,9 +105,29 @@ static void subflow_init_req(struct request_sock *req, * TCP option space. */ if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) - return; + return -EINVAL; #endif + return 0; +} + +static void subflow_init_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) +{ + struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); + struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); + struct mptcp_options_received mp_opt; + int ret; + + pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); + + ret = __subflow_init_req(req, sk_listener); + if (ret) + return; + + mptcp_get_options(skb, &mp_opt); + if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); @@ -126,13 +138,33 @@ static void subflow_init_req(struct request_sock *req, } if (mp_opt.mp_capable && listener->request_mptcp) { - int err; + int err, retries = 4; + + subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; +again: + do { + get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key)); + } while (subflow_req->local_key == 0); + + if (unlikely(req->syncookie)) { + mptcp_crypto_key_sha(subflow_req->local_key, + &subflow_req->token, + &subflow_req->idsn); + if (mptcp_token_exists(subflow_req->token)) { + if (retries-- > 0) + goto again; + } else { + subflow_req->mp_capable = 1; + } + return; + } err = mptcp_token_new_request(req); if (err == 0) subflow_req->mp_capable = 1; + else if (retries-- > 0) + goto again; - subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; } else if (mp_opt.mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; @@ -141,11 +173,60 @@ static void subflow_init_req(struct request_sock *req, subflow_req->token = mp_opt.token; subflow_req->remote_nonce = mp_opt.nonce; subflow_req->msk = subflow_token_join_request(req, skb); + + if (unlikely(req->syncookie) && subflow_req->msk) { + if (mptcp_can_accept_new_subflow(subflow_req->msk)) + subflow_init_req_cookie_join_save(subflow_req, skb); + } + pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, subflow_req->remote_nonce, subflow_req->msk); } } +int mptcp_subflow_init_cookie_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) +{ + struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); + struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); + struct mptcp_options_received mp_opt; + int err; + + err = __subflow_init_req(req, sk_listener); + if (err) + return err; + + mptcp_get_options(skb, &mp_opt); + + if (mp_opt.mp_capable && mp_opt.mp_join) + return -EINVAL; + + if (mp_opt.mp_capable && listener->request_mptcp) { + if (mp_opt.sndr_key == 0) + return -EINVAL; + + subflow_req->local_key = mp_opt.rcvr_key; + err = mptcp_token_new_request(req); + if (err) + return err; + + subflow_req->mp_capable = 1; + subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; + } else if (mp_opt.mp_join && listener->request_mptcp) { + if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) + return -EINVAL; + + if (mptcp_can_accept_new_subflow(subflow_req->msk)) + subflow_req->mp_join = 1; + + subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); + static void subflow_v4_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) @@ -265,7 +346,8 @@ do_reset: tcp_done(sk); } -static struct request_sock_ops subflow_request_sock_ops; +struct request_sock_ops mptcp_subflow_request_sock_ops; +EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops); static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops; static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -278,7 +360,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; - return tcp_conn_request(&subflow_request_sock_ops, + return tcp_conn_request(&mptcp_subflow_request_sock_ops, &subflow_request_sock_ipv4_ops, sk, skb); drop: @@ -303,7 +385,7 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) goto drop; - return tcp_conn_request(&subflow_request_sock_ops, + return tcp_conn_request(&mptcp_subflow_request_sock_ops, &subflow_request_sock_ipv6_ops, sk, skb); drop: @@ -1292,8 +1374,8 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops) void __init mptcp_subflow_init(void) { - subflow_request_sock_ops = tcp_request_sock_ops; - if (subflow_ops_init(&subflow_request_sock_ops) != 0) + mptcp_subflow_request_sock_ops = tcp_request_sock_ops; + if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0) panic("MPTCP: failed to init subflow request sock ops\n"); subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c new file mode 100644 index 000000000000..6eb992789b50 --- /dev/null +++ b/net/mptcp/syncookies.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/skbuff.h> + +#include "protocol.h" + +/* Syncookies do not work for JOIN requests. + * + * Unlike MP_CAPABLE, where the ACK cookie contains the needed MPTCP + * options to reconstruct the initial syn state, MP_JOIN does not contain + * the token to obtain the mptcp socket nor the server-generated nonce + * that was used in the cookie SYN/ACK response. + * + * Keep a small best effort state table to store the syn/synack data, + * indexed by skb hash. + * + * A MP_JOIN SYN packet handled by syn cookies is only stored if the 32bit + * token matches a known mptcp connection that can still accept more subflows. + * + * There is no timeout handling -- state is only re-constructed + * when the TCP ACK passed the cookie validation check. + */ + +struct join_entry { + u32 token; + u32 remote_nonce; + u32 local_nonce; + u8 join_id; + u8 local_id; + u8 backup; + u8 valid; +}; + +#define COOKIE_JOIN_SLOTS 1024 + +static struct join_entry join_entries[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp; +static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp; + +static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net) +{ + u32 i = skb_get_hash(skb) ^ net_hash_mix(net); + + return i % ARRAY_SIZE(join_entries); +} + +static void mptcp_join_store_state(struct join_entry *entry, + const struct mptcp_subflow_request_sock *subflow_req) +{ + entry->token = subflow_req->token; + entry->remote_nonce = subflow_req->remote_nonce; + entry->local_nonce = subflow_req->local_nonce; + entry->backup = subflow_req->backup; + entry->join_id = subflow_req->remote_id; + entry->local_id = subflow_req->local_id; + entry->valid = 1; +} + +void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb) +{ + struct net *net = read_pnet(&subflow_req->sk.req.ireq_net); + u32 i = mptcp_join_entry_hash(skb, net); + + /* No use in waiting if other cpu is already using this slot -- + * would overwrite the data that got stored. + */ + spin_lock_bh(&join_entry_locks[i]); + mptcp_join_store_state(&join_entries[i], subflow_req); + spin_unlock_bh(&join_entry_locks[i]); +} + +/* Called for a cookie-ack with MP_JOIN option present. + * Look up the saved state based on skb hash & check token matches msk + * in same netns. + * + * Caller will check msk can still accept another subflow. The hmac + * present in the cookie ACK mptcp option space will be checked later. + */ +bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb) +{ + struct net *net = read_pnet(&subflow_req->sk.req.ireq_net); + u32 i = mptcp_join_entry_hash(skb, net); + struct mptcp_sock *msk; + struct join_entry *e; + + e = &join_entries[i]; + + spin_lock_bh(&join_entry_locks[i]); + + if (e->valid == 0) { + spin_unlock_bh(&join_entry_locks[i]); + return false; + } + + e->valid = 0; + + msk = mptcp_token_get_sock(e->token); + if (!msk) { + spin_unlock_bh(&join_entry_locks[i]); + return false; + } + + /* If this fails, the token got re-used in the mean time by another + * mptcp socket in a different netns, i.e. entry is outdated. + */ + if (!net_eq(sock_net((struct sock *)msk), net)) + goto err_put; + + subflow_req->remote_nonce = e->remote_nonce; + subflow_req->local_nonce = e->local_nonce; + subflow_req->backup = e->backup; + subflow_req->remote_id = e->join_id; + subflow_req->token = e->token; + subflow_req->msk = msk; + spin_unlock_bh(&join_entry_locks[i]); + return true; + +err_put: + spin_unlock_bh(&join_entry_locks[i]); + sock_put((struct sock *)msk); + return false; +} + +void __init mptcp_join_cookie_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(join_entry_locks); i++) + spin_lock_init(&join_entry_locks[i]); + + BUILD_BUG_ON(ARRAY_SIZE(join_entry_locks) != ARRAY_SIZE(join_entries)); +} diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 97cfc45bcc4f..8b47c4bb1c6b 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -109,14 +109,12 @@ static void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) int mptcp_token_new_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - int retries = TOKEN_MAX_RETRIES; struct token_bucket *bucket; u32 token; -again: - mptcp_crypto_key_gen_sha(&subflow_req->local_key, - &subflow_req->token, - &subflow_req->idsn); + mptcp_crypto_key_sha(subflow_req->local_key, + &subflow_req->token, + &subflow_req->idsn); pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n", req, subflow_req->local_key, subflow_req->token, subflow_req->idsn); @@ -126,9 +124,7 @@ again: spin_lock_bh(&bucket->lock); if (__token_bucket_busy(bucket, token)) { spin_unlock_bh(&bucket->lock); - if (!--retries) - return -EBUSY; - goto again; + return -EBUSY; } hlist_nulls_add_head_rcu(&subflow_req->token_node, &bucket->req_chain); @@ -208,6 +204,32 @@ void mptcp_token_accept(struct mptcp_subflow_request_sock *req, spin_unlock_bh(&bucket->lock); } +bool mptcp_token_exists(u32 token) +{ + struct hlist_nulls_node *pos; + struct token_bucket *bucket; + struct mptcp_sock *msk; + struct sock *sk; + + rcu_read_lock(); + bucket = token_bucket(token); + +again: + sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { + msk = mptcp_sk(sk); + if (READ_ONCE(msk->token) == token) + goto found; + } + if (get_nulls_value(pos) != (token & token_mask)) + goto again; + + rcu_read_unlock(); + return false; +found: + rcu_read_unlock(); + return true; +} + /** * mptcp_token_get_sock - retrieve mptcp connection sock using its token * @token: token of the mptcp connection to retrieve diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index c0589e071f20..57d75b7f6220 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -196,6 +196,9 @@ ip -net "$ns4" link set ns4eth3 up ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2 +# use TCP syn cookies, even if no flooding was detected. +ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 + set_ethtool_flags() { local ns="$1" local dev="$2" @@ -407,6 +410,11 @@ do_transfer() sleep 1 fi + local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) + local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) + local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) + local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & local spid=$! @@ -450,6 +458,45 @@ do_transfer() check_transfer $cin $sout "file received by server" rets=$? + local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) + local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) + + local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) + local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + + expect_synrx=$((stat_synrx_last_l)) + expect_ackrx=$((stat_ackrx_last_l)) + + cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies) + cookies=${cookies##*=} + + if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then + expect_synrx=$((stat_synrx_last_l+1)) + expect_ackrx=$((stat_ackrx_last_l+1)) + fi + if [ $cookies -eq 2 ];then + if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then + echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance" + fi + if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then + echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance" + fi + else + if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then + echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed" + fi + if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then + echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed" + fi + fi + + if [ $expect_synrx -ne $stat_synrx_now_l ] ;then + echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + fi + if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then + echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + fi + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then echo "$duration [ OK ]" cat "$capout" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index dd42c2f692d0..f39c1129ce5f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -72,6 +72,15 @@ reset() init } +reset_with_cookies() +{ + reset + + for netns in "$ns1" "$ns2";do + ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2 + done +} + for arg in "$@"; do if [ "$arg" = "-c" ]; then capture=1 @@ -138,7 +147,7 @@ do_transfer() capuser="-Z $SUDO_USER" fi - capfile="mp_join-${listener_ns}.pcap" + capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") echo "Capturing traffic for test $TEST_COUNT into $capfile" ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & @@ -227,7 +236,7 @@ chk_join_nr() local count local dump_stats - printf "%-36s %s" "$msg" "syn" + printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn" count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$syn_nr" ]; then @@ -354,4 +363,57 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 +# single subflow, syncookies +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow with syn cookies" 1 1 1 + +# multiple subflows with syn cookies +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple subflows with syn cookies" 2 2 2 + +# multiple subflows limited by server +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflows limited by server w cookies" 2 2 1 + +# test signal address with cookies +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "signal address with syn cookies" 1 1 1 + +# test cookie with subflow and signal +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflow and signal w cookies" 2 2 2 + +# accept and use add_addr with additional subflows +reset_with_cookies +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflows and signal w. cookies" 3 3 3 + exit $ret |