diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/options.c | 24 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 265 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 14 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 19 |
4 files changed, 186 insertions, 136 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 444a38681e93..89a4225ed321 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -567,15 +567,15 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, } static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in_addr *addr) + struct in_addr *addr, u16 port) { u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[7]; msg[0] = addr_id; memcpy(&msg[1], &addr->s_addr, 4); - msg[5] = 0; - msg[6] = 0; + msg[5] = port >> 8; + msg[6] = port & 0xFF; mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac); @@ -584,15 +584,15 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, #if IS_ENABLED(CONFIG_MPTCP_IPV6) static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in6_addr *addr) + struct in6_addr *addr, u16 port) { u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; msg[0] = addr_id; memcpy(&msg[1], &addr->s6_addr, 16); - msg[17] = 0; - msg[18] = 0; + msg[17] = port >> 8; + msg[18] = port & 0xFF; mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac); @@ -646,7 +646,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * opts->ahmac = add_addr_generate_hmac(msk->local_key, msk->remote_key, opts->addr_id, - &opts->addr); + &opts->addr, + opts->port); } } #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -657,7 +658,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * opts->ahmac = add_addr6_generate_hmac(msk->local_key, msk->remote_key, opts->addr_id, - &opts->addr6); + &opts->addr6, + opts->port); } } #endif @@ -962,12 +964,14 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) hmac = add_addr_generate_hmac(msk->remote_key, msk->local_key, - mp_opt->addr_id, &mp_opt->addr); + mp_opt->addr_id, &mp_opt->addr, + mp_opt->port); #if IS_ENABLED(CONFIG_MPTCP_IPV6) else hmac = add_addr6_generate_hmac(msk->remote_key, msk->local_key, - mp_opt->addr_id, &mp_opt->addr6); + mp_opt->addr_id, &mp_opt->addr6, + mp_opt->port); #endif pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c5d5e68940ea..4bde960e19dc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -11,7 +11,6 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> -#include <linux/igmp.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -20,7 +19,6 @@ #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/transp_v6.h> -#include <net/addrconf.h> #endif #include <net/mptcp.h> #include <net/xfrm.h> @@ -1061,6 +1059,12 @@ out: } } +static void __mptcp_clean_una_wakeup(struct sock *sk) +{ + __mptcp_clean_una(sk); + mptcp_write_space(sk); +} + static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -1189,6 +1193,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size, */ while (skbs->qlen > 1) { skb = __skb_dequeue_tail(skbs); + *total_ts -= skb->truesize; __kfree_skb(skb); } return skbs->qlen > 0; @@ -1444,7 +1449,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk, release_sock(ssk); } -static void mptcp_push_pending(struct sock *sk, unsigned int flags) +static void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; struct mptcp_sock *msk = mptcp_sk(sk); @@ -1696,14 +1701,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) wait_for_memory: mptcp_set_nospace(sk); - mptcp_push_pending(sk, msg->msg_flags); + __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) goto out; } if (copied) - mptcp_push_pending(sk, msg->msg_flags); + __mptcp_push_pending(sk, msg->msg_flags); out: release_sock(sk); @@ -2115,6 +2120,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) return backup; } +static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) +{ + if (msk->subflow) { + iput(SOCK_INODE(msk->subflow)); + msk->subflow = NULL; + } +} + /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2126,6 +2139,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { + struct mptcp_sock *msk = mptcp_sk(sk); + list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); @@ -2154,6 +2169,18 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, release_sock(ssk); sock_put(ssk); + + if (ssk == msk->last_snd) + msk->last_snd = NULL; + + if (ssk == msk->ack_hint) + msk->ack_hint = NULL; + + if (ssk == msk->first) + msk->first = NULL; + + if (msk->subflow && ssk == msk->subflow->sk) + mptcp_dispose_initial_subflow(msk); } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2238,14 +2265,58 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) mptcp_close_wake_up(sk); } -static void mptcp_worker(struct work_struct *work) +static void __mptcp_retrans(struct sock *sk) { - struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); - struct sock *ssk, *sk = &msk->sk.icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; size_t copied = 0; - int state, ret; + struct sock *ssk; + int ret; + + __mptcp_clean_una_wakeup(sk); + dfrag = mptcp_rtx_head(sk); + if (!dfrag) + return; + + ssk = mptcp_subflow_get_retrans(msk); + if (!ssk) + goto reset_timer; + + lock_sock(ssk); + + /* limit retransmission to the bytes already sent on some subflows */ + info.sent = 0; + info.limit = dfrag->already_sent; + while (info.sent < dfrag->already_sent) { + if (!mptcp_alloc_tx_skb(sk, ssk)) + break; + + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); + if (ret <= 0) + break; + + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); + copied += ret; + info.sent += ret; + } + if (copied) + tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, + info.size_goal); + + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + +reset_timer: + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); +} + +static void mptcp_worker(struct work_struct *work) +{ + struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); + struct sock *sk = &msk->sk.icsk_inet.sk; + int state; lock_sock(sk); state = sk->sk_state; @@ -2280,45 +2351,8 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); - if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) - goto unlock; - - __mptcp_clean_una(sk); - dfrag = mptcp_rtx_head(sk); - if (!dfrag) - goto unlock; - - ssk = mptcp_subflow_get_retrans(msk); - if (!ssk) - goto reset_unlock; - - lock_sock(ssk); - - /* limit retransmission to the bytes already sent on some subflows */ - info.sent = 0; - info.limit = dfrag->already_sent; - while (info.sent < dfrag->already_sent) { - if (!mptcp_alloc_tx_skb(sk, ssk)) - break; - - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); - if (ret <= 0) - break; - - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); - copied += ret; - info.sent += ret; - } - if (copied) - tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, - info.size_goal); - - mptcp_set_timeout(sk, ssk); - release_sock(ssk); - -reset_unlock: - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) + __mptcp_retrans(sk); unlock: release_sock(sk); @@ -2523,12 +2557,6 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - /* dispose the ancillatory tcp socket, if any */ - if (msk->subflow) { - iput(SOCK_INODE(msk->subflow)); - msk->subflow = NULL; - } - /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -2553,6 +2581,7 @@ static void __mptcp_destroy_sock(struct sock *sk) sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + mptcp_dispose_initial_subflow(msk); sock_put(sk); } @@ -2847,6 +2876,48 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, return ret; } +static bool mptcp_unsupported(int level, int optname) +{ + if (level == SOL_IP) { + switch (optname) { + case IP_ADD_MEMBERSHIP: + case IP_ADD_SOURCE_MEMBERSHIP: + case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: + case IP_UNBLOCK_SOURCE: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; + } + if (level == SOL_IPV6) { + switch (optname) { + case IPV6_ADDRFORM: + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + case IPV6_JOIN_ANYCAST: + case IPV6_LEAVE_ANYCAST: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; + } + return false; +} + static int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -2855,6 +2926,9 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, pr_debug("msk=%p", msk); + if (mptcp_unsupported(level, optname)) + return -ENOPROTOOPT; + if (level == SOL_SOCKET) return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); @@ -2934,13 +3008,14 @@ static void mptcp_release_cb(struct sock *sk) { unsigned long flags, nflags; - /* push_pending may touch wmem_reserved, do it before the later - * cleanup - */ - if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) - __mptcp_clean_una(sk); - if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) { - /* mptcp_push_pending() acquires the subflow socket lock + for (;;) { + flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_PUSH_PENDING); + if (!flags) + break; + + /* the following actions acquire the subflow socket lock * * 1) can't be invoked in atomic scope * 2) must avoid ABBA deadlock with msk socket spinlock: the RX @@ -2949,13 +3024,21 @@ static void mptcp_release_cb(struct sock *sk) */ spin_unlock_bh(&sk->sk_lock.slock); - mptcp_push_pending(sk, 0); + if (flags & BIT(MPTCP_PUSH_PENDING)) + __mptcp_push_pending(sk, 0); + + cond_resched(); spin_lock_bh(&sk->sk_lock.slock); } + + if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) + __mptcp_clean_una_wakeup(sk); if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) __mptcp_error_report(sk); - /* clear any wmem reservation and errors */ + /* push_pending may touch wmem_reserved, ensure we do the cleanup + * later + */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); @@ -3285,6 +3368,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, /* PM/worker can now acquire the first subflow socket * lock without racing with listener queue cleanup, * we can notify it, if needed. + * + * Even if remote has reset the initial subflow by now + * the refcnt is still at least one. */ subflow = mptcp_subflow_ctx(msk->first); list_add(&subflow->node, &msk->conn_list); @@ -3376,34 +3462,10 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return mask; } -static int mptcp_release(struct socket *sock) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = sock->sk; - struct mptcp_sock *msk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - } - - release_sock(sk); - - return inet_release(sock); -} - static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, - .release = mptcp_release, + .release = inet_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, @@ -3495,35 +3557,10 @@ void __init mptcp_proto_init(void) } #if IS_ENABLED(CONFIG_MPTCP_IPV6) -static int mptcp6_release(struct socket *sock) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk; - struct sock *sk = sock->sk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - ipv6_sock_mc_close(ssk); - ipv6_sock_ac_close(ssk); - } - - release_sock(sk); - return inet6_release(sock); -} - static const struct proto_ops mptcp_v6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, - .release = mptcp6_release, + .release = inet6_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 91827d949766..e21a5bc36cf0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -52,14 +52,15 @@ #define TCPOLEN_MPTCP_DSS_MAP64 14 #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 #define TCPOLEN_MPTCP_ADD_ADDR 16 -#define TCPOLEN_MPTCP_ADD_ADDR_PORT 20 +#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 -#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 12 +#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 #define TCPOLEN_MPTCP_ADD_ADDR6 28 -#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 32 +#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 -#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24 -#define TCPOLEN_MPTCP_PORT_LEN 4 +#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 +#define TCPOLEN_MPTCP_PORT_LEN 2 +#define TCPOLEN_MPTCP_PORT_ALIGN 2 #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 #define TCPOLEN_MPTCP_PRIO 3 #define TCPOLEN_MPTCP_PRIO_ALIGN 4 @@ -701,8 +702,9 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; if (!echo) len += MPTCPOPT_THMAC_LEN; + /* account for 2 trailing 'nop' options */ if (port) - len += TCPOLEN_MPTCP_PORT_LEN; + len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN; return len; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e1fbcab257e6..d17d39ccdf34 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -477,6 +477,11 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) goto drop; + if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { + __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); + return 0; + } + return tcp_conn_request(&mptcp_subflow_request_sock_ops, &subflow_request_sock_ipv6_ops, sk, skb); @@ -687,11 +692,6 @@ create_child: /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; - if (!mptcp_finish_join(child)) - goto dispose_child; - - SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); - tcp_rsk(req)->drop_req = true; if (subflow_use_different_sport(owner, sk)) { pr_debug("ack inet_sport=%d %d", @@ -699,10 +699,16 @@ create_child: ntohs(inet_sk((struct sock *)owner)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(owner, sk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX); - goto out; + goto dispose_child; } SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX); } + + if (!mptcp_finish_join(child)) + goto dispose_child; + + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); + tcp_rsk(req)->drop_req = true; } } @@ -1297,6 +1303,7 @@ failed_unlink: spin_lock_bh(&msk->join_list_lock); list_del(&subflow->node); spin_unlock_bh(&msk->join_list_lock); + sock_put(mptcp_subflow_tcp_sock(subflow)); failed: subflow->disposable = 1; |