summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/options.c24
-rw-r--r--net/mptcp/protocol.c265
-rw-r--r--net/mptcp/protocol.h14
-rw-r--r--net/mptcp/subflow.c19
4 files changed, 186 insertions, 136 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 444a38681e93..89a4225ed321 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -567,15 +567,15 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
}
static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
- struct in_addr *addr)
+ struct in_addr *addr, u16 port)
{
u8 hmac[SHA256_DIGEST_SIZE];
u8 msg[7];
msg[0] = addr_id;
memcpy(&msg[1], &addr->s_addr, 4);
- msg[5] = 0;
- msg[6] = 0;
+ msg[5] = port >> 8;
+ msg[6] = port & 0xFF;
mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
@@ -584,15 +584,15 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
- struct in6_addr *addr)
+ struct in6_addr *addr, u16 port)
{
u8 hmac[SHA256_DIGEST_SIZE];
u8 msg[19];
msg[0] = addr_id;
memcpy(&msg[1], &addr->s6_addr, 16);
- msg[17] = 0;
- msg[18] = 0;
+ msg[17] = port >> 8;
+ msg[18] = port & 0xFF;
mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
@@ -646,7 +646,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
opts->ahmac = add_addr_generate_hmac(msk->local_key,
msk->remote_key,
opts->addr_id,
- &opts->addr);
+ &opts->addr,
+ opts->port);
}
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -657,7 +658,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
opts->ahmac = add_addr6_generate_hmac(msk->local_key,
msk->remote_key,
opts->addr_id,
- &opts->addr6);
+ &opts->addr6,
+ opts->port);
}
}
#endif
@@ -962,12 +964,14 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
hmac = add_addr_generate_hmac(msk->remote_key,
msk->local_key,
- mp_opt->addr_id, &mp_opt->addr);
+ mp_opt->addr_id, &mp_opt->addr,
+ mp_opt->port);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
else
hmac = add_addr6_generate_hmac(msk->remote_key,
msk->local_key,
- mp_opt->addr_id, &mp_opt->addr6);
+ mp_opt->addr_id, &mp_opt->addr6,
+ mp_opt->port);
#endif
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c5d5e68940ea..4bde960e19dc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -11,7 +11,6 @@
#include <linux/netdevice.h>
#include <linux/sched/signal.h>
#include <linux/atomic.h>
-#include <linux/igmp.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -20,7 +19,6 @@
#include <net/tcp_states.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/transp_v6.h>
-#include <net/addrconf.h>
#endif
#include <net/mptcp.h>
#include <net/xfrm.h>
@@ -1061,6 +1059,12 @@ out:
}
}
+static void __mptcp_clean_una_wakeup(struct sock *sk)
+{
+ __mptcp_clean_una(sk);
+ mptcp_write_space(sk);
+}
+
static void mptcp_enter_memory_pressure(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -1189,6 +1193,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size,
*/
while (skbs->qlen > 1) {
skb = __skb_dequeue_tail(skbs);
+ *total_ts -= skb->truesize;
__kfree_skb(skb);
}
return skbs->qlen > 0;
@@ -1444,7 +1449,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
release_sock(ssk);
}
-static void mptcp_push_pending(struct sock *sk, unsigned int flags)
+static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1696,14 +1701,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
wait_for_memory:
mptcp_set_nospace(sk);
- mptcp_push_pending(sk, msg->msg_flags);
+ __mptcp_push_pending(sk, msg->msg_flags);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
}
if (copied)
- mptcp_push_pending(sk, msg->msg_flags);
+ __mptcp_push_pending(sk, msg->msg_flags);
out:
release_sock(sk);
@@ -2115,6 +2120,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
return backup;
}
+static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
+{
+ if (msk->subflow) {
+ iput(SOCK_INODE(msk->subflow));
+ msk->subflow = NULL;
+ }
+}
+
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@@ -2126,6 +2139,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
list_del(&subflow->node);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@@ -2154,6 +2169,18 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
release_sock(ssk);
sock_put(ssk);
+
+ if (ssk == msk->last_snd)
+ msk->last_snd = NULL;
+
+ if (ssk == msk->ack_hint)
+ msk->ack_hint = NULL;
+
+ if (ssk == msk->first)
+ msk->first = NULL;
+
+ if (msk->subflow && ssk == msk->subflow->sk)
+ mptcp_dispose_initial_subflow(msk);
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2238,14 +2265,58 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
mptcp_close_wake_up(sk);
}
-static void mptcp_worker(struct work_struct *work)
+static void __mptcp_retrans(struct sock *sk)
{
- struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
- struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
+ struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
size_t copied = 0;
- int state, ret;
+ struct sock *ssk;
+ int ret;
+
+ __mptcp_clean_una_wakeup(sk);
+ dfrag = mptcp_rtx_head(sk);
+ if (!dfrag)
+ return;
+
+ ssk = mptcp_subflow_get_retrans(msk);
+ if (!ssk)
+ goto reset_timer;
+
+ lock_sock(ssk);
+
+ /* limit retransmission to the bytes already sent on some subflows */
+ info.sent = 0;
+ info.limit = dfrag->already_sent;
+ while (info.sent < dfrag->already_sent) {
+ if (!mptcp_alloc_tx_skb(sk, ssk))
+ break;
+
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ if (ret <= 0)
+ break;
+
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
+ copied += ret;
+ info.sent += ret;
+ }
+ if (copied)
+ tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
+ info.size_goal);
+
+ mptcp_set_timeout(sk, ssk);
+ release_sock(ssk);
+
+reset_timer:
+ if (!mptcp_timer_pending(sk))
+ mptcp_reset_timer(sk);
+}
+
+static void mptcp_worker(struct work_struct *work)
+{
+ struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
+ struct sock *sk = &msk->sk.icsk_inet.sk;
+ int state;
lock_sock(sk);
state = sk->sk_state;
@@ -2280,45 +2351,8 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
- if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
- goto unlock;
-
- __mptcp_clean_una(sk);
- dfrag = mptcp_rtx_head(sk);
- if (!dfrag)
- goto unlock;
-
- ssk = mptcp_subflow_get_retrans(msk);
- if (!ssk)
- goto reset_unlock;
-
- lock_sock(ssk);
-
- /* limit retransmission to the bytes already sent on some subflows */
- info.sent = 0;
- info.limit = dfrag->already_sent;
- while (info.sent < dfrag->already_sent) {
- if (!mptcp_alloc_tx_skb(sk, ssk))
- break;
-
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
- if (ret <= 0)
- break;
-
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
- copied += ret;
- info.sent += ret;
- }
- if (copied)
- tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
- info.size_goal);
-
- mptcp_set_timeout(sk, ssk);
- release_sock(ssk);
-
-reset_unlock:
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
+ __mptcp_retrans(sk);
unlock:
release_sock(sk);
@@ -2523,12 +2557,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
- /* dispose the ancillatory tcp socket, if any */
- if (msk->subflow) {
- iput(SOCK_INODE(msk->subflow));
- msk->subflow = NULL;
- }
-
/* be sure to always acquire the join list lock, to sync vs
* mptcp_finish_join().
*/
@@ -2553,6 +2581,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
+ mptcp_dispose_initial_subflow(msk);
sock_put(sk);
}
@@ -2847,6 +2876,48 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
return ret;
}
+static bool mptcp_unsupported(int level, int optname)
+{
+ if (level == SOL_IP) {
+ switch (optname) {
+ case IP_ADD_MEMBERSHIP:
+ case IP_ADD_SOURCE_MEMBERSHIP:
+ case IP_DROP_MEMBERSHIP:
+ case IP_DROP_SOURCE_MEMBERSHIP:
+ case IP_BLOCK_SOURCE:
+ case IP_UNBLOCK_SOURCE:
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ case MCAST_MSFILTER:
+ return true;
+ }
+ return false;
+ }
+ if (level == SOL_IPV6) {
+ switch (optname) {
+ case IPV6_ADDRFORM:
+ case IPV6_ADD_MEMBERSHIP:
+ case IPV6_DROP_MEMBERSHIP:
+ case IPV6_JOIN_ANYCAST:
+ case IPV6_LEAVE_ANYCAST:
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ case MCAST_MSFILTER:
+ return true;
+ }
+ return false;
+ }
+ return false;
+}
+
static int mptcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -2855,6 +2926,9 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
pr_debug("msk=%p", msk);
+ if (mptcp_unsupported(level, optname))
+ return -ENOPROTOOPT;
+
if (level == SOL_SOCKET)
return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
@@ -2934,13 +3008,14 @@ static void mptcp_release_cb(struct sock *sk)
{
unsigned long flags, nflags;
- /* push_pending may touch wmem_reserved, do it before the later
- * cleanup
- */
- if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
- __mptcp_clean_una(sk);
- if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) {
- /* mptcp_push_pending() acquires the subflow socket lock
+ for (;;) {
+ flags = 0;
+ if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
+ flags |= BIT(MPTCP_PUSH_PENDING);
+ if (!flags)
+ break;
+
+ /* the following actions acquire the subflow socket lock
*
* 1) can't be invoked in atomic scope
* 2) must avoid ABBA deadlock with msk socket spinlock: the RX
@@ -2949,13 +3024,21 @@ static void mptcp_release_cb(struct sock *sk)
*/
spin_unlock_bh(&sk->sk_lock.slock);
- mptcp_push_pending(sk, 0);
+ if (flags & BIT(MPTCP_PUSH_PENDING))
+ __mptcp_push_pending(sk, 0);
+
+ cond_resched();
spin_lock_bh(&sk->sk_lock.slock);
}
+
+ if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
+ __mptcp_clean_una_wakeup(sk);
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
__mptcp_error_report(sk);
- /* clear any wmem reservation and errors */
+ /* push_pending may touch wmem_reserved, ensure we do the cleanup
+ * later
+ */
__mptcp_update_wmem(sk);
__mptcp_update_rmem(sk);
@@ -3285,6 +3368,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* PM/worker can now acquire the first subflow socket
* lock without racing with listener queue cleanup,
* we can notify it, if needed.
+ *
+ * Even if remote has reset the initial subflow by now
+ * the refcnt is still at least one.
*/
subflow = mptcp_subflow_ctx(msk->first);
list_add(&subflow->node, &msk->conn_list);
@@ -3376,34 +3462,10 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
return mask;
}
-static int mptcp_release(struct socket *sock)
-{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = sock->sk;
- struct mptcp_sock *msk;
-
- if (!sk)
- return 0;
-
- lock_sock(sk);
-
- msk = mptcp_sk(sk);
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- ip_mc_drop_socket(ssk);
- }
-
- release_sock(sk);
-
- return inet_release(sock);
-}
-
static const struct proto_ops mptcp_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
- .release = mptcp_release,
+ .release = inet_release,
.bind = mptcp_bind,
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
@@ -3495,35 +3557,10 @@ void __init mptcp_proto_init(void)
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-static int mptcp6_release(struct socket *sock)
-{
- struct mptcp_subflow_context *subflow;
- struct mptcp_sock *msk;
- struct sock *sk = sock->sk;
-
- if (!sk)
- return 0;
-
- lock_sock(sk);
-
- msk = mptcp_sk(sk);
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- ip_mc_drop_socket(ssk);
- ipv6_sock_mc_close(ssk);
- ipv6_sock_ac_close(ssk);
- }
-
- release_sock(sk);
- return inet6_release(sock);
-}
-
static const struct proto_ops mptcp_v6_stream_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
- .release = mptcp6_release,
+ .release = inet6_release,
.bind = mptcp_bind,
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 91827d949766..e21a5bc36cf0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -52,14 +52,15 @@
#define TCPOLEN_MPTCP_DSS_MAP64 14
#define TCPOLEN_MPTCP_DSS_CHECKSUM 2
#define TCPOLEN_MPTCP_ADD_ADDR 16
-#define TCPOLEN_MPTCP_ADD_ADDR_PORT 20
+#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18
#define TCPOLEN_MPTCP_ADD_ADDR_BASE 8
-#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 12
+#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10
#define TCPOLEN_MPTCP_ADD_ADDR6 28
-#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 32
+#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20
-#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24
-#define TCPOLEN_MPTCP_PORT_LEN 4
+#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22
+#define TCPOLEN_MPTCP_PORT_LEN 2
+#define TCPOLEN_MPTCP_PORT_ALIGN 2
#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
#define TCPOLEN_MPTCP_PRIO 3
#define TCPOLEN_MPTCP_PRIO_ALIGN 4
@@ -701,8 +702,9 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
if (!echo)
len += MPTCPOPT_THMAC_LEN;
+ /* account for 2 trailing 'nop' options */
if (port)
- len += TCPOLEN_MPTCP_PORT_LEN;
+ len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN;
return len;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e1fbcab257e6..d17d39ccdf34 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -477,6 +477,11 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
+ if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+ __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+ return 0;
+ }
+
return tcp_conn_request(&mptcp_subflow_request_sock_ops,
&subflow_request_sock_ipv6_ops, sk, skb);
@@ -687,11 +692,6 @@ create_child:
/* move the msk reference ownership to the subflow */
subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
- if (!mptcp_finish_join(child))
- goto dispose_child;
-
- SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
- tcp_rsk(req)->drop_req = true;
if (subflow_use_different_sport(owner, sk)) {
pr_debug("ack inet_sport=%d %d",
@@ -699,10 +699,16 @@ create_child:
ntohs(inet_sk((struct sock *)owner)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
- goto out;
+ goto dispose_child;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
}
+
+ if (!mptcp_finish_join(child))
+ goto dispose_child;
+
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
+ tcp_rsk(req)->drop_req = true;
}
}
@@ -1297,6 +1303,7 @@ failed_unlink:
spin_lock_bh(&msk->join_list_lock);
list_del(&subflow->node);
spin_unlock_bh(&msk->join_list_lock);
+ sock_put(mptcp_subflow_tcp_sock(subflow));
failed:
subflow->disposable = 1;