summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-10-28 15:55:48 -0700
committerJakub Kicinski <kuba@kernel.org>2024-10-28 15:55:48 -0700
commit825199bf2017e4549586f038ede9acec68de883d (patch)
tree797f92f5cfd258a8955d207c9c8e9a18e2840ef0
parentfbb26ecc550200afcc15b121218fe7ec29bae219 (diff)
parent46a3282b87b1f9a88534eba59ecf852b2a21289c (diff)
downloadlwn-825199bf2017e4549586f038ede9acec68de883d.tar.gz
lwn-825199bf2017e4549586f038ede9acec68de883d.zip
Merge branch 'mptcp-various-small-improvements'
Matthieu Baerts says: ==================== mptcp: various small improvements The following patches are not related to each other. - Patch 1: Avoid sending advertisements on stale subflows, reducing risks on loosing them. - Patch 2: Annotate data-races around subflow->fully_established, using READ/WRITE_ONCE(). - Patch 3: A small clean-up on the PM side, avoiding a bit of duplicated code. - Patch 4: Use "Middlebox interference" MP_TCPRST code in reaction to a packet received without MPTCP options in the middle of a connection. ==================== Link: https://patch.msgid.link/20241021-net-next-mptcp-misc-6-13-v1-0-1ef02746504a@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/mptcp/diag.c2
-rw-r--r--net/mptcp/options.c4
-rw-r--r--net/mptcp/pm.c3
-rw-r--r--net/mptcp/pm_netlink.c14
-rw-r--r--net/mptcp/protocol.c8
-rw-r--r--net/mptcp/protocol.h6
-rw-r--r--net/mptcp/subflow.c16
7 files changed, 33 insertions, 20 deletions
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index 2d3efb405437..02205f7994d7 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -47,7 +47,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM;
if (sf->request_bkup)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC;
- if (sf->fully_established)
+ if (READ_ONCE(sf->fully_established))
flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED;
if (sf->conn_finished)
flags |= MPTCP_SUBFLOW_FLAG_CONNECTED;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 370c3836b771..1603b3702e22 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -461,7 +461,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
return false;
/* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
- if (subflow->fully_established || snd_data_fin_enable ||
+ if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable ||
subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
sk->sk_state != TCP_ESTABLISHED)
return false;
@@ -930,7 +930,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* here we can process OoO, in-window pkts, only in-sequence 4th ack
* will make the subflow fully established
*/
- if (likely(subflow->fully_established)) {
+ if (likely(READ_ONCE(subflow->fully_established))) {
/* on passive sockets, check for 3rd ack retransmission
* note that msk is always set by subflow_syn_recv_sock()
* for mp_join subflows
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 620264c75dc2..16c336c51940 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -154,6 +154,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
{
pr_debug("msk=%p\n", msk);
+
+ if (msk->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
}
void mptcp_pm_subflow_established(struct mptcp_sock *msk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index db586a5b3866..618289aac0ab 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -781,7 +781,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk,
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
{
- struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow, *alt = NULL;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
@@ -792,10 +792,18 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
if (__mptcp_subflow_active(subflow)) {
- mptcp_pm_send_ack(msk, subflow, false, false);
- break;
+ if (!subflow->stale) {
+ mptcp_pm_send_ack(msk, subflow, false, false);
+ return;
+ }
+
+ if (!alt)
+ alt = subflow;
}
}
+
+ if (alt)
+ mptcp_pm_send_ack(msk, alt, false, false);
}
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1f5c63eb21f0..e978e05ec8d1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3139,8 +3139,7 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d\n", sk, sk->sk_state);
- if (msk->token)
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
+ mptcp_pm_connection_closed(msk);
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
@@ -3206,8 +3205,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_stop_rtx_timer(sk);
mptcp_stop_tout_timer(sk);
- if (msk->token)
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
+ mptcp_pm_connection_closed(msk);
/* msk->subflow is still intact, the following will not free the first
* subflow
@@ -3511,7 +3509,7 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
struct tcp_sock *tp = tcp_sk(ssk);
unsigned long timeout;
- if (mptcp_subflow_ctx(ssk)->fully_established)
+ if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established))
return;
/* reschedule with a timeout above RTT, as we must look only for drop */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 568a72702b08..a93e661ef5c4 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -513,7 +513,6 @@ struct mptcp_subflow_context {
request_bkup : 1,
mp_capable : 1, /* remote is MPTCP capable */
mp_join : 1, /* remote is JOINing */
- fully_established : 1, /* path validated */
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
@@ -532,10 +531,11 @@ struct mptcp_subflow_context {
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
mpc_drop : 1, /* the MPC option has been dropped in a rtx */
- __unused : 8;
+ __unused : 9;
bool data_avail;
bool scheduled;
bool pm_listener; /* a listener managed by the kernel PM? */
+ bool fully_established; /* path validated */
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -780,7 +780,7 @@ static inline bool __tcp_can_send(const struct sock *ssk)
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
- if (subflow->request_join && !subflow->fully_established)
+ if (subflow->request_join && !READ_ONCE(subflow->fully_established))
return false;
return __tcp_can_send(mptcp_subflow_tcp_sock(subflow));
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6170f2fff71e..07352b15f145 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
const struct mptcp_options_received *mp_opt)
{
subflow_set_remote_key(msk, subflow, mp_opt);
- subflow->fully_established = 1;
+ WRITE_ONCE(subflow->fully_established, true);
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
@@ -971,7 +971,8 @@ enum mapping_status {
MAPPING_EMPTY,
MAPPING_DATA_FIN,
MAPPING_DUMMY,
- MAPPING_BAD_CSUM
+ MAPPING_BAD_CSUM,
+ MAPPING_NODSS
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
@@ -1128,8 +1129,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
return MAPPING_EMPTY;
}
+ /* If the required DSS has likely been dropped by a middlebox */
if (!subflow->map_valid)
- return MAPPING_INVALID;
+ return MAPPING_NODSS;
goto validate_seq;
}
@@ -1343,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
- status == MAPPING_BAD_CSUM))
+ status == MAPPING_BAD_CSUM || status == MAPPING_NODSS))
goto fallback;
if (status != MAPPING_OK)
@@ -1396,7 +1398,9 @@ fallback:
* subflow_error_report() will introduce the appropriate barriers
*/
subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
+ subflow->reset_reason = status == MAPPING_NODSS ?
+ MPTCP_RST_EMIDDLEBOX :
+ MPTCP_RST_EMPTCP;
reset:
WRITE_ONCE(ssk->sk_err, EBADMSG);
@@ -2062,7 +2066,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
- new_ctx->fully_established = 1;
+ WRITE_ONCE(new_ctx->fully_established, true);
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->request_bkup = subflow_req->request_bkup;