summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcelo Ricardo Leitner <marcelo.leitner@gmail.com>2016-03-19 12:17:20 -0300
committerDavid S. Miller <davem@davemloft.net>2016-03-20 16:31:12 -0400
commit3822a5ff4bc32043fa9c7b6d6f125bcdca6da39c (patch)
tree681827ec62422bf2f212913f9a696db89eaa72a6
parent31b055ef0c6116a27e9a787304ecf87a77d34764 (diff)
downloadlwn-3822a5ff4bc32043fa9c7b6d6f125bcdca6da39c.tar.gz
lwn-3822a5ff4bc32043fa9c7b6d6f125bcdca6da39c.zip
sctp: align MTU to a word
SCTP is a protocol that is aligned to a word (4 bytes). Thus using bare MTU can sometimes return values that are not aligned, like for loopback, which is 65536 but ipv4_mtu() limits that to 65535. This mis-alignment will cause the last non-aligned bytes to never be used and can cause issues with congestion control. So it's better to just consider a lower MTU and keep congestion control calcs saner as they are based on PMTU. Same applies to icmp frag needed messages, which is also fixed by this patch. One other effect of this is the inability to send MTU-sized packet without queueing or fragmentation and without hitting Nagle. As the check performed at sctp_packet_can_append_data(): if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; with the above example of MTU, if there are no other messages queued, one cannot send a packet that just fits one packet (65532 bytes) and without causing DATA chunk fragmentation or a delay. v2: - Added WORD_TRUNC macro Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sctp/sctp.h8
-rw-r--r--net/sctp/associola.c3
-rw-r--r--net/sctp/input.c3
-rw-r--r--net/sctp/transport.c4
4 files changed, 11 insertions, 7 deletions
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 835aa2ed9870..ad2136caa7d6 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -82,6 +82,11 @@
#define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT
#endif
+/* Round an int up to the next multiple of 4. */
+#define WORD_ROUND(s) (((s)+3)&~3)
+/* Truncate to the previous multiple of 4. */
+#define WORD_TRUNC(s) ((s)&~3)
+
/*
* Function declarations.
*/
@@ -475,9 +480,6 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\
(void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\
pos++)
-/* Round an int up to the next multiple of 4. */
-#define WORD_ROUND(s) (((s)+3)&~3)
-
/* External references. */
extern struct proto sctp_prot;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a19b3e607703..e1849f3714ad 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1406,7 +1406,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
list_for_each_entry(t, &asoc->peer.transport_addr_list,
transports) {
if (t->pmtu_pending && t->dst) {
- sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst));
+ sctp_transport_update_pmtu(sk, t,
+ WORD_TRUNC(dst_mtu(t->dst)));
t->pmtu_pending = 0;
}
if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index db76f1ab4ac2..00b8445364e3 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -606,7 +606,8 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
/* PMTU discovery (RFC1191) */
if (ICMP_FRAG_NEEDED == code) {
- sctp_icmp_frag_needed(sk, asoc, transport, info);
+ sctp_icmp_frag_needed(sk, asoc, transport,
+ WORD_TRUNC(info));
goto out_unlock;
} else {
if (ICMP_PROT_UNREACH == code) {
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d517153891a6..9b6b48c7524e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -226,7 +226,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
}
if (transport->dst) {
- transport->pathmtu = dst_mtu(transport->dst);
+ transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
} else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
@@ -280,7 +280,7 @@ void sctp_transport_route(struct sctp_transport *transport,
return;
}
if (transport->dst) {
- transport->pathmtu = dst_mtu(transport->dst);
+ transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
/* Initialize sk->sk_rcv_saddr, if the transport is the
* association's active path for getsockname().