From 3acb50c18d8d6650f10919464ade4dcdaf41d62f Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 2 Jun 2016 15:05:42 -0300 Subject: sctp: delay as much as possible skb_linearize This patch is a preparation for the GSO one. In order to successfully handle GSO packets on rx path we must not call skb_linearize, otherwise it defeats any gain GSO may have had. This patch thus delays as much as possible the call to skb_linearize, leaving it to sctp_inq_pop() moment. For that the sanity checks performed now know how to deal with fragments. One positive side-effect of this is that if the socket is backlogged it will have the chance of doing it on backlog processing instead of during softirq. With this move, it's evident that a check for non-linearity in sctp_inq_pop was ineffective and is now removed. Note that a similar check is performed a bit below this one. Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/input.c | 45 +++++++++++++++++++++++++-------------------- net/sctp/inqueue.c | 29 ++++++++++++++++++----------- 2 files changed, 43 insertions(+), 31 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/input.c b/net/sctp/input.c index a701527a9480..5cff2546c3dd 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -112,7 +112,6 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_ep_common *rcvr; struct sctp_transport *transport = NULL; struct sctp_chunk *chunk; - struct sctphdr *sh; union sctp_addr src; union sctp_addr dest; int family; @@ -124,15 +123,18 @@ int sctp_rcv(struct sk_buff *skb) __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS); - if (skb_linearize(skb)) + /* If packet is too small to contain a single chunk, let's not + * waste time on it anymore. + */ + if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + + skb_transport_offset(skb)) goto discard_it; - sh = sctp_hdr(skb); + if (!pskb_may_pull(skb, sizeof(struct sctphdr))) + goto discard_it; - /* Pull up the IP and SCTP headers. */ + /* Pull up the IP header. */ __skb_pull(skb, skb_transport_offset(skb)); - if (skb->len < sizeof(struct sctphdr)) - goto discard_it; skb->csum_valid = 0; /* Previous value not applicable */ if (skb_csum_unnecessary(skb)) @@ -141,11 +143,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_it; skb->csum_valid = 1; - skb_pull(skb, sizeof(struct sctphdr)); - - /* Make sure we at least have chunk headers worth of data left. */ - if (skb->len < sizeof(struct sctp_chunkhdr)) - goto discard_it; + __skb_pull(skb, sizeof(struct sctphdr)); family = ipver2af(ip_hdr(skb)->version); af = sctp_get_af_specific(family); @@ -230,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb) chunk->rcvr = rcvr; /* Remember the SCTP header. */ - chunk->sctp_hdr = sh; + chunk->sctp_hdr = sctp_hdr(skb); /* Set the source and destination addresses of the incoming chunk. */ sctp_init_addrs(chunk, &src, &dest); @@ -660,19 +658,23 @@ out_unlock: */ static int sctp_rcv_ootb(struct sk_buff *skb) { - sctp_chunkhdr_t *ch; - __u8 *ch_end; - - ch = (sctp_chunkhdr_t *) skb->data; + sctp_chunkhdr_t *ch, _ch; + int ch_end, offset = 0; /* Scan through all the chunks in the packet. */ do { + /* Make sure we have at least the header there */ + if (offset + sizeof(sctp_chunkhdr_t) > skb->len) + break; + + ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch); + /* Break out if chunk length is less then minimal. */ if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); - if (ch_end > skb_tail_pointer(skb)) + ch_end = offset + WORD_ROUND(ntohs(ch->length)); + if (ch_end > skb->len) break; /* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the @@ -697,8 +699,8 @@ static int sctp_rcv_ootb(struct sk_buff *skb) if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data) goto discard; - ch = (sctp_chunkhdr_t *) ch_end; - } while (ch_end < skb_tail_pointer(skb)); + offset = ch_end; + } while (ch_end < skb->len); return 0; @@ -1173,6 +1175,9 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, { sctp_chunkhdr_t *ch; + if (skb_linearize(skb)) + return NULL; + ch = (sctp_chunkhdr_t *) skb->data; /* The code below will attempt to walk the chunk and extract diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 9d87bba0ff1d..5ba08ceda3ab 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -130,7 +130,8 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) * at this time. */ - if ((chunk = queue->in_progress)) { + chunk = queue->in_progress; + if (chunk) { /* There is a packet that we have been working on. * Any post processing work to do before we move on? */ @@ -152,15 +153,29 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) if (!chunk) { struct list_head *entry; +next_chunk: /* Is the queue empty? */ if (list_empty(&queue->in_chunk_list)) return NULL; entry = queue->in_chunk_list.next; - chunk = queue->in_progress = - list_entry(entry, struct sctp_chunk, list); + chunk = list_entry(entry, struct sctp_chunk, list); list_del_init(entry); + /* Linearize if it's not GSO */ + if (skb_is_nonlinear(chunk->skb)) { + if (skb_linearize(chunk->skb)) { + __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); + sctp_chunk_free(chunk); + goto next_chunk; + } + + /* Update sctp_hdr as it probably changed */ + chunk->sctp_hdr = sctp_hdr(chunk->skb); + } + + queue->in_progress = chunk; + /* This is the first chunk in the packet. */ chunk->singleton = 1; ch = (sctp_chunkhdr_t *) chunk->skb->data; @@ -172,14 +187,6 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk->chunk_hdr = ch; chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); - /* In the unlikely case of an IP reassembly, the skb could be - * non-linear. If so, update chunk_end so that it doesn't go past - * the skb->tail. - */ - if (unlikely(skb_is_nonlinear(chunk->skb))) { - if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) - chunk->chunk_end = skb_tail_pointer(chunk->skb); - } skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ -- cgit v1.2.3 From 90017accff61ae89283ad9a51f9ac46ca01633fb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 2 Jun 2016 15:05:43 -0300 Subject: sctp: Add GSO support SCTP has this pecualiarity that its packets cannot be just segmented to (P)MTU. Its chunks must be contained in IP segments, padding respected. So we can't just generate a big skb, set gso_size to the fragmentation point and deliver it to IP layer. This patch takes a different approach. SCTP will now build a skb as it would be if it was received using GRO. That is, there will be a cover skb with protocol headers and children ones containing the actual segments, already segmented to a way that respects SCTP RFCs. With that, we can tell skb_segment() to just split based on frag_list, trusting its sizes are already in accordance. This way SCTP can benefit from GSO and instead of passing several packets through the stack, it can pass a single large packet. v2: - Added support for receiving GSO frames, as requested by Dave Miller. - Clear skb->cb if packet is GSO (otherwise it's not used by SCTP) - Added heuristics similar to what we have in TCP for not generating single GSO packets that fills cwnd. v3: - consider sctphdr size in skb_gso_transport_seglen() - rebased due to 5c7cdf339af5 ("gso: Remove arbitrary checks for unsupported GSO") Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 7 +- include/linux/netdevice.h | 1 + include/linux/skbuff.h | 2 + include/net/sctp/sctp.h | 4 + include/net/sctp/structs.h | 5 + net/core/ethtool.c | 1 + net/core/skbuff.c | 3 + net/sctp/Makefile | 3 +- net/sctp/input.c | 12 +- net/sctp/inqueue.c | 51 +++++- net/sctp/offload.c | 98 +++++++++++ net/sctp/output.c | 363 +++++++++++++++++++++++++++------------- net/sctp/protocol.c | 3 + net/sctp/socket.c | 2 + 14 files changed, 429 insertions(+), 126 deletions(-) create mode 100644 net/sctp/offload.c (limited to 'net/sctp') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index aa7b2400f98c..9c6c8ef2e9e7 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -53,8 +53,9 @@ enum { * headers in software. */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ + NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_TUNNEL_REMCSUM_BIT, + NETIF_F_GSO_SCTP_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ @@ -128,6 +129,7 @@ enum { #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) #define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) +#define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) @@ -166,7 +168,8 @@ enum { NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO) +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \ + NETIF_F_GSO_SCTP) /* * If one device supports one of these features, then enable them diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f45929ce8157..fa6df2699532 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aa3f9d7e8d5c..dc0fca747c5e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -487,6 +487,8 @@ enum { SKB_GSO_PARTIAL = 1 << 13, SKB_GSO_TUNNEL_REMCSUM = 1 << 14, + + SKB_GSO_SCTP = 1 << 15, }; #if BITS_PER_LONG > 32 diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index b392ac8382f2..632e205ca54b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net); int sctp_remaddr_proc_init(struct net *net); void sctp_remaddr_proc_exit(struct net *net); +/* + * sctp/offload.c + */ +int sctp_offload_init(void); /* * Module global variables diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 16b013a6191c..83c5ec58b93a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -566,6 +566,9 @@ struct sctp_chunk { /* This points to the sk_buff containing the actual data. */ struct sk_buff *skb; + /* In case of GSO packets, this will store the head one */ + struct sk_buff *head_skb; + /* These are the SCTP headers by reverse order in a packet. * Note that some of these may happen more than once. In that * case, we point at the "current" one, whatever that means @@ -696,6 +699,8 @@ struct sctp_packet { size_t overhead; /* This is the total size of all chunks INCLUDING padding. */ size_t size; + /* This is the maximum size this packet may have */ + size_t max_size; /* The packet is destined for this transport address. * The function we finally use to pass down to the next lower diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f4034817d255..977489820eb9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", + [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5ca562b56ec3..b6e0f95bef36 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #ifdef CONFIG_NET_CLS_ACT #include @@ -4383,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) thlen += inner_tcp_hdrlen(skb); } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { thlen = tcp_hdrlen(skb); + } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) { + thlen = sizeof(struct sctphdr); } /* UFO sets gso_size to the size of the fragmentation * payload, i.e. the size of the L4 (UDP) header is already diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 0fca5824ad0e..6c4f7496cec6 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ - output.o input.o debug.o ssnmap.o auth.o + output.o input.o debug.o ssnmap.o auth.o \ + offload.o sctp_probe-y := probe.o diff --git a/net/sctp/input.c b/net/sctp/input.c index 5cff2546c3dd..6f8e676d285e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -139,7 +139,9 @@ int sctp_rcv(struct sk_buff *skb) skb->csum_valid = 0; /* Previous value not applicable */ if (skb_csum_unnecessary(skb)) __skb_decr_checksum_unnecessary(skb); - else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0) + else if (!sctp_checksum_disable && + !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && + sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb->csum_valid = 1; @@ -1175,6 +1177,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, { sctp_chunkhdr_t *ch; + /* We do not allow GSO frames here as we need to linearize and + * then cannot guarantee frame boundaries. This shouldn't be an + * issue as packets hitting this are mostly INIT or INIT-ACK and + * those cannot be on GSO-style anyway. + */ + if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) + return NULL; + if (skb_linearize(skb)) return NULL; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 5ba08ceda3ab..edabbbdfca54 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -138,6 +138,17 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) if (chunk->singleton || chunk->end_of_packet || chunk->pdiscard) { + if (chunk->head_skb == chunk->skb) { + chunk->skb = skb_shinfo(chunk->skb)->frag_list; + goto new_skb; + } + if (chunk->skb->next) { + chunk->skb = chunk->skb->next; + goto new_skb; + } + + if (chunk->head_skb) + chunk->skb = chunk->head_skb; sctp_chunk_free(chunk); chunk = queue->in_progress = NULL; } else { @@ -155,15 +166,15 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) next_chunk: /* Is the queue empty? */ - if (list_empty(&queue->in_chunk_list)) + entry = sctp_list_dequeue(&queue->in_chunk_list); + if (!entry) return NULL; - entry = queue->in_chunk_list.next; chunk = list_entry(entry, struct sctp_chunk, list); - list_del_init(entry); /* Linearize if it's not GSO */ - if (skb_is_nonlinear(chunk->skb)) { + if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP && + skb_is_nonlinear(chunk->skb)) { if (skb_linearize(chunk->skb)) { __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); sctp_chunk_free(chunk); @@ -174,15 +185,39 @@ next_chunk: chunk->sctp_hdr = sctp_hdr(chunk->skb); } + if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) { + /* GSO-marked skbs but without frags, handle + * them normally + */ + if (skb_shinfo(chunk->skb)->frag_list) + chunk->head_skb = chunk->skb; + + /* skbs with "cover letter" */ + if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) + chunk->skb = skb_shinfo(chunk->skb)->frag_list; + + if (WARN_ON(!chunk->skb)) { + __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); + sctp_chunk_free(chunk); + goto next_chunk; + } + } + + if (chunk->asoc) + sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb); + queue->in_progress = chunk; +new_skb: /* This is the first chunk in the packet. */ - chunk->singleton = 1; ch = (sctp_chunkhdr_t *) chunk->skb->data; + chunk->singleton = 1; chunk->data_accepted = 0; - - if (chunk->asoc) - sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb); + chunk->pdiscard = 0; + chunk->auth = 0; + chunk->has_asconf = 0; + chunk->end_of_packet = 0; + chunk->ecn_ce_done = 0; } chunk->chunk_hdr = ch; diff --git a/net/sctp/offload.c b/net/sctp/offload.c new file mode 100644 index 000000000000..a37887b373a7 --- /dev/null +++ b/net/sctp/offload.c @@ -0,0 +1,98 @@ +/* + * sctp_offload - GRO/GSO Offloading for SCTP + * + * Copyright (C) 2015, Marcelo Ricardo Leitner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static __le32 sctp_gso_make_checksum(struct sk_buff *skb) +{ + skb->ip_summed = CHECKSUM_NONE; + return sctp_compute_cksum(skb, skb_transport_offset(skb)); +} + +static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct sctphdr *sh; + + sh = sctp_hdr(skb); + if (!pskb_may_pull(skb, sizeof(*sh))) + goto out; + + __skb_pull(skb, sizeof(*sh)); + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + struct skb_shared_info *pinfo = skb_shinfo(skb); + struct sk_buff *frag_iter; + + pinfo->gso_segs = 0; + if (skb->len != skb->data_len) { + /* Means we have chunks in here too */ + pinfo->gso_segs++; + } + + skb_walk_frags(skb, frag_iter) + pinfo->gso_segs++; + + segs = NULL; + goto out; + } + + segs = skb_segment(skb, features | NETIF_F_HW_CSUM); + if (IS_ERR(segs)) + goto out; + + /* All that is left is update SCTP CRC if necessary */ + if (!(features & NETIF_F_SCTP_CRC)) { + for (skb = segs; skb; skb = skb->next) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + sh = sctp_hdr(skb); + sh->checksum = sctp_gso_make_checksum(skb); + } + } + } + +out: + return segs; +} + +static const struct net_offload sctp_offload = { + .callbacks = { + .gso_segment = sctp_gso_segment, + }, +}; + +int __init sctp_offload_init(void) +{ + return inet_add_offload(&sctp_offload, IPPROTO_SCTP); +} diff --git a/net/sctp/output.c b/net/sctp/output.c index 9844fe573029..60499a69179d 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet) struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, __u32 vtag, int ecn_capable) { - struct sctp_chunk *chunk = NULL; + struct sctp_transport *tp = packet->transport; + struct sctp_association *asoc = tp->asoc; pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); packet->vtag = vtag; + if (asoc && tp->dst) { + struct sock *sk = asoc->base.sk; + + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + + if (sk_can_gso(sk)) { + struct net_device *dev = tp->dst->dev; + + packet->max_size = dev->gso_max_size; + } else { + packet->max_size = asoc->pathmtu; + } + rcu_read_unlock(); + + } else { + packet->max_size = tp->pathmtu; + } + if (ecn_capable && sctp_packet_empty(packet)) { - chunk = sctp_get_ecne_prepend(packet->transport->asoc); + struct sctp_chunk *chunk; /* If there a is a prepend chunk stick it on the list before * any other chunks get appended. */ + chunk = sctp_get_ecne_prepend(asoc); if (chunk) sctp_packet_append_chunk(packet, chunk); } @@ -381,12 +405,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctphdr *sh; - struct sk_buff *nskb; + struct sk_buff *nskb = NULL, *head = NULL; struct sctp_chunk *chunk, *tmp; struct sock *sk; int err = 0; int padding; /* How much padding do we need? */ + int pkt_size; __u8 has_data = 0; + int gso = 0; + int pktcount = 0; struct dst_entry *dst; unsigned char *auth = NULL; /* pointer to auth in skb data */ @@ -400,18 +427,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); sk = chunk->skb->sk; - /* Allocate the new skb. */ - nskb = alloc_skb(packet->size + MAX_HEADER, gfp); - if (!nskb) + /* Allocate the head skb, or main one if not in GSO */ + if (packet->size > tp->pathmtu && !packet->ipfragok) { + if (sk_can_gso(sk)) { + gso = 1; + pkt_size = packet->overhead; + } else { + /* If this happens, we trash this packet and try + * to build a new one, hopefully correct this + * time. Application may notice this error. + */ + pr_err_once("Trying to GSO but underlying device doesn't support it."); + goto nomem; + } + } else { + pkt_size = packet->size; + } + head = alloc_skb(pkt_size + MAX_HEADER, gfp); + if (!head) goto nomem; + if (gso) { + NAPI_GRO_CB(head)->last = head; + skb_shinfo(head)->gso_type = sk->sk_gso_type; + } /* Make sure the outbound skb has enough header room reserved. */ - skb_reserve(nskb, packet->overhead + MAX_HEADER); + skb_reserve(head, packet->overhead + MAX_HEADER); /* Set the owning socket so that we know where to get the * destination IP address. */ - sctp_packet_set_owner_w(nskb, sk); + sctp_packet_set_owner_w(head, sk); if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); @@ -422,11 +468,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) dst = dst_clone(tp->dst); if (!dst) goto no_route; - skb_dst_set(nskb, dst); + skb_dst_set(head, dst); /* Build the SCTP header. */ - sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); - skb_reset_transport_header(nskb); + sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr)); + skb_reset_transport_header(head); sh->source = htons(packet->source_port); sh->dest = htons(packet->destination_port); @@ -441,90 +487,133 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sh->vtag = htonl(packet->vtag); sh->checksum = 0; - /** - * 6.10 Bundling - * - * An endpoint bundles chunks by simply including multiple - * chunks in one outbound SCTP packet. ... - */ - - /** - * 3.2 Chunk Field Descriptions - * - * The total length of a chunk (including Type, Length and - * Value fields) MUST be a multiple of 4 bytes. If the length - * of the chunk is not a multiple of 4 bytes, the sender MUST - * pad the chunk with all zero bytes and this padding is not - * included in the chunk length field. The sender should - * never pad with more than 3 bytes. - * - * [This whole comment explains WORD_ROUND() below.] - */ - pr_debug("***sctp_transmit_packet***\n"); - list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { - list_del_init(&chunk->list); - if (sctp_chunk_is_data(chunk)) { - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ + do { + /* Set up convenience variables... */ + chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); + pktcount++; - if (!chunk->resent && !tp->rto_pending) { - chunk->rtt_in_progress = 1; - tp->rto_pending = 1; + /* Calculate packet size, so it fits in PMTU. Leave + * other chunks for the next packets. + */ + if (gso) { + pkt_size = packet->overhead; + list_for_each_entry(chunk, &packet->chunk_list, list) { + int padded = WORD_ROUND(chunk->skb->len); + + if (pkt_size + padded > tp->pathmtu) + break; + pkt_size += padded; } - has_data = 1; - } + /* Allocate a new skb. */ + nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); + if (!nskb) + goto nomem; - padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; - if (padding) - memset(skb_put(chunk->skb, padding), 0, padding); + /* Make sure the outbound skb has enough header + * room reserved. + */ + skb_reserve(nskb, packet->overhead + MAX_HEADER); + } else { + nskb = head; + } - /* if this is the auth chunk that we are adding, - * store pointer where it will be added and put - * the auth into the packet. + /** + * 3.2 Chunk Field Descriptions + * + * The total length of a chunk (including Type, Length and + * Value fields) MUST be a multiple of 4 bytes. If the length + * of the chunk is not a multiple of 4 bytes, the sender MUST + * pad the chunk with all zero bytes and this padding is not + * included in the chunk length field. The sender should + * never pad with more than 3 bytes. + * + * [This whole comment explains WORD_ROUND() below.] */ - if (chunk == packet->auth) - auth = skb_tail_pointer(nskb); - memcpy(skb_put(nskb, chunk->skb->len), + pkt_size -= packet->overhead; + list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { + list_del_init(&chunk->list); + if (sctp_chunk_is_data(chunk)) { + /* 6.3.1 C4) When data is in flight and when allowed + * by rule C5, a new RTT measurement MUST be made each + * round trip. Furthermore, new RTT measurements + * SHOULD be made no more than once per round-trip + * for a given destination transport address. + */ + + if (!chunk->resent && !tp->rto_pending) { + chunk->rtt_in_progress = 1; + tp->rto_pending = 1; + } + + has_data = 1; + } + + padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; + if (padding) + memset(skb_put(chunk->skb, padding), 0, padding); + + /* if this is the auth chunk that we are adding, + * store pointer where it will be added and put + * the auth into the packet. + */ + if (chunk == packet->auth) + auth = skb_tail_pointer(nskb); + + memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); - pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, " - "rtt_in_progress:%d\n", chunk, - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), - chunk->has_tsn ? "TSN" : "No TSN", - chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, - ntohs(chunk->chunk_hdr->length), chunk->skb->len, - chunk->rtt_in_progress); - - /* - * If this is a control chunk, this is our last - * reference. Free data chunks after they've been - * acknowledged or have failed. - */ - if (!sctp_chunk_is_data(chunk)) - sctp_chunk_free(chunk); - } + pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n", + chunk, + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), + chunk->has_tsn ? "TSN" : "No TSN", + chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, + ntohs(chunk->chunk_hdr->length), chunk->skb->len, + chunk->rtt_in_progress); + + /* If this is a control chunk, this is our last + * reference. Free data chunks after they've been + * acknowledged or have failed. + * Re-queue auth chunks if needed. + */ + pkt_size -= WORD_ROUND(chunk->skb->len); - /* SCTP-AUTH, Section 6.2 - * The sender MUST calculate the MAC as described in RFC2104 [2] - * using the hash function H as described by the MAC Identifier and - * the shared association key K based on the endpoint pair shared key - * described by the shared key identifier. The 'data' used for the - * computation of the AUTH-chunk is given by the AUTH chunk with its - * HMAC field set to zero (as shown in Figure 6) followed by all - * chunks that are placed after the AUTH chunk in the SCTP packet. - */ - if (auth) - sctp_auth_calculate_hmac(asoc, nskb, - (struct sctp_auth_chunk *)auth, - gfp); + if (chunk == packet->auth && !list_empty(&packet->chunk_list)) + list_add(&chunk->list, &packet->chunk_list); + else if (!sctp_chunk_is_data(chunk)) + sctp_chunk_free(chunk); + + if (!pkt_size) + break; + } + + /* SCTP-AUTH, Section 6.2 + * The sender MUST calculate the MAC as described in RFC2104 [2] + * using the hash function H as described by the MAC Identifier and + * the shared association key K based on the endpoint pair shared key + * described by the shared key identifier. The 'data' used for the + * computation of the AUTH-chunk is given by the AUTH chunk with its + * HMAC field set to zero (as shown in Figure 6) followed by all + * chunks that are placed after the AUTH chunk in the SCTP packet. + */ + if (auth) + sctp_auth_calculate_hmac(asoc, nskb, + (struct sctp_auth_chunk *)auth, + gfp); + + if (!gso) + break; + + if (skb_gro_receive(&head, nskb)) + goto nomem; + nskb = NULL; + if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= + sk->sk_gso_max_segs)) + goto nomem; + } while (!list_empty(&packet->chunk_list)); /* 2) Calculate the Adler-32 checksum of the whole packet, * including the SCTP common header and all the @@ -532,16 +621,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * * Note: Adler-32 is no longer applicable, as has been replaced * by CRC32-C as described in . + * + * If it's a GSO packet, it's postponed to sctp_skb_segment. */ - if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CRC) || - (dst_xfrm(dst) != NULL) || packet->ipfragok) { - sh->checksum = sctp_compute_cksum(nskb, 0); + if (!sctp_checksum_disable || gso) { + if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) || + dst_xfrm(dst) || packet->ipfragok)) { + sh->checksum = sctp_compute_cksum(head, 0); } else { /* no need to seed pseudo checksum for SCTP */ - nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = skb_transport_header(nskb) - nskb->head; - nskb->csum_offset = offsetof(struct sctphdr, checksum); + head->ip_summed = CHECKSUM_PARTIAL; + head->csum_start = skb_transport_header(head) - head->head; + head->csum_offset = offsetof(struct sctphdr, checksum); } } @@ -557,7 +648,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * Note: The works for IPv6 layer checks this bit too later * in transmission. See IP6_ECN_flow_xmit(). */ - tp->af_specific->ecn_capable(nskb->sk); + tp->af_specific->ecn_capable(sk); /* Set up the IP options. */ /* BUG: not implemented @@ -566,7 +657,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) /* Dump that on IP! */ if (asoc) { - asoc->stats.opackets++; + asoc->stats.opackets += pktcount; if (asoc->peer.last_sent_to != tp) /* Considering the multiple CPU scenario, this is a * "correcter" place for last_sent_to. --xguo @@ -589,16 +680,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) } } - pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); + pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len); + + if (gso) { + /* Cleanup our debris for IP stacks */ + memset(head->cb, 0, max(sizeof(struct inet_skb_parm), + sizeof(struct inet6_skb_parm))); - nskb->ignore_df = packet->ipfragok; - tp->af_specific->sctp_xmit(nskb, tp); + skb_shinfo(head)->gso_segs = pktcount; + skb_shinfo(head)->gso_size = GSO_BY_FRAGS; + + /* We have to refresh this in case we are xmiting to + * more than one transport at a time + */ + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + rcu_read_unlock(); + } + head->ignore_df = packet->ipfragok; + tp->af_specific->sctp_xmit(head, tp); out: sctp_packet_reset(packet); return err; no_route: - kfree_skb(nskb); + kfree_skb(head); + if (nskb != head) + kfree_skb(nskb); if (asoc) IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); @@ -751,39 +862,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, struct sctp_chunk *chunk, u16 chunk_len) { - size_t psize; - size_t pmtu; - int too_big; + size_t psize, pmtu; sctp_xmit_t retval = SCTP_XMIT_OK; psize = packet->size; - pmtu = ((packet->transport->asoc) ? - (packet->transport->asoc->pathmtu) : - (packet->transport->pathmtu)); - - too_big = (psize + chunk_len > pmtu); + if (packet->transport->asoc) + pmtu = packet->transport->asoc->pathmtu; + else + pmtu = packet->transport->pathmtu; /* Decide if we need to fragment or resubmit later. */ - if (too_big) { - /* It's OK to fragmet at IP level if any one of the following + if (psize + chunk_len > pmtu) { + /* It's OK to fragment at IP level if any one of the following * is true: - * 1. The packet is empty (meaning this chunk is greater - * the MTU) - * 2. The chunk we are adding is a control chunk - * 3. The packet doesn't have any data in it yet and data - * requires authentication. + * 1. The packet is empty (meaning this chunk is greater + * the MTU) + * 2. The packet doesn't have any data in it yet and data + * requires authentication. */ - if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || + if (sctp_packet_empty(packet) || (!packet->has_data && chunk->auth)) { /* We no longer do re-fragmentation. * Just fragment at the IP layer, if we * actually hit this condition */ packet->ipfragok = 1; - } else { - retval = SCTP_XMIT_PMTU_FULL; + goto out; } + + /* It is also okay to fragment if the chunk we are + * adding is a control chunk, but only if current packet + * is not a GSO one otherwise it causes fragmentation of + * a large frame. So in this case we allow the + * fragmentation by forcing it to be in a new packet. + */ + if (!sctp_chunk_is_data(chunk) && packet->has_data) + retval = SCTP_XMIT_PMTU_FULL; + + if (psize + chunk_len > packet->max_size) + /* Hit GSO/PMTU limit, gotta flush */ + retval = SCTP_XMIT_PMTU_FULL; + + if (!packet->transport->burst_limited && + psize + chunk_len > (packet->transport->cwnd >> 1)) + /* Do not allow a single GSO packet to use more + * than half of cwnd. + */ + retval = SCTP_XMIT_PMTU_FULL; + + if (packet->transport->burst_limited && + psize + chunk_len > (packet->transport->burst_limited >> 1)) + /* Do not allow a single GSO packet to use more + * than half of original cwnd. + */ + retval = SCTP_XMIT_PMTU_FULL; + /* Otherwise it will fit in the GSO packet */ } +out: return retval; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d3d50daa248b..40022ee885d7 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1516,6 +1516,9 @@ static __init int sctp_init(void) if (status) goto err_v6_add_protocol; + if (sctp_offload_init() < 0) + pr_crit("%s: Cannot add SCTP protocol offload\n", __func__); + out: return status; err_v6_add_protocol: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 67154b848aa9..712fb2339baa 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4003,6 +4003,8 @@ static int sctp_init_sock(struct sock *sk) return -ESOCKTNOSUPPORT; } + sk->sk_gso_type = SKB_GSO_SCTP; + /* Initialize default send parameters. These parameters can be * modified with the SCTP_DEFAULT_SEND_PARAM socket option. */ -- cgit v1.2.3 From 942b3235bf77e5600a05d6e85f0415bdeb8068bb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 2 Jun 2016 15:05:44 -0300 Subject: sctp: improve debug message to also log curr pkt and new chunk size This is useful for debugging packet sizes. Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index 60499a69179d..90d2e125c2f5 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -182,7 +182,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, sctp_xmit_t retval; int error = 0; - pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); + pr_debug("%s: packet:%p size:%lu chunk:%p size:%d\n", __func__, + packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: -- cgit v1.2.3 From 3b55a537d028ccc3e423e74a2037476318918341 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Jun 2016 22:53:26 -0700 Subject: sctp: Fix warning in sctp_packet_transmit_chunk() size_t objects should be printed with %Z printf format. Reported-by: kbuild test robot Signed-off-by: David S. Miller --- net/sctp/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index 90d2e125c2f5..1541a91d6d9d 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -182,7 +182,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, sctp_xmit_t retval; int error = 0; - pr_debug("%s: packet:%p size:%lu chunk:%p size:%d\n", __func__, + pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { -- cgit v1.2.3 From d46e416c11c88ef1deb5c7f19271806a5be597fe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 9 Jun 2016 22:48:18 +0800 Subject: sctp: sctp should change socket state when shutdown is received Now sctp doesn't change socket state upon shutdown reception. It changes just the assoc state, even though it's a TCP-style socket. For some cases, if we really need to check sk->sk_state, it's necessary to fix this issue, at least when we use ss or netstat to dump, we can get a more exact information. As an improvement, we will change sk->sk_state when we change asoc->state to SHUTDOWN_RECEIVED, and also do it in sctp_shutdown to keep consistent with sctp_close. Signed-off-by: Xin Long Acked-by: Marcelo R. Leitner Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 4 +++- net/sctp/socket.c | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index aa3712259368..12d45193357c 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -806,8 +806,10 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, /* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */ if (sctp_state(asoc, SHUTDOWN_RECEIVED) && - sctp_sstate(sk, ESTABLISHED)) + sctp_sstate(sk, ESTABLISHED)) { + sk->sk_state = SCTP_SS_CLOSING; sk->sk_shutdown |= RCV_SHUTDOWN; + } } if (sctp_state(asoc, COOKIE_WAIT)) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 712fb2339baa..6cae4c61ae26 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4195,6 +4195,7 @@ static void sctp_shutdown(struct sock *sk, int how) return; if (how & SEND_SHUTDOWN) { + sk->sk_state = SCTP_SS_CLOSING; ep = sctp_sk(sk)->ep; if (!list_empty(&ep->asocs)) { asoc = list_entry(ep->asocs.next, @@ -7566,10 +7567,13 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. */ - if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) + if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) { + newsk->sk_state = SCTP_SS_CLOSING; newsk->sk_shutdown |= RCV_SHUTDOWN; + } else { + newsk->sk_state = SCTP_SS_ESTABLISHED; + } - newsk->sk_state = SCTP_SS_ESTABLISHED; release_sock(newsk); } -- cgit v1.2.3 From a5e27d18fe64561a467b706f70cfc89ba6323f87 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 13 Jun 2016 23:08:26 +0800 Subject: sctp: fix error return code in sctp_init() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Acked-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/protocol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 40022ee885d7..3b56ae55aba3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1479,7 +1479,8 @@ static __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } - if (sctp_transport_hashtable_init()) + status = sctp_transport_hashtable_init(); + if (status) goto err_thash_alloc; pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize, -- cgit v1.2.3 From 141ddefce7c807c5e34b67be50b4a789a51f4a56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 16 Jun 2016 01:15:06 +0800 Subject: sctp: change sk state to CLOSED instead of CLOSING in sctp_sock_migrate Commit d46e416c11c8 ("sctp: sctp should change socket state when shutdown is received") may set sk_state CLOSING in sctp_sock_migrate, but inet_accept doesn't allow the sk_state other than ESTABLISHED/ CLOSED for sctp. So we will change sk_state to CLOSED, instead of CLOSING, as actually sk is closed already there. Fixes: d46e416c11c8 ("sctp: sctp should change socket state when shutdown is received") Reported-by: Ye Xiaolong Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6cae4c61ae26..cdabbd8219b1 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7568,7 +7568,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * is called, set RCV_SHUTDOWN flag. */ if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) { - newsk->sk_state = SCTP_SS_CLOSING; + newsk->sk_state = SCTP_SS_CLOSED; newsk->sk_shutdown |= RCV_SHUTDOWN; } else { newsk->sk_state = SCTP_SS_ESTABLISHED; -- cgit v1.2.3 From f1533cce60d1f84378c1dd925f9ef1038fa93507 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 7 Jul 2016 09:39:29 -0300 Subject: sctp: fix panic when sending auth chunks When we introduced GSO support, if using auth the auth chunk was being left queued on the packet even after the final segment was generated. Later on sctp_transmit_packet it calls sctp_packet_reset, which zeroed the packet len while not accounting for this left-over. This caused more space to be used the next packet due to the chunk still being queued, but space which wasn't allocated as its size wasn't accounted. The fix is to only queue it back when we know that we are going to generate another segment. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/output.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index 1541a91d6d9d..2e9223bb1b3a 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -582,9 +582,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) */ pkt_size -= WORD_ROUND(chunk->skb->len); - if (chunk == packet->auth && !list_empty(&packet->chunk_list)) - list_add(&chunk->list, &packet->chunk_list); - else if (!sctp_chunk_is_data(chunk)) + if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) sctp_chunk_free(chunk); if (!pkt_size) @@ -605,6 +603,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) (struct sctp_auth_chunk *)auth, gfp); + if (packet->auth) { + if (!list_empty(&packet->chunk_list)) { + /* We will generate more packets, so re-queue + * auth chunk. + */ + list_add(&chunk->list, &packet->chunk_list); + } else { + sctp_chunk_free(packet->auth); + packet->auth = NULL; + } + } + if (!gso) break; @@ -735,6 +745,8 @@ err: } goto out; nomem: + if (packet->auth && list_empty(&packet->auth->list)) + sctp_chunk_free(packet->auth); err = -ENOMEM; goto err; } -- cgit v1.2.3 From 28aa4c26fce2202db8d42ae76b639ca1d9a23d25 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:40 +0800 Subject: sctp: add SCTP_PR_SUPPORTED on sctp sockopt According to section 4.5 of rfc7496, prsctp_enable should be per asoc. We will add prsctp_enable to both asoc and ep, and replace the places where it used net.sctp->prsctp_enable with asoc->prsctp_enable. ep->prsctp_enable will be initialized with net.sctp->prsctp_enable, and asoc->prsctp_enable will be initialized with ep->prsctp_enable. We can also modify it's value through sockopt SCTP_PR_SUPPORTED. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 ++-- include/uapi/linux/sctp.h | 1 + net/sctp/associola.c | 1 + net/sctp/endpointola.c | 1 + net/sctp/sm_make_chunk.c | 12 +++---- net/sctp/socket.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 93 insertions(+), 8 deletions(-) (limited to 'net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 83c5ec58b93a..07115ca9de4d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1256,7 +1256,8 @@ struct sctp_endpoint { /* SCTP-AUTH: endpoint shared keys */ struct list_head endpoint_shared_keys; __u16 active_key_id; - __u8 auth_enable; + __u8 auth_enable:1, + prsctp_enable:1; }; /* Recover the outter endpoint structure. */ @@ -1848,7 +1849,8 @@ struct sctp_association { __u16 active_key_id; __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ - temp:1; /* Is it a temporary association? */ + temp:1, /* Is it a temporary association? */ + prsctp_enable:1; struct sctp_priv_assoc_stats stats; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index ce70fe6b45df..aa08906f292d 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -112,6 +112,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */ #define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ #define SCTP_GET_ASSOC_STATS 112 /* Read only */ +#define SCTP_PR_SUPPORTED 113 /* These are bit fields for msghdr->msg_flags. See section 5.1. */ /* On user space Linux, these live in as an enum. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index e1849f3714ad..1c23060c41a6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -268,6 +268,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a goto fail_init; asoc->active_key_id = ep->active_key_id; + asoc->prsctp_enable = ep->prsctp_enable; /* Save the hmacs and chunks list into this association */ if (ep->auth_hmacs_list) diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 9d494e35e7f9..1f03065686fe 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -163,6 +163,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, */ ep->auth_hmacs_list = auth_hmacs; ep->auth_chunk_list = auth_chunks; + ep->prsctp_enable = net->sctp.prsctp_enable; return ep; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 56f364d8f932..0e3045ef57fa 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -261,7 +261,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); - if (net->sctp.prsctp_enable) + if (asoc->prsctp_enable) chunksize += sizeof(prsctp_param); /* ADDIP: Section 4.2.7: @@ -355,7 +355,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_param(retval, num_ext, extensions); } - if (net->sctp.prsctp_enable) + if (asoc->prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); if (sp->adaptation_ind) { @@ -2024,8 +2024,8 @@ static void sctp_process_ext_param(struct sctp_association *asoc, for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { case SCTP_CID_FWD_TSN: - if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable) - asoc->peer.prsctp_capable = 1; + if (asoc->prsctp_enable && !asoc->peer.prsctp_capable) + asoc->peer.prsctp_capable = 1; break; case SCTP_CID_AUTH: /* if the peer reports AUTH, assume that he @@ -2169,7 +2169,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (net->sctp.prsctp_enable) + if (ep->prsctp_enable) break; goto fallthrough; @@ -2653,7 +2653,7 @@ do_addr_param: break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (net->sctp.prsctp_enable) { + if (asoc->prsctp_enable) { asoc->peer.prsctp_capable = 1; break; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index cdabbd8219b1..7460ddebd9ce 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3661,6 +3661,39 @@ static int sctp_setsockopt_recvnxtinfo(struct sock *sk, return 0; } +static int sctp_setsockopt_pr_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + asoc->prsctp_enable = !!params.assoc_value; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + sp->ep->prsctp_enable = !!params.assoc_value; + } else { + goto out; + } + + retval = 0; + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3821,6 +3854,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_RECVNXTINFO: retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen); break; + case SCTP_PR_SUPPORTED: + retval = sctp_setsockopt_pr_supported(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6166,6 +6202,47 @@ static int sctp_getsockopt_recvnxtinfo(struct sock *sk, int len, return 0; } +static int sctp_getsockopt_pr_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->prsctp_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->ep->prsctp_enable; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -6319,6 +6396,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_RECVNXTINFO: retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen); break; + case SCTP_PR_SUPPORTED: + retval = sctp_getsockopt_pr_supported(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; -- cgit v1.2.3 From f959fb442c35f4b61fea341401b8463dd0a1b959 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:41 +0800 Subject: sctp: add SCTP_DEFAULT_PRINFO into sctp sockopt This patch adds SCTP_DEFAULT_PRINFO to sctp sockopt. It is used to set/get sctp Partially Reliable Policies' default params, which includes 3 policies (ttl, rtx, prio) and their values. Still, if we set policy params in sndinfo, we will use the params of sndinfo against chunks, instead of the default params. In this patch, we will use 5-8bit of sp/asoc->default_flags to store prsctp policies, and reuse asoc->default_timetolive to store their values. It means if we enable and set prsctp policy, prior ttl timeout in sctp will not work any more. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 29 +++++++++++++++ net/sctp/socket.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) (limited to 'net/sctp') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index aa08906f292d..984cf2e9a61d 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -113,6 +113,29 @@ typedef __s32 sctp_assoc_t; #define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ #define SCTP_GET_ASSOC_STATS 112 /* Read only */ #define SCTP_PR_SUPPORTED 113 +#define SCTP_DEFAULT_PRINFO 114 + +/* PR-SCTP policies */ +#define SCTP_PR_SCTP_NONE 0x0000 +#define SCTP_PR_SCTP_TTL 0x0010 +#define SCTP_PR_SCTP_RTX 0x0020 +#define SCTP_PR_SCTP_PRIO 0x0030 +#define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_PRIO +#define SCTP_PR_SCTP_MASK 0x0030 + +#define __SCTP_PR_INDEX(x) ((x >> 4) - 1) +#define SCTP_PR_INDEX(x) __SCTP_PR_INDEX(SCTP_PR_SCTP_ ## x) + +#define SCTP_PR_POLICY(x) ((x) & SCTP_PR_SCTP_MASK) +#define SCTP_PR_SET_POLICY(flags, x) \ + do { \ + flags &= ~SCTP_PR_SCTP_MASK; \ + flags |= x; \ + } while (0) + +#define SCTP_PR_TTL_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_TTL) +#define SCTP_PR_RTX_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_RTX) +#define SCTP_PR_PRIO_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_PRIO) /* These are bit fields for msghdr->msg_flags. See section 5.1. */ /* On user space Linux, these live in as an enum. */ @@ -903,4 +926,10 @@ struct sctp_paddrthlds { __u16 spt_pathpfthld; }; +struct sctp_default_prinfo { + sctp_assoc_t pr_assoc_id; + __u32 pr_value; + __u16 pr_policy; +}; + #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7460ddebd9ce..c03fe1b76706 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3694,6 +3694,47 @@ out: return retval; } +static int sctp_setsockopt_default_prinfo(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_default_prinfo info; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(info)) + goto out; + + if (copy_from_user(&info, optval, sizeof(info))) { + retval = -EFAULT; + goto out; + } + + if (info.pr_policy & ~SCTP_PR_SCTP_MASK) + goto out; + + if (info.pr_policy == SCTP_PR_SCTP_NONE) + info.pr_value = 0; + + asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (asoc) { + SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); + asoc->default_timetolive = info.pr_value; + } else if (!info.pr_assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); + sp->default_timetolive = info.pr_value; + } else { + goto out; + } + + retval = 0; + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3857,6 +3898,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_PR_SUPPORTED: retval = sctp_setsockopt_pr_supported(sk, optval, optlen); break; + case SCTP_DEFAULT_PRINFO: + retval = sctp_setsockopt_default_prinfo(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6243,6 +6287,49 @@ out: return retval; } +static int sctp_getsockopt_default_prinfo(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_default_prinfo info; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(info)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(info); + if (copy_from_user(&info, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (asoc) { + info.pr_policy = SCTP_PR_POLICY(asoc->default_flags); + info.pr_value = asoc->default_timetolive; + } else if (!info.pr_assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + info.pr_policy = SCTP_PR_POLICY(sp->default_flags); + info.pr_value = sp->default_timetolive; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, &info, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -6399,6 +6486,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_PR_SUPPORTED: retval = sctp_getsockopt_pr_supported(sk, len, optval, optlen); break; + case SCTP_DEFAULT_PRINFO: + retval = sctp_getsockopt_default_prinfo(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; -- cgit v1.2.3 From 826d253d57b11f69add81c8086d2e7f1dce5ec77 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:42 +0800 Subject: sctp: add SCTP_PR_ASSOC_STATUS on sctp sockopt This patch adds SCTP_PR_ASSOC_STATUS to sctp sockopt, which is used to dump the prsctp statistics info from the asoc. The prsctp statistics includes abandoned_sent/unsent from the asoc. abandoned_sent is the count of the packets we drop packets from retransmit/transmited queue, and abandoned_unsent is the count of the packets we drop from out_queue according to the policy. Note: another option for prsctp statistics dump described in rfc is SCTP_PR_STREAM_STATUS, which is used to dump the prsctp statistics info from each stream. But by now, linux doesn't yet have per stream statistics info, it needs rfc6525 to be implemented. As the prsctp statistics for each stream has to be based on per stream statistics, we will delay it until rfc6525 is done in linux. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 +++ include/uapi/linux/sctp.h | 12 +++++++++ net/sctp/socket.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) (limited to 'net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 07115ca9de4d..d8e464aacb20 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1853,6 +1853,9 @@ struct sctp_association { prsctp_enable:1; struct sctp_priv_assoc_stats stats; + + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 984cf2e9a61d..d304f4c9792c 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -114,6 +114,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_GET_ASSOC_STATS 112 /* Read only */ #define SCTP_PR_SUPPORTED 113 #define SCTP_DEFAULT_PRINFO 114 +#define SCTP_PR_ASSOC_STATUS 115 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -926,6 +927,17 @@ struct sctp_paddrthlds { __u16 spt_pathpfthld; }; +/* + * Socket Option for Getting the Association/Stream-Specific PR-SCTP Status + */ +struct sctp_prstatus { + sctp_assoc_t sprstat_assoc_id; + __u16 sprstat_sid; + __u16 sprstat_policy; + __u64 sprstat_abandoned_unsent; + __u64 sprstat_abandoned_sent; +}; + struct sctp_default_prinfo { sctp_assoc_t pr_assoc_id; __u32 pr_value; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c03fe1b76706..c3167c43a9c1 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6330,6 +6330,64 @@ out: return retval; } +static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_prstatus params; + struct sctp_association *asoc; + int policy; + int retval = -EINVAL; + + if (len < sizeof(params)) + goto out; + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) { + retval = -EFAULT; + goto out; + } + + policy = params.sprstat_policy; + if (policy & ~SCTP_PR_SCTP_MASK) + goto out; + + asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); + if (!asoc) + goto out; + + if (policy == SCTP_PR_SCTP_NONE) { + params.sprstat_abandoned_unsent = 0; + params.sprstat_abandoned_sent = 0; + for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { + params.sprstat_abandoned_unsent += + asoc->abandoned_unsent[policy]; + params.sprstat_abandoned_sent += + asoc->abandoned_sent[policy]; + } + } else { + params.sprstat_abandoned_unsent = + asoc->abandoned_unsent[__SCTP_PR_INDEX(policy)]; + params.sprstat_abandoned_sent = + asoc->abandoned_sent[__SCTP_PR_INDEX(policy)]; + } + + if (put_user(len, optlen)) { + retval = -EFAULT; + goto out; + } + + if (copy_to_user(optval, ¶ms, len)) { + retval = -EFAULT; + goto out; + } + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -6490,6 +6548,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_default_prinfo(sk, len, optval, optlen); break; + case SCTP_PR_ASSOC_STATUS: + retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; -- cgit v1.2.3 From a6c2f792873aff332a4689717c3cd6104f46684c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:43 +0800 Subject: sctp: implement prsctp TTL policy prsctp TTL policy is a policy to abandon chunks when they expire at the specific time in local stack. It's similar with expires_at in struct sctp_datamsg. This patch uses sinfo->sinfo_timetolive to set the specific time for TTL policy. sinfo->sinfo_timetolive is also used for msg->expires_at. So if prsctp_enable or TTL policy is not enabled, msg->expires_at still works as before. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 10 ++++++++++ net/sctp/chunk.c | 20 +++++++++++++++++--- net/sctp/output.c | 2 ++ net/sctp/sm_make_chunk.c | 12 ++++++++++++ net/sctp/socket.c | 4 ++-- 5 files changed, 43 insertions(+), 5 deletions(-) (limited to 'net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d8e464aacb20..6bcda715008e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -602,6 +602,16 @@ struct sctp_chunk { /* This needs to be recoverable for SCTP_SEND_FAILED events. */ struct sctp_sndrcvinfo sinfo; + /* We use this field to record param for prsctp policies, + * for TTL policy, it is the time_to_drop of this chunk, + * for RTX policy, it is the max_sent_count of this chunk, + * for PRIO policy, it is the priority of this chunk. + */ + unsigned long prsctp_param; + + /* How many times this chunk have been sent, for prsctp RTX policy */ + int sent_count; + /* Which association does this belong to? */ struct sctp_association *asoc; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 1eb94bf18ef4..2698d122e201 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -335,13 +335,27 @@ errout: /* Check whether this message has expired. */ int sctp_chunk_abandoned(struct sctp_chunk *chunk) { - struct sctp_datamsg *msg = chunk->msg; + if (!chunk->asoc->prsctp_enable || + !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) { + struct sctp_datamsg *msg = chunk->msg; + + if (!msg->can_abandon) + return 0; + + if (time_after(jiffies, msg->expires_at)) + return 1; - if (!msg->can_abandon) return 0; + } - if (time_after(jiffies, msg->expires_at)) + if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && + time_after(jiffies, chunk->prsctp_param)) { + if (chunk->sent_count) + chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; + else + chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; return 1; + } return 0; } diff --git a/net/sctp/output.c b/net/sctp/output.c index 2e9223bb1b3a..7425f6c23888 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -316,6 +316,8 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, packet->has_data = 1; /* timestamp the chunk for rtx purposes */ chunk->sent_at = jiffies; + /* Mainly used for prsctp RTX policy */ + chunk->sent_count++; break; case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 0e3045ef57fa..2c431eef3a50 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -711,6 +711,17 @@ nodata: return retval; } +static void sctp_set_prsctp_policy(struct sctp_chunk *chunk, + const struct sctp_sndrcvinfo *sinfo) +{ + if (!chunk->asoc->prsctp_enable) + return; + + if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) + chunk->prsctp_param = + jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); +} + /* Make a DATA chunk for the given association from the provided * parameters. However, do not populate the data payload. */ @@ -744,6 +755,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); + sctp_set_prsctp_policy(retval, sinfo); nodata: return retval; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c3167c43a9c1..08614296628a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7099,7 +7099,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | - SCTP_SACK_IMMEDIATELY | + SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; @@ -7123,7 +7123,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->sinfo->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | - SCTP_SACK_IMMEDIATELY | + SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; -- cgit v1.2.3 From 01aadb3af6e1b10970c1f7e510b5097c8f725d64 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:44 +0800 Subject: sctp: implement prsctp RTX policy prsctp RTX policy is a policy to abandon chunks when they are retransmitted beyond the max count. This patch uses sent_count to count how many times one chunk has been sent, and prsctp_param is the max rtx count, which is from sinfo->sinfo_timetolive in sctp_set_prsctp_policy(). So similar to TTL policy, if RTX policy is enabled, msg->expire_at won't work. Then in sctp_chunk_abandoned, this patch checks if chunk->sent_count is bigger than chunk->prsctp_param to abandon this chunk. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/chunk.c | 4 ++++ net/sctp/sm_make_chunk.c | 2 ++ 2 files changed, 6 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 2698d122e201..b3692b55a8d2 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -355,6 +355,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) else chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; return 1; + } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && + chunk->sent_count > chunk->prsctp_param) { + chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + return 1; } return 0; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2c431eef3a50..cfde934af5c5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -720,6 +720,8 @@ static void sctp_set_prsctp_policy(struct sctp_chunk *chunk, if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) chunk->prsctp_param = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); + else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags)) + chunk->prsctp_param = sinfo->sinfo_timetolive; } /* Make a DATA chunk for the given association from the provided -- cgit v1.2.3 From 8dbdf1f5b09cb22560e7c7173b52fe3c631046bd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:45 +0800 Subject: sctp: implement prsctp PRIO policy prsctp PRIO policy is a policy to abandon lower priority chunks when asoc doesn't have enough snd buffer, so that the current chunk with higher priority can be queued successfully. Similar to TTL/RTX policy, we will set the priority of the chunk to prsctp_param with sinfo->sinfo_timetolive in sctp_set_prsctp_policy(). So if PRIO policy is enabled, msg->expire_at won't work. asoc->sent_cnt_removable will record how many chunks can be checked to remove. If priority policy is enabled, when the chunk is queued into the out_queue, we will increase sent_cnt_removable. When the chunk is moved to abandon_queue or dequeue and free, we will decrease sent_cnt_removable. In sctp_sendmsg, we will check if there is enough snd buffer for current msg and if sent_cnt_removable is not 0. Then try to abandon chunks in sctp_prune_prsctp when sendmsg from the retransmit/transmited queue, and free chunks from out_queue in right order until the abandon+free size > msg_len - sctp_wfree. For the abandon size, we have to wait until it sends FORWARD TSN, receives the sack and the chunks are really freed. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++ net/sctp/chunk.c | 1 + net/sctp/outqueue.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++ net/sctp/sm_make_chunk.c | 3 +- net/sctp/socket.c | 3 ++ 5 files changed, 109 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6bcda715008e..8626bdd3249a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1084,6 +1084,8 @@ void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, sctp_retransmit_reason_t); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); +void sctp_prsctp_prune(struct sctp_association *asoc, + struct sctp_sndrcvinfo *sinfo, int msg_len); /* Uncork and flush an outqueue. */ static inline void sctp_outq_cork(struct sctp_outq *q) { @@ -1864,6 +1866,8 @@ struct sctp_association { struct sctp_priv_assoc_stats stats; + int sent_cnt_removable; + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index b3692b55a8d2..a55e54738b81 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -360,6 +360,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } + /* PRIO policy is processed by sendmsg, not here */ return 0; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 084718f9b3da..72e54a416af6 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -326,6 +326,9 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) sctp_chunk_hold(chunk); sctp_outq_tail_data(q, chunk); + if (chunk->asoc->prsctp_enable && + SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) + chunk->asoc->sent_cnt_removable++; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else @@ -372,6 +375,96 @@ static void sctp_insert_list(struct list_head *head, struct list_head *new) list_add_tail(new, head); } +static int sctp_prsctp_prune_sent(struct sctp_association *asoc, + struct sctp_sndrcvinfo *sinfo, + struct list_head *queue, int msg_len) +{ + struct sctp_chunk *chk, *temp; + + list_for_each_entry_safe(chk, temp, queue, transmitted_list) { + if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + chk->prsctp_param <= sinfo->sinfo_timetolive) + continue; + + list_del_init(&chk->transmitted_list); + sctp_insert_list(&asoc->outqueue.abandoned, + &chk->transmitted_list); + + asoc->sent_cnt_removable--; + asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; + + if (!chk->tsn_gap_acked) { + if (chk->transport) + chk->transport->flight_size -= + sctp_data_size(chk); + asoc->outqueue.outstanding_bytes -= sctp_data_size(chk); + } + + msg_len -= SCTP_DATA_SNDSIZE(chk) + + sizeof(struct sk_buff) + + sizeof(struct sctp_chunk); + if (msg_len <= 0) + break; + } + + return msg_len; +} + +static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, + struct sctp_sndrcvinfo *sinfo, + struct list_head *queue, int msg_len) +{ + struct sctp_chunk *chk, *temp; + + list_for_each_entry_safe(chk, temp, queue, list) { + if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + chk->prsctp_param <= sinfo->sinfo_timetolive) + continue; + + list_del_init(&chk->list); + asoc->sent_cnt_removable--; + asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + + msg_len -= SCTP_DATA_SNDSIZE(chk) + + sizeof(struct sk_buff) + + sizeof(struct sctp_chunk); + sctp_chunk_free(chk); + if (msg_len <= 0) + break; + } + + return msg_len; +} + +/* Abandon the chunks according their priorities */ +void sctp_prsctp_prune(struct sctp_association *asoc, + struct sctp_sndrcvinfo *sinfo, int msg_len) +{ + struct sctp_transport *transport; + + if (!asoc->prsctp_enable || !asoc->sent_cnt_removable) + return; + + msg_len = sctp_prsctp_prune_sent(asoc, sinfo, + &asoc->outqueue.retransmit, + msg_len); + if (msg_len <= 0) + return; + + list_for_each_entry(transport, &asoc->peer.transport_addr_list, + transports) { + msg_len = sctp_prsctp_prune_sent(asoc, sinfo, + &transport->transmitted, + msg_len); + if (msg_len <= 0) + return; + } + + sctp_prsctp_prune_unsent(asoc, sinfo, + &asoc->outqueue.out_chunk_list, + msg_len); +} + /* Mark all the eligible packets on a transport for retransmission. */ void sctp_retransmit_mark(struct sctp_outq *q, struct sctp_transport *transport, @@ -962,6 +1055,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); + if (asoc->prsctp_enable && + SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) + asoc->sent_cnt_removable--; sctp_chunk_free(chunk); continue; } @@ -1251,6 +1347,9 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) tsn = ntohl(tchunk->subh.data_hdr->tsn); if (TSN_lte(tsn, ctsn)) { list_del_init(&tchunk->transmitted_list); + if (asoc->prsctp_enable && + SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) + asoc->sent_cnt_removable--; sctp_chunk_free(tchunk); } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index cfde934af5c5..1c96f4740e67 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -720,7 +720,8 @@ static void sctp_set_prsctp_policy(struct sctp_chunk *chunk, if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) chunk->prsctp_param = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); - else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags)) + else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) || + SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags)) chunk->prsctp_param = sinfo->sinfo_timetolive; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 08614296628a..71c7dc5ea62e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1914,6 +1914,9 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_free; } + if (sctp_wspace(asoc) < msg_len) + sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); -- cgit v1.2.3 From 9e238323799fb8c2add2b1de9a22edd4d4e51e30 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:55 -0300 Subject: sctp: allow others to use sctp_input_cb We process input path in other files too and having access to it is nice, so move it to a header where it's shared. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 15 +++++++++++++++ net/sctp/input.c | 11 ----------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8626bdd3249a..966c3a40039c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -59,6 +59,7 @@ #include /* We need tq_struct. */ #include /* We need sctp* header structs. */ #include /* We need auth specific structs */ +#include /* For inet_skb_parm */ /* A convenience structure for handling sockaddr structures. * We should wean ourselves off this. @@ -1092,6 +1093,20 @@ static inline void sctp_outq_cork(struct sctp_outq *q) q->cork = 1; } +/* SCTP skb control block. + * sctp_input_cb is currently used on rx and sock rx queue + */ +struct sctp_input_cb { + union { + struct inet_skb_parm h4; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_skb_parm h6; +#endif + } header; + struct sctp_chunk *chunk; +}; +#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) + /* These bind address data fields common between endpoints and associations */ struct sctp_bind_addr { diff --git a/net/sctp/input.c b/net/sctp/input.c index 6f8e676d285e..7a327ff71f08 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -90,17 +90,6 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) return 0; } -struct sctp_input_cb { - union { - struct inet_skb_parm h4; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_skb_parm h6; -#endif - } header; - struct sctp_chunk *chunk; -}; -#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) - /* * This is the routine which IP calls when receiving an SCTP packet. */ -- cgit v1.2.3 From f5d258e60722142e88cb6f0f337d78bca67cf973 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:56 -0300 Subject: sctp: reorder sctp_ulpevent and shrink msg_flags The next patch needs 8 bytes in there. sctp_ulpevent has a hole due to bad alignment; msg_flags is using 4 bytes while it actually uses only 2, so we shrink it, and iif member (4 bytes) which can be easily fetched from another place once the next patch is there, so we remove it and thus creating space for 8 bytes. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/ulpevent.h | 10 +++++----- net/sctp/ulpevent.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net/sctp') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index cccdcfd14973..aa342645dbce 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -48,15 +48,15 @@ */ struct sctp_ulpevent { struct sctp_association *asoc; - __u16 stream; - __u16 ssn; - __u16 flags; + unsigned int rmem_len; __u32 ppid; __u32 tsn; __u32 cumtsn; - int msg_flags; int iif; - unsigned int rmem_len; + __u16 stream; + __u16 ssn; + __u16 flags; + __u16 msg_flags; }; /* Retrieve the skb this event sits inside of. */ diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index d1e38308f615..706f5bc9f0c3 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -51,7 +51,7 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event); /* Initialize an ULP event from an given skb. */ static void sctp_ulpevent_init(struct sctp_ulpevent *event, - int msg_flags, + __u16 msg_flags, unsigned int len) { memset(event, 0, sizeof(struct sctp_ulpevent)); @@ -60,7 +60,7 @@ static void sctp_ulpevent_init(struct sctp_ulpevent *event, } /* Create a new sctp_ulpevent. */ -static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, +static struct sctp_ulpevent *sctp_ulpevent_new(int size, __u16 msg_flags, gfp_t gfp) { struct sctp_ulpevent *event; -- cgit v1.2.3 From 1f45f78f8e511203f03138f2ccde3d2cf90d2cbf Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:57 -0300 Subject: sctp: allow GSO frags to access the chunk too SCTP will try to access original IP headers on sctp_recvmsg in order to copy the addresses used. There are also other places that do similar access to IP or even SCTP headers. But after 90017accff61 ("sctp: Add GSO support") they aren't always there because they are only present in the header skb. SCTP handles the queueing of incoming data by cloning the incoming skb and limiting to only the relevant payload. This clone has its cb updated to something different and it's then queued on socket rx queue. Thus we need to fix this in two moments. For rx path, not related to socket queue yet, this patch uses a partially copied sctp_input_cb to such GSO frags. This restores the ability to access the headers for this part of the code. Regarding the socket rx queue, it removes iif member from sctp_event and also add a chunk pointer on it. With these changes we're always able to reach the headers again. The biggest change here is that now the sctp_chunk struct and the original skb are only freed after the application consumed the buffer. Note however that the original payload was already like this due to the skb cloning. For iif, SCTP's IPv4 code doesn't use it, so no change is necessary. IPv6 now can fetch it directly from original's IPv6 CB as the original skb is still accessible. In the future we probably can simplify sctp_v*_skb_iif() stuff, as sctp_v4_skb_iif() was called but it's return value not used, and now it's not even called, but such cleanup is out of scope for this change. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 7 +++++++ include/net/sctp/ulpevent.h | 2 +- net/sctp/inqueue.c | 7 +++++++ net/sctp/ipv6.c | 9 ++++----- net/sctp/protocol.c | 1 + net/sctp/sm_statefuns.c | 3 ++- net/sctp/socket.c | 10 +++++++--- net/sctp/ulpevent.c | 10 +++++++++- 8 files changed, 38 insertions(+), 11 deletions(-) (limited to 'net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 966c3a40039c..f6f201de6fa4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1107,6 +1107,13 @@ struct sctp_input_cb { }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) +static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb) +{ + const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + + return chunk->head_skb ? : skb; +} + /* These bind address data fields common between endpoints and associations */ struct sctp_bind_addr { diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index aa342645dbce..2c098cd7e7e2 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -48,11 +48,11 @@ */ struct sctp_ulpevent { struct sctp_association *asoc; + struct sctp_chunk *chunk; unsigned int rmem_len; __u32 ppid; __u32 tsn; __u32 cumtsn; - int iif; __u16 stream; __u16 ssn; __u16 flags; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index edabbbdfca54..147d975b0455 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -218,6 +218,13 @@ new_skb: chunk->has_asconf = 0; chunk->end_of_packet = 0; chunk->ecn_ce_done = 0; + if (chunk->head_skb) { + struct sctp_input_cb + *cb = SCTP_INPUT_CB(chunk->skb), + *head_cb = SCTP_INPUT_CB(chunk->head_skb); + + cb->chunk = head_cb->chunk; + } } chunk->chunk_hdr = ch; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0657d18a85bf..ae6f1a2178ba 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -420,6 +420,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb, addr->v6.sin6_flowinfo = 0; /* FIXME */ addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif; + /* Always called on head skb, so this is safe */ sh = sctp_hdr(skb); if (is_saddr) { *port = sh->source; @@ -710,8 +711,7 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) /* Where did this skb come from? */ static int sctp_v6_skb_iif(const struct sk_buff *skb) { - struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb; - return opt->iif; + return IP6CB(skb)->iif; } /* Was this packet marked by Explicit Congestion Notification? */ @@ -780,15 +780,14 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname, if (ip_hdr(skb)->version == 4) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = sh->source; - addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr; + addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr; } else { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_port = sh->source; addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) { - struct sctp_ulpevent *ev = sctp_skb2event(skb); - addr->v6.sin6_scope_id = ev->iif; + addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3b56ae55aba3..1adb9270e317 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -240,6 +240,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, port = &addr->v4.sin_port; addr->v4.sin_family = AF_INET; + /* Always called on head skb, so this is safe */ sh = sctp_hdr(skb); if (is_saddr) { *port = sh->source; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f1f08c8f277b..5aabf42065e2 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6125,7 +6125,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, af = sctp_get_af_specific( ipver2af(ip_hdr(chunk->skb)->version)); - if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) { + if (af && af->is_ce(sctp_gso_headskb(chunk->skb)) && + asoc->peer.ecn_capable) { /* Do real work as sideffect. */ sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE, SCTP_U32(tsn)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 71c7dc5ea62e..52fdd540a9ef 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2066,7 +2066,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); - struct sk_buff *skb; + struct sk_buff *skb, *head_skb; int copied; int err = 0; int skb_len; @@ -2102,12 +2102,16 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto out_free; - sock_recv_ts_and_drops(msg, sk, skb); + if (event->chunk && event->chunk->head_skb) + head_skb = event->chunk->head_skb; + else + head_skb = skb; + sock_recv_ts_and_drops(msg, sk, head_skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; sp->pf->event_msgname(event, msg->msg_name, addr_len); } else { - sp->pf->skb_msgname(skb, msg->msg_name, addr_len); + sp->pf->skb_msgname(head_skb, msg->msg_name, addr_len); } /* Check if we allow SCTP_NXTINFO. */ diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 706f5bc9f0c3..f6219b164b42 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -701,6 +701,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, sctp_ulpevent_receive_data(event, asoc); + /* And hold the chunk as we need it for getting the IP headers + * later in recvmsg + */ + sctp_chunk_hold(chunk); + event->chunk = chunk; + event->stream = ntohs(chunk->subh.data_hdr->stream); event->ssn = ntohs(chunk->subh.data_hdr->ssn); event->ppid = chunk->subh.data_hdr->ppid; @@ -710,11 +716,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, } event->tsn = ntohl(chunk->subh.data_hdr->tsn); event->msg_flags |= chunk->chunk_hdr->flags; - event->iif = sctp_chunk_iif(chunk); return event; fail_mark: + sctp_chunk_put(chunk); kfree_skb(skb); fail: return NULL; @@ -1007,6 +1013,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) done: sctp_assoc_rwnd_increase(event->asoc, len); + sctp_chunk_put(event->chunk); sctp_ulpevent_release_owner(event); } @@ -1029,6 +1036,7 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event) } done: + sctp_chunk_put(event->chunk); sctp_ulpevent_release_owner(event); } -- cgit v1.2.3 From e7487c86dc5c4a528a7dbd9dc14f453a0de61a84 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:58 -0300 Subject: sctp: avoid identifying address family many times for a chunk Identifying address family operations during rx path is not something expensive but it's ugly to the eye to have it done multiple times, specially when we already validated it during initial rx processing. This patch takes advantage of the now shared sctp_input_cb and make the pointer to the operations readily available. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/input.c | 1 + net/sctp/inqueue.c | 1 + net/sctp/sm_make_chunk.c | 20 ++++---------------- net/sctp/sm_statefuns.c | 7 ++----- 5 files changed, 9 insertions(+), 21 deletions(-) (limited to 'net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f6f201de6fa4..ce93c4b10d26 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1104,6 +1104,7 @@ struct sctp_input_cb { #endif } header; struct sctp_chunk *chunk; + struct sctp_af *af; }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) diff --git a/net/sctp/input.c b/net/sctp/input.c index 7a327ff71f08..30d72f7707b6 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -140,6 +140,7 @@ int sctp_rcv(struct sk_buff *skb) af = sctp_get_af_specific(family); if (unlikely(!af)) goto discard_it; + SCTP_INPUT_CB(skb)->af = af; /* Initialize local addresses for lookups. */ af->from_skb(&src, skb, 1); diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 147d975b0455..8fc773f9b59a 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -224,6 +224,7 @@ new_skb: *head_cb = SCTP_INPUT_CB(chunk->head_skb); cb->chunk = head_cb->chunk; + cb->af = head_cb->af; } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 1c96f4740e67..8c77b87a8565 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -108,14 +108,9 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk) /* What was the inbound interface for this chunk? */ int sctp_chunk_iif(const struct sctp_chunk *chunk) { - struct sctp_af *af; - int iif = 0; - - af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version)); - if (af) - iif = af->skb_iif(chunk->skb); + struct sk_buff *skb = chunk->skb; - return iif; + return SCTP_INPUT_CB(skb)->af->skb_iif(skb); } /* RFC 2960 3.3.2 Initiation (INIT) (1) @@ -1600,7 +1595,6 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep, struct sctp_association *asoc; struct sk_buff *skb; sctp_scope_t scope; - struct sctp_af *af; /* Create the bare association. */ scope = sctp_scope(sctp_source(chunk)); @@ -1610,16 +1604,10 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep, asoc->temp = 1; skb = chunk->skb; /* Create an entry for the source address of the packet. */ - af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version)); - if (unlikely(!af)) - goto fail; - af->from_skb(&asoc->c.peer_addr, skb, 1); + SCTP_INPUT_CB(skb)->af->from_skb(&asoc->c.peer_addr, skb, 1); + nodata: return asoc; - -fail: - sctp_association_free(asoc); - return NULL; } /* Build a cookie representing asoc. diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5aabf42065e2..b7c1f7f3c838 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6119,13 +6119,10 @@ static int sctp_eat_data(const struct sctp_association *asoc, */ if (!chunk->ecn_ce_done) { - struct sctp_af *af; + struct sctp_af *af = SCTP_INPUT_CB(chunk->skb)->af; chunk->ecn_ce_done = 1; - af = sctp_get_af_specific( - ipver2af(ip_hdr(chunk->skb)->version)); - - if (af && af->is_ce(sctp_gso_headskb(chunk->skb)) && + if (af->is_ce(sctp_gso_headskb(chunk->skb)) && asoc->peer.ecn_capable) { /* Do real work as sideffect. */ sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE, -- cgit v1.2.3 From d9cef42529402f9fce10376b6e427a5137d90c3d Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:59 -0300 Subject: sctp: do not clear chunk->ecn_ce_done flag We should not clear that flag when switching to a new skb from a GSO skb because it would cause ECN processing to happen multiple times per GSO skb, which is not wanted. Instead, let it be processed once per chunk. That is, in other words, once per IP header available. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/inqueue.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 8fc773f9b59a..942770675f4c 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -217,7 +217,6 @@ new_skb: chunk->auth = 0; chunk->has_asconf = 0; chunk->end_of_packet = 0; - chunk->ecn_ce_done = 0; if (chunk->head_skb) { struct sctp_input_cb *cb = SCTP_INPUT_CB(chunk->skb), -- cgit v1.2.3 From 2d47fd120d23390fea38c3c7cc5ee05a5b95c49f Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:09:00 -0300 Subject: sctp: only check for ECN if peer is using it Currently only read-only checks are performed up to the point on where we check if peer is ECN capable, checks which we can avoid otherwise. The flag ecn_ce_done is only used to perform this check once per incoming packet, and nothing more. Thus this patch moves the peer check up. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b7c1f7f3c838..d88bb2b0b699 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6118,12 +6118,11 @@ static int sctp_eat_data(const struct sctp_association *asoc, * chunk later. */ - if (!chunk->ecn_ce_done) { + if (asoc->peer.ecn_capable && !chunk->ecn_ce_done) { struct sctp_af *af = SCTP_INPUT_CB(chunk->skb)->af; chunk->ecn_ce_done = 1; - if (af->is_ce(sctp_gso_headskb(chunk->skb)) && - asoc->peer.ecn_capable) { + if (af->is_ce(sctp_gso_headskb(chunk->skb))) { /* Do real work as sideffect. */ sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE, SCTP_U32(tsn)); -- cgit v1.2.3 From e5b13f3444dfe4f014343e83aa7948b83ef58168 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 15 Jul 2016 16:38:19 -0300 Subject: sctp: recvmsg should be able to run even if sock is in closing state Commit d46e416c11c8 missed to update some other places which checked for the socket being TCP-style AND Established state, as Closing state has some overlapping with the previous understanding of Established. Without this fix, one of the effects is that some already queued rx messages may not be readable anymore depending on how the association teared down, and sending may also not be possible if peer initiated the shutdown. Also merge two if() blocks into one condition on sctp_sendmsg(). Cc: Xin Long Fixes: d46e416c11c8 ("sctp: sctp should change socket state when shutdown is received") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 52fdd540a9ef..d2681cb1dd30 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -202,7 +202,7 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) * could be a TCP-style listening socket or a socket which * hasn't yet called connect() to establish an association. */ - if (!sctp_sstate(sk, ESTABLISHED)) + if (!sctp_sstate(sk, ESTABLISHED) && !sctp_sstate(sk, CLOSING)) return NULL; /* Get the first and the only association from the list. */ @@ -1068,7 +1068,7 @@ static int __sctp_connect(struct sock *sk, * is already connected. * It cannot be done even on a TCP-style listening socket. */ - if (sctp_sstate(sk, ESTABLISHED) || + if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) || (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) { err = -EISCONN; goto out_free; @@ -1705,18 +1705,19 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) if (msg_name) { /* Look for a matching association on the endpoint. */ asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport); - if (!asoc) { - /* If we could not find a matching association on the - * endpoint, make sure that it is not a TCP-style - * socket that already has an association or there is - * no peeled-off association on another socket. - */ - if ((sctp_style(sk, TCP) && - sctp_sstate(sk, ESTABLISHED)) || - sctp_endpoint_is_peeled_off(ep, &to)) { - err = -EADDRNOTAVAIL; - goto out_unlock; - } + + /* If we could not find a matching association on the + * endpoint, make sure that it is not a TCP-style + * socket that already has an association or there is + * no peeled-off association on another socket. + */ + if (!asoc && + ((sctp_style(sk, TCP) && + (sctp_sstate(sk, ESTABLISHED) || + sctp_sstate(sk, CLOSING))) || + sctp_endpoint_is_peeled_off(ep, &to))) { + err = -EADDRNOTAVAIL; + goto out_unlock; } } else { asoc = sctp_id2assoc(sk, associd); @@ -2077,7 +2078,8 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, lock_sock(sk); - if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED)) { + if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) && + !sctp_sstate(sk, CLOSING)) { err = -ENOTCONN; goto out; } -- cgit v1.2.3 From c5c4e45c4b79acb23f07e43dac1348e67b4ddf91 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 15 Jul 2016 16:40:02 -0300 Subject: sctp: fix GSO for IPv6 commit 90017accff61 ("sctp: Add GSO support") didn't register SCTP GSO offloading for IPv6 and yet didn't put any restrictions on generating GSO packets while in IPv6, which causes all IPv6 GSO'ed packets to be silently dropped. The fix is to properly register the offload this time. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/offload.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/offload.c b/net/sctp/offload.c index a37887b373a7..7e869d0cca69 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -92,7 +92,28 @@ static const struct net_offload sctp_offload = { }, }; +static const struct net_offload sctp6_offload = { + .callbacks = { + .gso_segment = sctp_gso_segment, + }, +}; + int __init sctp_offload_init(void) { - return inet_add_offload(&sctp_offload, IPPROTO_SCTP); + int ret; + + ret = inet_add_offload(&sctp_offload, IPPROTO_SCTP); + if (ret) + goto out; + + ret = inet6_add_offload(&sctp6_offload, IPPROTO_SCTP); + if (ret) + goto ipv4; + + return ret; + +ipv4: + inet_del_offload(&sctp_offload, IPPROTO_SCTP); +out: + return ret; } -- cgit v1.2.3 From 9b97420228881e839b76c8a4506da3cb187bf004 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 22 Jul 2016 17:38:51 +0800 Subject: sctp: support ipv6 nonlocal bind This patch makes sctp support ipv6 nonlocal bind by adding sp->inet.freebind and net->ipv6.sysctl.ip_nonlocal_bind check in sctp_v6_available as what sctp did to support ipv4 nonlocal bind (commit cdac4e077489). Reported-by: Shijoe George Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ae6f1a2178ba..660c4a4cac31 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -560,6 +560,7 @@ static int sctp_v6_is_any(const union sctp_addr *addr) static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) { int type; + struct net *net = sock_net(&sp->inet.sk); const struct in6_addr *in6 = (const struct in6_addr *)&addr->v6.sin6_addr; type = ipv6_addr_type(in6); @@ -574,7 +575,8 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) if (!(type & IPV6_ADDR_UNICAST)) return 0; - return ipv6_chk_addr(sock_net(&sp->inet.sk), in6, NULL, 0); + return sp->inet.freebind || net->ipv6.sysctl.ip_nonlocal_bind || + ipv6_chk_addr(net, in6, NULL, 0); } /* This function checks if the address is a valid address to be used for -- cgit v1.2.3 From fd2d180a28cb5075163945d0b229926ec9782ab0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 22 Jul 2016 21:25:42 +0800 Subject: sctp: use inet_recvmsg to support sctp RFS well Commit 486bdee0134c ("sctp: add support for RPS and RFS") saves skb->hash into sk->sk_rxhash so that the inet_* can record it to flow table. But sctp uses sock_common_recvmsg as .recvmsg instead of inet_recvmsg, sock_common_recvmsg doesn't invoke sock_rps_record_flow to record the flow. It may cause that the receiver has no chances to record the flow if it doesn't send msg or poll the socket. So this patch fixes it by using inet_recvmsg as .recvmsg in sctp. Fixes: 486bdee0134c ("sctp: add support for RPS and RFS") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 660c4a4cac31..f473779e8b1c 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -956,7 +956,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, - .recvmsg = sock_common_recvmsg, + .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1adb9270e317..7b523e3f551f 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1028,7 +1028,7 @@ static const struct proto_ops inet_seqpacket_ops = { .setsockopt = sock_common_setsockopt, /* IP_SOL IP_OPTION is a problem */ .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, - .recvmsg = sock_common_recvmsg, + .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT -- cgit v1.2.3 From eefc1b1d105ee4d2ce907833ce675f1e9599b5e3 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Sat, 23 Jul 2016 00:32:48 -0300 Subject: sctp: fix BH handling on socket backlog Now that the backlog processing is called with BH enabled, we have to disable BH before taking the socket lock via bh_lock_sock() otherwise it may dead lock: sctp_backlog_rcv() bh_lock_sock(sk); if (sock_owned_by_user(sk)) { if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) sctp_chunk_free(chunk); else backloged = 1; } else sctp_inq_push(inqueue, chunk); bh_unlock_sock(sk); while sctp_inq_push() was disabling/enabling BH, but enabling BH triggers pending softirq, which then may try to re-lock the socket in sctp_rcv(). [ 219.187215] [ 219.187217] [] _raw_spin_lock+0x20/0x30 [ 219.187223] [] sctp_rcv+0x48c/0xba0 [sctp] [ 219.187225] [] ? nf_iterate+0x62/0x80 [ 219.187226] [] ip_local_deliver_finish+0x94/0x1e0 [ 219.187228] [] ip_local_deliver+0x6f/0xf0 [ 219.187229] [] ? ip_rcv_finish+0x3b0/0x3b0 [ 219.187230] [] ip_rcv_finish+0xd8/0x3b0 [ 219.187232] [] ip_rcv+0x282/0x3a0 [ 219.187233] [] ? update_curr+0x66/0x180 [ 219.187235] [] __netif_receive_skb_core+0x524/0xa90 [ 219.187236] [] ? update_cfs_shares+0x30/0xf0 [ 219.187237] [] ? __enqueue_entity+0x6c/0x70 [ 219.187239] [] ? enqueue_entity+0x204/0xdf0 [ 219.187240] [] __netif_receive_skb+0x18/0x60 [ 219.187242] [] process_backlog+0x9e/0x140 [ 219.187243] [] net_rx_action+0x22c/0x370 [ 219.187245] [] __do_softirq+0x112/0x2e7 [ 219.187247] [] do_softirq_own_stack+0x1c/0x30 [ 219.187247] [ 219.187248] [] do_softirq.part.14+0x38/0x40 [ 219.187249] [] __local_bh_enable_ip+0x7d/0x80 [ 219.187254] [] sctp_inq_push+0x68/0x80 [sctp] [ 219.187258] [] sctp_backlog_rcv+0x151/0x1c0 [sctp] [ 219.187260] [] __release_sock+0x87/0xf0 [ 219.187261] [] release_sock+0x30/0xa0 [ 219.187265] [] sctp_accept+0x17d/0x210 [sctp] [ 219.187266] [] ? prepare_to_wait_event+0xf0/0xf0 [ 219.187268] [] inet_accept+0x3c/0x130 [ 219.187269] [] SYSC_accept4+0x103/0x210 [ 219.187271] [] ? _raw_spin_unlock_bh+0x1a/0x20 [ 219.187272] [] ? release_sock+0x8c/0xa0 [ 219.187276] [] ? sctp_inet_listen+0x62/0x1b0 [sctp] [ 219.187277] [] SyS_accept+0x10/0x20 Fixes: 860fbbc343bf ("sctp: prepare for socket backlog behavior change") Cc: Eric Dumazet Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 2 ++ net/sctp/inqueue.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/input.c b/net/sctp/input.c index 30d72f7707b6..c182db7d691f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -321,6 +321,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) */ sk = rcvr->sk; + local_bh_disable(); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -332,6 +333,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_inq_push(inqueue, chunk); bh_unlock_sock(sk); + local_bh_enable(); /* If the chunk was backloged again, don't drop refs */ if (backloged) diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 942770675f4c..c30ddb0f3190 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -89,12 +89,10 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ - local_bh_disable(); list_add_tail(&chunk->list, &q->in_chunk_list); if (chunk->asoc) chunk->asoc->stats.ipackets++; q->immediate.func(&q->immediate); - local_bh_enable(); } /* Peek at the next chunk on the inqeue. */ -- cgit v1.2.3 From 52253db924d1480bf2543afbb9551de31381aab9 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Sat, 23 Jul 2016 00:33:44 -0300 Subject: sctp: also point GSO head_skb to the sk when it's available The head skb for GSO packets won't travel through the inner depths of SCTP stack as it doesn't contain any chunks on it. That means skb->sk doesn't get set and then when sctp_recvmsg() calls sctp_inet6_skb_msgname() on the head_skb it panics, as this last needs to check flags at the socket (sp->v4mapped). The fix is to initialize skb->sk for th head skb once we are able to do it. That is, when the first chunk is processed. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/ulpevent.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index f6219b164b42..1bc4f71aaba8 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -91,6 +91,7 @@ int sctp_ulpevent_is_notification(const struct sctp_ulpevent *event) static inline void sctp_ulpevent_set_owner(struct sctp_ulpevent *event, const struct sctp_association *asoc) { + struct sctp_chunk *chunk = event->chunk; struct sk_buff *skb; /* Cast away the const, as we are just wanting to @@ -101,6 +102,8 @@ static inline void sctp_ulpevent_set_owner(struct sctp_ulpevent *event, event->asoc = (struct sctp_association *)asoc; atomic_add(event->rmem_len, &event->asoc->rmem_alloc); sctp_skb_set_owner_r(skb, asoc->base.sk); + if (chunk && chunk->head_skb && !chunk->head_skb->sk) + chunk->head_skb->sk = asoc->base.sk; } /* A simple destructor to give up the reference to the association. */ -- cgit v1.2.3 From 5fc382d87517707ad77ea4c9c12e2a3fde2c838a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 23 Jul 2016 09:42:35 +0200 Subject: net/sctp: terminate rhashtable walk correctly I was seeing a lot of these: BUG: sleeping function called from invalid context at mm/slab.h:388 in_atomic(): 0, irqs_disabled(): 0, pid: 14971, name: trinity-c2 Preemption disabled at:[] rhashtable_walk_start+0x46/0x150 [] preempt_count_add+0x1fb/0x280 [] _raw_spin_lock+0x12/0x40 [] console_unlock+0x2f7/0x930 [] vprintk_emit+0x2fb/0x520 [] vprintk_default+0x1a/0x20 [] printk+0x94/0xb0 [] print_stack_trace+0xe0/0x170 [] ___might_sleep+0x3be/0x460 [] __might_sleep+0x90/0x1a0 [] kmem_cache_alloc+0x153/0x1e0 [] rhashtable_walk_init+0xfe/0x2d0 [] sctp_transport_walk_start+0x1e/0x60 [] sctp_transport_seq_start+0x4d/0x150 [] seq_read+0x27b/0x1180 [] proc_reg_read+0xbc/0x180 [] __vfs_read+0xdb/0x610 [] vfs_read+0xea/0x2d0 [] SyS_pread64+0x11b/0x150 [] do_syscall_64+0x19c/0x410 [] return_from_SYSCALL_64+0x0/0x6a [] 0xffffffffffffffff Apparently we always need to call rhashtable_walk_stop(), even when rhashtable_walk_start() fails: * rhashtable_walk_start - Start a hash table walk * @iter: Hash table iterator * * Start a hash table walk. Note that we take the RCU lock in all * cases including when we return an error. So you must always call * rhashtable_walk_stop to clean up. otherwise we never call rcu_read_unlock() and we get the splat above. Fixes: 53fa1036 ("sctp: fix some rhashtable functions using in sctp proc/diag") See-also: 53fa1036 ("sctp: fix some rhashtable functions using in sctp proc/diag") See-also: f2dba9c6 ("rhashtable: Introduce rhashtable_walk_*") Cc: Xin Long Cc: Herbert Xu Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d2681cb1dd30..8812e1bf6c1c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4393,6 +4393,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter) err = rhashtable_walk_start(iter); if (err && err != -EAGAIN) { + rhashtable_walk_stop(iter); rhashtable_walk_exit(iter); return err; } -- cgit v1.2.3