From 049b3ff5a86d0187184a189d2e31b8654d58fe22 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 11 Nov 2005 16:08:24 -0800 Subject: [SCTP]: Include ulpevents in socket receive buffer accounting. Also introduces a sysctl option to configure the receive buffer accounting policy to be either at socket or association level. Default is all the associations on the same socket share the receive buffer. Signed-off-by: Neil Horman Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sctp/input.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'net/sctp/input.c') diff --git a/net/sctp/input.c b/net/sctp/input.c index 28f32243397f..b24ff2c1aef5 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -100,21 +100,6 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) return 0; } -/* The free routine for skbuffs that sctp receives */ -static void sctp_rfree(struct sk_buff *skb) -{ - atomic_sub(sizeof(struct sctp_chunk),&skb->sk->sk_rmem_alloc); - sock_rfree(skb); -} - -/* The ownership wrapper routine to do receive buffer accounting */ -static void sctp_rcv_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb_set_owner_r(skb,sk); - skb->destructor = sctp_rfree; - atomic_add(sizeof(struct sctp_chunk),&sk->sk_rmem_alloc); -} - struct sctp_input_cb { union { struct inet_skb_parm h4; @@ -217,9 +202,6 @@ int sctp_rcv(struct sk_buff *skb) rcvr = &ep->base; } - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) - goto discard_release; - /* * RFC 2960, 8.4 - Handle "Out of the blue" Packets. * An SCTP packet is called an "out of the blue" (OOTB) @@ -256,8 +238,6 @@ int sctp_rcv(struct sk_buff *skb) } SCTP_INPUT_CB(skb)->chunk = chunk; - sctp_rcv_set_owner_r(skb,sk); - /* Remember what endpoint is to handle this packet. */ chunk->rcvr = rcvr; -- cgit v1.2.3 From 52ccb8e90c0ace233b8b740f2fc5de0dbd706b27 Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Thu, 22 Dec 2005 11:36:46 -0800 Subject: [SCTP]: Update SCTP_PEER_ADDR_PARAMS socket option to the latest api draft. This patch adds support to set/get heartbeat interval, maximum number of retransmissions, pathmtu, sackdelay time for a particular transport/ association/socket as per the latest SCTP sockets api draft11. Signed-off-by: Frank Filz Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 76 +++++-- include/net/sctp/user.h | 16 ++ net/sctp/associola.c | 81 ++++--- net/sctp/input.c | 36 +++- net/sctp/output.c | 17 +- net/sctp/sm_sideeffect.c | 26 ++- net/sctp/sm_statefuns.c | 20 +- net/sctp/socket.c | 511 ++++++++++++++++++++++++++++++++++----------- net/sctp/transport.c | 32 +-- 9 files changed, 595 insertions(+), 220 deletions(-) (limited to 'net/sctp/input.c') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8e7794ee27ff..f5c22d77feab 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -277,6 +277,24 @@ struct sctp_sock { __u32 default_context; __u32 default_timetolive; + /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to + * the destination address every heartbeat interval. This value + * will be inherited by all new associations. + */ + __u32 hbinterval; + + /* This is the max_retrans value for new associations. */ + __u16 pathmaxrxt; + + /* The initial Path MTU to use for new associations. */ + __u32 pathmtu; + + /* The default SACK delay timeout for new associations. */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; + struct sctp_initmsg initmsg; struct sctp_rtoinfo rtoinfo; struct sctp_paddrparams paddrparam; @@ -845,9 +863,6 @@ struct sctp_transport { /* Data that has been sent, but not acknowledged. */ __u32 flight_size; - /* PMTU : The current known path MTU. */ - __u32 pmtu; - /* Destination */ struct dst_entry *dst; /* Source address. */ @@ -862,7 +877,22 @@ struct sctp_transport { /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to * the destination address every heartbeat interval. */ - int hb_interval; + __u32 hbinterval; + + /* This is the max_retrans value for the transport and will + * be initialized from the assocs value. This can be changed + * using SCTP_SET_PEER_ADDR_PARAMS socket option. + */ + __u16 pathmaxrxt; + + /* PMTU : The current known path MTU. */ + __u32 pathmtu; + + /* SACK delay timeout */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; /* When was the last time (in jiffies) that we heard from this * transport? We use this to pick new active and retran paths. @@ -882,22 +912,11 @@ struct sctp_transport { */ int state; - /* hb_allowed : The current heartbeat state of this destination, - * : i.e. ALLOW-HB, NO-HEARTBEAT, etc. - */ - int hb_allowed; - /* These are the error stats for this destination. */ /* Error count : The current error count for this destination. */ unsigned short error_count; - /* This is the max_retrans value for the transport and will - * be initialized to proto.max_retrans.path. This can be changed - * using SCTP_SET_PEER_ADDR_PARAMS socket option. - */ - int max_retrans; - /* Per : A timer used by each destination. * Destination : * Timer : @@ -1502,6 +1521,28 @@ struct sctp_association { /* The largest timeout or RTO value to use in attempting an INIT */ __u16 max_init_timeo; + /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to + * the destination address every heartbeat interval. This value + * will be inherited by all new transports. + */ + __u32 hbinterval; + + /* This is the max_retrans value for new transports in the + * association. + */ + __u16 pathmaxrxt; + + /* Association : The smallest PMTU discovered for all of the + * PMTU : peer's transport addresses. + */ + __u32 pathmtu; + + /* SACK delay timeout */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; + int timeouts[SCTP_NUM_TIMEOUT_TYPES]; struct timer_list timers[SCTP_NUM_TIMEOUT_TYPES]; @@ -1571,11 +1612,6 @@ struct sctp_association { */ wait_queue_head_t wait; - /* Association : The smallest PMTU discovered for all of the - * PMTU : peer's transport addresses. - */ - __u32 pmtu; - /* The message size at which SCTP fragmentation will occur. */ __u32 frag_point; diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index f1c3bc54526a..b9052864fa5a 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -503,11 +503,27 @@ struct sctp_setadaption { * unreachable. The following structure is used to access and modify an * address's parameters: */ +enum sctp_spp_flags { + SPP_HB_ENABLE = 1, /*Enable heartbeats*/ + SPP_HB_DISABLE = 2, /*Disable heartbeats*/ + SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE, + SPP_HB_DEMAND = 4, /*Send heartbeat immediately*/ + SPP_PMTUD_ENABLE = 8, /*Enable PMTU discovery*/ + SPP_PMTUD_DISABLE = 16, /*Disable PMTU discovery*/ + SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE, + SPP_SACKDELAY_ENABLE = 32, /*Enable SACK*/ + SPP_SACKDELAY_DISABLE = 64, /*Disable SACK*/ + SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, +}; + struct sctp_paddrparams { sctp_assoc_t spp_assoc_id; struct sockaddr_storage spp_address; __u32 spp_hbinterval; __u16 spp_pathmaxrxt; + __u32 spp_pathmtu; + __u32 spp_sackdelay; + __u32 spp_flags; } __attribute__((packed, aligned(4))); /* diff --git a/net/sctp/associola.c b/net/sctp/associola.c index dec68a604773..9d05e13e92f6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -110,7 +110,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000; asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) * 1000; - asoc->pmtu = 0; asoc->frag_point = 0; /* Set the association max_retrans and RTO values from the @@ -123,6 +122,25 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->overall_error_count = 0; + /* Initialize the association's heartbeat interval based on the + * sock configured value. + */ + asoc->hbinterval = msecs_to_jiffies(sp->hbinterval); + + /* Initialize path max retrans value. */ + asoc->pathmaxrxt = sp->pathmaxrxt; + + /* Initialize default path MTU. */ + asoc->pathmtu = sp->pathmtu; + + /* Set association default SACK delay */ + asoc->sackdelay = msecs_to_jiffies(sp->sackdelay); + + /* Set the association default flags controlling + * Heartbeat, SACK delay, and Path MTU Discovery. + */ + asoc->param_flags = sp->param_flags; + /* Initialize the maximum mumber of new data packets that can be sent * in a burst. */ @@ -144,8 +162,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a = 5 * asoc->rto_max; asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; - asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = - SCTP_DEFAULT_TIMEOUT_SACK; + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; @@ -540,23 +557,46 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, sctp_transport_set_owner(peer, asoc); + /* Initialize the peer's heartbeat interval based on the + * association configured value. + */ + peer->hbinterval = asoc->hbinterval; + + /* Set the path max_retrans. */ + peer->pathmaxrxt = asoc->pathmaxrxt; + + /* Initialize the peer's SACK delay timeout based on the + * association configured value. + */ + peer->sackdelay = asoc->sackdelay; + + /* Enable/disable heartbeat, SACK delay, and path MTU discovery + * based on association setting. + */ + peer->param_flags = asoc->param_flags; + /* Initialize the pmtu of the transport. */ - sctp_transport_pmtu(peer); + if (peer->param_flags & SPP_PMTUD_ENABLE) + sctp_transport_pmtu(peer); + else if (asoc->pathmtu) + peer->pathmtu = asoc->pathmtu; + else + peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT; /* If this is the first transport addr on this association, * initialize the association PMTU to the peer's PMTU. * If not and the current association PMTU is higher than the new * peer's PMTU, reset the association PMTU to the new peer's PMTU. */ - if (asoc->pmtu) - asoc->pmtu = min_t(int, peer->pmtu, asoc->pmtu); + if (asoc->pathmtu) + asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu); else - asoc->pmtu = peer->pmtu; + asoc->pathmtu = peer->pathmtu; SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " - "%d\n", asoc, asoc->pmtu); + "%d\n", asoc, asoc->pathmtu); - asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); + asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); /* The asoc->peer.port might not be meaningful yet, but * initialize the packet structure anyway. @@ -574,7 +614,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, * (for example, implementations MAY use the size of the * receiver advertised window). */ - peer->cwnd = min(4*asoc->pmtu, max_t(__u32, 2*asoc->pmtu, 4380)); + peer->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); /* At this point, we may not have the receiver's advertised window, * so initialize ssthresh to the default value and it will be set @@ -585,17 +625,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, peer->partial_bytes_acked = 0; peer->flight_size = 0; - /* By default, enable heartbeat for peer address. */ - peer->hb_allowed = 1; - - /* Initialize the peer's heartbeat interval based on the - * sock configured value. - */ - peer->hb_interval = msecs_to_jiffies(sp->paddrparam.spp_hbinterval); - - /* Set the path max_retrans. */ - peer->max_retrans = sp->paddrparam.spp_pathmaxrxt; - /* Set the transport's RTO.initial value */ peer->rto = asoc->rto_initial; @@ -1155,18 +1184,18 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) /* Get the lowest pmtu of all the transports. */ list_for_each(pos, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); - if (!pmtu || (t->pmtu < pmtu)) - pmtu = t->pmtu; + if (!pmtu || (t->pathmtu < pmtu)) + pmtu = t->pathmtu; } if (pmtu) { struct sctp_sock *sp = sctp_sk(asoc->base.sk); - asoc->pmtu = pmtu; + asoc->pathmtu = pmtu; asoc->frag_point = sctp_frag_point(sp, pmtu); } SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", - __FUNCTION__, asoc, asoc->pmtu, asoc->frag_point); + __FUNCTION__, asoc, asoc->pathmtu, asoc->frag_point); } /* Should we send a SACK to update our peer? */ @@ -1179,7 +1208,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pmtu))) + min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) return 1; break; default: diff --git a/net/sctp/input.c b/net/sctp/input.c index b24ff2c1aef5..238f1bffa684 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -305,18 +305,36 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { - if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - printk(KERN_WARNING "%s: Reported pmtu %d too low, " - "using default minimum of %d\n", __FUNCTION__, pmtu, - SCTP_DEFAULT_MINSEGMENT); - pmtu = SCTP_DEFAULT_MINSEGMENT; - } + if (sock_owned_by_user(sk) || !t || (t->pathmtu == pmtu)) + return; - if (!sock_owned_by_user(sk) && t && (t->pmtu != pmtu)) { - t->pmtu = pmtu; + if (t->param_flags & SPP_PMTUD_ENABLE) { + if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { + printk(KERN_WARNING "%s: Reported pmtu %d too low, " + "using default minimum of %d\n", + __FUNCTION__, pmtu, + SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment size and disable + * pmtu discovery on this transport. + */ + t->pathmtu = SCTP_DEFAULT_MINSEGMENT; + t->param_flags = (t->param_flags & ~SPP_HB) | + SPP_PMTUD_DISABLE; + } else { + t->pathmtu = pmtu; + } + + /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); - sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } + + /* Retransmit with the new pmtu setting. + * Normally, if PMTU discovery is disabled, an ICMP Fragmentation + * Needed will never be sent, but if a message was sent before + * PMTU discovery was disabled that was larger than the PMTU, it + * would not be fragmented, so it must be re-transmitted fragmented. + */ + sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } /* diff --git a/net/sctp/output.c b/net/sctp/output.c index 931371633464..a40991ef72c9 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -234,8 +234,8 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, goto finish; pmtu = ((packet->transport->asoc) ? - (packet->transport->asoc->pmtu) : - (packet->transport->pmtu)); + (packet->transport->asoc->pathmtu) : + (packet->transport->pathmtu)); too_big = (psize + chunk_len > pmtu); @@ -482,7 +482,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!dst || (dst->obsolete > 1)) { dst_release(dst); sctp_transport_route(tp, NULL, sctp_sk(sk)); - sctp_assoc_sync_pmtu(asoc); + if (asoc->param_flags & SPP_PMTUD_ENABLE) { + sctp_assoc_sync_pmtu(asoc); + } } nskb->dst = dst_clone(tp->dst); @@ -492,7 +494,10 @@ int sctp_packet_transmit(struct sctp_packet *packet) SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", nskb->len); - (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok); + if (tp->param_flags & SPP_PMTUD_ENABLE) + (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok); + else + (*tp->af_specific->sctp_xmit)(nskb, tp, 1); out: packet->size = packet->overhead; @@ -577,7 +582,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * if ((flightsize + Max.Burst * MTU) < cwnd) * cwnd = flightsize + Max.Burst * MTU */ - max_burst_bytes = asoc->max_burst * asoc->pmtu; + max_burst_bytes = asoc->max_burst * asoc->pathmtu; if ((transport->flight_size + max_burst_bytes) < transport->cwnd) { transport->cwnd = transport->flight_size + max_burst_bytes; SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " @@ -622,7 +627,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * data will fit or delay in hopes of bundling a full * sized packet. */ - if (len < asoc->pmtu - packet->overhead) { + if (len < asoc->pathmtu - packet->overhead) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 823947170a33..2d7d8a5db2ac 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -157,9 +157,12 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, { __u32 ctsn, max_tsn_seen; struct sctp_chunk *sack; + struct sctp_transport *trans = asoc->peer.last_data_from; int error = 0; - if (force) + if (force || + (!trans && (asoc->param_flags & SPP_SACKDELAY_DISABLE)) || + (trans && (trans->param_flags & SPP_SACKDELAY_DISABLE))) asoc->peer.sack_needed = 1; ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); @@ -189,7 +192,22 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, if (!asoc->peer.sack_needed) { /* We will need a SACK for the next packet. */ asoc->peer.sack_needed = 1; - goto out; + + /* Set the SACK delay timeout based on the + * SACK delay for the last transport + * data was received from, or the default + * for the association. + */ + if (trans) + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = + trans->sackdelay; + else + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = + asoc->sackdelay; + + /* Restart the SACK timer. */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, + SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { if (asoc->a_rwnd > asoc->rwnd) asoc->a_rwnd = asoc->rwnd; @@ -205,7 +223,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } -out: + return error; nomem: error = -ENOMEM; @@ -415,7 +433,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, asoc->overall_error_count++; if (transport->state != SCTP_INACTIVE && - (transport->error_count++ >= transport->max_retrans)) { + (transport->error_count++ >= transport->pathmaxrxt)) { SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", " transport IP: port:%d failed.\n", asoc, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 475bfb4972d9..557a7d90b92a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -900,7 +900,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, * HEARTBEAT is sent (see Section 8.3). */ - if (transport->hb_allowed) { + if (transport->param_flags & SPP_HB_ENABLE) { if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type, arg, commands)) @@ -1051,7 +1051,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_DISCARD; } - max_interval = link->hb_interval + link->rto; + max_interval = link->hbinterval + link->rto; /* Check if the timestamp looks valid. */ if (time_after(hbinfo->sent_at, jiffies) || @@ -2691,14 +2691,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, * document allow. However, an SCTP transmitter MUST NOT be * more aggressive than the following algorithms allow. */ - if (chunk->end_of_packet) { + if (chunk->end_of_packet) sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); - /* Start the SACK timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, - SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); - } - return SCTP_DISPOSITION_CONSUME; discard_force: @@ -2721,13 +2716,9 @@ discard_force: return SCTP_DISPOSITION_DISCARD; discard_noforce: - if (chunk->end_of_packet) { + if (chunk->end_of_packet) sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); - /* Start the SACK timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, - SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); - } return SCTP_DISPOSITION_DISCARD; consume: return SCTP_DISPOSITION_CONSUME; @@ -3442,9 +3433,6 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, * send another. */ sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); - /* Start the SACK timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, - SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); return SCTP_DISPOSITION_CONSUME; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9df888e932c5..adb5ee62c2a6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1941,106 +1941,275 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, * address's parameters: * * struct sctp_paddrparams { - * sctp_assoc_t spp_assoc_id; - * struct sockaddr_storage spp_address; - * uint32_t spp_hbinterval; - * uint16_t spp_pathmaxrxt; - * }; - * - * spp_assoc_id - (UDP style socket) This is filled in the application, - * and identifies the association for this query. + * sctp_assoc_t spp_assoc_id; + * struct sockaddr_storage spp_address; + * uint32_t spp_hbinterval; + * uint16_t spp_pathmaxrxt; + * uint32_t spp_pathmtu; + * uint32_t spp_sackdelay; + * uint32_t spp_flags; + * }; + * + * spp_assoc_id - (one-to-many style socket) This is filled in the + * application, and identifies the association for + * this query. * spp_address - This specifies which address is of interest. * spp_hbinterval - This contains the value of the heartbeat interval, - * in milliseconds. A value of 0, when modifying the - * parameter, specifies that the heartbeat on this - * address should be disabled. A value of UINT32_MAX - * (4294967295), when modifying the parameter, - * specifies that a heartbeat should be sent - * immediately to the peer address, and the current - * interval should remain unchanged. + * in milliseconds. If a value of zero + * is present in this field then no changes are to + * be made to this parameter. * spp_pathmaxrxt - This contains the maximum number of * retransmissions before this address shall be - * considered unreachable. + * considered unreachable. If a value of zero + * is present in this field then no changes are to + * be made to this parameter. + * spp_pathmtu - When Path MTU discovery is disabled the value + * specified here will be the "fixed" path mtu. + * Note that if the spp_address field is empty + * then all associations on this address will + * have this fixed path mtu set upon them. + * + * spp_sackdelay - When delayed sack is enabled, this value specifies + * the number of milliseconds that sacks will be delayed + * for. This value will apply to all addresses of an + * association if the spp_address field is empty. Note + * also, that if delayed sack is enabled and this + * value is set to 0, no change is made to the last + * recorded delayed sack timer value. + * + * spp_flags - These flags are used to control various features + * on an association. The flag field may contain + * zero or more of the following options. + * + * SPP_HB_ENABLE - Enable heartbeats on the + * specified address. Note that if the address + * field is empty all addresses for the association + * have heartbeats enabled upon them. + * + * SPP_HB_DISABLE - Disable heartbeats on the + * speicifed address. Note that if the address + * field is empty all addresses for the association + * will have their heartbeats disabled. Note also + * that SPP_HB_ENABLE and SPP_HB_DISABLE are + * mutually exclusive, only one of these two should + * be specified. Enabling both fields will have + * undetermined results. + * + * SPP_HB_DEMAND - Request a user initiated heartbeat + * to be made immediately. + * + * SPP_PMTUD_ENABLE - This field will enable PMTU + * discovery upon the specified address. Note that + * if the address feild is empty then all addresses + * on the association are effected. + * + * SPP_PMTUD_DISABLE - This field will disable PMTU + * discovery upon the specified address. Note that + * if the address feild is empty then all addresses + * on the association are effected. Not also that + * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually + * exclusive. Enabling both will have undetermined + * results. + * + * SPP_SACKDELAY_ENABLE - Setting this flag turns + * on delayed sack. The time specified in spp_sackdelay + * is used to specify the sack delay for this address. Note + * that if spp_address is empty then all addresses will + * enable delayed sack and take on the sack delay + * value specified in spp_sackdelay. + * SPP_SACKDELAY_DISABLE - Setting this flag turns + * off delayed sack. If the spp_address field is blank then + * delayed sack is disabled for the entire association. Note + * also that this field is mutually exclusive to + * SPP_SACKDELAY_ENABLE, setting both will have undefined + * results. */ +int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, + struct sctp_transport *trans, + struct sctp_association *asoc, + struct sctp_sock *sp, + int hb_change, + int pmtud_change, + int sackdelay_change) +{ + int error; + + if (params->spp_flags & SPP_HB_DEMAND && trans) { + error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); + if (error) + return error; + } + + if (params->spp_hbinterval) { + if (trans) { + trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); + } else if (asoc) { + asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); + } else { + sp->hbinterval = params->spp_hbinterval; + } + } + + if (hb_change) { + if (trans) { + trans->param_flags = + (trans->param_flags & ~SPP_HB) | hb_change; + } else if (asoc) { + asoc->param_flags = + (asoc->param_flags & ~SPP_HB) | hb_change; + } else { + sp->param_flags = + (sp->param_flags & ~SPP_HB) | hb_change; + } + } + + if (params->spp_pathmtu) { + if (trans) { + trans->pathmtu = params->spp_pathmtu; + sctp_assoc_sync_pmtu(asoc); + } else if (asoc) { + asoc->pathmtu = params->spp_pathmtu; + sctp_frag_point(sp, params->spp_pathmtu); + } else { + sp->pathmtu = params->spp_pathmtu; + } + } + + if (pmtud_change) { + if (trans) { + int update = (trans->param_flags & SPP_PMTUD_DISABLE) && + (params->spp_flags & SPP_PMTUD_ENABLE); + trans->param_flags = + (trans->param_flags & ~SPP_PMTUD) | pmtud_change; + if (update) { + sctp_transport_pmtu(trans); + sctp_assoc_sync_pmtu(asoc); + } + } else if (asoc) { + asoc->param_flags = + (asoc->param_flags & ~SPP_PMTUD) | pmtud_change; + } else { + sp->param_flags = + (sp->param_flags & ~SPP_PMTUD) | pmtud_change; + } + } + + if (params->spp_sackdelay) { + if (trans) { + trans->sackdelay = + msecs_to_jiffies(params->spp_sackdelay); + } else if (asoc) { + asoc->sackdelay = + msecs_to_jiffies(params->spp_sackdelay); + } else { + sp->sackdelay = params->spp_sackdelay; + } + } + + if (sackdelay_change) { + if (trans) { + trans->param_flags = + (trans->param_flags & ~SPP_SACKDELAY) | + sackdelay_change; + } else if (asoc) { + asoc->param_flags = + (asoc->param_flags & ~SPP_SACKDELAY) | + sackdelay_change; + } else { + sp->param_flags = + (sp->param_flags & ~SPP_SACKDELAY) | + sackdelay_change; + } + } + + if (params->spp_pathmaxrxt) { + if (trans) { + trans->pathmaxrxt = params->spp_pathmaxrxt; + } else if (asoc) { + asoc->pathmaxrxt = params->spp_pathmaxrxt; + } else { + sp->pathmaxrxt = params->spp_pathmaxrxt; + } + } + + return 0; +} + static int sctp_setsockopt_peer_addr_params(struct sock *sk, char __user *optval, int optlen) { - struct sctp_paddrparams params; - struct sctp_transport *trans; + struct sctp_paddrparams params; + struct sctp_transport *trans = NULL; + struct sctp_association *asoc = NULL; + struct sctp_sock *sp = sctp_sk(sk); int error; + int hb_change, pmtud_change, sackdelay_change; if (optlen != sizeof(struct sctp_paddrparams)) - return -EINVAL; + return - EINVAL; + if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; - /* - * API 7. Socket Options (setting the default value for the endpoint) - * All options that support specific settings on an association by - * filling in either an association id variable or a sockaddr_storage - * SHOULD also support setting of the same value for the entire endpoint - * (i.e. future associations). To accomplish this the following logic is - * used when setting one of these options: - - * c) If neither the sockaddr_storage or association identification is - * set i.e. the sockaddr_storage is set to all 0's (INADDR_ANY) and - * the association identification is 0, the settings are a default - * and to be applied to the endpoint (all future associations). - */ + /* Validate flags and value parameters. */ + hb_change = params.spp_flags & SPP_HB; + pmtud_change = params.spp_flags & SPP_PMTUD; + sackdelay_change = params.spp_flags & SPP_SACKDELAY; + + if (hb_change == SPP_HB || + pmtud_change == SPP_PMTUD || + sackdelay_change == SPP_SACKDELAY || + params.spp_sackdelay > 500 || + (params.spp_pathmtu + && params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) + return -EINVAL; - /* update default value for endpoint (all future associations) */ - if (!params.spp_assoc_id && - sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { - /* Manual heartbeat on an endpoint is invalid. */ - if (0xffffffff == params.spp_hbinterval) + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. + */ + if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + trans = sctp_addr_id2transport(sk, ¶ms.spp_address, + params.spp_assoc_id); + if (!trans) return -EINVAL; - else if (params.spp_hbinterval) - sctp_sk(sk)->paddrparam.spp_hbinterval = - params.spp_hbinterval; - if (params.spp_pathmaxrxt) - sctp_sk(sk)->paddrparam.spp_pathmaxrxt = - params.spp_pathmaxrxt; - return 0; } - trans = sctp_addr_id2transport(sk, ¶ms.spp_address, - params.spp_assoc_id); - if (!trans) + /* Get association, if assoc_id != 0 and the socket is a one + * to many style socket, and an association was not found, then + * the id was invalid. + */ + asoc = sctp_id2assoc(sk, params.spp_assoc_id); + if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - /* Applications can enable or disable heartbeats for any peer address - * of an association, modify an address's heartbeat interval, force a - * heartbeat to be sent immediately, and adjust the address's maximum - * number of retransmissions sent before an address is considered - * unreachable. - * - * The value of the heartbeat interval, in milliseconds. A value of - * UINT32_MAX (4294967295), when modifying the parameter, specifies - * that a heartbeat should be sent immediately to the peer address, - * and the current interval should remain unchanged. - */ - if (0xffffffff == params.spp_hbinterval) { - error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); - if (error) - return error; - } else { - /* The value of the heartbeat interval, in milliseconds. A value of 0, - * when modifying the parameter, specifies that the heartbeat on this - * address should be disabled. + /* Heartbeat demand can only be sent on a transport or + * association, but not a socket. */ - if (params.spp_hbinterval) { - trans->hb_allowed = 1; - trans->hb_interval = - msecs_to_jiffies(params.spp_hbinterval); - } else - trans->hb_allowed = 0; - } + if (params.spp_flags & SPP_HB_DEMAND && !trans && !asoc) + return -EINVAL; + + /* Process parameters. */ + error = sctp_apply_peer_addr_params(¶ms, trans, asoc, sp, + hb_change, pmtud_change, + sackdelay_change); - /* spp_pathmaxrxt contains the maximum number of retransmissions - * before this address shall be considered unreachable. + if (error) + return error; + + /* If changes are for association, also apply parameters to each + * transport. */ - if (params.spp_pathmaxrxt) - trans->max_retrans = params.spp_pathmaxrxt; + if (!trans && asoc) { + struct list_head *pos; + + list_for_each(pos, &asoc->peer.transport_addr_list) { + trans = list_entry(pos, struct sctp_transport, + transports); + sctp_apply_peer_addr_params(¶ms, trans, asoc, sp, + hb_change, pmtud_change, + sackdelay_change); + } + } return 0; } @@ -2334,7 +2503,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl /* Update the frag_point of the existing associations. */ list_for_each(pos, &(sp->ep->asocs)) { asoc = list_entry(pos, struct sctp_association, asocs); - asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); + asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); } return 0; @@ -2715,8 +2884,13 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->paddrparam.spp_hbinterval = jiffies_to_msecs(sctp_hb_interval); - sp->paddrparam.spp_pathmaxrxt = sctp_max_retrans_path; + sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); + sp->pathmaxrxt = sctp_max_retrans_path; + sp->pathmtu = 0; // allow default discovery + sp->sackdelay = sctp_sack_timeout; + sp->param_flags = SPP_HB_ENABLE | + SPP_PMTUD_ENABLE | + SPP_SACKDELAY_ENABLE; /* If enabled no SCTP message fragmentation will be performed. * Configure through SCTP_DISABLE_FRAGMENTS socket option. @@ -2865,7 +3039,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_primary.spinfo_cwnd = transport->cwnd; status.sstat_primary.spinfo_srtt = transport->srtt; status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); - status.sstat_primary.spinfo_mtu = transport->pmtu; + status.sstat_primary.spinfo_mtu = transport->pathmtu; if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN) status.sstat_primary.spinfo_state = SCTP_ACTIVE; @@ -2924,7 +3098,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len, pinfo.spinfo_cwnd = transport->cwnd; pinfo.spinfo_srtt = transport->srtt; pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); - pinfo.spinfo_mtu = transport->pmtu; + pinfo.spinfo_mtu = transport->pathmtu; if (pinfo.spinfo_state == SCTP_UNKNOWN) pinfo.spinfo_state = SCTP_ACTIVE; @@ -3086,69 +3260,154 @@ out: * address's parameters: * * struct sctp_paddrparams { - * sctp_assoc_t spp_assoc_id; - * struct sockaddr_storage spp_address; - * uint32_t spp_hbinterval; - * uint16_t spp_pathmaxrxt; - * }; - * - * spp_assoc_id - (UDP style socket) This is filled in the application, - * and identifies the association for this query. + * sctp_assoc_t spp_assoc_id; + * struct sockaddr_storage spp_address; + * uint32_t spp_hbinterval; + * uint16_t spp_pathmaxrxt; + * uint32_t spp_pathmtu; + * uint32_t spp_sackdelay; + * uint32_t spp_flags; + * }; + * + * spp_assoc_id - (one-to-many style socket) This is filled in the + * application, and identifies the association for + * this query. * spp_address - This specifies which address is of interest. * spp_hbinterval - This contains the value of the heartbeat interval, - * in milliseconds. A value of 0, when modifying the - * parameter, specifies that the heartbeat on this - * address should be disabled. A value of UINT32_MAX - * (4294967295), when modifying the parameter, - * specifies that a heartbeat should be sent - * immediately to the peer address, and the current - * interval should remain unchanged. + * in milliseconds. If a value of zero + * is present in this field then no changes are to + * be made to this parameter. * spp_pathmaxrxt - This contains the maximum number of * retransmissions before this address shall be - * considered unreachable. + * considered unreachable. If a value of zero + * is present in this field then no changes are to + * be made to this parameter. + * spp_pathmtu - When Path MTU discovery is disabled the value + * specified here will be the "fixed" path mtu. + * Note that if the spp_address field is empty + * then all associations on this address will + * have this fixed path mtu set upon them. + * + * spp_sackdelay - When delayed sack is enabled, this value specifies + * the number of milliseconds that sacks will be delayed + * for. This value will apply to all addresses of an + * association if the spp_address field is empty. Note + * also, that if delayed sack is enabled and this + * value is set to 0, no change is made to the last + * recorded delayed sack timer value. + * + * spp_flags - These flags are used to control various features + * on an association. The flag field may contain + * zero or more of the following options. + * + * SPP_HB_ENABLE - Enable heartbeats on the + * specified address. Note that if the address + * field is empty all addresses for the association + * have heartbeats enabled upon them. + * + * SPP_HB_DISABLE - Disable heartbeats on the + * speicifed address. Note that if the address + * field is empty all addresses for the association + * will have their heartbeats disabled. Note also + * that SPP_HB_ENABLE and SPP_HB_DISABLE are + * mutually exclusive, only one of these two should + * be specified. Enabling both fields will have + * undetermined results. + * + * SPP_HB_DEMAND - Request a user initiated heartbeat + * to be made immediately. + * + * SPP_PMTUD_ENABLE - This field will enable PMTU + * discovery upon the specified address. Note that + * if the address feild is empty then all addresses + * on the association are effected. + * + * SPP_PMTUD_DISABLE - This field will disable PMTU + * discovery upon the specified address. Note that + * if the address feild is empty then all addresses + * on the association are effected. Not also that + * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually + * exclusive. Enabling both will have undetermined + * results. + * + * SPP_SACKDELAY_ENABLE - Setting this flag turns + * on delayed sack. The time specified in spp_sackdelay + * is used to specify the sack delay for this address. Note + * that if spp_address is empty then all addresses will + * enable delayed sack and take on the sack delay + * value specified in spp_sackdelay. + * SPP_SACKDELAY_DISABLE - Setting this flag turns + * off delayed sack. If the spp_address field is blank then + * delayed sack is disabled for the entire association. Note + * also that this field is mutually exclusive to + * SPP_SACKDELAY_ENABLE, setting both will have undefined + * results. */ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen) { - struct sctp_paddrparams params; - struct sctp_transport *trans; + struct sctp_paddrparams params; + struct sctp_transport *trans = NULL; + struct sctp_association *asoc = NULL; + struct sctp_sock *sp = sctp_sk(sk); if (len != sizeof(struct sctp_paddrparams)) return -EINVAL; + if (copy_from_user(¶ms, optval, len)) return -EFAULT; - /* If no association id is specified retrieve the default value - * for the endpoint that will be used for all future associations + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. */ - if (!params.spp_assoc_id && - sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { - params.spp_hbinterval = sctp_sk(sk)->paddrparam.spp_hbinterval; - params.spp_pathmaxrxt = sctp_sk(sk)->paddrparam.spp_pathmaxrxt; - - goto done; + if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + trans = sctp_addr_id2transport(sk, ¶ms.spp_address, + params.spp_assoc_id); + if (!trans) { + SCTP_DEBUG_PRINTK("Failed no transport\n"); + return -EINVAL; + } } - trans = sctp_addr_id2transport(sk, ¶ms.spp_address, - params.spp_assoc_id); - if (!trans) - return -EINVAL; - - /* The value of the heartbeat interval, in milliseconds. A value of 0, - * when modifying the parameter, specifies that the heartbeat on this - * address should be disabled. + /* Get association, if assoc_id != 0 and the socket is a one + * to many style socket, and an association was not found, then + * the id was invalid. */ - if (!trans->hb_allowed) - params.spp_hbinterval = 0; - else - params.spp_hbinterval = jiffies_to_msecs(trans->hb_interval); + asoc = sctp_id2assoc(sk, params.spp_assoc_id); + if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { + SCTP_DEBUG_PRINTK("Failed no association\n"); + return -EINVAL; + } - /* spp_pathmaxrxt contains the maximum number of retransmissions - * before this address shall be considered unreachable. - */ - params.spp_pathmaxrxt = trans->max_retrans; + if (trans) { + /* Fetch transport values. */ + params.spp_hbinterval = jiffies_to_msecs(trans->hbinterval); + params.spp_pathmtu = trans->pathmtu; + params.spp_pathmaxrxt = trans->pathmaxrxt; + params.spp_sackdelay = jiffies_to_msecs(trans->sackdelay); + + /*draft-11 doesn't say what to return in spp_flags*/ + params.spp_flags = trans->param_flags; + } else if (asoc) { + /* Fetch association values. */ + params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval); + params.spp_pathmtu = asoc->pathmtu; + params.spp_pathmaxrxt = asoc->pathmaxrxt; + params.spp_sackdelay = jiffies_to_msecs(asoc->sackdelay); + + /*draft-11 doesn't say what to return in spp_flags*/ + params.spp_flags = asoc->param_flags; + } else { + /* Fetch socket values. */ + params.spp_hbinterval = sp->hbinterval; + params.spp_pathmtu = sp->pathmtu; + params.spp_sackdelay = sp->sackdelay; + params.spp_pathmaxrxt = sp->pathmaxrxt; + + /*draft-11 doesn't say what to return in spp_flags*/ + params.spp_flags = sp->param_flags; + } -done: if (copy_to_user(optval, ¶ms, len)) return -EFAULT; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 268ddaf2dc0f..68d73e2dd155 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -86,10 +86,13 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->init_sent_count = 0; peer->state = SCTP_ACTIVE; - peer->hb_allowed = 0; + peer->param_flags = SPP_HB_DISABLE | + SPP_PMTUD_ENABLE | + SPP_SACKDELAY_ENABLE; + peer->hbinterval = 0; /* Initialize the default path max_retrans. */ - peer->max_retrans = sctp_max_retrans_path; + peer->pathmaxrxt = sctp_max_retrans_path; peer->error_count = 0; INIT_LIST_HEAD(&peer->transmitted); @@ -229,10 +232,10 @@ void sctp_transport_pmtu(struct sctp_transport *transport) dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL); if (dst) { - transport->pmtu = dst_mtu(dst); + transport->pathmtu = dst_mtu(dst); dst_release(dst); } else - transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; + transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } /* Caches the dst entry and source address for a transport's destination @@ -254,8 +257,11 @@ void sctp_transport_route(struct sctp_transport *transport, af->get_saddr(asoc, dst, daddr, &transport->saddr); transport->dst = dst; + if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { + return; + } if (dst) { - transport->pmtu = dst_mtu(dst); + transport->pathmtu = dst_mtu(dst); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). @@ -264,7 +270,7 @@ void sctp_transport_route(struct sctp_transport *transport, opt->pf->af->to_sk_saddr(&transport->saddr, asoc->base.sk); } else - transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; + transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } /* Hold a reference to a transport. */ @@ -369,7 +375,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, ssthresh = transport->ssthresh; pba = transport->partial_bytes_acked; - pmtu = transport->asoc->pmtu; + pmtu = transport->asoc->pathmtu; if (cwnd <= ssthresh) { /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less @@ -441,8 +447,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pmtu); - transport->cwnd = transport->asoc->pmtu; + 4*transport->asoc->pathmtu); + transport->cwnd = transport->asoc->pathmtu; break; case SCTP_LOWER_CWND_FAST_RTX: @@ -459,7 +465,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pmtu); + 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; break; @@ -479,7 +485,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, if ((jiffies - transport->last_time_ecne_reduced) > transport->rtt) { transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pmtu); + 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; transport->last_time_ecne_reduced = jiffies; } @@ -496,7 +502,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, */ if ((jiffies - transport->last_time_used) > transport->rto) transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pmtu); + 4*transport->asoc->pathmtu); break; }; @@ -511,7 +517,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, unsigned long sctp_transport_timeout(struct sctp_transport *t) { unsigned long timeout; - timeout = t->hb_interval + t->rto + sctp_jitter(t->rto); + timeout = t->hbinterval + t->rto + sctp_jitter(t->rto); timeout += jiffies; return timeout; } -- cgit v1.2.3 From b59c270104f03960069596722fea70340579244d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 6 Jan 2006 23:06:10 -0800 Subject: [NETFILTER]: Keep conntrack reference until IPsec policy checks are done Keep the conntrack reference until policy checks have been performed for IPsec NAT support. The reference needs to be dropped before a packet is queued to avoid having the conntrack module unloadable. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 1 + net/ipv4/ip_input.c | 15 ++++++--------- net/ipv4/raw.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv4/udp.c | 2 ++ net/sctp/input.c | 1 + 6 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net/sctp/input.c') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f244670764a..23ba177c1150 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1099,6 +1099,7 @@ int dccp_v4_destroy_sock(struct sock *sk) kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } + nf_reset(skb); /* Clean up a referenced DCCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash != NULL) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e45846ae570b..18d7fad474d7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -185,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb) raw_rcv(last, skb2); } last = sk; - nf_reset(skb); } } @@ -204,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) __skb_pull(skb, ihl); - /* Free reference early: we don't need it any more, and it may - hold ip_conntrack module loaded indefinitely. */ - nf_reset(skb); - /* Point into the IP datagram, just past the header. */ skb->h.raw = skb->data; @@ -232,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { int ret; - if (!ipprot->no_policy && - !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - goto out; + if (!ipprot->no_policy) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + kfree_skb(skb); + goto out; + } + nf_reset(skb); } ret = ipprot->handler(skb); if (ret < 0) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b0d7e4d6269..165a4d81efa4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } + nf_reset(skb); skb_push(skb, skb->data - skb->nh.raw); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9f83e5b28ce..6ea353907af5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1080,6 +1080,7 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset(skb); if (sk_filter(sk, skb, 0)) goto discard_and_relse; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 223abaa72bc5..00840474a449 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -989,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) kfree_skb(skb); return -1; } + nf_reset(skb); if (up->encap_type) { /* @@ -1149,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + nf_reset(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_checksum_complete(skb)) diff --git a/net/sctp/input.c b/net/sctp/input.c index 238f1bffa684..4aa6fc60357c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -225,6 +225,7 @@ int sctp_rcv(struct sk_buff *skb) if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; + nf_reset(skb); ret = sk_filter(sk, skb, 1); if (ret) -- cgit v1.2.3 From 7a48f923b8b27bfaa5f7b2a449a6fe268724ddd5 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 17 Jan 2006 11:51:28 -0800 Subject: [SCTP]: Fix potential race condition between sctp_close() and sctp_rcv(). Do not release the reference to association/endpoint if an incoming skb is added to backlog. Instead release it after the chunk is processed in sctp_backlog_rcv(). Signed-off-by: Sridhar Samudrala Signed-off-by: Vlad Yasevich --- net/sctp/input.c | 29 ++++++++++++++++++++--------- net/sctp/inqueue.c | 4 +++- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'net/sctp/input.c') diff --git a/net/sctp/input.c b/net/sctp/input.c index 4aa6fc60357c..c463e4049c52 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -262,15 +262,12 @@ int sctp_rcv(struct sk_buff *skb) else sctp_backlog_rcv(sk, skb); - /* Release the sock and any reference counts we took in the - * lookup calls. + /* Release the sock and the sock ref we took in the lookup calls. + * The asoc/ep ref will be released in sctp_backlog_rcv. */ sctp_bh_unlock_sock(sk); - if (asoc) - sctp_association_put(asoc); - else - sctp_endpoint_put(ep); sock_put(sk); + return ret; discard_it: @@ -296,9 +293,23 @@ discard_release: int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; - struct sctp_inq *inqueue = &chunk->rcvr->inqueue; - - sctp_inq_push(inqueue, chunk); + struct sctp_inq *inqueue = NULL; + struct sctp_ep_common *rcvr = NULL; + + rcvr = chunk->rcvr; + if (rcvr->dead) { + sctp_chunk_free(chunk); + } else { + inqueue = &chunk->rcvr->inqueue; + sctp_inq_push(inqueue, chunk); + } + + /* Release the asoc/ep ref we took in the lookup calls in sctp_rcv. */ + if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) + sctp_association_put(sctp_assoc(rcvr)); + else + sctp_endpoint_put(sctp_ep(rcvr)); + return 0; } diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 2d33922c044b..297b8951463e 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -73,8 +73,10 @@ void sctp_inq_free(struct sctp_inq *queue) /* If there is a packet which is currently being worked on, * free it as well. */ - if (queue->in_progress) + if (queue->in_progress) { sctp_chunk_free(queue->in_progress); + queue->in_progress = NULL; + } if (queue->malloced) { /* Dump the master memory segment. */ -- cgit v1.2.3 From c4d2444e992c4eda1d7fc3287e93ba58295bf6b9 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 17 Jan 2006 11:56:26 -0800 Subject: [SCTP]: Fix couple of races between sctp_peeloff() and sctp_rcv(). Validate and update the sk in sctp_rcv() to avoid the race where an assoc/ep could move to a different socket after we get the sk, but before the skb is added to the backlog. Also migrate the skb's in backlog queue to new sk when doing a peeloff. Signed-off-by: Sridhar Samudrala --- include/net/sctp/sctp.h | 2 ++ net/sctp/input.c | 35 ++++++++++++++++++++++++++++++++++- net/sctp/socket.c | 4 ++++ 3 files changed, 40 insertions(+), 1 deletion(-) (limited to 'net/sctp/input.c') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a553f39f6aee..e673b2c984e9 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -175,6 +175,8 @@ void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t); +void sctp_backlog_migrate(struct sctp_association *assoc, + struct sock *oldsk, struct sock *newsk); /* * Section: Macros, externs, and inlines diff --git a/net/sctp/input.c b/net/sctp/input.c index c463e4049c52..71fd56375641 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -257,12 +257,21 @@ int sctp_rcv(struct sk_buff *skb) */ sctp_bh_lock_sock(sk); + /* It is possible that the association could have moved to a different + * socket if it is peeled off. If so, update the sk. + */ + if (sk != rcvr->sk) { + sctp_bh_lock_sock(rcvr->sk); + sctp_bh_unlock_sock(sk); + sk = rcvr->sk; + } + if (sock_owned_by_user(sk)) sk_add_backlog(sk, skb); else sctp_backlog_rcv(sk, skb); - /* Release the sock and the sock ref we took in the lookup calls. + /* Release the sock and the sock ref we took in the lookup calls. * The asoc/ep ref will be released in sctp_backlog_rcv. */ sctp_bh_unlock_sock(sk); @@ -297,6 +306,9 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) struct sctp_ep_common *rcvr = NULL; rcvr = chunk->rcvr; + + BUG_TRAP(rcvr->sk == sk); + if (rcvr->dead) { sctp_chunk_free(chunk); } else { @@ -313,6 +325,27 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } +void sctp_backlog_migrate(struct sctp_association *assoc, + struct sock *oldsk, struct sock *newsk) +{ + struct sk_buff *skb; + struct sctp_chunk *chunk; + + skb = oldsk->sk_backlog.head; + oldsk->sk_backlog.head = oldsk->sk_backlog.tail = NULL; + while (skb != NULL) { + struct sk_buff *next = skb->next; + + chunk = SCTP_INPUT_CB(skb)->chunk; + skb->next = NULL; + if (&assoc->base == chunk->rcvr) + sk_add_backlog(newsk, skb); + else + sk_add_backlog(oldsk, skb); + skb = next; + } +} + /* Handle icmp frag needed error. */ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6a0b1af89932..fb1821d9f338 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5602,8 +5602,12 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, */ newsp->type = type; + spin_lock_bh(&oldsk->sk_lock.slock); + /* Migrate the backlog from oldsk to newsk. */ + sctp_backlog_migrate(assoc, oldsk, newsk); /* Migrate the association to the new socket. */ sctp_assoc_migrate(assoc, newsk); + spin_unlock_bh(&oldsk->sk_lock.slock); /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. -- cgit v1.2.3 From a7d1f1b66c05ef4ebb58a34be7caad9af15546a4 Mon Sep 17 00:00:00 2001 From: Tsutomu Fujii Date: Tue, 17 Jan 2006 11:57:09 -0800 Subject: [SCTP]: Fix sctp_rcv_ootb() to handle the last chunk of a packet correctly. Signed-off-by: Tsutomu Fujii Signed-off-by: Sridhar Samudrala --- net/sctp/input.c | 13 +++++++++---- net/sctp/sm_statefuns.c | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net/sctp/input.c') diff --git a/net/sctp/input.c b/net/sctp/input.c index 71fd56375641..cb78b50868ee 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -588,10 +588,16 @@ int sctp_rcv_ootb(struct sk_buff *skb) sctp_errhdr_t *err; ch = (sctp_chunkhdr_t *) skb->data; - ch_end = ((__u8 *) ch) + WORD_ROUND(ntohs(ch->length)); /* Scan through all the chunks in the packet. */ - while (ch_end > (__u8 *)ch && ch_end < skb->tail) { + do { + /* Break out if chunk length is less then minimal. */ + if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) + break; + + ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + if (ch_end > skb->tail) + break; /* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the * receiver MUST silently discard the OOTB packet and take no @@ -622,8 +628,7 @@ int sctp_rcv_ootb(struct sk_buff *skb) } ch = (sctp_chunkhdr_t *) ch_end; - ch_end = ((__u8 *) ch) + WORD_ROUND(ntohs(ch->length)); - } + } while (ch_end < skb->tail); return 0; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 477d7f80dba6..71c9a961c321 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3090,6 +3090,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, break; ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + if (ch_end > skb->tail) + break; if (SCTP_CID_SHUTDOWN_ACK == ch->type) ootb_shut_ack = 1; -- cgit v1.2.3