diff options
author | Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> | 2007-12-24 21:33:45 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-01-28 15:00:06 -0800 |
commit | 0e3a4803aa06cd7bc2cfc1d04289df4f6027640a (patch) | |
tree | c3af99ceea81cd14e14c96fe0c85f39236de933b /net/ipv4/tcp_output.c | |
parent | 7ffc49a6ee92b7138c2ee28073a8e10e58335d62 (diff) | |
download | lwn-0e3a4803aa06cd7bc2cfc1d04289df4f6027640a.tar.gz lwn-0e3a4803aa06cd7bc2cfc1d04289df4f6027640a.zip |
[TCP]: Force TSO splits to MSS boundaries
If snd_wnd - snd_nxt wasn't multiple of MSS, skb was split on
odd boundary by the callers of tcp_window_allows.
We try really hard to avoid unnecessary modulos. Therefore the
old caller side check "if (skb->len < limit)" was too wide as
well because limit is not bound in any way to skb->len and can
cause spurious testing for trimming in the middle of the queue
while we only wanted that to happen at the tail of the queue.
A simple additional caller side check for tcp_write_queue_tail
would likely have resulted 2 x modulos because the limit would
have to be first calculated from window, however, doing that
unnecessary modulo is not mandatory. After a minor change to
the algorithm, simply determine first if the modulo is needed
at all and at that point immediately decide also from which
value it should be calculated from.
This approach also kills some duplicated code.
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 51 |
1 files changed, 25 insertions, 26 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9a9510acb147..9058e0a25107 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1017,13 +1017,29 @@ static void tcp_cwnd_validate(struct sock *sk) } } -static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) +/* Returns the portion of skb which can be sent right away without + * introducing MSS oddities to segment boundaries. In rare cases where + * mss_now != mss_cache, we will request caller to create a small skb + * per input skb which could be mostly avoided here (if desired). + */ +static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, + unsigned int mss_now, + unsigned int cwnd) { - u32 window, cwnd_len; + struct tcp_sock *tp = tcp_sk(sk); + u32 needed, window, cwnd_len; window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq); cwnd_len = mss_now * cwnd; - return min(window, cwnd_len); + + if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) + return cwnd_len; + + if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len) + return cwnd_len; + + needed = min(skb->len, window); + return needed - needed % mss_now; } /* Can at least one segment of SKB be sent right now, according to the @@ -1458,17 +1474,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) } limit = mss_now; - if (tso_segs > 1) { - limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); - - if (skb->len < limit) { - unsigned int trim = skb->len % mss_now; - - if (trim) - limit = skb->len - trim; - } - } + if (tso_segs > 1) + limit = tcp_mss_split_point(sk, skb, mss_now, + cwnd_quota); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now))) @@ -1515,7 +1523,6 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { - struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); unsigned int tso_segs, cwnd_quota; @@ -1530,17 +1537,9 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!tso_segs); limit = mss_now; - if (tso_segs > 1) { - limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); - - if (skb->len < limit) { - unsigned int trim = skb->len % mss_now; - - if (trim) - limit = skb->len - trim; - } - } + if (tso_segs > 1) + limit = tcp_mss_split_point(sk, skb, mss_now, + cwnd_quota); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now))) |