diff options
author | David S. Miller <davem@davemloft.net> | 2005-07-05 15:24:38 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-07-05 15:24:38 -0700 |
commit | c1b4a7e69576d65efc31a8cea0714173c2841244 (patch) | |
tree | 92082532651cddc6f0649a9d7ca9ca63e381d310 /net/ipv4/tcp.c | |
parent | 0d9901df62fe4820aee86b49f1a074cdb5c6928e (diff) | |
download | lwn-c1b4a7e69576d65efc31a8cea0714173c2841244.tar.gz lwn-c1b4a7e69576d65efc31a8cea0714173c2841244.zip |
[TCP]: Move to new TSO segmenting scheme.
Make TSO segment transmit size decisions at send time not earlier.
The basic scheme is that we try to build as large a TSO frame as
possible when pulling in the user data, but the size of the TSO frame
output to the card is determined at transmit time.
This is guided by tp->xmit_size_goal. It is always set to a multiple
of MSS and tells sendmsg/sendpage how large an SKB to try and build.
Later, tcp_write_xmit() and tcp_push_one() chop up the packet if
necessary and conditions warrant. These routines can also decide to
"defer" in order to wait for more ACKs to arrive and thus allow larger
TSO frames to be emitted.
A general observation is that TSO elongates the pipe, thus requiring a
larger congestion window and larger buffering especially at the sender
side. Therefore, it is important that applications 1) get a large
enough socket send buffer (this is accomplished by our dynamic send
buffer expansion code) 2) do large enough writes.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2ba73bf3a8f9..29894c749163 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -615,7 +615,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse size_t psize, int flags) { struct tcp_sock *tp = tcp_sk(sk); - int mss_now; + int mss_now, size_goal; int err; ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -628,6 +628,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + size_goal = tp->xmit_size_goal; copied = 0; err = -EPIPE; @@ -641,7 +642,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse int offset = poffset % PAGE_SIZE; int size = min_t(size_t, psize, PAGE_SIZE - offset); - if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) { + if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) { new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -652,7 +653,7 @@ new_segment: goto wait_for_memory; skb_entail(sk, tp, skb); - copy = mss_now; + copy = size_goal; } if (copy > size) @@ -693,7 +694,7 @@ new_segment: if (!(psize -= copy)) goto out; - if (skb->len != mss_now || (flags & MSG_OOB)) + if (skb->len < mss_now || (flags & MSG_OOB)) continue; if (forced_push(tp)) { @@ -713,6 +714,7 @@ wait_for_memory: goto do_error; mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + size_goal = tp->xmit_size_goal; } out: @@ -754,7 +756,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, static inline int select_size(struct sock *sk, struct tcp_sock *tp) { - int tmp = tp->mss_cache_std; + int tmp = tp->mss_cache; if (sk->sk_route_caps & NETIF_F_SG) { if (sk->sk_route_caps & NETIF_F_TSO) @@ -778,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int iovlen, flags; - int mss_now; + int mss_now, size_goal; int err, copied; long timeo; @@ -797,6 +799,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + size_goal = tp->xmit_size_goal; /* Ok commence sending. */ iovlen = msg->msg_iovlen; @@ -819,7 +822,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, skb = sk->sk_write_queue.prev; if (!sk->sk_send_head || - (copy = mss_now - skb->len) <= 0) { + (copy = size_goal - skb->len) <= 0) { new_segment: /* Allocate new segment. If the interface is SG, @@ -842,7 +845,7 @@ new_segment: skb->ip_summed = CHECKSUM_HW; skb_entail(sk, tp, skb); - copy = mss_now; + copy = size_goal; } /* Try to append data to the end of skb. */ @@ -937,7 +940,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len != mss_now || (flags & MSG_OOB)) + if (skb->len < mss_now || (flags & MSG_OOB)) continue; if (forced_push(tp)) { @@ -957,6 +960,7 @@ wait_for_memory: goto do_error; mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); + size_goal = tp->xmit_size_goal; } } @@ -2128,7 +2132,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rto = jiffies_to_usecs(tp->rto); info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); - info->tcpi_snd_mss = tp->mss_cache_std; + info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = tp->ack.rcv_mss; info->tcpi_unacked = tp->packets_out; @@ -2178,7 +2182,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, switch (optname) { case TCP_MAXSEG: - val = tp->mss_cache_std; + val = tp->mss_cache; if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->rx_opt.user_mss; break; |