summaryrefslogtreecommitdiff
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-11-24 15:52:46 -0800
committerDavid S. Miller <davem@davemloft.net>2008-11-24 15:52:46 -0800
commit2e77d89b2fa8e3f8325b8ce7893ec3645f41aff5 (patch)
treeae40aa75449f705bd166630f9bcb5f41373d8248 /net/ipv4/ip_output.c
parent4db0acf3c0afbbbb2ae35a65f8896ca6655a47ec (diff)
downloadlwn-2e77d89b2fa8e3f8325b8ce7893ec3645f41aff5.tar.gz
lwn-2e77d89b2fa8e3f8325b8ce7893ec3645f41aff5.zip
net: avoid a pair of dst_hold()/dst_release() in ip_append_data()
We can reduce pressure on dst entry refcount that slowdown UDP transmit path on SMP machines. This pressure is visible on RTP servers when delivering content to mediagateways, especially big ones, handling thousand of streams. Several cpus send UDP frames to the same destination, hence use the same dst entry. This patch makes ip_append_data() eventually steal the refcount its callers had to take on the dst entry. This doesnt avoid all refcounting, but still gives speedups on SMP, on UDP/RAW transmit path Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c11
1 files changed, 8 insertions, 3 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 46d7be233eac..5516825a0751 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- struct ipcm_cookie *ipc, struct rtable *rt,
+ struct ipcm_cookie *ipc, struct rtable **rtp,
unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk,
int offset = 0;
unsigned int maxfraglen, fragheaderlen;
int csummode = CHECKSUM_NONE;
+ struct rtable *rt;
if (flags&MSG_PROBE)
return 0;
@@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk,
inet->cork.flags |= IPCORK_OPT;
inet->cork.addr = ipc->addr;
}
- dst_hold(&rt->u.dst);
+ rt = *rtp;
+ /*
+ * We steal reference to this route, caller should not release it
+ */
+ *rtp = NULL;
inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
rt->u.dst.dev->mtu :
dst_mtu(rt->u.dst.path);
@@ -1391,7 +1396,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
- &ipc, rt, MSG_DONTWAIT);
+ &ipc, &rt, MSG_DONTWAIT);
if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
if (arg->csumoffset >= 0)
*((__sum16 *)skb_transport_header(skb) +