diff options
author | Alexander Duyck <alexander.h.duyck@intel.com> | 2012-06-21 13:58:31 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-06-22 17:11:13 -0700 |
commit | 6648bd7e0e62c0c8c03b15e00c9e7015e232feff (patch) | |
tree | e5e1fdadea6ea51b4341017c40906e71c1913774 /net | |
parent | 8e27628ecf883b9e5825103e40e6f86bf8225f1a (diff) | |
download | lwn-6648bd7e0e62c0c8c03b15e00c9e7015e232feff.tar.gz lwn-6648bd7e0e62c0c8c03b15e00c9e7015e232feff.zip |
ipv4: Add sysctl knob to control early socket demux
This change is meant to add a control for disabling early socket demux.
The main motivation behind this patch is to provide an option to disable
the feature as it adds an additional cost to routing that reduces overall
throughput by up to 5%. For example one of my systems went from 12.1Mpps
to 11.6 after the early socket demux was added. It looks like the reason
for the regression is that we are now having to perform two lookups, first
the one for an established socket, and then the one for the routing table.
By adding this patch and toggling the value for ip_early_demux to 0 I am
able to get back to the 12.1Mpps I was previously seeing.
[ Move local variables in ip_rcv_finish() down into the basic
block in which they are actually used. -DaveM ]
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/ip_input.c | 22 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 7 |
2 files changed, 20 insertions, 9 deletions
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 93b092c9a394..bca25179cdb9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -313,6 +313,8 @@ drop: return true; } +int sysctl_ip_early_demux __read_mostly = 1; + static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -323,16 +325,18 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (skb_dst(skb) == NULL) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; - int err; + int err = -ENOENT; - rcu_read_lock(); - ipprot = rcu_dereference(inet_protos[protocol]); - err = -ENOENT; - if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb); - rcu_read_unlock(); + if (sysctl_ip_early_demux) { + const struct net_protocol *ipprot; + int protocol = iph->protocol; + + rcu_read_lock(); + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot && ipprot->early_demux) + err = ipprot->early_demux(skb); + rcu_read_unlock(); + } if (err) { err = ip_route_input_noref(skb, iph->daddr, iph->saddr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ef32956ed655..12aa0c5867c4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -301,6 +301,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { + .procname = "ip_early_demux", + .data = &sysctl_ip_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "ip_dynaddr", .data = &sysctl_ip_dynaddr, .maxlen = sizeof(int), |