From c6cffba4ffa26a8ffacd0bb9f3144e34f20da7de Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 26 Jul 2012 11:14:38 +0000 Subject: ipv4: Fix input route performance regression. With the routing cache removal we lost the "noref" code paths on input, and this can kill some routing workloads. Reinstate the noref path when we hit a cached route in the FIB nexthops. With help from Eric Dumazet. Reported-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/route.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index c29ef2733f2d..8c52bc6f1c90 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -157,8 +158,22 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 return ip_route_output_key(net, fl4); } -extern int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin); +extern int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin); + +static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin) +{ + int err; + + rcu_read_lock(); + err = ip_route_input_noref(skb, dst, src, tos, devin); + if (!err) + skb_dst_force(skb); + rcu_read_unlock(); + + return err; +} extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags); -- cgit v1.2.3 From c7109986db3c945f50ceed884a30e0fd8af3b89b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jul 2012 12:18:11 +0000 Subject: ipv6: Early TCP socket demux This is the IPv6 missing bits for infrastructure added in commit 41063e9dd1195 (ipv4: Early TCP socket demux.) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 13 +++++++------ include/net/protocol.h | 2 ++ net/ipv4/ip_input.c | 1 + net/ipv6/ip6_input.c | 13 +++++++++++-- net/ipv6/tcp_ipv6.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 00cbb4384c79..9e34c877a770 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -96,14 +96,15 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { - struct sock *sk; + struct sock *sk = skb_steal_sock(skb); - if (unlikely(sk = skb_steal_sock(skb))) + if (sk) return sk; - else return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, - &ipv6_hdr(skb)->saddr, sport, - &ipv6_hdr(skb)->daddr, ntohs(dport), - inet6_iif(skb)); + + return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, + &ipv6_hdr(skb)->saddr, sport, + &ipv6_hdr(skb)->daddr, ntohs(dport), + inet6_iif(skb)); } extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/protocol.h b/include/net/protocol.h index 057f2d315567..929528c73fe8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -52,6 +52,8 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { + void (*early_demux)(struct sk_buff *skb); + int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index bda8cac2ae91..981ff1eef28c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,6 +314,7 @@ drop: } int sysctl_ip_early_demux __read_mostly = 1; +EXPORT_SYMBOL(sysctl_ip_early_demux); static int ip_rcv_finish(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5ab923e51af3..47975e363fcd 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -47,9 +47,18 @@ -inline int ip6_rcv_finish( struct sk_buff *skb) +int ip6_rcv_finish(struct sk_buff *skb) { - if (skb_dst(skb) == NULL) + if (sysctl_ip_early_demux && !skb_dst(skb)) { + const struct inet6_protocol *ipprot; + + rcu_read_lock(); + ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); + if (ipprot && ipprot->early_demux) + ipprot->early_demux(skb); + rcu_read_unlock(); + } + if (!skb_dst(skb)) ip6_route_input(skb); return dst_input(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f49476e2d884..221224e72507 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1674,6 +1674,43 @@ do_time_wait: goto discard_it; } +static void tcp_v6_early_demux(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) + return; + + hdr = ipv6_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + inet6_iif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { + struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); + if (dst) + dst = dst_check(dst, 0); + if (dst && + icsk->rx_dst_ifindex == inet6_iif(skb)) + skb_dst_set_noref(skb, dst); + } + } +} + static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1984,6 +2021,7 @@ struct proto tcpv6_prot = { }; static const struct inet6_protocol tcpv6_protocol = { + .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .gso_send_check = tcp_v6_gso_send_check, -- cgit v1.2.3