From 5cbb28a4bf65c7e4daa6c25b651fed8eb888c620 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 28 Oct 2015 13:09:53 -0400 Subject: tipc: linearize arriving NAME_DISTR and LINK_PROTO buffers Testing of the new UDP bearer has revealed that reception of NAME_DISTRIBUTOR, LINK_PROTOCOL/RESET and LINK_PROTOCOL/ACTIVATE message buffers is not prepared for the case that those may be non-linear. We now linearize all such buffers before they are delivered up to the generic reception layer. In order for the commit to apply cleanly to 'net' and 'stable', we do the change in the function tipc_udp_recv() for now. Later, we will post a commit to 'net-next' moving the linearization to generic code, in tipc_named_rcv() and tipc_link_proto_rcv(). Fixes: commit d0f91938bede ("tipc: add ip/udp media type") Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 6e648d90297a..cd7c5f131e72 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -48,6 +48,7 @@ #include #include "core.h" #include "bearer.h" +#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -222,6 +223,10 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; + int usr = msg_user(buf_msg(skb)); + + if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) + skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { -- cgit v1.2.3 From 4f823defdd5b106a5e89745ee8b163c71855de1e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 30 Oct 2015 10:23:33 +0200 Subject: ipv4: fix to not remove local route on link down When fib_netdev_event calls fib_disable_ip on NETDEV_DOWN event we should not delete the local routes if the local address is still present. The confusion comes from the fact that both fib_netdev_event and fib_inetaddr_event use the NETDEV_DOWN constant. Fix it by returning back the variable 'force'. Steps to reproduce: modprobe dummy ifconfig dummy0 192.168.168.1 up ifconfig dummy0 down ip route list table local | grep dummy | grep host local 192.168.168.1 dev dummy0 proto kernel scope host src 192.168.168.1 Fixes: 8a3d03166f19 ("net: track link-status of ipv4 nexthops") Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 13 +++++++------ net/ipv4/fib_semantics.c | 11 ++++++++--- 3 files changed, 16 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 727d6e9a9685..965fa5b1a274 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -317,7 +317,7 @@ void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); -int fib_sync_down_dev(struct net_device *dev, unsigned long event); +int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net *net, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); void fib_select_multipath(struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 690bcbc59f26..457b2cd75b85 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1110,9 +1110,10 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, unsigned long event) +static void fib_disable_ip(struct net_device *dev, unsigned long event, + bool force) { - if (fib_sync_down_dev(dev, event)) + if (fib_sync_down_dev(dev, event, force)) fib_flush(dev_net(dev)); rt_cache_flush(dev_net(dev)); arp_ifdown(dev); @@ -1140,7 +1141,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, event); + fib_disable_ip(dev, event, true); } else { rt_cache_flush(dev_net(dev)); } @@ -1157,7 +1158,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo unsigned int flags; if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, event); + fib_disable_ip(dev, event, true); rt_flush_dev(dev); return NOTIFY_DONE; } @@ -1178,14 +1179,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(net); break; case NETDEV_DOWN: - fib_disable_ip(dev, event); + fib_disable_ip(dev, event, false); break; case NETDEV_CHANGE: flags = dev_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) fib_sync_up(dev, RTNH_F_LINKDOWN); else - fib_sync_down_dev(dev, event); + fib_sync_down_dev(dev, event, false); /* fall through */ case NETDEV_CHANGEMTU: rt_cache_flush(net); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 064bd3caaa4f..2aa5b5e7da75 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1281,7 +1281,13 @@ int fib_sync_down_addr(struct net *net, __be32 local) return ret; } -int fib_sync_down_dev(struct net_device *dev, unsigned long event) +/* Event force Flags Description + * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host + * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host + * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed + * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed + */ +int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -1290,8 +1296,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) struct hlist_head *head = &fib_info_devhash[hash]; struct fib_nh *nh; - if (event == NETDEV_UNREGISTER || - event == NETDEV_DOWN) + if (force) scope = -1; hlist_for_each_entry(nh, head, nh_hash) { -- cgit v1.2.3 From c9b3292eeb52c6834e972eb5b8fe38914771ed12 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 30 Oct 2015 10:23:34 +0200 Subject: ipv4: update RTNH_F_LINKDOWN flag on UP event When nexthop is part of multipath route we should clear the LINKDOWN flag when link goes UP or when first address is added. This is needed because we always set LINKDOWN flag when DEAD flag was set but now on UP the nexthop is not dead anymore. Examples when LINKDOWN bit can be forgotten when no NETDEV_CHANGE is delivered: - link goes down (LINKDOWN is set), then link goes UP and device shows carrier OK but LINKDOWN remains set - last address is deleted (LINKDOWN is set), then address is added and device shows carrier OK but LINKDOWN remains set Steps to reproduce: modprobe dummy ifconfig dummy0 192.168.168.1 up here add a multipath route where one nexthop is for dummy0: ip route add 1.2.3.4 nexthop dummy0 nexthop SOME_OTHER_DEVICE ifconfig dummy0 down ifconfig dummy0 up now ip route shows nexthop that is not dead. Now set the sysctl var: echo 1 > /proc/sys/net/ipv4/conf/dummy0/ignore_routes_with_linkdown now ip route will show a dead nexthop because the forgotten RTNH_F_LINKDOWN is propagated as RTNH_F_DEAD. Fixes: 8a3d03166f19 ("net: track link-status of ipv4 nexthops") Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2aa5b5e7da75..e966f8599b4a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1445,6 +1445,13 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) if (!(dev->flags & IFF_UP)) return 0; + if (nh_flags & RTNH_F_DEAD) { + unsigned int flags = dev_get_flags(dev); + + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + nh_flags |= RTNH_F_LINKDOWN; + } + prev_fi = NULL; hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; -- cgit v1.2.3 From 44f49dd8b5a606870a1f21101522a0f9c4414784 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Fri, 30 Oct 2015 16:54:31 -0700 Subject: ipmr: fix possible race resulting from improper usage of IP_INC_STATS_BH() in preemptible context. Fixes the following kernel BUG : BUG: using __this_cpu_add() in preemptible [00000000] code: bash/2758 caller is __this_cpu_preempt_check+0x13/0x15 CPU: 0 PID: 2758 Comm: bash Tainted: P O 3.18.19 #2 ffffffff8170eaca ffff880110d1b788 ffffffff81482b2a 0000000000000000 0000000000000000 ffff880110d1b7b8 ffffffff812010ae ffff880007cab800 ffff88001a060800 ffff88013a899108 ffff880108b84240 ffff880110d1b7c8 Call Trace: [] dump_stack+0x52/0x80 [] check_preemption_disabled+0xce/0xe1 [] __this_cpu_preempt_check+0x13/0x15 [] ipmr_queue_xmit+0x647/0x70c [] ip_mr_forward+0x32f/0x34e [] ip_mroute_setsockopt+0xe03/0x108c [] ? get_parent_ip+0x11/0x42 [] ? pollwake+0x4d/0x51 [] ? default_wake_function+0x0/0xf [] ? get_parent_ip+0x11/0x42 [] ? __wake_up_common+0x45/0x77 [] ? _raw_spin_unlock_irqrestore+0x1d/0x32 [] ? __wake_up_sync_key+0x4a/0x53 [] ? sock_def_readable+0x71/0x75 [] do_ip_setsockopt+0x9d/0xb55 [] ? unix_seqpacket_sendmsg+0x3f/0x41 [] ? sock_sendmsg+0x6d/0x86 [] ? sockfd_lookup_light+0x12/0x5d [] ? SyS_sendto+0xf3/0x11b [] ? new_sync_read+0x82/0xaa [] compat_ip_setsockopt+0x3b/0x99 [] compat_raw_setsockopt+0x11/0x32 [] compat_sock_common_setsockopt+0x18/0x1f [] compat_SyS_setsockopt+0x1a9/0x1cf [] compat_SyS_socketcall+0x180/0x1e3 [] cstar_dispatch+0x7/0x1e Signed-off-by: Ani Sinha Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 866ee89f5254..8e8203d5c520 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1682,8 +1682,8 @@ static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); + IP_INC_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -1745,7 +1745,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * to blackhole. */ - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } -- cgit v1.2.3 From ec13ad1d705c9990d55ed8ab21946cef323d30c7 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 2 Nov 2015 01:24:38 +0100 Subject: ipv6: fix crash on ICMPv6 redirects with prohibited/blackholed source There are other error values besides ip6_null_entry that can be returned by ip6_route_redirect(): fib6_rule_action() can also result in ip6_blk_hole_entry and ip6_prohibit_entry if such ip rules are installed. Only checking for ip6_null_entry in rt6_do_redirect() causes ip6_ins_rt() to be called with rt->rt6i_table == NULL in these cases, making the kernel crash. Signed-off-by: Matthias Schiffer Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 946880ad48ac..cc88a73b9ce0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2099,7 +2099,6 @@ static int ip6_route_del(struct fib6_config *cfg) static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; struct rt6_info *rt, *nrt = NULL; struct ndisc_options ndopts; @@ -2160,7 +2159,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu } rt = (struct rt6_info *) dst; - if (rt == net->ipv6.ip6_null_entry) { + if (rt->rt6i_flags & RTF_REJECT) { net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); return; } -- cgit v1.2.3 From 4ece9009774596ee3df0acba65a324b7ea79387c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Nov 2015 17:08:19 -0800 Subject: sit: fix sit0 percpu double allocations sit0 device allocates its percpu storage twice : - One time in ipip6_tunnel_init() - One time in ipip6_fb_tunnel_init() Thus we leak 48 bytes per possible cpu per network namespace dismantle. ipip6_fb_tunnel_init() can be much simpler and does not return an error, and should be called after register_netdev() Note that ipip6_tunnel_clone_6rd() also needs to be called after register_netdev() (calling ipip6_tunnel_init()) Fixes: ebe084aafb7e ("sit: Use ipip6_tunnel_init as the ndo_init function.") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/sit.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 94428fd85b2f..dcccae86190f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev) return 0; } -static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) +static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); - tunnel->dev = dev; - tunnel->net = dev_net(dev); - iph->version = 4; iph->protocol = IPPROTO_IPV6; iph->ihl = 5; iph->ttl = 64; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); - if (!tunnel->dst_cache) { - free_percpu(dev->tstats); - return -ENOMEM; - } - dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); - return 0; } static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1831,23 +1817,19 @@ static int __net_init sit_init_net(struct net *net) */ sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; - err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - if (err) - goto err_dev_free; - - ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); err = register_netdev(sitn->fb_tunnel_dev); if (err) goto err_reg_dev; + ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); + ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + t = netdev_priv(sitn->fb_tunnel_dev); strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: - dev_put(sitn->fb_tunnel_dev); -err_dev_free: ipip6_dev_free(sitn->fb_tunnel_dev); err_alloc_dev: return err; -- cgit v1.2.3 From ebac62fe3d24c0ce22dd83afa7b07d1a2aaef44d Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Tue, 3 Nov 2015 08:51:07 +0100 Subject: ipv6: fix tunnel error handling Both tunnel6_protocol and tunnel46_protocol share the same error handler, tunnel6_err(), which traverses through tunnel6_handlers list. For ipip6 tunnels, we need to traverse tunnel46_handlers as we do e.g. in tunnel46_rcv(). Current code can generate an ICMPv6 error message with an IPv4 packet embedded in it. Fixes: 73d605d1abbd ("[IPSEC]: changing API of xfrm6_tunnel_register") Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/tunnel6.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 3c758007b327..dae25cad05cd 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -144,6 +144,16 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, break; } +static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_tunnel *handler; + + for_each_tunnel_rcu(tunnel46_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + static const struct inet6_protocol tunnel6_protocol = { .handler = tunnel6_rcv, .err_handler = tunnel6_err, @@ -152,7 +162,7 @@ static const struct inet6_protocol tunnel6_protocol = { static const struct inet6_protocol tunnel46_protocol = { .handler = tunnel46_rcv, - .err_handler = tunnel6_err, + .err_handler = tunnel46_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -- cgit v1.2.3