From b0ce3508b25ea6fa10ae3ca254de1d695b521702 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 May 2013 07:34:53 +0000 Subject: bonding: allow TSO being set on bonding master MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some situations, we need to disable TSO on bonding slaves. bonding device automatically unset TSO in bond_fix_features(), and performance is not good because : 1) We consume more cpu cycles. 2) GSO segmentation has some bugs leading to out of order TCP packets if this segmentation is done before virtual device. This particular problem will be addressed in a separate patch. This patch allows TSO being set/unset on the bonding master, so that GSO segmentation is done after bonding layer. Signed-off-by: Eric Dumazet Cc: Michał Mirosław Cc: Jay Vosburgh Cc: Andy Gospodarek Cc: Maciej Żenczykowski Cc: Tom Herbert Cc: Neal Cardwell Cc: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a94a5a0ab122..60584b185a0c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2733,6 +2733,17 @@ static inline netdev_features_t netdev_get_wanted_features( } netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask); + +/* Allow TSO being used on stacked device : + * Performing the GSO segmentation before last device + * is a performance improvement. + */ +static inline netdev_features_t netdev_add_tso_features(netdev_features_t features, + netdev_features_t mask) +{ + return netdev_increment_features(features, NETIF_F_ALL_TSO, mask); +} + int __netdev_update_features(struct net_device *dev); void netdev_update_features(struct net_device *dev); void netdev_change_features(struct net_device *dev); -- cgit v1.2.3 From 5dbe7c178d3f0a4634f088d9e729f1909b9ddcd1 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 26 Jun 2013 17:23:42 +0200 Subject: net: fix kernel deadlock with interface rename and netdev name retrieval. When the kernel (compiled with CONFIG_PREEMPT=n) is performing the rename of a network interface, it can end up waiting for a workqueue to complete. If userland is able to invoke a SIOCGIFNAME ioctl or a SO_BINDTODEVICE getsockopt in between, the kernel will deadlock due to the fact that read_secklock_begin() will spin forever waiting for the writer process (the one doing the interface rename) to update the devnet_rename_seq sequence. This patch fixes the problem by adding a helper (netdev_get_name()) and using it in the code handling the SIOCGIFNAME ioctl and SO_BINDTODEVICE setsockopt. The netdev_get_name() helper uses raw_seqcount_begin() to avoid spinning forever, waiting for devnet_rename_seq->sequence to become even. cond_resched() is used in the contended case, before retrying the access to give the writer process a chance to finish. The use of raw_seqcount_begin() will incur some unneeded work in the reader process in the contended case, but this is better than deadlocking the system. Signed-off-by: Nicolas Schichan Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 34 ++++++++++++++++++++++++++++++++++ net/core/dev_ioctl.c | 19 ++++--------------- net/core/sock.c | 17 ++--------------- 4 files changed, 41 insertions(+), 30 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 60584b185a0c..96e4c21e15e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1695,6 +1695,7 @@ extern int init_dummy_netdev(struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); +extern int netdev_get_name(struct net *net, char *name, int ifindex); extern int dev_restart(struct net_device *dev); #ifdef CONFIG_NETPOLL_TRAP extern int netpoll_trap(void); diff --git a/net/core/dev.c b/net/core/dev.c index fc1e289397f5..faebb398fb46 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -791,6 +791,40 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) } EXPORT_SYMBOL(dev_get_by_index); +/** + * netdev_get_name - get a netdevice name, knowing its ifindex. + * @net: network namespace + * @name: a pointer to the buffer where the name will be stored. + * @ifindex: the ifindex of the interface to get the name from. + * + * The use of raw_seqcount_begin() and cond_resched() before + * retrying is required as we want to give the writers a chance + * to complete when CONFIG_PREEMPT is not set. + */ +int netdev_get_name(struct net *net, char *name, int ifindex) +{ + struct net_device *dev; + unsigned int seq; + +retry: + seq = raw_seqcount_begin(&devnet_rename_seq); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + + strcpy(name, dev->name); + rcu_read_unlock(); + if (read_seqcount_retry(&devnet_rename_seq, seq)) { + cond_resched(); + goto retry; + } + + return 0; +} + /** * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 6cc0481faade..5b7d0e1d0664 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -19,9 +19,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) { - struct net_device *dev; struct ifreq ifr; - unsigned seq; + int error; /* * Fetch the caller's info block. @@ -30,19 +29,9 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; -retry: - seq = read_seqcount_begin(&devnet_rename_seq); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); - if (!dev) { - rcu_read_unlock(); - return -ENODEV; - } - - strcpy(ifr.ifr_name, dev->name); - rcu_read_unlock(); - if (read_seqcount_retry(&devnet_rename_seq, seq)) - goto retry; + error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); + if (error) + return error; if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; diff --git a/net/core/sock.c b/net/core/sock.c index 88868a9d21da..d6d024cfaaaf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -571,9 +571,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES struct net *net = sock_net(sk); - struct net_device *dev; char devname[IFNAMSIZ]; - unsigned seq; if (sk->sk_bound_dev_if == 0) { len = 0; @@ -584,20 +582,9 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, if (len < IFNAMSIZ) goto out; -retry: - seq = read_seqcount_begin(&devnet_rename_seq); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); - ret = -ENODEV; - if (!dev) { - rcu_read_unlock(); + ret = netdev_get_name(net, devname, sk->sk_bound_dev_if); + if (ret) goto out; - } - - strcpy(devname, dev->name); - rcu_read_unlock(); - if (read_seqcount_retry(&devnet_rename_seq, seq)) - goto retry; len = strlen(devname) + 1; -- cgit v1.2.3