summaryrefslogtreecommitdiff
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 18:39:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 18:39:49 -0700
commit1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21 (patch)
treedcc97181d4d187252e0cc8fdf29d9b365fa3ffd0 /drivers/net/tun.c
parent285767604576148fc1be7fcd112e4a90eb0d6ad2 (diff)
parent7170e6045a6a8b33f4fa5753589dc77b16198e2d (diff)
downloadlwn-1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21.tar.gz
lwn-1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add Maglev hashing scheduler to IPVS, from Inju Song. 2) Lots of new TC subsystem tests from Roman Mashak. 3) Add TCP zero copy receive and fix delayed acks and autotuning with SO_RCVLOWAT, from Eric Dumazet. 4) Add XDP_REDIRECT support to mlx5 driver, from Jesper Dangaard Brouer. 5) Add ttl inherit support to vxlan, from Hangbin Liu. 6) Properly separate ipv6 routes into their logically independant components. fib6_info for the routing table, and fib6_nh for sets of nexthops, which thus can be shared. From David Ahern. 7) Add bpf_xdp_adjust_tail helper, which can be used to generate ICMP messages from XDP programs. From Nikita V. Shirokov. 8) Lots of long overdue cleanups to the r8169 driver, from Heiner Kallweit. 9) Add BTF ("BPF Type Format"), from Martin KaFai Lau. 10) Add traffic condition monitoring to iwlwifi, from Luca Coelho. 11) Plumb extack down into fib_rules, from Roopa Prabhu. 12) Add Flower classifier offload support to igb, from Vinicius Costa Gomes. 13) Add UDP GSO support, from Willem de Bruijn. 14) Add documentation for eBPF helpers, from Quentin Monnet. 15) Add TLS tx offload to mlx5, from Ilya Lesokhin. 16) Allow applications to be given the number of bytes available to read on a socket via a control message returned from recvmsg(), from Soheil Hassas Yeganeh. 17) Add x86_32 eBPF JIT compiler, from Wang YanQing. 18) Add AF_XDP sockets, with zerocopy support infrastructure as well. From Björn Töpel. 19) Remove indirect load support from all of the BPF JITs and handle these operations in the verifier by translating them into native BPF instead. From Daniel Borkmann. 20) Add GRO support to ipv6 gre tunnels, from Eran Ben Elisha. 21) Allow XDP programs to do lookups in the main kernel routing tables for forwarding. From David Ahern. 22) Allow drivers to store hardware state into an ELF section of kernel dump vmcore files, and use it in cxgb4. From Rahul Lakkireddy. 23) Various RACK and loss detection improvements in TCP, from Yuchung Cheng. 24) Add TCP SACK compression, from Eric Dumazet. 25) Add User Mode Helper support and basic bpfilter infrastructure, from Alexei Starovoitov. 26) Support ports and protocol values in RTM_GETROUTE, from Roopa Prabhu. 27) Support bulking in ->ndo_xdp_xmit() API, from Jesper Dangaard Brouer. 28) Add lots of forwarding selftests, from Petr Machata. 29) Add generic network device failover driver, from Sridhar Samudrala. * ra.kernel.org:/pub/scm/linux/kernel/git/davem/net-next: (1959 commits) strparser: Add __strp_unpause and use it in ktls. rxrpc: Fix terminal retransmission connection ID to include the channel net: hns3: Optimize PF CMDQ interrupt switching process net: hns3: Fix for VF mailbox receiving unknown message net: hns3: Fix for VF mailbox cannot receiving PF response bnx2x: use the right constant Revert "net: sched: cls: Fix offloading when ingress dev is vxlan" net: dsa: b53: Fix for brcm tag issue in Cygnus SoC enic: fix UDP rss bits netdev-FAQ: clarify DaveM's position for stable backports rtnetlink: validate attributes in do_setlink() mlxsw: Add extack messages for port_{un, }split failures netdevsim: Add extack error message for devlink reload devlink: Add extack to reload and port_{un, }split operations net: metrics: add proper netlink validation ipmr: fix error path when ipmr_new_table fails ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds net: hns3: remove unused hclgevf_cfg_func_mta_filter netfilter: provide udp*_lib_lookup for nf_tproxy qed*: Utilize FW 8.37.2.0 ...
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c182
1 files changed, 104 insertions, 78 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 23e9eb66197f..85e14adf5207 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -70,6 +70,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
+#include <net/xdp.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
#include <linux/skb_array.h>
@@ -80,6 +81,9 @@
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
+static void tun_default_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd);
+
/* Uncomment to enable debugging */
/* #define TUN_DEBUG 1 */
@@ -241,6 +245,7 @@ struct tun_struct {
struct bpf_prog __rcu *xdp_prog;
struct tun_prog __rcu *steering_prog;
struct tun_prog __rcu *filter_prog;
+ struct ethtool_link_ksettings link_ksettings;
};
struct veth {
@@ -248,11 +253,11 @@ struct veth {
__be16 h_vlan_TCI;
};
-bool tun_is_xdp_buff(void *ptr)
+bool tun_is_xdp_frame(void *ptr)
{
return (unsigned long)ptr & TUN_XDP_FLAG;
}
-EXPORT_SYMBOL(tun_is_xdp_buff);
+EXPORT_SYMBOL(tun_is_xdp_frame);
void *tun_xdp_to_ptr(void *ptr)
{
@@ -525,11 +530,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
rcu_read_lock();
- /* We may get a very small possibility of OOO during switching, not
- * worth to optimize.*/
- if (tun->numqueues == 1 || tfile->detached)
- goto unlock;
-
e = tun_flow_find(head, rxhash);
if (likely(e)) {
/* TODO: keep queueing to old queue until it's empty? */
@@ -548,7 +548,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
spin_unlock_bh(&tun->lock);
}
-unlock:
rcu_read_unlock();
}
@@ -660,10 +659,10 @@ void tun_ptr_free(void *ptr)
{
if (!ptr)
return;
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- put_page(virt_to_head_page(xdp->data));
+ xdp_return_frame(xdpf);
} else {
__skb_array_destroy_skb(ptr);
}
@@ -848,6 +847,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
tun->dev, tfile->queue_index);
if (err < 0)
goto out;
+ err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err < 0) {
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ goto out;
+ }
err = 0;
}
@@ -1284,65 +1289,69 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
};
-static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+static void __tun_xdp_flush_tfile(struct tun_file *tfile)
+{
+ /* Notify and wake up reader process */
+ if (tfile->flags & TUN_FASYNC)
+ kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
+ tfile->socket.sk->sk_data_ready(tfile->socket.sk);
+}
+
+static int tun_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
{
struct tun_struct *tun = netdev_priv(dev);
- struct xdp_buff *buff = xdp->data_hard_start;
- int headroom = xdp->data - xdp->data_hard_start;
struct tun_file *tfile;
u32 numqueues;
- int ret = 0;
-
- /* Assure headroom is available and buff is properly aligned */
- if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
- return -ENOSPC;
+ int drops = 0;
+ int cnt = n;
+ int i;
- *buff = *xdp;
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
if (!numqueues) {
- ret = -ENOSPC;
- goto out;
+ rcu_read_unlock();
+ return -ENXIO; /* Caller will free/return all frames */
}
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
- /* Encode the XDP flag into lowest bit for consumer to differ
- * XDP buffer from sk_buff.
- */
- if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
- this_cpu_inc(tun->pcpu_stats->tx_dropped);
- ret = -ENOSPC;
+
+ spin_lock(&tfile->tx_ring.producer_lock);
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdp = frames[i];
+ /* Encode the XDP flag into lowest bit for consumer to differ
+ * XDP buffer from sk_buff.
+ */
+ void *frame = tun_xdp_to_ptr(xdp);
+
+ if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
+ this_cpu_inc(tun->pcpu_stats->tx_dropped);
+ xdp_return_frame_rx_napi(xdp);
+ drops++;
+ }
}
+ spin_unlock(&tfile->tx_ring.producer_lock);
+
+ if (flags & XDP_XMIT_FLUSH)
+ __tun_xdp_flush_tfile(tfile);
-out:
rcu_read_unlock();
- return ret;
+ return cnt - drops;
}
-static void tun_xdp_flush(struct net_device *dev)
+static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
{
- struct tun_struct *tun = netdev_priv(dev);
- struct tun_file *tfile;
- u32 numqueues;
+ struct xdp_frame *frame = convert_to_xdp_frame(xdp);
- rcu_read_lock();
+ if (unlikely(!frame))
+ return -EOVERFLOW;
- numqueues = READ_ONCE(tun->numqueues);
- if (!numqueues)
- goto out;
-
- tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
- numqueues]);
- /* Notify and wake up reader process */
- if (tfile->flags & TUN_FASYNC)
- kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
- tfile->socket.sk->sk_data_ready(tfile->socket.sk);
-
-out:
- rcu_read_unlock();
+ return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
}
static const struct net_device_ops tap_netdev_ops = {
@@ -1363,7 +1372,6 @@ static const struct net_device_ops tap_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
.ndo_bpf = tun_xdp,
.ndo_xdp_xmit = tun_xdp_xmit,
- .ndo_xdp_flush = tun_xdp_flush,
};
static void tun_flow_init(struct tun_struct *tun)
@@ -1680,14 +1688,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
case XDP_TX:
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
- if (tun_xdp_xmit(tun->dev, &xdp))
+ if (tun_xdp_tx(tun->dev, &xdp))
goto err_redirect;
- tun_xdp_flush(tun->dev);
rcu_read_unlock();
local_bh_enable();
return NULL;
case XDP_PASS:
delta = orig_data - xdp.data;
+ len = xdp.data_end - xdp.data;
break;
default:
bpf_warn_invalid_xdp_action(act);
@@ -1708,7 +1716,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
}
skb_reserve(skb, pad - delta);
- skb_put(skb, len + delta);
+ skb_put(skb, len);
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
@@ -1932,10 +1940,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
local_bh_enable();
}
- rcu_read_lock();
- if (!rcu_dereference(tun->steering_prog))
+ /* Compute the costly rx hash only if needed for flow updates.
+ * We may get a very small possibility of OOO during switching, not
+ * worth to optimize.
+ */
+ if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
+ !tfile->detached)
rxhash = __skb_get_hash_symmetric(skb);
- rcu_read_unlock();
if (frags) {
/* Exercise flow dissector code path. */
@@ -2004,11 +2015,11 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
static ssize_t tun_put_user_xdp(struct tun_struct *tun,
struct tun_file *tfile,
- struct xdp_buff *xdp,
+ struct xdp_frame *xdp_frame,
struct iov_iter *iter)
{
int vnet_hdr_sz = 0;
- size_t size = xdp->data_end - xdp->data;
+ size_t size = xdp_frame->len;
struct tun_pcpu_stats *stats;
size_t ret;
@@ -2024,7 +2035,7 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
}
- ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
+ ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
stats = get_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
@@ -2192,11 +2203,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
return err;
}
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- ret = tun_put_user_xdp(tun, tfile, xdp, to);
- put_page(virt_to_head_page(xdp->data));
+ ret = tun_put_user_xdp(tun, tfile, xdpf, to);
+ xdp_return_frame(xdpf);
} else {
struct sk_buff *skb = ptr;
@@ -2278,6 +2289,7 @@ static void tun_setup(struct net_device *dev)
tun->owner = INVALID_UID;
tun->group = INVALID_GID;
+ tun_default_link_ksettings(dev, &tun->link_ksettings);
dev->ethtool_ops = &tun_ethtool_ops;
dev->needs_free_netdev = true;
@@ -2435,10 +2447,10 @@ out_free:
static int tun_ptr_peek_len(void *ptr)
{
if (likely(ptr)) {
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- return xdp->data_end - xdp->data;
+ return xdpf->len;
}
return __skb_array_len_with_tag(ptr);
} else {
@@ -2852,10 +2864,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg, int ifreq_len)
{
struct tun_file *tfile = file->private_data;
+ struct net *net = sock_net(&tfile->sk);
struct tun_struct *tun;
void __user* argp = (void __user*)arg;
struct ifreq ifr;
- struct net *net;
kuid_t owner;
kgid_t group;
int sndbuf;
@@ -2879,14 +2891,18 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
*/
return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
(unsigned int __user*)argp);
- } else if (cmd == TUNSETQUEUE)
+ } else if (cmd == TUNSETQUEUE) {
return tun_set_queue(file, &ifr);
+ } else if (cmd == SIOCGSKNS) {
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ return open_related_ns(&net->ns, get_net_ns);
+ }
ret = 0;
rtnl_lock();
tun = tun_get(tfile);
- net = sock_net(&tfile->sk);
if (cmd == TUNSETIFF) {
ret = -EEXIST;
if (tun)
@@ -2916,14 +2932,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
tfile->ifindex = ifindex;
goto unlock;
}
- if (cmd == SIOCGSKNS) {
- ret = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto unlock;
-
- ret = open_related_ns(&net->ns, get_net_ns);
- goto unlock;
- }
ret = -EBADFD;
if (!tun)
@@ -3313,8 +3321,8 @@ static struct miscdevice tun_miscdev = {
/* ethtool interface */
-static int tun_get_link_ksettings(struct net_device *dev,
- struct ethtool_link_ksettings *cmd)
+static void tun_default_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
ethtool_link_ksettings_zero_link_mode(cmd, supported);
ethtool_link_ksettings_zero_link_mode(cmd, advertising);
@@ -3323,6 +3331,23 @@ static int tun_get_link_ksettings(struct net_device *dev,
cmd->base.port = PORT_TP;
cmd->base.phy_address = 0;
cmd->base.autoneg = AUTONEG_DISABLE;
+}
+
+static int tun_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ memcpy(cmd, &tun->link_ksettings, sizeof(*cmd));
+ return 0;
+}
+
+static int tun_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ memcpy(&tun->link_ksettings, cmd, sizeof(*cmd));
return 0;
}
@@ -3393,6 +3418,7 @@ static const struct ethtool_ops tun_ethtool_ops = {
.get_coalesce = tun_get_coalesce,
.set_coalesce = tun_set_coalesce,
.get_link_ksettings = tun_get_link_ksettings,
+ .set_link_ksettings = tun_set_link_ksettings,
};
static int tun_queue_resize(struct tun_struct *tun)