From 09605cc12c07830659a19b266503795c511a2060 Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Fri, 13 Nov 2015 11:40:34 +0100 Subject: net ipv4: use preferred log methods Replace printk calls with preferred unconditional log method calls to keep kernel messages clean. Added newline to "too small MTU" message. Signed-off-by: Bastian Stender Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 73 ++++++++++---------------- net/ipv4/netfilter/arp_tables.c | 6 +-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_nat_snmp_basic.c | 22 ++++---- 4 files changed, 44 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 0bc7412d9e14..e86e8a9738ea 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -65,15 +65,6 @@ #include #include -/* Define this to allow debugging output */ -#undef IPCONFIG_DEBUG - -#ifdef IPCONFIG_DEBUG -#define DBG(x) printk x -#else -#define DBG(x) do { } while(0) -#endif - #if defined(CONFIG_IP_PNP_DHCP) #define IPCONFIG_DHCP #endif @@ -227,7 +218,7 @@ static int __init ic_open_devs(void) if (dev->mtu >= 364) able |= IC_BOOTP; else - pr_warn("DHCP/BOOTP: Ignoring device %s, MTU %d too small", + pr_warn("DHCP/BOOTP: Ignoring device %s, MTU %d too small\n", dev->name, dev->mtu); if (!(dev->flags & IFF_NOARP)) able |= IC_RARP; @@ -254,8 +245,8 @@ static int __init ic_open_devs(void) else d->xid = 0; ic_proto_have_if |= able; - DBG(("IP-Config: %s UP (able=%d, xid=%08x)\n", - dev->name, able, d->xid)); + pr_debug("IP-Config: %s UP (able=%d, xid=%08x)\n", + dev->name, able, d->xid); } } @@ -311,7 +302,7 @@ static void __init ic_close_devs(void) next = d->next; dev = d->dev; if (dev != ic_dev && !netdev_uses_dsa(dev)) { - DBG(("IP-Config: Downing %s\n", dev->name)); + pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } kfree(d); @@ -464,7 +455,8 @@ static int __init ic_defaults(void) &ic_myaddr); return -1; } - printk("IP-Config: Guessing netmask %pI4\n", &ic_netmask); + pr_notice("IP-Config: Guessing netmask %pI4\n", + &ic_netmask); } return 0; @@ -675,9 +667,7 @@ ic_dhcp_init_options(u8 *options) u8 *e = options; int len; -#ifdef IPCONFIG_DEBUG - printk("DHCP: Sending message type %d\n", mt); -#endif + pr_debug("DHCP: Sending message type %d\n", mt); memcpy(e, ic_bootp_cookie, 4); /* RFC1048 Magic Cookie */ e += 4; @@ -847,7 +837,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d else if (dev->type == ARPHRD_FDDI) b->htype = ARPHRD_ETHER; else { - printk("Unknown ARP type 0x%04x for device %s\n", dev->type, dev->name); + pr_warn("Unknown ARP type 0x%04x for device %s\n", dev->type, + dev->name); b->htype = dev->type; /* can cause undefined behavior */ } @@ -904,14 +895,12 @@ static void __init ic_do_bootp_ext(u8 *ext) int i; __be16 mtu; -#ifdef IPCONFIG_DEBUG u8 *c; - printk("DHCP/BOOTP: Got extension %d:",*ext); + pr_debug("DHCP/BOOTP: Got extension %d:", *ext); for (c=ext+2; cyour_ip; ic_servaddr = server_id; -#ifdef IPCONFIG_DEBUG - printk("DHCP: Offered address %pI4 by server %pI4\n", - &ic_myaddr, &b->iph.saddr); -#endif + pr_debug("DHCP: Offered address %pI4 by server %pI4\n", + &ic_myaddr, &b->iph.saddr); /* The DHCP indicated server address takes * precedence over the bootp header one if * they are different. @@ -1254,13 +1239,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - pr_cont(","); + pr_notice(","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - pr_cont(" OK\n"); + pr_notice(" OK\n"); break; } @@ -1268,7 +1253,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - pr_cont(" timed out!\n"); + pr_notice(" timed out!\n"); break; } @@ -1278,7 +1263,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - pr_cont("."); + pr_notice("."); } #ifdef IPCONFIG_BOOTP @@ -1295,11 +1280,11 @@ static int __init ic_dynamic(void) return -1; } - printk("IP-Config: Got %s answer from %pI4, ", + pr_info("IP-Config: Got %s answer from %pI4, ", ((ic_got_reply & IC_RARP) ? "RARP" - : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_addrservaddr); - pr_cont("my address is %pI4\n", &ic_myaddr); + : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), + &ic_addrservaddr); + pr_info("my address is %pI4\n", &ic_myaddr); return 0; } @@ -1426,7 +1411,7 @@ static int __init ip_auto_config(void) if (!ic_enable) return 0; - DBG(("IP-Config: Entered.\n")); + pr_debug("IP-Config: Entered.\n"); #ifdef IPCONFIG_DYNAMIC try_try_again: #endif @@ -1548,8 +1533,8 @@ static int __init ip_auto_config(void) } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); - pr_cont("\n"); + pr_info(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_info("\n"); #endif /* !SILENT */ return 0; @@ -1585,7 +1570,7 @@ static int __init ic_proto_name(char *name) return 1; *v = 0; if (kstrtou8(client_id, 0, dhcp_client_identifier)) - DBG("DHCP: Invalid client identifier type\n"); + pr_debug("DHCP: Invalid client identifier type\n"); strncpy(dhcp_client_identifier + 1, v + 1, 251); *v = ','; } @@ -1644,7 +1629,7 @@ static int __init ip_auto_config_setup(char *addrs) if ((cp = strchr(ip, ':'))) *cp++ = '\0'; if (strlen(ip) > 0) { - DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip)); + pr_debug("IP-Config: Parameter #%d: `%s'\n", num, ip); switch (num) { case 0: if ((ic_myaddr = in_aton(ip)) == ANY) @@ -1716,7 +1701,7 @@ static int __init vendor_class_identifier_setup(char *addrs) if (strlcpy(vendor_class_identifier, addrs, sizeof(vendor_class_identifier)) >= sizeof(vendor_class_identifier)) - pr_warn("DHCP: vendorclass too long, truncated to \"%s\"", + pr_warn("DHCP: vendorclass too long, truncated to \"%s\"\n", vendor_class_identifier); return 1; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 11dccba474b7..b488cac9c5ca 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -38,13 +38,13 @@ MODULE_DESCRIPTION("arptables core"); /*#define DEBUG_ARP_TABLES_USER*/ #ifdef DEBUG_ARP_TABLES -#define dprintf(format, args...) printk(format , ## args) +#define dprintf(format, args...) pr_debug(format, ## args) #else #define dprintf(format, args...) #endif #ifdef DEBUG_ARP_TABLES_USER -#define duprintf(format, args...) printk(format , ## args) +#define duprintf(format, args...) pr_debug(format, ## args) #else #define duprintf(format, args...) #endif @@ -1905,7 +1905,7 @@ static int __init arp_tables_init(void) if (ret < 0) goto err4; - printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n"); + pr_info("arp_tables: (C) 2002 David S. Miller\n"); return 0; err4: diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 461ca926fd39..e3c46e8e2762 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -451,7 +451,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { - printk(KERN_ERR "Unable to register netfilter socket option\n"); + pr_err("Unable to register netfilter socket option\n"); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ddb894ac1458..c9b52c361da2 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1048,7 +1048,7 @@ static int snmp_parse_mangle(unsigned char *msg, if (!asn1_uint_decode (&ctx, end, &vers)) return 0; if (debug > 1) - printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1); + pr_debug("bsalg: snmp version: %u\n", vers + 1); if (vers > 1) return 1; @@ -1064,10 +1064,10 @@ static int snmp_parse_mangle(unsigned char *msg, if (debug > 1) { unsigned int i; - printk(KERN_DEBUG "bsalg: community: "); + pr_debug("bsalg: community: "); for (i = 0; i < comm.len; i++) - printk("%c", comm.data[i]); - printk("\n"); + pr_cont("%c", comm.data[i]); + pr_cont("\n"); } kfree(comm.data); @@ -1091,9 +1091,9 @@ static int snmp_parse_mangle(unsigned char *msg, }; if (pdutype > SNMP_PDU_TRAP2) - printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype); + pr_debug("bsalg: bad pdu type %u\n", pdutype); else - printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]); + pr_debug("bsalg: pdu: %s\n", pdus[pdutype]); } if (pdutype != SNMP_PDU_RESPONSE && pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2) @@ -1119,7 +1119,7 @@ static int snmp_parse_mangle(unsigned char *msg, return 0; if (debug > 1) - printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u " + pr_debug("bsalg: request: id=0x%lx error_status=%u " "error_index=%u\n", req.id, req.error_status, req.error_index); } @@ -1145,13 +1145,13 @@ static int snmp_parse_mangle(unsigned char *msg, } if (debug > 1) { - printk(KERN_DEBUG "bsalg: object: "); + pr_debug("bsalg: object: "); for (i = 0; i < obj->id_len; i++) { if (i > 0) - printk("."); - printk("%lu", obj->id[i]); + pr_cont("."); + pr_cont("%lu", obj->id[i]); } - printk(": type=%u\n", obj->type); + pr_cont(": type=%u\n", obj->type); } -- cgit v1.2.3 From 52bd2d62ce6758d811edcbd2256eb9ea7f6a56cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:50 -0800 Subject: net: better skb->sender_cpu and skb->napi_id cohabitation skb->sender_cpu and skb->napi_id share a common storage, and we had various bugs about this. We had to call skb_sender_cpu_clear() in some places to not leave a prior skb->napi_id and fool netdev_pick_tx() As suggested by Alexei, we could split the space so that these errors can not happen. 0 value being reserved as the common (not initialized) value, let's reserve [1 .. NR_CPUS] range for valid sender_cpu, and [NR_CPUS+1 .. ~0U] for valid napi_id. This will allow proper busy polling support over tunnels. Signed-off-by: Eric Dumazet Suggested-by: Alexei Starovoitov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 --- net/core/dev.c | 33 ++++++++++++++++----------------- 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4355129fff91..c9c394bf0771 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1082,9 +1082,6 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) static inline void skb_sender_cpu_clear(struct sk_buff *skb) { -#ifdef CONFIG_XPS - skb->sender_cpu = 0; -#endif } #ifdef NET_SKBUFF_DATA_USES_OFFSET diff --git a/net/core/dev.c b/net/core/dev.c index ae00b894e675..2582c24a75c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -182,7 +182,7 @@ EXPORT_SYMBOL(dev_base_lock); /* protects napi_hash addition/deletion and napi_gen_id */ static DEFINE_SPINLOCK(napi_hash_lock); -static unsigned int napi_gen_id; +static unsigned int napi_gen_id = NR_CPUS; static DEFINE_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; @@ -3021,7 +3021,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, int queue_index = 0; #ifdef CONFIG_XPS - if (skb->sender_cpu == 0) + u32 sender_cpu = skb->sender_cpu - 1; + + if (sender_cpu >= (u32)NR_CPUS) skb->sender_cpu = raw_smp_processor_id() + 1; #endif @@ -4676,25 +4678,22 @@ EXPORT_SYMBOL_GPL(napi_by_id); void napi_hash_add(struct napi_struct *napi) { - if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { + if (test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) + return; - spin_lock(&napi_hash_lock); + spin_lock(&napi_hash_lock); - /* 0 is not a valid id, we also skip an id that is taken - * we expect both events to be extremely rare - */ - napi->napi_id = 0; - while (!napi->napi_id) { - napi->napi_id = ++napi_gen_id; - if (napi_by_id(napi->napi_id)) - napi->napi_id = 0; - } + /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ + do { + if (unlikely(++napi_gen_id < NR_CPUS + 1)) + napi_gen_id = NR_CPUS + 1; + } while (napi_by_id(napi_gen_id)); + napi->napi_id = napi_gen_id; - hlist_add_head_rcu(&napi->napi_hash_node, - &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); - spin_unlock(&napi_hash_lock); - } + spin_unlock(&napi_hash_lock); } EXPORT_SYMBOL_GPL(napi_hash_add); -- cgit v1.2.3 From 02d62e86fe892c59a1259d089d4d16ac76977a37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:52 -0800 Subject: net: un-inline sk_busy_loop() There is really little gain from inlining this big function. We'll soon make it even bigger in following patches. This means we no longer need to export napi_by_id() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 --------- include/net/busy_poll.h | 45 +----------------------------------------- net/core/dev.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bfac1abfc1..2020a89df12b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -460,15 +460,6 @@ static inline void napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } -/** - * napi_by_id - lookup a NAPI by napi_id - * @napi_id: hashed napi_id - * - * lookup @napi_id in napi_hash table - * must be called under rcu_read_lock() - */ -struct napi_struct *napi_by_id(unsigned int napi_id); - /** * napi_hash_add - add a NAPI to global hashtable * @napi: napi context diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 1d67fb6b23a0..2fbeb1313c0f 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -72,50 +72,7 @@ static inline bool busy_loop_timeout(unsigned long end_time) return time_after(now, end_time); } -/* when used in sock_poll() nonblock is known at compile time to be true - * so the loop and end_time will be optimized out - */ -static inline bool sk_busy_loop(struct sock *sk, int nonblock) -{ - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; - const struct net_device_ops *ops; - struct napi_struct *napi; - int rc = false; - - /* - * rcu read lock for napi hash - * bh so we don't race with net_rx_action - */ - rcu_read_lock_bh(); - - napi = napi_by_id(sk->sk_napi_id); - if (!napi) - goto out; - - ops = napi->dev->netdev_ops; - if (!ops->ndo_busy_poll) - goto out; - - do { - rc = ops->ndo_busy_poll(napi); - - if (rc == LL_FLUSH_FAILED) - break; /* permanent failure */ - - if (rc > 0) - /* local bh are disabled so it is ok to use _BH */ - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); - cpu_relax(); - - } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && - !need_resched() && !busy_loop_timeout(end_time)); - - rc = !skb_queue_empty(&sk->sk_receive_queue); -out: - rcu_read_unlock_bh(); - return rc; -} +bool sk_busy_loop(struct sock *sk, int nonblock); /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 2582c24a75c6..74a816b299df 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -96,6 +96,7 @@ #include #include #include +#include #include #include #include @@ -4663,7 +4664,7 @@ void napi_complete_done(struct napi_struct *n, int work_done) EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ -struct napi_struct *napi_by_id(unsigned int napi_id) +static struct napi_struct *napi_by_id(unsigned int napi_id) { unsigned int hash = napi_id % HASH_SIZE(napi_hash); struct napi_struct *napi; @@ -4674,7 +4675,52 @@ struct napi_struct *napi_by_id(unsigned int napi_id) return NULL; } -EXPORT_SYMBOL_GPL(napi_by_id); + +#if defined(CONFIG_NET_RX_BUSY_POLL) +bool sk_busy_loop(struct sock *sk, int nonblock) +{ + unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + const struct net_device_ops *ops; + struct napi_struct *napi; + int rc = false; + + /* + * rcu read lock for napi hash + * bh so we don't race with net_rx_action + */ + rcu_read_lock_bh(); + + napi = napi_by_id(sk->sk_napi_id); + if (!napi) + goto out; + + ops = napi->dev->netdev_ops; + if (!ops->ndo_busy_poll) + goto out; + + do { + rc = ops->ndo_busy_poll(napi); + + if (rc == LL_FLUSH_FAILED) + break; /* permanent failure */ + + if (rc > 0) + /* local bh are disabled so it is ok to use _BH */ + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); + cpu_relax(); + + } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && + !need_resched() && !busy_loop_timeout(end_time)); + + rc = !skb_queue_empty(&sk->sk_receive_queue); +out: + rcu_read_unlock_bh(); + return rc; +} +EXPORT_SYMBOL(sk_busy_loop); + +#endif /* CONFIG_NET_RX_BUSY_POLL */ void napi_hash_add(struct napi_struct *napi) { -- cgit v1.2.3 From 2a028ecb76497d05e5cd4e3e8b09d965cac2e3f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:53 -0800 Subject: net: allow BH servicing in sk_busy_loop() Instead of blocking BH in whole sk_busy_loop(), block them only around ->ndo_busy_poll() calls. This has many benefits. 1) allow tunneled traffic to use busy poll as well as native traffic. Tunnels handlers usually call netif_rx() and depend on net_rx_action() being run (from sofirq handler) 2) allow RFS/RPS being used (sending IPI to other cpus if needed) 3) use the 'lets burn cpu cycles' budget to do useful work (like TX completions, timers, RCU callbacks...) 4) reduce BH latencies, making busy poll a better citizen. Tested: Tested with SIT tunnel lpaa5:~# echo 0 >/proc/sys/net/core/busy_read lpaa5:~# ./netperf -H 2002:af6:786::1 -t TCP_RR MIGRATED TCP REQUEST/RESPONSE TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:786::1 () port 0 AF_INET6 : first burst 0 Local /Remote Socket Size Request Resp. Elapsed Trans. Send Recv Size Size Time Rate bytes Bytes bytes bytes secs. per sec 16384 87380 1 1 10.00 37373.93 16384 87380 Now enable busy poll on both hosts lpaa5:~# echo 70 >/proc/sys/net/core/busy_read lpaa6:~# echo 70 >/proc/sys/net/core/busy_read lpaa5:~# ./netperf -H 2002:af6:786::1 -t TCP_RR MIGRATED TCP REQUEST/RESPONSE TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:786::1 () port 0 AF_INET6 : first burst 0 Local /Remote Socket Size Request Resp. Elapsed Trans. Send Recv Size Size Time Rate bytes Bytes bytes bytes secs. per sec 16384 87380 1 1 10.00 58314.77 16384 87380 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 74a816b299df..2002eec2617d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4684,11 +4684,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock) struct napi_struct *napi; int rc = false; - /* - * rcu read lock for napi hash - * bh so we don't race with net_rx_action - */ - rcu_read_lock_bh(); + rcu_read_lock(); napi = napi_by_id(sk->sk_napi_id); if (!napi) @@ -4699,23 +4695,23 @@ bool sk_busy_loop(struct sock *sk, int nonblock) goto out; do { + local_bh_disable(); rc = ops->ndo_busy_poll(napi); + if (rc > 0) + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); + local_bh_enable(); if (rc == LL_FLUSH_FAILED) break; /* permanent failure */ - if (rc > 0) - /* local bh are disabled so it is ok to use _BH */ - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); cpu_relax(); - } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && !need_resched() && !busy_loop_timeout(end_time)); rc = !skb_queue_empty(&sk->sk_receive_queue); out: - rcu_read_unlock_bh(); + rcu_read_unlock(); return rc; } EXPORT_SYMBOL(sk_busy_loop); -- cgit v1.2.3 From ce6aea93f7510437dde625b77a7a2f4d20b72660 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:54 -0800 Subject: net: network drivers no longer need to implement ndo_busy_poll() Instead of having to implement complex ndo_busy_poll() method, drivers can simply rely on NAPI poll logic. Busy polling gains are mainly coming from polling itself, not on exact details on how we poll the device. ndo_busy_poll() if implemented can avoid touching napi state, but it adds extra synchronization between normal napi->poll() and busy poll handler, slowing down the common path (non busy polling) with extra atomic operations. In practice few drivers ever got busy poll because of the complexity. We could go one step further, and make busy polling available for all NAPI drivers, but this would require that all netif_napi_del() calls are done in process context so that we can call synchronize_rcu(). Full audit would be required. Before this is done, a driver still needs to call : - skb_mark_napi_id() for each skb provided to the stack. - napi_hash_add() and napi_hash_del() to allocate a napi_id per napi struct. - Make sure RCU grace period is respected after napi_hash_del() before memory containing napi structure is freed. Followup patch implements busy poll for mlx5 driver as an example. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2002eec2617d..93009610aee8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4677,10 +4677,11 @@ static struct napi_struct *napi_by_id(unsigned int napi_id) } #if defined(CONFIG_NET_RX_BUSY_POLL) +#define BUSY_POLL_BUDGET 8 bool sk_busy_loop(struct sock *sk, int nonblock) { unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; - const struct net_device_ops *ops; + int (*busy_poll)(struct napi_struct *dev); struct napi_struct *napi; int rc = false; @@ -4690,13 +4691,27 @@ bool sk_busy_loop(struct sock *sk, int nonblock) if (!napi) goto out; - ops = napi->dev->netdev_ops; - if (!ops->ndo_busy_poll) - goto out; + /* Note: ndo_busy_poll method is optional in linux-4.5 */ + busy_poll = napi->dev->netdev_ops->ndo_busy_poll; do { + rc = 0; local_bh_disable(); - rc = ops->ndo_busy_poll(napi); + if (busy_poll) { + rc = busy_poll(napi); + } else if (napi_schedule_prep(napi)) { + void *have = netpoll_poll_lock(napi); + + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { + rc = napi->poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi); + if (rc == BUSY_POLL_BUDGET) { + napi_complete_done(napi, rc); + napi_schedule(napi); + } + } + netpoll_poll_unlock(have); + } if (rc > 0) NET_ADD_STATS_BH(sock_net(sk), LINUX_MIB_BUSYPOLLRXPACKETS, rc); -- cgit v1.2.3 From 93f93a4404159ecf7e9148f5ad0718ec702ac4cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:59 -0800 Subject: net: move skb_mark_napi_id() into core networking stack We would like to automatically provide busy polling support to all NAPI drivers, without them having to implement anything. skb_mark_napi_id() can be called from napi_gro_receive() and napi_get_frags(). Few drivers are still calling skb_mark_napi_id() because they use netif_receive_skb(). They should eventually call napi_gro_receive() instead. I will leave this to drivers maintainers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 1 - drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 -- drivers/net/ethernet/chelsio/cxgb4/sge.c | 1 - drivers/net/ethernet/emulex/benet/be_main.c | 1 - drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 - drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 1 - drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 - drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- drivers/net/ethernet/sfc/rx.c | 1 - drivers/net/virtio_net.c | 2 -- net/core/dev.c | 2 ++ 13 files changed, 4 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 53ce1222b11d..8a9b493566c9 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2024,7 +2024,6 @@ read_again: skb->dev = netdev; skb->protocol = eth_type_trans(skb, netdev); skb_record_rx_queue(skb, channel->queue_index); - skb_mark_napi_id(skb, napi); napi_gro_receive(napi, skb); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ca208a7eecd5..ab9222924bd9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1094,8 +1094,6 @@ reuse_rx: __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(cqe_fp->vlan_tag)); - skb_mark_napi_id(skb, &fp->napi); - napi_gro_receive(&fp->napi, skb); next_rx: rx_buf->data = NULL; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index b7b93e7a643d..f650f295f264 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1864,7 +1864,6 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb->truesize += skb->data_len; skb->ip_summed = CHECKSUM_UNNECESSARY; skb_record_rx_queue(skb, rxq->rspq.idx); - skb_mark_napi_id(skb, &rxq->rspq.napi); pi = netdev_priv(skb->dev); if (pi->rxtstamp) cxgb4_sgetim_to_hwtstamp(adapter, skb_hwtstamps(skb), diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b6ad02909d6b..c29d62496ad9 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2184,7 +2184,6 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo, skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3); skb->csum_level = rxcp->tunneled; - skb_mark_napi_id(skb, napi); if (rxcp->vlanf) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 635b3ac17877..6649ce4ba2de 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1632,7 +1632,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) continue; } #endif - skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); rx_desc->wb.qword1.status_error_len = 0; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 47e9a90d6b10..77968b184b1f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1090,7 +1090,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) continue; } #endif - skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); rx_desc->wb.qword1.status_error_len = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 47395ff5d908..4089d776d01a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1659,6 +1659,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, struct sk_buff *skb) { + skb_mark_napi_id(skb, &q_vector->napi); if (ixgbe_qv_busy_polling(q_vector)) netif_receive_skb(skb); else @@ -2123,7 +2124,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } #endif /* IXGBE_FCOE */ - skb_mark_napi_id(skb, &q_vector->napi); ixgbe_rx_skb(q_vector, skb); /* update budget accounting */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 1feead34093b..41440b2b20a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -925,7 +925,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud PKT_HASH_TYPE_L3); skb_record_rx_queue(gro_skb, cq->ring); - skb_mark_napi_id(gro_skb, &cq->napi); if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { timestamp = mlx4_en_get_cqe_ts(cqe); @@ -988,8 +987,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud timestamp); } - skb_mark_napi_id(skb, &cq->napi); - napi_gro_receive(&cq->napi, skb); next: for (nr = 0; nr < priv->num_frags; nr++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index fe752f8e24b9..7c8c4088d1be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -243,7 +243,6 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); skb = rq->skb[wqe_counter]; prefetch(skb->data); - skb_mark_napi_id(skb, cq->napi); rq->skb[wqe_counter] = NULL; dma_unmap_single(rq->pdev, diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 83651ac8ddb9..acf866147d65 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1488,7 +1488,6 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum) } myri10ge_vlan_rx(mgp->dev, va, skb); skb_record_rx_queue(skb, ss - &mgp->ss[0]); - skb_mark_napi_id(skb, &ss->napi); if (polling) { int hlen; @@ -1506,6 +1505,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum) skb->data_len -= hlen; skb->tail += hlen; skb->protocol = eth_type_trans(skb, dev); + skb_mark_napi_id(skb, &ss->napi); netif_receive_skb(skb); } else diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 809ea4610a77..8956995b2fe7 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -463,7 +463,6 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, skb_record_rx_queue(skb, channel->rx_queue.core_index); - skb_mark_napi_id(skb, &channel->napi_str); gro_result = napi_gro_frags(napi); if (gro_result != GRO_DROP) channel->irq_mod_score += 2; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d8838dedb7a4..d1d14cecf450 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -516,8 +516,6 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, skb_shinfo(skb)->gso_segs = 0; } - skb_mark_napi_id(skb, &rq->napi); - napi_gro_receive(&rq->napi, skb); return; diff --git a/net/core/dev.c b/net/core/dev.c index 93009610aee8..83b48747928c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4356,6 +4356,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { + skb_mark_napi_id(skb, napi); trace_napi_gro_receive_entry(skb); skb_gro_reset_offset(skb); @@ -4390,6 +4391,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = napi_alloc_skb(napi, GRO_MAX_HEAD); napi->skb = skb; + skb_mark_napi_id(skb, napi); } return skb; } -- cgit v1.2.3 From d64b5e85bfe2fe4c790abcbd16d9ae32391ddd7e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:00 -0800 Subject: net: add netif_tx_napi_add() netif_tx_napi_add() is a variant of netif_napi_add() It should be used by drivers that use a napi structure to exclusively poll TX. We do not want to add this kind of napi in napi_hash[] in following patches, adding generic busy polling to all NAPI drivers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- .../net/ethernet/freescale/fs_enet/fs_enet-main.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 4 ++-- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 4 ++-- drivers/net/ethernet/rocker/rocker.c | 2 +- drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/ti/netcp_core.c | 2 +- drivers/net/wireless/ath/wil6210/netdev.c | 2 +- include/linux/netdevice.h | 23 +++++++++++++++++++++- net/core/dev.c | 3 ++- 12 files changed, 38 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 858106352ce9..993c780bdfab 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1216,7 +1216,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, /* Initialize SW view of the ring */ spin_lock_init(&ring->lock); ring->priv = priv; - netif_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); + netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); ring->index = index; ring->size = size; ring->alloc_size = ring->size; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 17f017ab4dac..b15a60d787c7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2041,11 +2041,11 @@ static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv) for (i = 0; i < priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); } ring = &priv->tx_rings[DESC_INDEX]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); } static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index cf8e54652df9..48a9c176e0d1 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -1050,7 +1050,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); - netif_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); + netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3e6b9b437497..c8bc43e99a35 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1347,12 +1347,12 @@ static int gfar_probe(struct platform_device *ofdev) if (priv->poll_mode == GFAR_SQ_POLLING) { netif_napi_add(dev, &priv->gfargrp[i].napi_rx, gfar_poll_rx_sq, GFAR_DEV_WEIGHT); - netif_napi_add(dev, &priv->gfargrp[i].napi_tx, + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, gfar_poll_tx_sq, 2); } else { netif_napi_add(dev, &priv->gfargrp[i].napi_rx, gfar_poll_rx, GFAR_DEV_WEIGHT); - netif_napi_add(dev, &priv->gfargrp[i].napi_tx, + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, gfar_poll_tx, 2); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index eb8a4988de63..3a6176fea78d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -156,8 +156,8 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.event = mlx4_en_cq_event; if (cq->is_tx) { - netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, - NAPI_POLL_WEIGHT); + netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, + NAPI_POLL_WEIGHT); } else { netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); napi_hash_add(&cq->napi); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index d4b5085a21fa..7bd6f25b4625 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -1604,7 +1604,7 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) { for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - netif_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, + netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, NAPI_POLL_WEIGHT); } } @@ -2135,7 +2135,7 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - netif_napi_add(netdev, &tx_ring->napi, + netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_83xx_msix_tx_poll, NAPI_POLL_WEIGHT); } diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index e9f2349e98bc..a4ab71d43e4e 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4998,7 +4998,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) dev->netdev_ops = &rocker_port_netdev_ops; dev->ethtool_ops = &rocker_port_ethtool_ops; dev->switchdev_ops = &rocker_port_switchdev_ops; - netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, + netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, NAPI_POLL_WEIGHT); netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, NAPI_POLL_WEIGHT); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 48b92c9de12a..15322c08de80 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2469,7 +2469,7 @@ static int cpsw_probe(struct platform_device *pdev) ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); - netif_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); + netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); /* register the network device */ SET_NETDEV_DEV(ndev, &pdev->dev); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 37b9b39192ec..e5e20e734f21 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1990,7 +1990,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, /* NAPI register */ netif_napi_add(ndev, &netcp->rx_napi, netcp_rx_poll, NETCP_NAPI_WEIGHT); - netif_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT); + netif_tx_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT); /* Register the network device */ ndev->dev_id = 0; diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index e3b3c8fb4605..56aaa2d4fb0e 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -183,7 +183,7 @@ void *wil_if_alloc(struct device *dev) netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, WIL6210_NAPI_BUDGET); - netif_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, + netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, WIL6210_NAPI_BUDGET); netif_tx_stop_all_queues(ndev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2020a89df12b..838935d1cdbb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -326,7 +326,8 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ - NAPI_STATE_HASHED, /* In NAPI hash */ + NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ + NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ }; enum gro_result { @@ -1938,6 +1939,26 @@ static inline void *netdev_priv(const struct net_device *dev) void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight); +/** + * netif_tx_napi_add - initialize a napi context + * @dev: network device + * @napi: napi context + * @poll: polling function + * @weight: default weight + * + * This variant of netif_napi_add() should be used from drivers using NAPI + * to exclusively poll a TX queue. + * This will avoid we add it into napi_hash[], thus polluting this hash table. + */ +static inline void netif_tx_napi_add(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight) +{ + set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state); + netif_napi_add(dev, napi, poll, weight); +} + /** * netif_napi_del - remove a napi context * @napi: napi context diff --git a/net/core/dev.c b/net/core/dev.c index 83b48747928c..ff58a8bc5e3c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4737,7 +4737,8 @@ EXPORT_SYMBOL(sk_busy_loop); void napi_hash_add(struct napi_struct *napi) { - if (test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || + test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) return; spin_lock(&napi_hash_lock); -- cgit v1.2.3 From 6180d9de61a5c461f9e3efef5417a844701dbbb2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:01 -0800 Subject: net: move napi_hash[] into read mostly section We do not often add/delete a napi context. Moving napi_hash[] into read mostly section avoids potential false sharing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/hashtable.h | 4 ++++ net/core/dev.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 519b6e2d769e..661e5c2a8e2a 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -16,6 +16,10 @@ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } +#define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] __read_mostly = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + #define DECLARE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] diff --git a/net/core/dev.c b/net/core/dev.c index ff58a8bc5e3c..02dfbd91a8e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -184,7 +184,7 @@ EXPORT_SYMBOL(dev_base_lock); static DEFINE_SPINLOCK(napi_hash_lock); static unsigned int napi_gen_id = NR_CPUS; -static DEFINE_HASHTABLE(napi_hash, 8); +static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; -- cgit v1.2.3 From 34cbe27e811c591c854a39c0dee1b461bb796953 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:02 -0800 Subject: net: napi_hash_del() returns a boolean status napi_hash_del() will soon be used from both drivers (if they want) or core networking stack. Callers are responsibles to ensure an RCU grace period is respected before freeing napi structure : napi_hash_del() can signal if this RCU grace period is needed or not. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++-- net/core/dev.c | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 838935d1cdbb..e5c33b29471b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -474,9 +474,10 @@ void napi_hash_add(struct napi_struct *napi); * @napi: napi context * * Warning: caller must observe rcu grace period - * before freeing memory containing @napi + * before freeing memory containing @napi, if + * this function returns true. */ -void napi_hash_del(struct napi_struct *napi); +bool napi_hash_del(struct napi_struct *napi); /** * napi_disable - prevent NAPI from scheduling diff --git a/net/core/dev.c b/net/core/dev.c index 02dfbd91a8e4..59dddac1c2e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4760,14 +4760,18 @@ EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi */ -void napi_hash_del(struct napi_struct *napi) +bool napi_hash_del(struct napi_struct *napi) { + bool rcu_sync_needed = false; + spin_lock(&napi_hash_lock); - if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { + rcu_sync_needed = true; hlist_del_rcu(&napi->napi_hash_node); - + } spin_unlock(&napi_hash_lock); + return rcu_sync_needed; } EXPORT_SYMBOL_GPL(napi_hash_del); -- cgit v1.2.3 From 93d05d4a320cb16712bb3d57a9658f395d8cecb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:03 -0800 Subject: net: provide generic busy polling to all NAPI drivers NAPI drivers no longer need to observe a particular protocol to benefit from busy polling (CONFIG_NET_RX_BUSY_POLL=y) napi_hash_add() and napi_hash_del() are automatically called from core networking stack, respectively from netif_napi_add() and netif_napi_del() This patch depends on free_netdev() and netif_napi_del() being called from process context, which seems to be the norm. Drivers might still prefer to call napi_hash_del() on their own, since they might combine all the rcu grace periods into a single one, knowing their NAPI structures lifetime, while core networking stack has no idea of a possible combining. Once this patch proves to not bring serious regressions, we will cleanup drivers to either remove napi_hash_del() or provide appropriate rcu grace periods combining. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 -- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 -- drivers/net/ethernet/chelsio/cxgb4/sge.c | 1 - drivers/net/ethernet/cisco/enic/enic_main.c | 2 -- drivers/net/ethernet/emulex/benet/be_main.c | 1 - drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 1 - drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 6 ++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 1 - drivers/net/ethernet/sfc/efx.c | 1 - drivers/net/virtio_net.c | 1 - include/linux/netdevice.h | 7 +++++++ net/core/dev.c | 7 +++++++ 14 files changed, 16 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ab9222924bd9..d9add7c02e42 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -46,7 +46,6 @@ static void bnx2x_add_all_napi_cnic(struct bnx2x *bp) for_each_rx_queue_cnic(bp, i) { netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, NAPI_POLL_WEIGHT); - napi_hash_add(&bnx2x_fp(bp, i, napi)); } } @@ -58,7 +57,6 @@ static void bnx2x_add_all_napi(struct bnx2x *bp) for_each_eth_queue(bp, i) { netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, NAPI_POLL_WEIGHT); - napi_hash_add(&bnx2x_fp(bp, i, napi)); } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index db15c5ee09c5..f2d0dc9b1c41 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4227,12 +4227,10 @@ static void bnxt_init_napi(struct bnxt *bp) bnapi = bp->bnapi[i]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); - napi_hash_add(&bnapi->napi); } } else { bnapi = bp->bnapi[0]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); - napi_hash_add(&bnapi->napi); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index f650f295f264..48d8fbb1c220 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2527,7 +2527,6 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto err; netif_napi_add(dev, &iq->napi, napi_rx_handler, 64); - napi_hash_add(&iq->napi); iq->cur_desc = iq->desc; iq->cidx = 0; iq->gen = 1; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index b36643ef0593..b2182d3ba3cc 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2458,13 +2458,11 @@ static int enic_dev_init(struct enic *enic) switch (vnic_dev_get_intr_mode(enic->vdev)) { default: netif_napi_add(netdev, &enic->napi[0], enic_poll, 64); - napi_hash_add(&enic->napi[0]); break; case VNIC_DEV_INTR_MODE_MSIX: for (i = 0; i < enic->rq_count; i++) { netif_napi_add(netdev, &enic->napi[i], enic_poll_msix_rq, NAPI_POLL_WEIGHT); - napi_hash_add(&enic->napi[i]); } for (i = 0; i < enic->wq_count; i++) netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)], diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c29d62496ad9..4cab8879f5ae 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2630,7 +2630,6 @@ static int be_evt_queues_create(struct be_adapter *adapter) eqo->affinity_mask); netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); - napi_hash_add(&eqo->napi); } return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index f3168bcc7d87..e771e764daa3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -844,7 +844,6 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* initialize NAPI */ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll, 64); - napi_hash_add(&q_vector->napi); #ifdef CONFIG_NET_RX_BUSY_POLL /* initialize busy poll */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 592ff237d692..2955186cd4f6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2483,9 +2483,6 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) q_vector->v_idx = q_idx; netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64); -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_add(&q_vector->napi); -#endif adapter->q_vector[q_idx] = q_vector; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 3a6176fea78d..af975a2b74c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -155,13 +155,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->is_tx) { + if (cq->is_tx) netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, NAPI_POLL_WEIGHT); - } else { + else netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); - napi_hash_add(&cq->napi); - } napi_enable(&cq->napi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ffb1f9c1b973..f6a8cc787603 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -982,7 +982,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, mlx5e_build_channeltc_to_txq_map(priv, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - napi_hash_add(&c->napi); err = mlx5e_open_tx_cqs(c, cparam); if (err) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index acf866147d65..270c9eeb7ab6 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3814,7 +3814,6 @@ static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) ss->dev = mgp->dev; netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, myri10ge_napi_weight); - napi_hash_add(&ss->napi); } return 0; abort: diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index a3c42a376741..4e82bcfbe3e0 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2059,7 +2059,6 @@ static void efx_init_napi_channel(struct efx_channel *channel) channel->napi_dev = efx->net_dev; netif_napi_add(channel->napi_dev, &channel->napi_str, efx_poll, napi_weight); - napi_hash_add(&channel->napi_str); efx_channel_busy_poll_init(channel); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d1d14cecf450..b1ae4cbf2453 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1610,7 +1610,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) vi->rq[i].pages = NULL; netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight); - napi_hash_add(&vi->rq[i].napi); sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e5c33b29471b..7d2d1d7aaec7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -466,6 +466,9 @@ static inline void napi_complete(struct napi_struct *n) * @napi: napi context * * generate a new napi_id and store a @napi under it in napi_hash + * Used for busy polling (CONFIG_NET_RX_BUSY_POLL) + * Note: This is normally automatically done from netif_napi_add(), + * so might disappear in a future linux version. */ void napi_hash_add(struct napi_struct *napi); @@ -476,6 +479,10 @@ void napi_hash_add(struct napi_struct *napi); * Warning: caller must observe rcu grace period * before freeing memory containing @napi, if * this function returns true. + * Note: core networking stack automatically calls it + * from netif_napi_del() + * Drivers might want to call this helper to combine all + * the needed rcu grace periods into a single one. */ bool napi_hash_del(struct napi_struct *napi); diff --git a/net/core/dev.c b/net/core/dev.c index 59dddac1c2e7..41cef3e3f558 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4807,6 +4807,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->poll_owner = -1; #endif set_bit(NAPI_STATE_SCHED, &napi->state); + napi_hash_add(napi); } EXPORT_SYMBOL(netif_napi_add); @@ -4826,8 +4827,12 @@ void napi_disable(struct napi_struct *n) } EXPORT_SYMBOL(napi_disable); +/* Must be called in process context */ void netif_napi_del(struct napi_struct *napi) { + might_sleep(); + if (napi_hash_del(napi)) + synchronize_net(); list_del_init(&napi->dev_list); napi_free_frags(napi); @@ -7227,11 +7232,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs); * This function does the last stage of destroying an allocated device * interface. The reference to the device object is released. * If this is the last reference then it will be freed. + * Must be called in process context. */ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; + might_sleep(); netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); -- cgit v1.2.3 From 70f56aa2ee7142a53a8c5285a685c55987a1a990 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 1 Nov 2015 09:39:49 +0100 Subject: Bluetooth: Move BR/EDR default events behind its features There are some BR/EDR default events for Bluetooth 1.2 or later controllers that are not conditional on their features being present. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 62edbf1b114e..db423657935a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -531,10 +531,6 @@ static void hci_setup_event_mask(struct hci_request *req) if (lmp_bredr_capable(hdev)) { events[4] |= 0x01; /* Flow Specification Complete */ - events[4] |= 0x02; /* Inquiry Result with RSSI */ - events[4] |= 0x04; /* Read Remote Extended Features Complete */ - events[5] |= 0x08; /* Synchronous Connection Complete */ - events[5] |= 0x10; /* Synchronous Connection Changed */ } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); @@ -555,6 +551,14 @@ static void hci_setup_event_mask(struct hci_request *req) if (lmp_inq_rssi_capable(hdev)) events[4] |= 0x02; /* Inquiry Result with RSSI */ + if (lmp_ext_feat_capable(hdev)) + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + + if (lmp_esco_capable(hdev)) { + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ + } + if (lmp_sniffsubr_capable(hdev)) events[5] |= 0x20; /* Sniff Subrating */ -- cgit v1.2.3 From 7d26f5c4be620a384c3c9c7590cae2828d50626f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 1 Nov 2015 09:39:51 +0100 Subject: Bluetooth: Build LE event mask based on supported commands The LE event mask should be created based on the commands that are actually supported by the controller. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db423657935a..ea95075f1826 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -781,7 +781,6 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) u8 events[8]; memset(events, 0, sizeof(events)); - events[0] = 0x0f; if (hdev->le_features[0] & HCI_LE_ENCRYPTION) events[0] |= 0x10; /* LE Long Term Key Request */ @@ -808,6 +807,34 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) * Report */ + /* If the controller supports the LE Set Scan Enable command, + * enable the corresponding advertising report event. + */ + if (hdev->commands[26] & 0x08) + events[0] |= 0x02; /* LE Advertising Report */ + + /* If the controller supports the LE Create Connection + * command, enable the corresponding event. + */ + if (hdev->commands[26] & 0x10) + events[0] |= 0x01; /* LE Connection Complete */ + + /* If the controller supports the LE Connection Update + * command, enable the corresponding event. + */ + if (hdev->commands[27] & 0x04) + events[0] |= 0x04; /* LE Connection Update + * Complete + */ + + /* If the controller supports the LE Read Remote Used Features + * command, enable the corresponding event. + */ + if (hdev->commands[27] & 0x20) + events[0] |= 0x08; /* LE Read Remote Used + * Features Complete + */ + /* If the controller supports the LE Read Local P-256 * Public Key command, enable the corresponding event. */ -- cgit v1.2.3 From 9fe759ceedcdc0c43234382425a158c3f31e6909 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 1 Nov 2015 09:45:22 +0100 Subject: Bluetooth: Fix issue with HCI_QUIRK_FIXUP_INQUIRY_MODE and event mask When setting the event mask, the HCI_QUIRK_FIXUP_INQUIRY_MODE quirk is required to be checked so that the Inquiry Result with RSSI event gets actually enabled. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ea95075f1826..556c173ccbc6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -548,7 +548,8 @@ static void hci_setup_event_mask(struct hci_request *req) } } - if (lmp_inq_rssi_capable(hdev)) + if (lmp_inq_rssi_capable(hdev) || + test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) events[4] |= 0x02; /* Inquiry Result with RSSI */ if (lmp_ext_feat_capable(hdev)) -- cgit v1.2.3 From 5c3d3b4c4f3df584a90301b944580bf4c1974f12 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 4 Nov 2015 07:17:23 +0100 Subject: Bluetooth: Make LE only events conditional on supported commands For the LE only controllers, there are events that should not be enabled if the corresponding command is not supported. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 556c173ccbc6..97734cab2538 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -534,13 +534,27 @@ static void hci_setup_event_mask(struct hci_request *req) } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); - events[0] |= 0x10; /* Disconnection Complete */ - events[1] |= 0x08; /* Read Remote Version Information Complete */ events[1] |= 0x20; /* Command Complete */ events[1] |= 0x40; /* Command Status */ events[1] |= 0x80; /* Hardware Error */ - events[2] |= 0x04; /* Number of Completed Packets */ - events[3] |= 0x02; /* Data Buffer Overflow */ + + /* If the controller supports the Disconnect command, enable + * the corresponding event. In addition enable packet flow + * control related events. + */ + if (hdev->commands[0] & 0x20) { + events[0] |= 0x10; /* Disconnection Complete */ + events[2] |= 0x04; /* Number of Completed Packets */ + events[3] |= 0x02; /* Data Buffer Overflow */ + } + + /* If the controller supports the Read Remote Version + * Information command, enable the corresponding event. + */ + if (hdev->commands[2] & 0x80) + events[1] |= 0x08; /* Read Remote Version Information + * Complete + */ if (hdev->le_features[0] & HCI_LE_ENCRYPTION) { events[0] |= 0x80; /* Encryption Change */ -- cgit v1.2.3 From d79f34e32b833cb8651dfd4209d36cf99c89d1d3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 5 Nov 2015 07:10:00 +0100 Subject: Bluetooth: Use new hci_skb_pkt_* wrappers for core packet handling The new hci_skb_pkt_* wrappers only help if they are used consistently in the Bluetooth subsystem. So first convert the core packet handling. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 23 ++++++++++++----------- net/bluetooth/hci_request.c | 4 ++-- net/bluetooth/hci_sock.c | 38 +++++++++++++++++++------------------- 3 files changed, 33 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 97734cab2538..db26cbd1cd9d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3566,7 +3566,7 @@ int hci_reset_dev(struct hci_dev *hdev) if (!skb) return -ENOMEM; - bt_cb(skb)->pkt_type = HCI_EVENT_PKT; + hci_skb_pkt_type(skb) = HCI_EVENT_PKT; memcpy(skb_put(skb, 3), hw_err, 3); /* Send Hardware Error to upper stack */ @@ -3583,9 +3583,9 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) return -ENXIO; } - if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && + hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) { kfree_skb(skb); return -EINVAL; } @@ -3607,7 +3607,7 @@ EXPORT_SYMBOL(hci_recv_frame); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb) { /* Mark as diagnostic packet */ - bt_cb(skb)->pkt_type = HCI_DIAG_PKT; + hci_skb_pkt_type(skb) = HCI_DIAG_PKT; /* Time stamp */ __net_timestamp(skb); @@ -3649,7 +3649,8 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { int err; - BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); + BT_DBG("%s type %d len %d", hdev->name, hci_skb_pkt_type(skb), + skb->len); /* Time stamp */ __net_timestamp(skb); @@ -3762,7 +3763,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, skb->len = skb_headlen(skb); skb->data_len = 0; - bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; + hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; switch (hdev->dev_type) { case HCI_BREDR: @@ -3802,7 +3803,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, do { skb = list; list = list->next; - bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; + hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; hci_add_acl_hdr(skb, conn->handle, flags); BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); @@ -3840,7 +3841,7 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) skb_reset_transport_header(skb); memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE); - bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; + hci_skb_pkt_type(skb) = HCI_SCODATA_PKT; skb_queue_tail(&conn->data_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); @@ -4499,7 +4500,7 @@ static void hci_rx_work(struct work_struct *work) if (test_bit(HCI_INIT, &hdev->flags)) { /* Don't process data packets in this states. */ - switch (bt_cb(skb)->pkt_type) { + switch (hci_skb_pkt_type(skb)) { case HCI_ACLDATA_PKT: case HCI_SCODATA_PKT: kfree_skb(skb); @@ -4508,7 +4509,7 @@ static void hci_rx_work(struct work_struct *work) } /* Process frame */ - switch (bt_cb(skb)->pkt_type) { + switch (hci_skb_pkt_type(skb)) { case HCI_EVENT_PKT: BT_DBG("%s Event packet", hdev->name); hci_event_packet(hdev, skb); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 981f8a202c27..bdb170995966 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -98,8 +98,8 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, BT_DBG("skb len %d", skb->len); - bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - bt_cb(skb)->hci.opcode = opcode; + hci_skb_pkt_type(skb) = HCI_COMMAND_PKT; + hci_skb_opcode(skb) = opcode; return skb; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b1eb8c09a660..235ad0fa3571 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -120,13 +120,13 @@ static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb) /* Apply filter */ flt = &hci_pi(sk)->filter; - flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS; + flt_type = hci_skb_pkt_type(skb) & HCI_FLT_TYPE_BITS; if (!test_bit(flt_type, &flt->type_mask)) return true; /* Extra filter for event packets only */ - if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT) + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT) return false; flt_event = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); @@ -170,19 +170,19 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) { - if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && - bt_cb(skb)->pkt_type != HCI_EVENT_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) + if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT && + hci_skb_pkt_type(skb) != HCI_EVENT_PKT && + hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) continue; if (is_filtered_packet(sk, skb)) continue; } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { if (!bt_cb(skb)->incoming) continue; - if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && + hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) continue; } else { /* Don't send frame to other channel types */ @@ -196,7 +196,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; /* Put type byte before the data */ - memcpy(skb_push(skb_copy, 1), &bt_cb(skb)->pkt_type, 1); + memcpy(skb_push(skb_copy, 1), &hci_skb_pkt_type(skb), 1); } nskb = skb_clone(skb_copy, GFP_ATOMIC); @@ -262,7 +262,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("hdev %p len %d", hdev, skb->len); - switch (bt_cb(skb)->pkt_type) { + switch (hci_skb_pkt_type(skb)) { case HCI_COMMAND_PKT: opcode = cpu_to_le16(HCI_MON_COMMAND_PKT); break; @@ -447,7 +447,7 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) bt_cb(skb)->incoming = 1; __net_timestamp(skb); - bt_cb(skb)->pkt_type = HCI_EVENT_PKT; + hci_skb_pkt_type(skb) = HCI_EVENT_PKT; hci_send_to_sock(hdev, skb); kfree_skb(skb); } @@ -1211,7 +1211,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } - bt_cb(skb)->pkt_type = *((unsigned char *) skb->data); + hci_skb_pkt_type(skb) = *((unsigned char *) skb->data); skb_pull(skb, 1); if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { @@ -1220,16 +1220,16 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, * * However check that the packet type is valid. */ - if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT && + hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) { err = -EINVAL; goto drop; } skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); - } else if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { + } else if (hci_skb_pkt_type(skb) == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); @@ -1260,8 +1260,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } - if (bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + if (hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) { err = -EINVAL; goto drop; } -- cgit v1.2.3 From 44d271377479c4d4fe7f2d07d188656684773fbd Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 5 Nov 2015 09:31:40 +0200 Subject: Bluetooth: Compress the size of struct hci_ctrl We can reduce the size of the hci_ctrl struct by converting 'bool req_start' to 'u8 req_flags' and making the two function pointers a union (since only one is ever set at a time). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 11 ++++++++--- net/bluetooth/hci_core.c | 14 +++++++------- net/bluetooth/hci_request.c | 10 +++++++--- net/bluetooth/hci_sock.c | 2 +- 4 files changed, 23 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index a85e6d3d75ef..8d38f411009c 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -296,12 +296,17 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode); typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb); +#define HCI_REQ_START BIT(0) +#define HCI_REQ_SKB BIT(1) + struct hci_ctrl { __u16 opcode; - bool req_start; + u8 req_flags; u8 req_event; - hci_req_complete_t req_complete; - hci_req_complete_skb_t req_complete_skb; + union { + hci_req_complete_t req_complete; + hci_req_complete_skb_t req_complete_skb; + }; }; struct bt_skb_cb { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db26cbd1cd9d..bc97fc6de876 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3695,7 +3695,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); @@ -4392,7 +4392,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev) if (!skb) return true; - return bt_cb(skb)->hci.req_start; + return (bt_cb(skb)->hci.req_flags & HCI_REQ_START); } static void hci_resend_last(struct hci_dev *hdev) @@ -4452,20 +4452,20 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, * callback would be found in hdev->sent_cmd instead of the * command queue (hdev->cmd_q). */ - if (bt_cb(hdev->sent_cmd)->hci.req_complete) { - *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; + if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; return; } - if (bt_cb(hdev->sent_cmd)->hci.req_complete_skb) { - *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; + if (bt_cb(hdev->sent_cmd)->hci.req_complete) { + *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; return; } /* Remove all pending commands belonging to this request */ spin_lock_irqsave(&hdev->cmd_q.lock, flags); while ((skb = __skb_dequeue(&hdev->cmd_q))) { - if (bt_cb(skb)->hci.req_start) { + if (bt_cb(skb)->hci.req_flags & HCI_REQ_START) { __skb_queue_head(&hdev->cmd_q, skb); break; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index bdb170995966..5ba27c30e8f2 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -56,8 +56,12 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete, return -ENODATA; skb = skb_peek_tail(&req->cmd_q); - bt_cb(skb)->hci.req_complete = complete; - bt_cb(skb)->hci.req_complete_skb = complete_skb; + if (complete) { + bt_cb(skb)->hci.req_complete = complete; + } else if (complete_skb) { + bt_cb(skb)->hci.req_complete_skb = complete_skb; + bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB; + } spin_lock_irqsave(&hdev->cmd_q.lock, flags); skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); @@ -128,7 +132,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, } if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; bt_cb(skb)->hci.req_event = event; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 235ad0fa3571..19b23013c4f6 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1249,7 +1249,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); -- cgit v1.2.3 From 1982162bbe20672941897566f2f42d51a306a155 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 6 Nov 2015 07:42:20 +0100 Subject: Bluetooth: Add missing hci_skb_opcode for raw socket commands When HCI commands are injected via the raw socket, the core was not including the decoded opcode value. So ensure that it is actually set. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 19b23013c4f6..32caa6271a92 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1242,6 +1242,11 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } + /* Since the opcode has already been extracted here, store + * a copy of the value for later use by the drivers. + */ + hci_skb_opcode(skb) = opcode; + if (ogf == 0x3f) { skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); -- cgit v1.2.3 From 0ebc181884e8f538c4786840ed4abef828d4dc9b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 6 Nov 2015 13:35:33 +0200 Subject: Bluetooth: Add clarifying comment why schedule_work is used It's not obvious why schedule_work is used instead of queue_work. Add a comment explaining why. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 85b82f7adbd2..fd6120a41138 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -178,6 +178,10 @@ static void hci_connect_le_scan_remove(struct hci_conn *conn) hci_dev_hold(conn->hdev); hci_conn_get(conn); + /* Even though we hold a reference to the hdev, many other + * things might get cleaned up meanwhile, including the hdev's + * own workqueue, so we can't use that for scheduling. + */ schedule_work(&conn->le_scan_cleanup); } -- cgit v1.2.3 From 8528d3f738386706a6d2af05d7bdb542594bc95c Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Nov 2015 07:47:11 +0100 Subject: Bluetooth: Fix casting coding style within HCI sockets The HCI sockets code has still some old casting coding style. Fix this to match with the rest of the code. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 32caa6271a92..18a41eae295c 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -294,7 +294,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) return; /* Put header before the data */ - hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE); + hdr = (void *)skb_push(skb_copy, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len); @@ -375,7 +375,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) __net_timestamp(skb); - hdr = (void *) skb_push(skb, HCI_MON_HDR_SIZE); + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); @@ -436,11 +436,11 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) if (!skb) return; - hdr = (void *) skb_put(skb, HCI_EVENT_HDR_SIZE); + hdr = (void *)skb_put(skb, HCI_EVENT_HDR_SIZE); hdr->evt = HCI_EV_STACK_INTERNAL; hdr->plen = sizeof(*ev) + dlen; - ev = (void *) skb_put(skb, sizeof(*ev) + dlen); + ev = (void *)skb_put(skb, sizeof(*ev) + dlen); ev->type = type; memcpy(ev->data, data, dlen); @@ -653,20 +653,20 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, return -EOPNOTSUPP; case HCIGETCONNINFO: - return hci_get_conn_info(hdev, (void __user *) arg); + return hci_get_conn_info(hdev, (void __user *)arg); case HCIGETAUTHINFO: - return hci_get_auth_info(hdev, (void __user *) arg); + return hci_get_auth_info(hdev, (void __user *)arg); case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; - return hci_sock_blacklist_add(hdev, (void __user *) arg); + return hci_sock_blacklist_add(hdev, (void __user *)arg); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; - return hci_sock_blacklist_del(hdev, (void __user *) arg); + return hci_sock_blacklist_del(hdev, (void __user *)arg); } return -ENOIOCTLCMD; @@ -675,7 +675,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - void __user *argp = (void __user *) arg; + void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; int err; @@ -926,7 +926,7 @@ done: static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) { - struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr; + struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr; struct sock *sk = sock->sk; struct hci_dev *hdev; int err = 0; @@ -991,8 +991,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, } } -static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -1211,7 +1211,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } - hci_skb_pkt_type(skb) = *((unsigned char *) skb->data); + hci_skb_pkt_type(skb) = skb->data[0]; skb_pull(skb, 1); if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { -- cgit v1.2.3 From dd31506d4aece48943802c2bca3f1f7d2e7266b4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Nov 2015 07:47:12 +0100 Subject: Bluetooth: Add support for sending system notes to monitor channel The monitor channel can be used to send generic system notes as text strings for debugging purposes. This adds the system note monitor code and uses it for including kernel and subsystem version into traces. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 2 ++ include/net/bluetooth/hci_mon.h | 1 + net/bluetooth/af_bluetooth.c | 8 +++----- net/bluetooth/hci_sock.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 8d38f411009c..bfd1590821d6 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -29,6 +29,8 @@ #include #include +#define BT_SUBSYS_VERSION "2.21" + #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 #define PF_BLUETOOTH AF_BLUETOOTH diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 2b67567cf28d..c91bb23eb29e 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -43,6 +43,7 @@ struct hci_mon_hdr { #define HCI_MON_CLOSE_INDEX 9 #define HCI_MON_INDEX_INFO 10 #define HCI_MON_VENDOR_DIAG 11 +#define HCI_MON_SYSTEM_NOTE 12 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a3bffd1ec2b4..34c53d5862f6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -33,8 +33,6 @@ #include "selftest.h" -#define VERSION "2.21" - /* Bluetooth sockets */ #define BT_MAX_PROTO 8 static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; @@ -715,7 +713,7 @@ static int __init bt_init(void) sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); - BT_INFO("Core ver %s", VERSION); + BT_INFO("Core ver %s", BT_SUBSYS_VERSION); err = bt_selftest(); if (err < 0) @@ -789,7 +787,7 @@ subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); -MODULE_VERSION(VERSION); +MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION); +MODULE_VERSION(BT_SUBSYS_VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 18a41eae295c..710265c35d16 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -26,6 +26,8 @@ #include #include +#include +#include #include #include @@ -383,6 +385,29 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } +static void send_monitor_note(struct sock *sk, const char *text) +{ + size_t len = strlen(text); + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(len + 1, GFP_ATOMIC); + if (!skb) + return; + + strcpy(skb_put(skb, len + 1), text); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_SYSTEM_NOTE); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + if (sock_queue_rcv_skb(sk, skb)) + kfree_skb(skb); +} + static void send_monitor_replay(struct sock *sk) { struct hci_dev *hdev; @@ -872,6 +897,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + send_monitor_note(sk, "Linux version " UTS_RELEASE + " (" UTS_MACHINE ")"); + send_monitor_note(sk, "Bluetooth subsystem version " + BT_SUBSYS_VERSION); send_monitor_replay(sk); atomic_inc(&monitor_promisc); -- cgit v1.2.3 From ac71494934c475e3f51e5e3e64a12f57618d82a4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Nov 2015 07:47:13 +0100 Subject: Bluetooth: Add support for controller specific logging To enable controller specific logging, the userspace daemon has to have the ability to log per controller. To facilitate this support, provide a dedicated logging channel. Messages in this channel will be included in the monitor queue and with that also forwarded to monitoring tools along with the actual hardware traces. All messages from the logging channel are timestamped and with that allow an easy correlation between userspace messages and hardware events. This will increase the ability to debug problems faster. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_mon.h | 1 + include/net/bluetooth/hci_sock.h | 1 + net/bluetooth/hci_sock.c | 102 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) (limited to 'net') diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index c91bb23eb29e..587d0131b349 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -44,6 +44,7 @@ struct hci_mon_hdr { #define HCI_MON_INDEX_INFO 10 #define HCI_MON_VENDOR_DIAG 11 #define HCI_MON_SYSTEM_NOTE 12 +#define HCI_MON_USER_LOGGING 13 struct hci_mon_new_index { __u8 type; diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 9a46d665c1b5..8e9138acdae1 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -45,6 +45,7 @@ struct sockaddr_hci { #define HCI_CHANNEL_USER 1 #define HCI_CHANNEL_MONITOR 2 #define HCI_CHANNEL_CONTROL 3 +#define HCI_CHANNEL_LOGGING 4 struct hci_filter { unsigned long type_mask; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 710265c35d16..41f579ba447b 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -906,6 +906,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, atomic_inc(&monitor_promisc); break; + case HCI_CHANNEL_LOGGING: + if (haddr.hci_dev != HCI_DEV_NONE) { + err = -EINVAL; + goto done; + } + + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + goto done; + } + break; + default: if (!hci_mgmt_chan_find(haddr.hci_channel)) { err = -EINVAL; @@ -1033,6 +1045,9 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (flags & MSG_OOB) return -EOPNOTSUPP; + if (hci_pi(sk)->channel == HCI_CHANNEL_LOGGING) + return -EOPNOTSUPP; + if (sk->sk_state == BT_CLOSED) return 0; @@ -1179,6 +1194,90 @@ done: return err; } +static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + struct hci_dev *hdev; + u16 index; + int err; + + /* The logging frame consists at minimum of the standard header, + * the priority byte, the ident length byte and at least one string + * terminator NUL byte. Anything shorter are invalid packets. + */ + if (len < sizeof(*hdr) + 3) + return -EINVAL; + + skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return err; + + if (memcpy_from_msg(skb_put(skb, len), msg, len)) { + err = -EFAULT; + goto drop; + } + + hdr = (void *)skb->data; + + if (__le16_to_cpu(hdr->len) != len - sizeof(*hdr)) { + err = -EINVAL; + goto drop; + } + + if (__le16_to_cpu(hdr->opcode) == 0x0000) { + __u8 priority = skb->data[sizeof(*hdr)]; + __u8 ident_len = skb->data[sizeof(*hdr) + 1]; + + /* Only the priorities 0-7 are valid and with that any other + * value results in an invalid packet. + * + * The priority byte is followed by an ident length byte and + * the NUL terminated ident string. Check that the ident + * length is not overflowing the packet and also that the + * ident string itself is NUL terminated. In case the ident + * length is zero, the length value actually doubles as NUL + * terminator identifier. + * + * The message follows the ident string (if present) and + * must be NUL terminated. Otherwise it is not a valid packet. + */ + if (priority > 7 || skb->data[len - 1] != 0x00 || + ident_len > len - sizeof(*hdr) - 3 || + skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) { + err = -EINVAL; + goto drop; + } + } else { + err = -EINVAL; + goto drop; + } + + index = __le16_to_cpu(hdr->index); + + if (index != MGMT_INDEX_NONE) { + hdev = hci_dev_get(index); + if (!hdev) { + err = -ENODEV; + goto drop; + } + } else { + hdev = NULL; + } + + hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING); + + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); + err = len; + + if (hdev) + hci_dev_put(hdev); + +drop: + kfree_skb(skb); + return err; +} + static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1208,6 +1307,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; goto done; + case HCI_CHANNEL_LOGGING: + err = hci_logging_frame(sk, msg, len); + goto done; default: mutex_lock(&mgmt_chan_list_lock); chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); -- cgit v1.2.3 From 030e7f8141a262e32dc064d7cf12377d769d45c2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 10 Nov 2015 09:44:53 +0200 Subject: Bluetooth: Remove unnecessary call to hci_update_background_scan The hci_conn_params_clear_all() function is only called from hci_unregister_dev() at which point it's completely futile to try to do any LE scanning updates. Simply remove this unnecessary function call. At the same time we can make the function static since it's only accessed from within the same c-file. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1878d0a96333..15e6a2bffc2b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1036,7 +1036,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); -void hci_conn_params_clear_all(struct hci_dev *hdev); void hci_conn_params_clear_disabled(struct hci_dev *hdev); struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bc97fc6de876..ea648e9913f9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3070,15 +3070,13 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -void hci_conn_params_clear_all(struct hci_dev *hdev) +static void hci_conn_params_clear_all(struct hci_dev *hdev) { struct hci_conn_params *params, *tmp; list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) hci_conn_params_free(params); - hci_update_background_scan(hdev); - BT_DBG("All LE connection parameters were removed"); } -- cgit v1.2.3 From be91cd05704d5a547de086d0e61c249ee62d2e13 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 10 Nov 2015 09:44:54 +0200 Subject: Bluetooth: Move synchronous request handling into hci_request.c hci_request.c is a more natural place for the synchronous request handling. Furthermore, we will soon need access to some of the previously private-to-hci_core.c functions from hci_request.c. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 193 -------------------------------------------- net/bluetooth/hci_request.c | 184 +++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 11 +++ 3 files changed, 195 insertions(+), 193 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ea648e9913f9..aa18ec701816 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -56,15 +56,6 @@ DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); -/* ----- HCI requests ----- */ - -#define HCI_REQ_DONE 0 -#define HCI_REQ_PEND 1 -#define HCI_REQ_CANCELED 2 - -#define hci_req_lock(d) mutex_lock(&d->req_lock) -#define hci_req_unlock(d) mutex_unlock(&d->req_lock) - /* ---- HCI debugfs entries ---- */ static ssize_t dut_mode_read(struct file *file, char __user *user_buf, @@ -198,190 +189,6 @@ static void hci_debugfs_create_basic(struct hci_dev *hdev) &vendor_diag_fops); } -/* ---- HCI requests ---- */ - -static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, - struct sk_buff *skb) -{ - BT_DBG("%s result 0x%2.2x", hdev->name, result); - - if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; - if (skb) - hdev->req_skb = skb_get(skb); - wake_up_interruptible(&hdev->req_wait_q); - } -} - -static void hci_req_cancel(struct hci_dev *hdev, int err) -{ - BT_DBG("%s err 0x%2.2x", hdev->name, err); - - if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = err; - hdev->req_status = HCI_REQ_CANCELED; - wake_up_interruptible(&hdev->req_wait_q); - } -} - -struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u8 event, u32 timeout) -{ - DECLARE_WAITQUEUE(wait, current); - struct hci_request req; - struct sk_buff *skb; - int err = 0; - - BT_DBG("%s", hdev->name); - - hci_req_init(&req, hdev); - - hci_req_add_ev(&req, opcode, plen, param, event); - - hdev->req_status = HCI_REQ_PEND; - - add_wait_queue(&hdev->req_wait_q, &wait); - set_current_state(TASK_INTERRUPTIBLE); - - err = hci_req_run_skb(&req, hci_req_sync_complete); - if (err < 0) { - remove_wait_queue(&hdev->req_wait_q, &wait); - set_current_state(TASK_RUNNING); - return ERR_PTR(err); - } - - schedule_timeout(timeout); - - remove_wait_queue(&hdev->req_wait_q, &wait); - - if (signal_pending(current)) - return ERR_PTR(-EINTR); - - switch (hdev->req_status) { - case HCI_REQ_DONE: - err = -bt_to_errno(hdev->req_result); - break; - - case HCI_REQ_CANCELED: - err = -hdev->req_result; - break; - - default: - err = -ETIMEDOUT; - break; - } - - hdev->req_status = hdev->req_result = 0; - skb = hdev->req_skb; - hdev->req_skb = NULL; - - BT_DBG("%s end: err %d", hdev->name, err); - - if (err < 0) { - kfree_skb(skb); - return ERR_PTR(err); - } - - if (!skb) - return ERR_PTR(-ENODATA); - - return skb; -} -EXPORT_SYMBOL(__hci_cmd_sync_ev); - -struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout) -{ - return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); -} -EXPORT_SYMBOL(__hci_cmd_sync); - -/* Execute request and wait for completion. */ -static int __hci_req_sync(struct hci_dev *hdev, - void (*func)(struct hci_request *req, - unsigned long opt), - unsigned long opt, __u32 timeout) -{ - struct hci_request req; - DECLARE_WAITQUEUE(wait, current); - int err = 0; - - BT_DBG("%s start", hdev->name); - - hci_req_init(&req, hdev); - - hdev->req_status = HCI_REQ_PEND; - - func(&req, opt); - - add_wait_queue(&hdev->req_wait_q, &wait); - set_current_state(TASK_INTERRUPTIBLE); - - err = hci_req_run_skb(&req, hci_req_sync_complete); - if (err < 0) { - hdev->req_status = 0; - - remove_wait_queue(&hdev->req_wait_q, &wait); - set_current_state(TASK_RUNNING); - - /* ENODATA means the HCI request command queue is empty. - * This can happen when a request with conditionals doesn't - * trigger any commands to be sent. This is normal behavior - * and should not trigger an error return. - */ - if (err == -ENODATA) - return 0; - - return err; - } - - schedule_timeout(timeout); - - remove_wait_queue(&hdev->req_wait_q, &wait); - - if (signal_pending(current)) - return -EINTR; - - switch (hdev->req_status) { - case HCI_REQ_DONE: - err = -bt_to_errno(hdev->req_result); - break; - - case HCI_REQ_CANCELED: - err = -hdev->req_result; - break; - - default: - err = -ETIMEDOUT; - break; - } - - hdev->req_status = hdev->req_result = 0; - - BT_DBG("%s end: err %d", hdev->name, err); - - return err; -} - -static int hci_req_sync(struct hci_dev *hdev, - void (*req)(struct hci_request *req, - unsigned long opt), - unsigned long opt, __u32 timeout) -{ - int ret; - - if (!test_bit(HCI_UP, &hdev->flags)) - return -ENETDOWN; - - /* Serialize all requests */ - hci_req_lock(hdev); - ret = __hci_req_sync(hdev, req, opt, timeout); - hci_req_unlock(hdev); - - return ret; -} - static void hci_reset_req(struct hci_request *req, unsigned long opt) { BT_DBG("%s %ld", req->hdev->name, opt); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 5ba27c30e8f2..aa868f6f5a90 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -27,6 +27,10 @@ #include "smp.h" #include "hci_request.h" +#define HCI_REQ_DONE 0 +#define HCI_REQ_PEND 1 +#define HCI_REQ_CANCELED 2 + void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); @@ -82,6 +86,186 @@ int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete) return req_run(req, NULL, complete); } +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, + struct sk_buff *skb) +{ + BT_DBG("%s result 0x%2.2x", hdev->name, result); + + if (hdev->req_status == HCI_REQ_PEND) { + hdev->req_result = result; + hdev->req_status = HCI_REQ_DONE; + if (skb) + hdev->req_skb = skb_get(skb); + wake_up_interruptible(&hdev->req_wait_q); + } +} + +void hci_req_cancel(struct hci_dev *hdev, int err) +{ + BT_DBG("%s err 0x%2.2x", hdev->name, err); + + if (hdev->req_status == HCI_REQ_PEND) { + hdev->req_result = err; + hdev->req_status = HCI_REQ_CANCELED; + wake_up_interruptible(&hdev->req_wait_q); + } +} + +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout) +{ + DECLARE_WAITQUEUE(wait, current); + struct hci_request req; + struct sk_buff *skb; + int err = 0; + + BT_DBG("%s", hdev->name); + + hci_req_init(&req, hdev); + + hci_req_add_ev(&req, opcode, plen, param, event); + + hdev->req_status = HCI_REQ_PEND; + + add_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + err = hci_req_run_skb(&req, hci_req_sync_complete); + if (err < 0) { + remove_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_RUNNING); + return ERR_PTR(err); + } + + schedule_timeout(timeout); + + remove_wait_queue(&hdev->req_wait_q, &wait); + + if (signal_pending(current)) + return ERR_PTR(-EINTR); + + switch (hdev->req_status) { + case HCI_REQ_DONE: + err = -bt_to_errno(hdev->req_result); + break; + + case HCI_REQ_CANCELED: + err = -hdev->req_result; + break; + + default: + err = -ETIMEDOUT; + break; + } + + hdev->req_status = hdev->req_result = 0; + skb = hdev->req_skb; + hdev->req_skb = NULL; + + BT_DBG("%s end: err %d", hdev->name, err); + + if (err < 0) { + kfree_skb(skb); + return ERR_PTR(err); + } + + if (!skb) + return ERR_PTR(-ENODATA); + + return skb; +} +EXPORT_SYMBOL(__hci_cmd_sync_ev); + +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout) +{ + return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); +} +EXPORT_SYMBOL(__hci_cmd_sync); + +/* Execute request and wait for completion. */ +int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout) +{ + struct hci_request req; + DECLARE_WAITQUEUE(wait, current); + int err = 0; + + BT_DBG("%s start", hdev->name); + + hci_req_init(&req, hdev); + + hdev->req_status = HCI_REQ_PEND; + + func(&req, opt); + + add_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + err = hci_req_run_skb(&req, hci_req_sync_complete); + if (err < 0) { + hdev->req_status = 0; + + remove_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_RUNNING); + + /* ENODATA means the HCI request command queue is empty. + * This can happen when a request with conditionals doesn't + * trigger any commands to be sent. This is normal behavior + * and should not trigger an error return. + */ + if (err == -ENODATA) + return 0; + + return err; + } + + schedule_timeout(timeout); + + remove_wait_queue(&hdev->req_wait_q, &wait); + + if (signal_pending(current)) + return -EINTR; + + switch (hdev->req_status) { + case HCI_REQ_DONE: + err = -bt_to_errno(hdev->req_result); + break; + + case HCI_REQ_CANCELED: + err = -hdev->req_result; + break; + + default: + err = -ETIMEDOUT; + break; + } + + hdev->req_status = hdev->req_result = 0; + + BT_DBG("%s end: err %d", hdev->name, err); + + return err; +} + +int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout) +{ + int ret; + + if (!test_bit(HCI_UP, &hdev->flags)) + return -ENETDOWN; + + /* Serialize all requests */ + hci_req_lock(hdev); + ret = __hci_req_sync(hdev, req, opt, timeout); + hci_req_unlock(hdev); + + return ret; +} + struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 25c7f1305dcb..6e6bad4ca4ab 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -20,6 +20,9 @@ SOFTWARE IS DISCLAIMED. */ +#define hci_req_lock(d) mutex_lock(&d->req_lock) +#define hci_req_unlock(d) mutex_unlock(&d->req_lock) + struct hci_request { struct hci_dev *hdev; struct sk_buff_head cmd_q; @@ -41,6 +44,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, hci_req_complete_t *req_complete, hci_req_complete_skb_t *req_complete_skb); +int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout); +int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout); +void hci_req_cancel(struct hci_dev *hdev, int err); + struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); -- cgit v1.2.3 From b504430c868c2979d2dbee9be051e425fdeb36ac Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 10 Nov 2015 09:44:55 +0200 Subject: Bluetooth: Add 'sync' specifier to synchronous request APIs To make it clear which HCI request APIs target specifically synchronous requests, add 'sync' to the API names. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 28 ++++++++++++++-------------- net/bluetooth/hci_request.c | 6 +++--- net/bluetooth/hci_request.h | 6 +++--- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index aa18ec701816..ec1bebaade32 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -92,14 +92,14 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE)) return -EALREADY; - hci_req_lock(hdev); + hci_req_sync_lock(hdev); if (enable) skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL, HCI_CMD_TIMEOUT); else skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_CMD_TIMEOUT); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -156,9 +156,9 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, !test_bit(HCI_RUNNING, &hdev->flags)) goto done; - hci_req_lock(hdev); + hci_req_sync_lock(hdev); err = hdev->set_diag(hdev, enable); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); if (err < 0) return err; @@ -1257,7 +1257,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) BT_DBG("%s %p", hdev->name, hdev); - hci_req_lock(hdev); + hci_req_sync_lock(hdev); if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { ret = -ENODEV; @@ -1410,7 +1410,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) } done: - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return ret; } @@ -1504,12 +1504,12 @@ int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work(&hdev->power_off); - hci_req_cancel(hdev, ENODEV); - hci_req_lock(hdev); + hci_req_sync_cancel(hdev, ENODEV); + hci_req_sync_lock(hdev); if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { cancel_delayed_work_sync(&hdev->cmd_timer); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return 0; } @@ -1607,7 +1607,7 @@ int hci_dev_do_close(struct hci_dev *hdev) memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); hci_dev_put(hdev); return 0; @@ -1643,7 +1643,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev) BT_DBG("%s %p", hdev->name, hdev); - hci_req_lock(hdev); + hci_req_sync_lock(hdev); /* Drop queues */ skb_queue_purge(&hdev->rx_q); @@ -1667,7 +1667,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev) ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return ret; } @@ -3537,9 +3537,9 @@ struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen); - hci_req_lock(hdev); + hci_req_sync_lock(hdev); skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return skb; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index aa868f6f5a90..ae19bce89616 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -100,7 +100,7 @@ static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, } } -void hci_req_cancel(struct hci_dev *hdev, int err) +void hci_req_sync_cancel(struct hci_dev *hdev, int err) { BT_DBG("%s err 0x%2.2x", hdev->name, err); @@ -259,9 +259,9 @@ int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, return -ENETDOWN; /* Serialize all requests */ - hci_req_lock(hdev); + hci_req_sync_lock(hdev); ret = __hci_req_sync(hdev, req, opt, timeout); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return ret; } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6e6bad4ca4ab..5b3240cf9eb7 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -20,8 +20,8 @@ SOFTWARE IS DISCLAIMED. */ -#define hci_req_lock(d) mutex_lock(&d->req_lock) -#define hci_req_unlock(d) mutex_unlock(&d->req_lock) +#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) +#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) struct hci_request { struct hci_dev *hdev; @@ -50,7 +50,7 @@ int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, unsigned long opt), unsigned long opt, __u32 timeout); -void hci_req_cancel(struct hci_dev *hdev, int err); +void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); -- cgit v1.2.3 From 5fc16cc4f3044551587dfee8e12422cbf59303e8 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:16 +0200 Subject: Bluetooth: Add stubs for synchronous HCI request functionality Prepare hci_request.c to have code for doing synchronous HCI requests, such as LE scanning or advertising changes. The necessary work callbacks will be set up in hci_request_setup() and cleaned up in hci_request_cancel_all(). The former is used when an HCI device get registered, and the latter each time it is powered off (or unregistered). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 4 ++++ net/bluetooth/hci_request.c | 8 ++++++++ net/bluetooth/hci_request.h | 3 +++ 3 files changed, 15 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ec1bebaade32..965bc01a0d91 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1609,6 +1609,8 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_req_sync_unlock(hdev); + hci_request_cancel_all(hdev); + hci_dev_put(hdev); return 0; } @@ -3161,6 +3163,8 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); + hci_request_setup(hdev); + hci_init_sysfs(hdev); discovery_init(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ae19bce89616..d48206277fe4 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -845,3 +845,11 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } + +void hci_request_setup(struct hci_dev *hdev) +{ +} + +void hci_request_cancel_all(struct hci_dev *hdev) +{ +} diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 5b3240cf9eb7..9759b7175f8e 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -70,3 +70,6 @@ void __hci_update_background_scan(struct hci_request *req); int hci_abort_conn(struct hci_conn *conn, u8 reason); void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason); + +void hci_request_setup(struct hci_dev *hdev); +void hci_request_cancel_all(struct hci_dev *hdev); -- cgit v1.2.3 From 2e93e53b8f86fb38a9a3c3bd08e539c40b3f8d89 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:17 +0200 Subject: Bluetooth: Run all background scan updates through req_workqueue Instead of firing off a simple async request queue all background scan updates through req_workqueue and use hci_req_sync() there to ensure that no two updates overlap with each other. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 ++ net/bluetooth/hci_request.c | 39 +++++++++++++++++---------------------- net/bluetooth/hci_request.h | 6 +++++- net/bluetooth/mgmt.c | 2 +- 4 files changed, 25 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 15e6a2bffc2b..c2ca6a58d1e0 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -327,6 +327,8 @@ struct hci_dev { struct work_struct cmd_work; struct work_struct tx_work; + struct work_struct bg_scan_update; + struct sk_buff_head rx_q; struct sk_buff_head raw_q; struct sk_buff_head cmd_q; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index d48206277fe4..0adbb59ec2f0 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -731,28 +731,6 @@ void __hci_update_background_scan(struct hci_request *req) } } -static void update_background_scan_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - if (status) - BT_DBG("HCI request failed to update background scanning: " - "status 0x%2.2x", status); -} - -void hci_update_background_scan(struct hci_dev *hdev) -{ - int err; - struct hci_request req; - - hci_req_init(&req, hdev); - - __hci_update_background_scan(&req); - - err = hci_req_run(&req, update_background_scan_complete); - if (err && err != -ENODATA) - BT_ERR("Failed to run HCI request: err %d", err); -} - void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -846,10 +824,27 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } +static void update_bg_scan(struct hci_request *req, unsigned long opt) +{ + hci_dev_lock(req->hdev); + __hci_update_background_scan(req); + hci_dev_unlock(req->hdev); +} + +static void bg_scan_update(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + bg_scan_update); + + hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT); +} + void hci_request_setup(struct hci_dev *hdev) { + INIT_WORK(&hdev->bg_scan_update, bg_scan_update); } void hci_request_cancel_all(struct hci_dev *hdev) { + cancel_work_sync(&hdev->bg_scan_update); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 9759b7175f8e..983e687fee22 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -64,12 +64,16 @@ void __hci_update_page_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, u8 *own_addr_type); -void hci_update_background_scan(struct hci_dev *hdev); void __hci_update_background_scan(struct hci_request *req); int hci_abort_conn(struct hci_conn *conn, u8 reason); void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason); +static inline void hci_update_background_scan(struct hci_dev *hdev) +{ + queue_work(hdev->req_workqueue, &hdev->bg_scan_update); +} + void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7f22119276f3..29c9fec814b4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2510,8 +2510,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) hci_req_init(&req, hdev); update_adv_data(&req); update_scan_rsp_data(&req); - __hci_update_background_scan(&req); hci_req_run(&req, NULL); + hci_update_background_scan(hdev); } unlock: -- cgit v1.2.3 From 51d7a94d56f842a6bd752c11de2f80f2cbc4a507 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:18 +0200 Subject: Bluetooth: Don't wait for HCI in Add/Remove Device There's no point in waiting for HCI activity in Add/Remove Device since the effects of these calls are long-lasting and we can anyway not report up to the application all HCI failures. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 161 ++++++++++++++++----------------------------------- 1 file changed, 50 insertions(+), 111 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 29c9fec814b4..27504949e995 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6076,10 +6076,9 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) } /* This function requires the caller holds hdev->lock */ -static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, +static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, u8 auto_connect) { - struct hci_dev *hdev = req->hdev; struct hci_conn_params *params; params = hci_conn_params_add(hdev, addr, addr_type); @@ -6099,26 +6098,17 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, */ if (params->explicit_connect) list_add(¶ms->action, &hdev->pend_le_conns); - - __hci_update_background_scan(req); break; case HCI_AUTO_CONN_REPORT: if (params->explicit_connect) list_add(¶ms->action, &hdev->pend_le_conns); else list_add(¶ms->action, &hdev->pend_le_reports); - __hci_update_background_scan(req); break; case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: - if (!is_connected(hdev, addr, addr_type)) { + if (!is_connected(hdev, addr, addr_type)) list_add(¶ms->action, &hdev->pend_le_conns); - /* If we are in scan phase of connecting, we were - * already added to pend_le_conns and scanning. - */ - if (params->auto_connect != HCI_AUTO_CONN_EXPLICIT) - __hci_update_background_scan(req); - } break; } @@ -6142,25 +6132,6 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); } -static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - struct mgmt_pending_cmd *cmd; - - BT_DBG("status 0x%02x", status); - - hci_dev_lock(hdev); - - cmd = pending_find(MGMT_OP_ADD_DEVICE, hdev); - if (!cmd) - goto unlock; - - cmd->cmd_complete(cmd, mgmt_status(status)); - mgmt_pending_remove(cmd); - -unlock: - hci_dev_unlock(hdev); -} - static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -6198,9 +6169,10 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (cp->addr.type == BDADDR_BREDR) { /* Only incoming connections action is supported for now */ if (cp->action != 0x01) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } @@ -6229,33 +6201,31 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, * hci_conn_params_lookup. */ if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) { - err = cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } /* If the connection parameters don't exist for this device, * they will be created and configured with defaults. */ - if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type, + if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type, auto_conn) < 0) { - err = cmd->cmd_complete(cmd, MGMT_STATUS_FAILED); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_FAILED, &cp->addr, + sizeof(cp->addr)); goto unlock; } + hci_update_background_scan(hdev); + added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); - err = hci_req_run(&req, add_device_complete); - if (err < 0) { - /* ENODATA means no HCI commands were needed (e.g. if - * the adapter is powered off). - */ - if (err == -ENODATA) - err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); - mgmt_pending_remove(cmd); - } + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_SUCCESS, &cp->addr, + sizeof(cp->addr)); unlock: hci_dev_unlock(hdev); @@ -6273,55 +6243,25 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk); } -static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - struct mgmt_pending_cmd *cmd; - - BT_DBG("status 0x%02x", status); - - hci_dev_lock(hdev); - - cmd = pending_find(MGMT_OP_REMOVE_DEVICE, hdev); - if (!cmd) - goto unlock; - - cmd->cmd_complete(cmd, mgmt_status(status)); - mgmt_pending_remove(cmd); - -unlock: - hci_dev_unlock(hdev); -} - static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_device *cp = data; - struct mgmt_pending_cmd *cmd; - struct hci_request req; int err; BT_DBG("%s", hdev->name); - hci_req_init(&req, hdev); - hci_dev_lock(hdev); - cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_DEVICE, hdev, data, len); - if (!cmd) { - err = -ENOMEM; - goto unlock; - } - - cmd->cmd_complete = addr_cmd_complete; - if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { struct hci_conn_params *params; u8 addr_type; if (!bdaddr_type_is_valid(cp->addr.type)) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } @@ -6330,13 +6270,15 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, &cp->addr.bdaddr, cp->addr.type); if (err) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, + sizeof(cp->addr)); goto unlock; } - __hci_update_page_scan(&req); + hci_update_page_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -6351,33 +6293,36 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, * hci_conn_params_lookup. */ if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (!params) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } if (params->auto_connect == HCI_AUTO_CONN_DISABLED || params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } list_del(¶ms->action); list_del(¶ms->list); kfree(params); - __hci_update_background_scan(&req); + hci_update_background_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); } else { @@ -6385,9 +6330,10 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, struct bdaddr_list *b, *btmp; if (cp->addr.type) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } @@ -6397,7 +6343,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - __hci_update_page_scan(&req); + hci_update_page_scan(hdev); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -6414,20 +6360,13 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, BT_DBG("All LE connection parameters were removed"); - __hci_update_background_scan(&req); + hci_update_background_scan(hdev); } complete: - err = hci_req_run(&req, remove_device_complete); - if (err < 0) { - /* ENODATA means no HCI commands were needed (e.g. if - * the adapter is powered off). - */ - if (err == -ENODATA) - err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); - mgmt_pending_remove(cmd); - } - + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_SUCCESS, &cp->addr, + sizeof(cp->addr)); unlock: hci_dev_unlock(hdev); return err; -- cgit v1.2.3 From 4ebeee2dff9815619be6ff9a845d33716f48468c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:19 +0200 Subject: Bluetooth: Add HCI status return parameter to hci_req_sync() In some cases it may be important to get the exact HCI status rather than the converted HCI-to-errno value. Add an optional return parameter to the hci_req_sync() API to allow for this. Since there are no good HCI translation candidates for cancelation and timeout, use the "unknown" status code for those cases. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 3 ++- net/bluetooth/hci_core.c | 26 +++++++++++++------------- net/bluetooth/hci_request.c | 12 +++++++++--- net/bluetooth/hci_request.h | 4 ++-- 4 files changed, 26 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0205b80cc90b..cc2216727655 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -452,7 +452,8 @@ enum { #define HCI_ERROR_REMOTE_POWER_OFF 0x15 #define HCI_ERROR_LOCAL_HOST_TERM 0x16 #define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18 -#define HCI_ERROR_INVALID_LL_PARAMS 0x1E +#define HCI_ERROR_INVALID_LL_PARAMS 0x1e +#define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c /* Flow control modes */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 965bc01a0d91..029d7798cffa 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -768,14 +768,14 @@ static int __hci_init(struct hci_dev *hdev) { int err; - err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; if (hci_dev_test_flag(hdev, HCI_SETUP)) hci_debugfs_create_basic(hdev); - err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -786,11 +786,11 @@ static int __hci_init(struct hci_dev *hdev) if (hdev->dev_type != HCI_BREDR) return 0; - err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; - err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -846,7 +846,7 @@ static int __hci_unconf_init(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return 0; - err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -1204,7 +1204,7 @@ int hci_inquiry(void __user *arg) if (do_inquiry) { err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir, - timeo); + timeo, NULL); if (err < 0) goto done; @@ -1570,7 +1570,7 @@ int hci_dev_do_close(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) && !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { set_bit(HCI_INIT, &hdev->flags); - __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); + __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT, NULL); clear_bit(HCI_INIT, &hdev->flags); } @@ -1667,7 +1667,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; - ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT, NULL); hci_req_sync_unlock(hdev); return ret; @@ -1802,7 +1802,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETENCRYPT: @@ -1814,18 +1814,18 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); if (err) break; } err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETSCAN: err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); /* Ensure that the connectable and discoverable states * get correctly modified as this was a non-mgmt change. @@ -1836,7 +1836,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) case HCISETLINKPOL: err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETLINKMODE: diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0adbb59ec2f0..b1d4d5bba7c1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -186,7 +186,7 @@ EXPORT_SYMBOL(__hci_cmd_sync); /* Execute request and wait for completion. */ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, u32 timeout, u8 *hci_status) { struct hci_request req; DECLARE_WAITQUEUE(wait, current); @@ -231,14 +231,20 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, switch (hdev->req_status) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); + if (hci_status) + *hci_status = hdev->req_result; break; case HCI_REQ_CANCELED: err = -hdev->req_result; + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; break; default: err = -ETIMEDOUT; + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; break; } @@ -251,7 +257,7 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, u32 timeout, u8 *hci_status) { int ret; @@ -260,7 +266,7 @@ int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, /* Serialize all requests */ hci_req_sync_lock(hdev); - ret = __hci_req_sync(hdev, req, opt, timeout); + ret = __hci_req_sync(hdev, req, opt, timeout, hci_status); hci_req_sync_unlock(hdev); return ret; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 983e687fee22..8441d12a62dd 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -46,10 +46,10 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout); + unsigned long opt, u32 timeout, u8 *hci_status); int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout); + unsigned long opt, u32 timeout, u8 *hci_status); void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, -- cgit v1.2.3 From 84235d222a297a281dbe984ef4f28519cacc5fe3 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:20 +0200 Subject: Bluetooth: Use req_workqueue for explicit connect requests Since explicit connect requests are also a sub-category of passive scan updates, run them through the same workqueue as the other passive scan changes. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 39 ++++----------------------------------- net/bluetooth/hci_request.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index fd6120a41138..1ed1e153b3fa 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -933,26 +933,6 @@ done: return conn; } -static void hci_connect_le_scan_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - struct hci_conn *conn; - - if (!status) - return; - - BT_ERR("Failed to add device to auto conn whitelist: status 0x%2.2x", - status); - - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (conn) - hci_le_conn_failed(conn, status); - - hci_dev_unlock(hdev); -} - static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) { struct hci_conn *conn; @@ -968,10 +948,9 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) } /* This function requires the caller holds hdev->lock */ -static int hci_explicit_conn_params_set(struct hci_request *req, +static int hci_explicit_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) { - struct hci_dev *hdev = req->hdev; struct hci_conn_params *params; if (is_connected(hdev, addr, addr_type)) @@ -999,7 +978,6 @@ static int hci_explicit_conn_params_set(struct hci_request *req, } params->explicit_connect = true; - __hci_update_background_scan(req); BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, params->auto_connect); @@ -1013,8 +991,6 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u16 conn_timeout, u8 role) { struct hci_conn *conn; - struct hci_request req; - int err; /* Let's make sure that le is enabled.*/ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { @@ -1046,25 +1022,18 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, if (!conn) return ERR_PTR(-ENOMEM); - hci_req_init(&req, hdev); - - if (hci_explicit_conn_params_set(&req, dst, dst_type) < 0) + if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) return ERR_PTR(-EBUSY); conn->state = BT_CONNECT; set_bit(HCI_CONN_SCANNING, &conn->flags); - - err = hci_req_run(&req, hci_connect_le_scan_complete); - if (err && err != -ENODATA) { - hci_conn_del(conn); - return ERR_PTR(err); - } - conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->pending_sec_level = sec_level; conn->conn_timeout = conn_timeout; + hci_update_background_scan(hdev); + done: hci_conn_hold(conn); return conn; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b1d4d5bba7c1..c0ea310a116a 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -841,8 +841,21 @@ static void bg_scan_update(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, bg_scan_update); + struct hci_conn *conn; + u8 status; + int err; + + err = hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT, &status); + if (!err) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + hci_le_conn_failed(conn, status); - hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT); + hci_dev_unlock(hdev); } void hci_request_setup(struct hci_dev *hdev) -- cgit v1.2.3 From af02dd446999796a742e3940d1a25f2b35b6eeba Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:21 +0200 Subject: Bluetooth: Use req_workqueue for background scanning when powering on We can easily use the new req_workqueue based background scan update for the power on case. This also removes the last external user of __hci_update_background_scan(). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 27504949e995..bb870c3aadae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7465,9 +7465,8 @@ void mgmt_index_removed(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -static void restart_le_actions(struct hci_request *req) +static void restart_le_actions(struct hci_dev *hdev) { - struct hci_dev *hdev = req->hdev; struct hci_conn_params *p; list_for_each_entry(p, &hdev->le_conn_params, list) { @@ -7488,8 +7487,6 @@ static void restart_le_actions(struct hci_request *req) break; } } - - __hci_update_background_scan(req); } static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) @@ -7505,6 +7502,9 @@ static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) * decide if the public address or static address is used. */ smp_register(hdev); + + restart_le_actions(hdev); + hci_update_background_scan(hdev); } hci_dev_lock(hdev); @@ -7583,8 +7583,6 @@ static int powered_update_hci(struct hci_dev *hdev) hdev->cur_adv_instance) schedule_adv_instance(&req, hdev->cur_adv_instance, true); - - restart_le_actions(&req); } link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); -- cgit v1.2.3 From 145a0913ef180af6be7af2c50056ae171c2a2b94 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:22 +0200 Subject: Bluetooth: Make __hci_update_background_scan private to hci_request.c There are no more external users so this API can be made private. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 2 +- net/bluetooth/hci_request.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c0ea310a116a..8aa06cc545c3 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -670,7 +670,7 @@ void hci_update_page_scan(struct hci_dev *hdev) * * This function requires the caller holds hdev->lock. */ -void __hci_update_background_scan(struct hci_request *req) +static void __hci_update_background_scan(struct hci_request *req) { struct hci_dev *hdev = req->hdev; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 8441d12a62dd..1f1194628652 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -64,8 +64,6 @@ void __hci_update_page_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, u8 *own_addr_type); -void __hci_update_background_scan(struct hci_request *req); - int hci_abort_conn(struct hci_conn *conn, u8 reason); void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason); -- cgit v1.2.3 From 7c1fbed23981faff2840ddc8909e7c78d80ade30 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:23 +0200 Subject: Bluetooth: Move LE scan disable/restart behind req_workqueue To avoid any risks of races, place also these LE scan modification work callbacks behind the same work queue as the other LE scan changes. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 5 +- net/bluetooth/hci_core.c | 168 ------------------------------------ net/bluetooth/hci_request.c | 179 +++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 4 +- 4 files changed, 183 insertions(+), 173 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c2ca6a58d1e0..1f75aebbd8c4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -328,6 +328,8 @@ struct hci_dev { struct work_struct tx_work; struct work_struct bg_scan_update; + struct delayed_work le_scan_disable; + struct delayed_work le_scan_restart; struct sk_buff_head rx_q; struct sk_buff_head raw_q; @@ -372,9 +374,6 @@ struct hci_dev { DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); - struct delayed_work le_scan_disable; - struct delayed_work le_scan_restart; - __s8 adv_tx_power; __u8 adv_data[HCI_MAX_AD_LENGTH]; __u8 adv_data_len; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 029d7798cffa..0655521dd8bc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1527,9 +1527,6 @@ int hci_dev_do_close(struct hci_dev *hdev) if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) cancel_delayed_work(&hdev->service_cache); - cancel_delayed_work_sync(&hdev->le_scan_disable); - cancel_delayed_work_sync(&hdev->le_scan_restart); - if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); @@ -2889,169 +2886,6 @@ static void hci_conn_params_clear_all(struct hci_dev *hdev) BT_DBG("All LE connection parameters were removed"); } -static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - if (status) { - BT_ERR("Failed to start inquiry: status %d", status); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - return; - } -} - -static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_cp_inquiry cp; - int err; - - if (status) { - BT_ERR("Failed to disable LE scanning: status %d", status); - return; - } - - hdev->discovery.scan_start = 0; - - switch (hdev->discovery.type) { - case DISCOV_TYPE_LE: - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - break; - - case DISCOV_TYPE_INTERLEAVED: - hci_dev_lock(hdev); - - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - /* If we were running LE only scan, change discovery - * state. If we were running both LE and BR/EDR inquiry - * simultaneously, and BR/EDR inquiry is already - * finished, stop discovery, otherwise BR/EDR inquiry - * will stop discovery when finished. If we will resolve - * remote device name, do not change discovery state. - */ - if (!test_bit(HCI_INQUIRY, &hdev->flags) && - hdev->discovery.state != DISCOVERY_RESOLVING) - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } else { - struct hci_request req; - - hci_inquiry_cache_flush(hdev); - - hci_req_init(&req, hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; - hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - err = hci_req_run(&req, inquiry_complete); - if (err) { - BT_ERR("Inquiry request failed: err %d", err); - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } - } - - hci_dev_unlock(hdev); - break; - } -} - -static void le_scan_disable_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_disable.work); - struct hci_request req; - int err; - - BT_DBG("%s", hdev->name); - - cancel_delayed_work_sync(&hdev->le_scan_restart); - - hci_req_init(&req, hdev); - - hci_req_add_le_scan_disable(&req); - - err = hci_req_run(&req, le_scan_disable_work_complete); - if (err) - BT_ERR("Disable LE scanning request failed: err %d", err); -} - -static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - unsigned long timeout, duration, scan_start, now; - - BT_DBG("%s", hdev->name); - - if (status) { - BT_ERR("Failed to restart LE scan: status %d", status); - return; - } - - if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || - !hdev->discovery.scan_start) - return; - - /* When the scan was started, hdev->le_scan_disable has been queued - * after duration from scan_start. During scan restart this job - * has been canceled, and we need to queue it again after proper - * timeout, to make sure that scan does not run indefinitely. - */ - duration = hdev->discovery.scan_duration; - scan_start = hdev->discovery.scan_start; - now = jiffies; - if (now - scan_start <= duration) { - int elapsed; - - if (now >= scan_start) - elapsed = now - scan_start; - else - elapsed = ULONG_MAX - scan_start + now; - - timeout = duration - elapsed; - } else { - timeout = 0; - } - queue_delayed_work(hdev->workqueue, - &hdev->le_scan_disable, timeout); -} - -static void le_scan_restart_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_restart.work); - struct hci_request req; - struct hci_cp_le_set_scan_enable cp; - int err; - - BT_DBG("%s", hdev->name); - - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; - - hci_req_init(&req, hdev); - - hci_req_add_le_scan_disable(&req); - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - - err = hci_req_run(&req, le_scan_restart_work_complete); - if (err) - BT_ERR("Restart LE scan request failed: err %d", err); -} - /* Copy the Identity Address of the controller. * * If the controller has a public BD_ADDR, then by default use that one. @@ -3151,8 +2985,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); - INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); - INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire); skb_queue_head_init(&hdev->rx_q); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 8aa06cc545c3..4588fe2bfc0e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -858,12 +858,191 @@ static void bg_scan_update(struct work_struct *work) hci_dev_unlock(hdev); } +static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + if (status) { + BT_ERR("Failed to start inquiry: status %d", status); + + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + return; + } +} + +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +{ + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + struct hci_cp_inquiry cp; + int err; + + if (status) { + BT_ERR("Failed to disable LE scanning: status %d", status); + return; + } + + hdev->discovery.scan_start = 0; + + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + break; + + case DISCOV_TYPE_INTERLEAVED: + hci_dev_lock(hdev); + + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + /* If we were running LE only scan, change discovery + * state. If we were running both LE and BR/EDR inquiry + * simultaneously, and BR/EDR inquiry is already + * finished, stop discovery, otherwise BR/EDR inquiry + * will stop discovery when finished. If we will resolve + * remote device name, do not change discovery state. + */ + if (!test_bit(HCI_INQUIRY, &hdev->flags) && + hdev->discovery.state != DISCOVERY_RESOLVING) + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } else { + struct hci_request req; + + hci_inquiry_cache_flush(hdev); + + hci_req_init(&req, hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + err = hci_req_run(&req, inquiry_complete); + if (err) { + BT_ERR("Inquiry request failed: err %d", err); + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } + } + + hci_dev_unlock(hdev); + break; + } +} + +static void le_scan_disable(struct hci_request *req, unsigned long opt) +{ + hci_req_add_le_scan_disable(req); +} + +static void le_scan_disable_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_disable.work); + u8 status; + int err; + + BT_DBG("%s", hdev->name); + + cancel_delayed_work(&hdev->le_scan_restart); + + err = hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); + if (err) + return; + + le_scan_disable_work_complete(hdev, status); +} + +static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status) +{ + unsigned long timeout, duration, scan_start, now; + + BT_DBG("%s", hdev->name); + + if (status) { + BT_ERR("Failed to restart LE scan: status %d", status); + return; + } + + hci_dev_lock(hdev); + + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || + !hdev->discovery.scan_start) + goto unlock; + + /* When the scan was started, hdev->le_scan_disable has been queued + * after duration from scan_start. During scan restart this job + * has been canceled, and we need to queue it again after proper + * timeout, to make sure that scan does not run indefinitely. + */ + duration = hdev->discovery.scan_duration; + scan_start = hdev->discovery.scan_start; + now = jiffies; + if (now - scan_start <= duration) { + int elapsed; + + if (now >= scan_start) + elapsed = now - scan_start; + else + elapsed = ULONG_MAX - scan_start + now; + + timeout = duration - elapsed; + } else { + timeout = 0; + } + + queue_delayed_work(hdev->req_workqueue, + &hdev->le_scan_disable, timeout); + +unlock: + hci_dev_unlock(hdev); +} + +static void le_scan_restart(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_enable cp; + + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return; + + hci_req_add_le_scan_disable(req); + + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_ENABLE; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); +} + +static void le_scan_restart_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); + u8 status; + int err; + + BT_DBG("%s", hdev->name); + + err = hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); + if (err) + return; + + le_scan_restart_work_complete(hdev, status); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->bg_scan_update, bg_scan_update); + INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); + INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); } void hci_request_cancel_all(struct hci_dev *hdev) { cancel_work_sync(&hdev->bg_scan_update); + cancel_delayed_work_sync(&hdev->le_scan_disable); + cancel_delayed_work_sync(&hdev->le_scan_restart); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index bb870c3aadae..a229cfd0530e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4367,7 +4367,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, hdev->discovery.scan_duration = timeout; } - queue_delayed_work(hdev->workqueue, + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, timeout); } @@ -8389,7 +8389,7 @@ static void restart_le_scan(struct hci_dev *hdev) hdev->discovery.scan_duration)) return; - queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart, + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_restart, DISCOV_LE_RESTART_DELAY); } -- cgit v1.2.3 From 591752afbcc8179979296698cae698541d2e5431 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:24 +0200 Subject: Bluetooth: Add discovery type validity helper As preparation for moving the discovery HCI commands behind req_workqueue, add a helper and do the validity checks of the given discovery type before proceeding further. This way we don't need to do them again in hci_request.c. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a229cfd0530e..e634b4d85249 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4375,6 +4375,33 @@ unlock: hci_dev_unlock(hdev); } +static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, + uint8_t *mgmt_status) +{ + switch (type) { + case DISCOV_TYPE_LE: + *mgmt_status = mgmt_le_support(hdev); + if (*mgmt_status) + return false; + break; + case DISCOV_TYPE_INTERLEAVED: + *mgmt_status = mgmt_le_support(hdev); + if (*mgmt_status) + return false; + /* Intentional fall-through */ + case DISCOV_TYPE_BREDR: + *mgmt_status = mgmt_bredr_support(hdev); + if (*mgmt_status) + return false; + break; + default: + *mgmt_status = MGMT_STATUS_INVALID_PARAMS; + return false; + } + + return true; +} + static int start_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -4403,6 +4430,12 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } + if (!discovery_type_is_valid(hdev, cp->type, &status)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + status, &cp->type, sizeof(cp->type)); + goto failed; + } + cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); if (!cmd) { err = -ENOMEM; @@ -4502,6 +4535,13 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } + if (!discovery_type_is_valid(hdev, cp->type, &status)) { + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + status, &cp->type, sizeof(cp->type)); + goto failed; + } + cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY, hdev, data, len); if (!cmd) { -- cgit v1.2.3 From a1d01db1202ee6795c0a665b43896293ad4e2a77 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:25 +0200 Subject: Bluetooth: Add error return value to hci_req_sync callback In some circumstances it may be useful to abort the request through checks done in the request callback. To make the feature possible this patch changes the return value of the request callback from void to int and aborts the request if a non-zero value is returned. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 45 ++++++++++++++++++++++++++++++++------------- net/bluetooth/hci_request.c | 27 ++++++++++++++++++--------- net/bluetooth/hci_request.h | 8 ++++---- 3 files changed, 54 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0655521dd8bc..fb618d6bcded 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -189,13 +189,14 @@ static void hci_debugfs_create_basic(struct hci_dev *hdev) &vendor_diag_fops); } -static void hci_reset_req(struct hci_request *req, unsigned long opt) +static int hci_reset_req(struct hci_request *req, unsigned long opt) { BT_DBG("%s %ld", req->hdev->name, opt); /* Reset device */ set_bit(HCI_RESET, &req->hdev->flags); hci_req_add(req, HCI_OP_RESET, 0, NULL); + return 0; } static void bredr_init(struct hci_request *req) @@ -235,7 +236,7 @@ static void amp_init1(struct hci_request *req) hci_req_add(req, HCI_OP_READ_LOCATION_DATA, 0, NULL); } -static void amp_init2(struct hci_request *req) +static int amp_init2(struct hci_request *req) { /* Read Local Supported Features. Not all AMP controllers * support this so it's placed conditionally in the second @@ -243,9 +244,11 @@ static void amp_init2(struct hci_request *req) */ if (req->hdev->commands[14] & 0x20) hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); + + return 0; } -static void hci_init1_req(struct hci_request *req, unsigned long opt) +static int hci_init1_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -268,6 +271,8 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt) BT_ERR("Unknown device type %d", hdev->dev_type); break; } + + return 0; } static void bredr_setup(struct hci_request *req) @@ -416,7 +421,7 @@ static void hci_setup_event_mask(struct hci_request *req) hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events); } -static void hci_init2_req(struct hci_request *req, unsigned long opt) +static int hci_init2_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -496,6 +501,8 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), &enable); } + + return 0; } static void hci_setup_link_policy(struct hci_request *req) @@ -570,7 +577,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events); } -static void hci_init3_req(struct hci_request *req, unsigned long opt) +static int hci_init3_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; u8 p; @@ -709,9 +716,11 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp), &cp); } + + return 0; } -static void hci_init4_req(struct hci_request *req, unsigned long opt) +static int hci_init4_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -762,6 +771,8 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, sizeof(support), &support); } + + return 0; } static int __hci_init(struct hci_dev *hdev) @@ -821,7 +832,7 @@ static int __hci_init(struct hci_dev *hdev) return 0; } -static void hci_init0_req(struct hci_request *req, unsigned long opt) +static int hci_init0_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -837,6 +848,8 @@ static void hci_init0_req(struct hci_request *req, unsigned long opt) /* Read BD Address */ if (hdev->set_bdaddr) hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); + + return 0; } static int __hci_unconf_init(struct hci_dev *hdev) @@ -856,7 +869,7 @@ static int __hci_unconf_init(struct hci_dev *hdev) return 0; } -static void hci_scan_req(struct hci_request *req, unsigned long opt) +static int hci_scan_req(struct hci_request *req, unsigned long opt) { __u8 scan = opt; @@ -864,9 +877,10 @@ static void hci_scan_req(struct hci_request *req, unsigned long opt) /* Inquiry and Page scans */ hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + return 0; } -static void hci_auth_req(struct hci_request *req, unsigned long opt) +static int hci_auth_req(struct hci_request *req, unsigned long opt) { __u8 auth = opt; @@ -874,9 +888,10 @@ static void hci_auth_req(struct hci_request *req, unsigned long opt) /* Authentication */ hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); + return 0; } -static void hci_encrypt_req(struct hci_request *req, unsigned long opt) +static int hci_encrypt_req(struct hci_request *req, unsigned long opt) { __u8 encrypt = opt; @@ -884,9 +899,10 @@ static void hci_encrypt_req(struct hci_request *req, unsigned long opt) /* Encryption */ hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); + return 0; } -static void hci_linkpol_req(struct hci_request *req, unsigned long opt) +static int hci_linkpol_req(struct hci_request *req, unsigned long opt) { __le16 policy = cpu_to_le16(opt); @@ -894,6 +910,7 @@ static void hci_linkpol_req(struct hci_request *req, unsigned long opt) /* Default link policy */ hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); + return 0; } /* Get HCI device by index. @@ -1138,7 +1155,7 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) return copied; } -static void hci_inq_req(struct hci_request *req, unsigned long opt) +static int hci_inq_req(struct hci_request *req, unsigned long opt) { struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; struct hci_dev *hdev = req->hdev; @@ -1147,13 +1164,15 @@ static void hci_inq_req(struct hci_request *req, unsigned long opt) BT_DBG("%s", hdev->name); if (test_bit(HCI_INQUIRY, &hdev->flags)) - return; + return 0; /* Start Inquiry */ memcpy(&cp.lap, &ir->lap, 3); cp.length = ir->length; cp.num_rsp = ir->num_rsp; hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + return 0; } int hci_inquiry(void __user *arg) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 4588fe2bfc0e..ecfa4105e00d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -184,8 +184,8 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_sync); /* Execute request and wait for completion. */ -int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, - unsigned long opt), +int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, + unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status) { struct hci_request req; @@ -198,7 +198,12 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, hdev->req_status = HCI_REQ_PEND; - func(&req, opt); + err = func(&req, opt); + if (err) { + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; + return err; + } add_wait_queue(&hdev->req_wait_q, &wait); set_current_state(TASK_INTERRUPTIBLE); @@ -255,8 +260,8 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, return err; } -int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, - unsigned long opt), +int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, + unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status) { int ret; @@ -830,11 +835,12 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } -static void update_bg_scan(struct hci_request *req, unsigned long opt) +static int update_bg_scan(struct hci_request *req, unsigned long opt) { hci_dev_lock(req->hdev); __hci_update_background_scan(req); hci_dev_unlock(req->hdev); + return 0; } static void bg_scan_update(struct work_struct *work) @@ -932,9 +938,10 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) } } -static void le_scan_disable(struct hci_request *req, unsigned long opt) +static int le_scan_disable(struct hci_request *req, unsigned long opt) { hci_req_add_le_scan_disable(req); + return 0; } static void le_scan_disable_work(struct work_struct *work) @@ -1000,14 +1007,14 @@ unlock: hci_dev_unlock(hdev); } -static void le_scan_restart(struct hci_request *req, unsigned long opt) +static int le_scan_restart(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_scan_enable cp; /* If controller is not scanning we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; + return 0; hci_req_add_le_scan_disable(req); @@ -1015,6 +1022,8 @@ static void le_scan_restart(struct hci_request *req, unsigned long opt) cp.enable = LE_SCAN_ENABLE; cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + + return 0; } static void le_scan_restart_work(struct work_struct *work) diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 1f1194628652..1927013f5e67 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -44,11 +44,11 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, hci_req_complete_t *req_complete, hci_req_complete_skb_t *req_complete_skb); -int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, - unsigned long opt), +int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, + unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status); -int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, - unsigned long opt), +int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, + unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status); void hci_req_sync_cancel(struct hci_dev *hdev, int err); -- cgit v1.2.3 From e68f072b7396574df5324e1cf93e4b0c92460735 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:30:30 +0200 Subject: Bluetooth: Move Start Discovery to req_workqueue Since discovery also deals with LE scanning it makes sense to move it behind the same req_workqueue as other LE scanning changes. This also simplifies the logic since we do many of the actions in a synchronous manner. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_request.c | 202 +++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 238 ++------------------------------------- 3 files changed, 213 insertions(+), 229 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1f75aebbd8c4..72ea8a6d7d70 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -327,6 +327,7 @@ struct hci_dev { struct work_struct cmd_work; struct work_struct tx_work; + struct work_struct discov_update; struct work_struct bg_scan_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -1473,6 +1474,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status); void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); +void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ecfa4105e00d..da1e30b85e77 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1042,8 +1042,209 @@ static void le_scan_restart_work(struct work_struct *work) le_scan_restart_work_complete(hdev, status); } +static int bredr_inquiry(struct hci_request *req, unsigned long opt) +{ + struct hci_cp_inquiry cp; + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + + BT_DBG("%s", req->hdev->name); + + hci_dev_lock(req->hdev); + hci_inquiry_cache_flush(req->hdev); + hci_dev_unlock(req->hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_BREDR_INQUIRY_LEN; + + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + return 0; +} + +static void cancel_adv_timeout(struct hci_dev *hdev) +{ + if (hdev->adv_instance_timeout) { + hdev->adv_instance_timeout = 0; + cancel_delayed_work(&hdev->adv_instance_expire); + } +} + +static void disable_advertising(struct hci_request *req) +{ + u8 enable = 0x00; + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static int active_scan(struct hci_request *req, unsigned long opt) +{ + uint16_t interval = opt; + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + u8 own_addr_type; + int err; + + BT_DBG("%s", hdev->name); + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { + hci_dev_lock(hdev); + + /* Don't let discovery abort an outgoing connection attempt + * that's using directed advertising. + */ + if (hci_lookup_le_connect(hdev)) { + hci_dev_unlock(hdev); + return -EBUSY; + } + + cancel_adv_timeout(hdev); + hci_dev_unlock(hdev); + + disable_advertising(req); + } + + /* If controller is scanning, it means the background scanning is + * running. Thus, we should temporarily stop it in order to set the + * discovery scanning parameters. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req); + + /* All active scans will be done with either a resolvable private + * address (when privacy feature has been enabled) or non-resolvable + * private address. + */ + err = hci_update_random_address(req, true, &own_addr_type); + if (err < 0) + own_addr_type = ADDR_LE_DEV_PUBLIC; + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_ACTIVE; + param_cp.interval = cpu_to_le16(interval); + param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); + param_cp.own_address_type = own_addr_type; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); + + return 0; +} + +static int interleaved_discov(struct hci_request *req, unsigned long opt) +{ + int err; + + BT_DBG("%s", req->hdev->name); + + err = active_scan(req, opt); + if (err) + return err; + + return bredr_inquiry(req, opt); +} + +static void start_discovery(struct hci_dev *hdev, u8 *status) +{ + unsigned long timeout; + + BT_DBG("%s type %u", hdev->name, hdev->discovery.type); + + switch (hdev->discovery.type) { + case DISCOV_TYPE_BREDR: + if (!hci_dev_test_flag(hdev, HCI_INQUIRY)) + hci_req_sync(hdev, bredr_inquiry, 0, HCI_CMD_TIMEOUT, + status); + return; + case DISCOV_TYPE_INTERLEAVED: + /* When running simultaneous discovery, the LE scanning time + * should occupy the whole discovery time sine BR/EDR inquiry + * and LE scanning are scheduled by the controller. + * + * For interleaving discovery in comparison, BR/EDR inquiry + * and LE scanning are done sequentially with separate + * timeouts. + */ + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); + /* During simultaneous discovery, we double LE scan + * interval. We must leave some time for the controller + * to do BR/EDR inquiry. + */ + hci_req_sync(hdev, interleaved_discov, + DISCOV_LE_SCAN_INT * 2, HCI_CMD_TIMEOUT, + status); + break; + } + + timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); + hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT, + HCI_CMD_TIMEOUT, status); + break; + case DISCOV_TYPE_LE: + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); + hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT, + HCI_CMD_TIMEOUT, status); + break; + default: + *status = HCI_ERROR_UNSPECIFIED; + return; + } + + if (*status) + return; + + BT_DBG("%s timeout %u ms", hdev->name, jiffies_to_msecs(timeout)); + + /* When service discovery is used and the controller has a + * strict duplicate filter, it is important to remember the + * start and duration of the scan. This is required for + * restarting scanning during the discovery phase. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && + hdev->discovery.result_filtering) { + hdev->discovery.scan_start = jiffies; + hdev->discovery.scan_duration = timeout; + } + + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, + timeout); +} + +static void discov_update(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discov_update); + u8 status = 0; + + switch (hdev->discovery.state) { + case DISCOVERY_STARTING: + start_discovery(hdev, &status); + mgmt_start_discovery_complete(hdev, status); + if (status) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + else + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + break; + case DISCOVERY_STOPPED: + default: + return; + } +} + void hci_request_setup(struct hci_dev *hdev) { + INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); @@ -1051,6 +1252,7 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { + cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e634b4d85249..63b099471c92 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4164,145 +4164,9 @@ done: return err; } -static bool trigger_bredr_inquiry(struct hci_request *req, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_inquiry cp; - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - - *status = mgmt_bredr_support(hdev); - if (*status) - return false; - - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) { - *status = MGMT_STATUS_BUSY; - return false; - } - - hci_inquiry_cache_flush(hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_BREDR_INQUIRY_LEN; - - hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - return true; -} - -static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - u8 own_addr_type; - int err; - - *status = mgmt_le_support(hdev); - if (*status) - return false; - - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { - /* Don't let discovery abort an outgoing connection attempt - * that's using directed advertising. - */ - if (hci_lookup_le_connect(hdev)) { - *status = MGMT_STATUS_REJECTED; - return false; - } - - cancel_adv_timeout(hdev); - disable_advertising(req); - } - - /* If controller is scanning, it means the background scanning is - * running. Thus, we should temporarily stop it in order to set the - * discovery scanning parameters. - */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req); - - /* All active scans will be done with either a resolvable private - * address (when privacy feature has been enabled) or non-resolvable - * private address. - */ - err = hci_update_random_address(req, true, &own_addr_type); - if (err < 0) { - *status = MGMT_STATUS_FAILED; - return false; - } - - memset(¶m_cp, 0, sizeof(param_cp)); - param_cp.type = LE_SCAN_ACTIVE; - param_cp.interval = cpu_to_le16(interval); - param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); - param_cp.own_address_type = own_addr_type; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); - - return true; -} - -static bool trigger_discovery(struct hci_request *req, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - - switch (hdev->discovery.type) { - case DISCOV_TYPE_BREDR: - if (!trigger_bredr_inquiry(req, status)) - return false; - break; - - case DISCOV_TYPE_INTERLEAVED: - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - /* During simultaneous discovery, we double LE scan - * interval. We must leave some time for the controller - * to do BR/EDR inquiry. - */ - if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT * 2, - status)) - return false; - - if (!trigger_bredr_inquiry(req, status)) - return false; - - return true; - } - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - *status = MGMT_STATUS_NOT_SUPPORTED; - return false; - } - /* fall through */ - - case DISCOV_TYPE_LE: - if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT, status)) - return false; - break; - - default: - *status = MGMT_STATUS_INVALID_PARAMS; - return false; - } - - return true; -} - -static void start_discovery_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - unsigned long timeout; BT_DBG("status %d", status); @@ -4317,61 +4181,6 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, mgmt_pending_remove(cmd); } - if (status) { - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - goto unlock; - } - - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - - /* If the scan involves LE scan, pick proper timeout to schedule - * hdev->le_scan_disable that will stop it. - */ - switch (hdev->discovery.type) { - case DISCOV_TYPE_LE: - timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - break; - case DISCOV_TYPE_INTERLEAVED: - /* When running simultaneous discovery, the LE scanning time - * should occupy the whole discovery time sine BR/EDR inquiry - * and LE scanning are scheduled by the controller. - * - * For interleaving discovery in comparison, BR/EDR inquiry - * and LE scanning are done sequentially with separate - * timeouts. - */ - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) - timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - else - timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); - break; - case DISCOV_TYPE_BREDR: - timeout = 0; - break; - default: - BT_ERR("Invalid discovery type %d", hdev->discovery.type); - timeout = 0; - break; - } - - if (timeout) { - /* When service discovery is used and the controller has - * a strict duplicate filter, it is important to remember - * the start and duration of the scan. This is required - * for restarting scanning during the discovery phase. - */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, - &hdev->quirks) && - hdev->discovery.result_filtering) { - hdev->discovery.scan_start = jiffies; - hdev->discovery.scan_duration = timeout; - } - - queue_delayed_work(hdev->req_workqueue, - &hdev->le_scan_disable, timeout); - } - -unlock: hci_dev_unlock(hdev); } @@ -4407,7 +4216,6 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_start_discovery *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; u8 status; int err; @@ -4436,14 +4244,6 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); - if (!cmd) { - err = -ENOMEM; - goto failed; - } - - cmd->cmd_complete = generic_cmd_complete; - /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ @@ -4452,22 +4252,17 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; hdev->discovery.report_invalid_rssi = false; - hci_req_init(&req, hdev); - - if (!trigger_discovery(&req, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - status, &cp->type, sizeof(cp->type)); - mgmt_pending_remove(cmd); + cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; } - err = hci_req_run(&req, start_discovery_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - goto failed; - } + cmd->cmd_complete = generic_cmd_complete; hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; failed: hci_dev_unlock(hdev); @@ -4486,7 +4281,6 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_start_service_discovery *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; const u16 max_uuid_count = ((U16_MAX - sizeof(*cp)) / 16); u16 uuid_count, expected_len; u8 status; @@ -4574,23 +4368,9 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, } } - hci_req_init(&req, hdev); - - if (!trigger_discovery(&req, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - status, &cp->type, sizeof(cp->type)); - mgmt_pending_remove(cmd); - goto failed; - } - - err = hci_req_run(&req, start_discovery_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - goto failed; - } - hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.2.3 From 2154d3f4fb83c812a161c4910948dd876997e111 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:30:45 +0200 Subject: Bluetooth: Move Stop Discovery to req_workqueue Since discovery also deals with LE scanning it makes sense to move it behind the same req_workqueue as other LE scanning changes. This also simplifies the logic since we do many of the actions in a synchronous manner. Part of this refactoring is moving hci_req_stop_discovery() to hci_request.c. At the same time the function receives support for properly handling the STOPPING state since that's the state we'll be in when stopping through the req_workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_request.c | 62 ++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 3 ++ net/bluetooth/mgmt.c | 72 +++------------------------------------- 4 files changed, 71 insertions(+), 67 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 72ea8a6d7d70..609f4a03899c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1475,6 +1475,7 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); +void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index da1e30b85e77..3219ee66faad 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1221,6 +1221,62 @@ static void start_discovery(struct hci_dev *hdev, u8 *status) timeout); } +bool hci_req_stop_discovery(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct discovery_state *d = &hdev->discovery; + struct hci_cp_remote_name_req_cancel cp; + struct inquiry_entry *e; + bool ret = false; + + BT_DBG("%s state %u", hdev->name, hdev->discovery.state); + + if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) { + if (test_bit(HCI_INQUIRY, &hdev->flags)) + hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); + + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + cancel_delayed_work(&hdev->le_scan_disable); + hci_req_add_le_scan_disable(req); + } + + ret = true; + } else { + /* Passive scanning */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + hci_req_add_le_scan_disable(req); + ret = true; + } + } + + /* No further actions needed for LE-only discovery */ + if (d->type == DISCOV_TYPE_LE) + return ret; + + if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) { + e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, + NAME_PENDING); + if (!e) + return ret; + + bacpy(&cp.bdaddr, &e->data.bdaddr); + hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), + &cp); + ret = true; + } + + return ret; +} + +static int stop_discovery(struct hci_request *req, unsigned long opt) +{ + hci_dev_lock(req->hdev); + hci_req_stop_discovery(req); + hci_dev_unlock(req->hdev); + + return 0; +} + static void discov_update(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1236,6 +1292,12 @@ static void discov_update(struct work_struct *work) else hci_discovery_set_state(hdev, DISCOVERY_FINDING); break; + case DISCOVERY_STOPPING: + hci_req_sync(hdev, stop_discovery, 0, HCI_CMD_TIMEOUT, &status); + mgmt_stop_discovery_complete(hdev, status); + if (!status) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + break; case DISCOVERY_STOPPED: default: return; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 1927013f5e67..6b9e59f7f7a9 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -58,6 +58,9 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +/* Returns true if HCI commands were queued */ +bool hci_req_stop_discovery(struct hci_request *req); + void hci_update_page_scan(struct hci_dev *hdev); void __hci_update_page_scan(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 63b099471c92..8cdacef6b108 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1416,49 +1416,6 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) } } -static bool hci_stop_discovery(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_remote_name_req_cancel cp; - struct inquiry_entry *e; - - switch (hdev->discovery.state) { - case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) - hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); - - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - cancel_delayed_work(&hdev->le_scan_disable); - hci_req_add_le_scan_disable(req); - } - - return true; - - case DISCOVERY_RESOLVING: - e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, - NAME_PENDING); - if (!e) - break; - - bacpy(&cp.bdaddr, &e->data.bdaddr); - hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), - &cp); - - return true; - - default: - /* Passive scanning */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_req_add_le_scan_disable(req); - return true; - } - - break; - } - - return false; -} - static void advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance) { @@ -1636,7 +1593,7 @@ static int clean_up_hci_state(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(&req); - discov_stopped = hci_stop_discovery(&req); + discov_stopped = hci_req_stop_discovery(&req); list_for_each_entry(conn, &hdev->conn_hash.list, list) { /* 0x15 == Terminated due to Power Off */ @@ -4377,7 +4334,7 @@ failed: return err; } -static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) +void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; @@ -4391,9 +4348,6 @@ static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) mgmt_pending_remove(cmd); } - if (!status) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); } @@ -4402,7 +4356,6 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_stop_discovery *mgmt_cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; int err; BT_DBG("%s", hdev->name); @@ -4431,24 +4384,9 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, cmd->cmd_complete = generic_cmd_complete; - hci_req_init(&req, hdev); - - hci_stop_discovery(&req); - - err = hci_req_run(&req, stop_discovery_complete); - if (!err) { - hci_discovery_set_state(hdev, DISCOVERY_STOPPING); - goto unlock; - } - - mgmt_pending_remove(cmd); - - /* If no HCI commands were sent we're done */ - if (err == -ENODATA) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0, - &mgmt_cp->type, sizeof(mgmt_cp->type)); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - } + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; unlock: hci_dev_unlock(hdev); -- cgit v1.2.3 From 2f27498107c298b9cdd93c77d9e3ad409949b36b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 10:36:15 +0200 Subject: Bluetooth: Fix BR/EDR Page Scan update with Add Device The recent changes to remove dependency on HCI in Add Device missed out relevant changes for BR/EDR. This patch removes the left-overs and ensures the right HCI command gets queued for BR/EDR. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8cdacef6b108..e4ad0457547a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5894,8 +5894,6 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_device *cp = data; - struct mgmt_pending_cmd *cmd; - struct hci_request req; u8 auto_conn, addr_type; int err; @@ -5912,18 +5910,8 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); - hci_req_init(&req, hdev); - hci_dev_lock(hdev); - cmd = mgmt_pending_add(sk, MGMT_OP_ADD_DEVICE, hdev, data, len); - if (!cmd) { - err = -ENOMEM; - goto unlock; - } - - cmd->cmd_complete = addr_cmd_complete; - if (cp->addr.type == BDADDR_BREDR) { /* Only incoming connections action is supported for now */ if (cp->action != 0x01) { @@ -5939,7 +5927,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - __hci_update_page_scan(&req); + hci_update_page_scan(hdev); goto added; } -- cgit v1.2.3 From 7df26b56297456a133b8cc2efb069065d6b9a555 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 12:24:21 +0200 Subject: Bluetooth: Pass inquiry length to bredr_inquiry() Passing the needed inquiry length to bredr_inquiry() makes it possible to also use this helper for interleaved discovery where the controller doesn't support simultaneous Inquiry & LE scan. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 3219ee66faad..98827e7631ca 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1044,6 +1044,7 @@ static void le_scan_restart_work(struct work_struct *work) static int bredr_inquiry(struct hci_request *req, unsigned long opt) { + u8 length = opt; struct hci_cp_inquiry cp; /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; @@ -1056,7 +1057,7 @@ static int bredr_inquiry(struct hci_request *req, unsigned long opt) memset(&cp, 0, sizeof(cp)); memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_BREDR_INQUIRY_LEN; + cp.length = length; hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); @@ -1150,7 +1151,7 @@ static int interleaved_discov(struct hci_request *req, unsigned long opt) if (err) return err; - return bredr_inquiry(req, opt); + return bredr_inquiry(req, DISCOV_BREDR_INQUIRY_LEN); } static void start_discovery(struct hci_dev *hdev, u8 *status) @@ -1162,7 +1163,8 @@ static void start_discovery(struct hci_dev *hdev, u8 *status) switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: if (!hci_dev_test_flag(hdev, HCI_INQUIRY)) - hci_req_sync(hdev, bredr_inquiry, 0, HCI_CMD_TIMEOUT, + hci_req_sync(hdev, bredr_inquiry, + DISCOV_BREDR_INQUIRY_LEN, HCI_CMD_TIMEOUT, status); return; case DISCOV_TYPE_INTERLEAVED: -- cgit v1.2.3 From f4a2cb4d8f792350ec38b35b94026fc2c4be8d0f Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 12:24:22 +0200 Subject: Bluetooth: Simplify le_scan_disable_work() Merge le_scan_disable_work_complete into the main le_scan_disable_work function and take advantage of the updated bredr_inquiry() to run the Inquiry through hci_req_sync(). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 148 +++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 91 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 98827e7631ca..04c3357b1e1c 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -864,83 +864,31 @@ static void bg_scan_update(struct work_struct *work) hci_dev_unlock(hdev); } -static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) +static int le_scan_disable(struct hci_request *req, unsigned long opt) { - if (status) { - BT_ERR("Failed to start inquiry: status %d", status); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - return; - } + hci_req_add_le_scan_disable(req); + return 0; } -static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +static int bredr_inquiry(struct hci_request *req, unsigned long opt) { + u8 length = opt; /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; struct hci_cp_inquiry cp; - int err; - - if (status) { - BT_ERR("Failed to disable LE scanning: status %d", status); - return; - } - - hdev->discovery.scan_start = 0; - - switch (hdev->discovery.type) { - case DISCOV_TYPE_LE: - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - break; - case DISCOV_TYPE_INTERLEAVED: - hci_dev_lock(hdev); - - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - /* If we were running LE only scan, change discovery - * state. If we were running both LE and BR/EDR inquiry - * simultaneously, and BR/EDR inquiry is already - * finished, stop discovery, otherwise BR/EDR inquiry - * will stop discovery when finished. If we will resolve - * remote device name, do not change discovery state. - */ - if (!test_bit(HCI_INQUIRY, &hdev->flags) && - hdev->discovery.state != DISCOVERY_RESOLVING) - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } else { - struct hci_request req; - - hci_inquiry_cache_flush(hdev); - - hci_req_init(&req, hdev); + BT_DBG("%s", req->hdev->name); - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; - hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + hci_dev_lock(req->hdev); + hci_inquiry_cache_flush(req->hdev); + hci_dev_unlock(req->hdev); - err = hci_req_run(&req, inquiry_complete); - if (err) { - BT_ERR("Inquiry request failed: err %d", err); - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } - } + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = length; - hci_dev_unlock(hdev); - break; - } -} + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); -static int le_scan_disable(struct hci_request *req, unsigned long opt) -{ - hci_req_add_le_scan_disable(req); return 0; } @@ -949,17 +897,57 @@ static void le_scan_disable_work(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan_disable.work); u8 status; - int err; BT_DBG("%s", hdev->name); + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return; + cancel_delayed_work(&hdev->le_scan_restart); - err = hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); - if (err) + hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); + if (status) { + BT_ERR("Failed to disable LE scan: status 0x%02x", status); + return; + } + + hdev->discovery.scan_start = 0; + + /* If we were running LE only scan, change discovery state. If + * we were running both LE and BR/EDR inquiry simultaneously, + * and BR/EDR inquiry is already finished, stop discovery, + * otherwise BR/EDR inquiry will stop discovery when finished. + * If we will resolve remote device name, do not change + * discovery state. + */ + + if (hdev->discovery.type == DISCOV_TYPE_LE) + goto discov_stopped; + + if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED) + return; + + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) { + if (!test_bit(HCI_INQUIRY, &hdev->flags) && + hdev->discovery.state != DISCOVERY_RESOLVING) + goto discov_stopped; + return; + } + + hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN, + HCI_CMD_TIMEOUT, &status); + if (status) { + BT_ERR("Inquiry failed: status 0x%02x", status); + goto discov_stopped; + } + + return; - le_scan_disable_work_complete(hdev, status); +discov_stopped: + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); } static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status) @@ -1042,28 +1030,6 @@ static void le_scan_restart_work(struct work_struct *work) le_scan_restart_work_complete(hdev, status); } -static int bredr_inquiry(struct hci_request *req, unsigned long opt) -{ - u8 length = opt; - struct hci_cp_inquiry cp; - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - - BT_DBG("%s", req->hdev->name); - - hci_dev_lock(req->hdev); - hci_inquiry_cache_flush(req->hdev); - hci_dev_unlock(req->hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = length; - - hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - return 0; -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { -- cgit v1.2.3 From 3dfe5905a7505bc0cbf5f63405631d8e188d9235 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 12:24:23 +0200 Subject: Bluetooth: Remove unnecessary le_scan_restart_work_complete() function The only user of this, le_scan_restart_work(), is so short and simple that it makes sense to just merge the code there. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 60 ++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 04c3357b1e1c..e8345d8106b5 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -950,12 +950,35 @@ discov_stopped: hci_dev_unlock(hdev); } -static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status) +static int le_scan_restart(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_enable cp; + + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return 0; + + hci_req_add_le_scan_disable(req); + + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_ENABLE; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + + return 0; +} + +static void le_scan_restart_work(struct work_struct *work) { + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); unsigned long timeout, duration, scan_start, now; + u8 status; BT_DBG("%s", hdev->name); + hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); if (status) { BT_ERR("Failed to restart LE scan: status %d", status); return; @@ -995,41 +1018,6 @@ unlock: hci_dev_unlock(hdev); } -static int le_scan_restart(struct hci_request *req, unsigned long opt) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_enable cp; - - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return 0; - - hci_req_add_le_scan_disable(req); - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - - return 0; -} - -static void le_scan_restart_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_restart.work); - u8 status; - int err; - - BT_DBG("%s", hdev->name); - - err = hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); - if (err) - return; - - le_scan_restart_work_complete(hdev, status); -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { -- cgit v1.2.3 From 0ad06aa6a7682319bb1adcc187a1fa8db6b2da2c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 14:44:57 +0200 Subject: Bluetooth: Fix specifying role for LE connections The hci_connect_le_scan() is (as the name implies) a master/central role API, so it makes no sense in passing a role parameter to it. At the same time this patch also fixes the direct advertising support for LE L2CAP sockets where we now call the more appropriate hci_le_connect() API if slave/peripheral role is desired. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/l2cap_core.c | 15 +++++++-------- net/bluetooth/mgmt.c | 3 +-- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 609f4a03899c..55ce209157b1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -877,7 +877,7 @@ struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle); struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, - u16 conn_timeout, u8 role); + u16 conn_timeout); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, u8 role); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1ed1e153b3fa..673c2254935b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -988,7 +988,7 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev, /* This function requires the caller holds hdev->lock */ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, - u16 conn_timeout, u8 role) + u16 conn_timeout) { struct hci_conn *conn; @@ -1018,7 +1018,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, BT_DBG("requesting refresh of dst_addr"); - conn = hci_conn_add(hdev, LE_LINK, dst, role); + conn = hci_conn_add(hdev, LE_LINK, dst, HCI_ROLE_MASTER); if (!conn) return ERR_PTR(-ENOMEM); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 66e8b6ee19a5..139da8106b04 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7113,8 +7113,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, chan->dcid = cid; if (bdaddr_type_is_le(dst_type)) { - u8 role; - /* Convert from L2CAP channel address type to HCI address type */ if (dst_type == BDADDR_LE_PUBLIC) @@ -7123,14 +7121,15 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, dst_type = ADDR_LE_DEV_RANDOM; if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - role = HCI_ROLE_SLAVE; + hcon = hci_connect_le(hdev, dst, dst_type, + chan->sec_level, + HCI_LE_CONN_TIMEOUT, + HCI_ROLE_SLAVE); else - role = HCI_ROLE_MASTER; + hcon = hci_connect_le_scan(hdev, dst, dst_type, + chan->sec_level, + HCI_LE_CONN_TIMEOUT); - hcon = hci_connect_le_scan(hdev, dst, dst_type, - chan->sec_level, - HCI_LE_CONN_TIMEOUT, - role); } else { u8 auth_type = l2cap_get_auth_type(chan); hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e4ad0457547a..eca203e891d2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3518,8 +3518,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, addr_type, sec_level, - HCI_LE_CONN_TIMEOUT, - HCI_ROLE_MASTER); + HCI_LE_CONN_TIMEOUT); } if (IS_ERR(conn)) { -- cgit v1.2.3 From 658aead94bb65c0141391f20f8c24f51e971b6ea Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 14:44:58 +0200 Subject: Bluetooth: Move check for ongoing connect earlier in hci_connect_le() This helps simplify the logic in further patches (less cleanups to do in this failure branch). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 673c2254935b..08a291dd0f3a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -798,6 +798,12 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EOPNOTSUPP); } + /* Since the controller supports only one LE connection attempt at a + * time, we return -EBUSY if there is any connection attempt running. + */ + if (hci_lookup_le_connect(hdev)) + return ERR_PTR(-EBUSY); + /* Some devices send ATT messages as soon as the physical link is * established. To be able to handle these ATT messages, the user- * space first establishes the connection and then starts the pairing @@ -821,12 +827,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, } } - /* Since the controller supports only one LE connection attempt at a - * time, we return -EBUSY if there is any connection attempt running. - */ - if (hci_lookup_le_connect(hdev)) - return ERR_PTR(-EBUSY); - /* When given an identity address with existing identity * resolving key, the connection needs to be established * to a resolvable random address. -- cgit v1.2.3 From e2caced40734731e2a17b501840809e30a08141a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 14:44:59 +0200 Subject: Bluetooth: Remove conn_unfinished variable from hci_connect_le() The conn_unfinished variable makes the entire logic of hci_connect_le() rather confusing. By restructuring and clarifying the logic we can actually remove the conn_unfinished variable and still keep the same behavior. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 48 ++++++++++++------------------------------------ 1 file changed, 12 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 08a291dd0f3a..2d334e07fd77 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -785,7 +785,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 role) { struct hci_conn_params *params; - struct hci_conn *conn, *conn_unfinished; + struct hci_conn *conn; struct smp_irk *irk; struct hci_request req; int err; @@ -804,27 +804,14 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, if (hci_lookup_le_connect(hdev)) return ERR_PTR(-EBUSY); - /* Some devices send ATT messages as soon as the physical link is - * established. To be able to handle these ATT messages, the user- - * space first establishes the connection and then starts the pairing - * process. - * - * So if a hci_conn object already exists for the following connection - * attempt, we simply update pending_sec_level and auth_type fields - * and return the object found. + /* If there's already a connection object but it's not in + * scanning state it means it must already be established, in + * which case we can't do anything else except report a failure + * to connect. */ conn = hci_conn_hash_lookup_le(hdev, dst, dst_type); - conn_unfinished = NULL; - if (conn) { - if (conn->state == BT_CONNECT && - test_bit(HCI_CONN_SCANNING, &conn->flags)) { - BT_DBG("will continue unfinished conn %pMR", dst); - conn_unfinished = conn; - } else { - if (conn->pending_sec_level < sec_level) - conn->pending_sec_level = sec_level; - goto done; - } + if (conn && !test_bit(HCI_CONN_SCANNING, &conn->flags)) { + return ERR_PTR(-EBUSY); } /* When given an identity address with existing identity @@ -842,23 +829,20 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, dst_type = ADDR_LE_DEV_RANDOM; } - if (conn_unfinished) { - conn = conn_unfinished; + if (conn) { bacpy(&conn->dst, dst); } else { conn = hci_conn_add(hdev, LE_LINK, dst, role); + if (!conn) + return ERR_PTR(-ENOMEM); + hci_conn_hold(conn); + conn->pending_sec_level = sec_level; } - if (!conn) - return ERR_PTR(-ENOMEM); - conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; - if (!conn_unfinished) - conn->pending_sec_level = sec_level; - hci_req_init(&req, hdev); /* Disable advertising if we're active. For master role @@ -922,14 +906,6 @@ create_conn: return ERR_PTR(err); } -done: - /* If this is continuation of connect started by hci_connect_le_scan, - * it already called hci_conn_hold and calling it again would mess the - * counter. - */ - if (!conn_unfinished) - hci_conn_hold(conn); - return conn; } -- cgit v1.2.3 From 7df0f73ece45c2e499b416cbc90949e0226eb134 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 12 Nov 2015 15:15:00 +0200 Subject: Bluetooth: Simplify request cleanup code The hci_req_sync_cancel() is just as much related to the request cleanup as hci_request_cancel_all() is. Just move the former into the latter and do the cleanup from a single place in hci_dev_do_close(). The important thing is to avoid deadlocks by holding the req_sync lock: previously hci_request_cancel_all was done right after releasing the lock and with this patch it's right before taking it. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 4 +--- net/bluetooth/hci_request.c | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fb618d6bcded..63fd31d7b27a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1523,7 +1523,7 @@ int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work(&hdev->power_off); - hci_req_sync_cancel(hdev, ENODEV); + hci_request_cancel_all(hdev); hci_req_sync_lock(hdev); if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { @@ -1625,8 +1625,6 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_req_sync_unlock(hdev); - hci_request_cancel_all(hdev); - hci_dev_put(hdev); return 0; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e8345d8106b5..76bd912be9fe 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1270,6 +1270,8 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { + hci_req_sync_cancel(hdev, ENODEV); + cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); -- cgit v1.2.3 From 56f9ebe641d613916d3dce710004d48ab66660fa Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 14 Nov 2015 20:22:41 +0100 Subject: mac802154: Delete an unnecessary check before the function call "kfree_skb" The kfree_skb() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Marcel Holtmann --- net/mac802154/rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 42e96729dae6..446e1300383e 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -217,8 +217,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, break; } - if (skb) - kfree_skb(skb); + kfree_skb(skb); } static void -- cgit v1.2.3 From 06fbb3d5c7ff366fe7ab7b4157bdb3096fca6d09 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 14 Nov 2015 22:00:27 +0100 Subject: Bluetooth: Delete an unnecessary check before the function call "kfree_skb" The kfree_skb() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Marcel Holtmann --- net/bluetooth/cmtp/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 298ed37010e6..9e59b6654126 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -178,8 +178,7 @@ static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff * cmtp_add_msgpart(session, id, skb->data + hdrlen, len); break; default: - if (session->reassembly[id] != NULL) - kfree_skb(session->reassembly[id]); + kfree_skb(session->reassembly[id]); session->reassembly[id] = NULL; break; } -- cgit v1.2.3 From f37590bd772243db8ce47071a56c3a2b84cb282b Mon Sep 17 00:00:00 2001 From: Prasanna Karthik Date: Tue, 17 Nov 2015 11:06:53 +0000 Subject: Bluetooth: clean up af_bluetooth code Fix error reported by checkpatch. ERROR:"foo* bar" should be "foo *bar" Signed-off-by: Prasanna Karthik Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 34c53d5862f6..a83c6a73f562 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -669,7 +669,7 @@ static const struct file_operations bt_fops = { }; int bt_procfs_init(struct net *net, const char *name, - struct bt_sock_list* sk_list, + struct bt_sock_list *sk_list, int (* seq_show)(struct seq_file *, void *)) { sk_list->custom_seq_show = seq_show; @@ -685,7 +685,7 @@ void bt_procfs_cleanup(struct net *net, const char *name) } #else int bt_procfs_init(struct net *net, const char *name, - struct bt_sock_list* sk_list, + struct bt_sock_list *sk_list, int (* seq_show)(struct seq_file *, void *)) { return 0; -- cgit v1.2.3 From 74b93e9f4ee0ae9292730de1a1e7d919c59c8ad2 Mon Sep 17 00:00:00 2001 From: Prasanna Karthik Date: Wed, 18 Nov 2015 12:38:41 +0000 Subject: Bluetooth: Clean up hci_core code Fix errors reported by checkpatch. - ERROR: spaces required around that ':' (ctx:VxW) - ERROR: open brace '{' following function declarations go on the next line Signed-off-by: Prasanna Karthik Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 63fd31d7b27a..89af7e4fac02 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -64,7 +64,7 @@ static ssize_t dut_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -124,7 +124,7 @@ static ssize_t vendor_diag_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -2600,7 +2600,8 @@ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance) } /* This function requires the caller holds hdev->lock */ -struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) { +struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) +{ struct adv_info *cur_instance; cur_instance = hci_find_adv_instance(hdev, instance); -- cgit v1.2.3 From 9a54421018d76c50c2fa82f88dffbfa6af0383d6 Mon Sep 17 00:00:00 2001 From: Prasanna Karthik Date: Thu, 19 Nov 2015 12:05:35 +0000 Subject: Bluetooth: remove unneeded variable in l2cap_stream_rx Remove unneeded variable used to store return value. Error reported by coccicheck. Signed-off-by: Prasanna Karthik Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 139da8106b04..39a5149f3010 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6538,8 +6538,6 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { - int err = 0; - BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, chan->rx_state); @@ -6570,7 +6568,7 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, chan->last_acked_seq = control->txseq; chan->expected_tx_seq = __next_seq(chan, control->txseq); - return err; + return 0; } static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) -- cgit v1.2.3 From c7cad0d6f70cd4ce8644ffe528a4df1cdc2e77f5 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:40 -0500 Subject: tipc: move linearization of buffers to generic code In commit 5cbb28a4bf65c7e4 ("tipc: linearize arriving NAME_DISTR and LINK_PROTO buffers") we added linearization of NAME_DISTRIBUTOR, LINK_PROTOCOL/RESET and LINK_PROTOCOL/ACTIVATE to the function tipc_udp_recv(). The location of the change was selected in order to make the commit easily appliable to 'net' and 'stable'. We now move this linearization to where it should be done, in the functions tipc_named_rcv() and tipc_link_proto_rcv() respectively. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 2 ++ net/tipc/name_distr.c | 1 + net/tipc/udp_media.c | 5 ----- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 9efbdbde2b08..fa452fb5f34e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1260,6 +1260,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* fall thru' */ case ACTIVATE_MSG: + skb_linearize(skb); + hdr = buf_msg(skb); /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c07612bab95c..f51c8bdbea1c 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -397,6 +397,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) spin_lock_bh(&tn->nametbl_lock); for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + skb_linearize(skb); msg = buf_msg(skb); mtype = msg_type(msg); item = (struct distr_item *)msg_data(msg); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ad2719ad4c1b..816914ef228d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -48,7 +48,6 @@ #include #include "core.h" #include "bearer.h" -#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -221,10 +220,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; - int usr = msg_user(buf_msg(skb)); - - if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) - skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { -- cgit v1.2.3 From 5c10e9794013143eec80d494603d46dcb219970a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:41 -0500 Subject: tipc: small cleanup of function tipc_node_check_state() The function tipc_node_check_state() contains the core logics for handling link synchronization and failover. For this reason, it is important to keep it as comprehensible as possible. In this commit, we make three small cleanups. 1) If the node is in state SELF_DOWN_PEER_LEAVING and the received packet confirms that the peer has lost contact, there will be no further action in this function. To make this clearer, we return from the function directly after the state change. 2) Since commit 0f8b8e28fb3241f9fd ("tipc: eliminate risk of stalled link synchronization") only the logically first TUNNEL_PROTO/SYNCH packet can alter the link state and set the synch point, independently of arrival order. Hence, there is not any longer any need to adjust the synch value in case such packets arrive in disorder. We remove this adjustment. 3) It is the intention that any message arriving on any of the links may trig a check for and possible termination of a node SYNCH state. A redundant and unnoticed check for tipc_link_is_synching() obviously beats this purpose, with the effect that only packets arriving on the synching link may currently end the synch state. We remove this check. This change will further shorten the synchronization period between parallel links. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index 20cddec0a43c..7756804034e2 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1187,6 +1187,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if (msg_peer_node_is_up(hdr)) return false; tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + return true; } /* Ignore duplicate packets */ @@ -1232,12 +1233,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); } - if (less(syncpt, n->sync_point)) - n->sync_point = syncpt; } /* Open tunnel link when parallel link reaches synch point */ - if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) { + if (n->state == NODE_SYNCHING) { if (tipc_link_is_synching(l)) { tnl = l; } else { -- cgit v1.2.3 From 1d7e1c2595bd20c5274a8e49d89cf0cf483759de Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:42 -0500 Subject: tipc: reduce code dependency between binding table and node layer The file name_distr.c currently contains three functions, named_cluster_distribute(), tipc_publ_subcscribe() and tipc_publ_unsubscribe() that all directly access fields in struct tipc_node. We want to eliminate such dependencies, so we move those functions to the file node.c and rename them to tipc_node_broadcast(), tipc_node_subscribe() and tipc_node_unsubscribe() respectively. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/core.h | 5 ++++ net/tipc/name_distr.c | 67 +++------------------------------------------------ net/tipc/name_distr.h | 1 - net/tipc/name_table.c | 5 ++-- net/tipc/node.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ net/tipc/node.h | 3 +++ 6 files changed, 74 insertions(+), 67 deletions(-) (limited to 'net') diff --git a/net/tipc/core.h b/net/tipc/core.h index 18e95a8020cd..5504d63503df 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -118,6 +118,11 @@ static inline int tipc_netid(struct net *net) return tipc_net(net)->net_id; } +static inline struct list_head *tipc_nodes(struct net *net) +{ + return &tipc_net(net)->node_list; +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index f51c8bdbea1c..ebe9d0ff6e9e 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -84,31 +84,6 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, return buf; } -void named_cluster_distribute(struct net *net, struct sk_buff *skb) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *oskb; - struct tipc_node *node; - u32 dnode; - - rcu_read_lock(); - list_for_each_entry_rcu(node, &tn->node_list, list) { - dnode = node->addr; - if (in_own_node(net, dnode)) - continue; - if (!tipc_node_is_up(node)) - continue; - oskb = pskb_copy(skb, GFP_ATOMIC); - if (!oskb) - break; - msg_set_destnode(buf_msg(oskb), dnode); - tipc_node_xmit_skb(net, oskb, dnode, 0); - } - rcu_read_unlock(); - - kfree_skb(skb); -} - /** * tipc_named_publish - tell other nodes about a new publication by this node */ @@ -226,42 +201,6 @@ void tipc_named_node_up(struct net *net, u32 dnode) tipc_node_xmit(net, &head, dnode, 0); } -static void tipc_publ_subscribe(struct net *net, struct publication *publ, - u32 addr) -{ - struct tipc_node *node; - - if (in_own_node(net, addr)) - return; - - node = tipc_node_find(net, addr); - if (!node) { - pr_warn("Node subscription rejected, unknown node 0x%x\n", - addr); - return; - } - - tipc_node_lock(node); - list_add_tail(&publ->nodesub_list, &node->publ_list); - tipc_node_unlock(node); - tipc_node_put(node); -} - -static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, - u32 addr) -{ - struct tipc_node *node; - - node = tipc_node_find(net, addr); - if (!node) - return; - - tipc_node_lock(node); - list_del_init(&publ->nodesub_list); - tipc_node_unlock(node); - tipc_node_put(node); -} - /** * tipc_publ_purge - remove publication associated with a failed node * @@ -277,7 +216,7 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) - tipc_publ_unsubscribe(net, p, addr); + tipc_node_unsubscribe(net, &p->nodesub_list, addr); spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { @@ -317,7 +256,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, TIPC_CLUSTER_SCOPE, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_subscribe(net, publ, node); + tipc_node_subscribe(net, &publ->nodesub_list, node); return true; } } else if (dtype == WITHDRAWAL) { @@ -326,7 +265,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_unsubscribe(net, publ, node); + tipc_node_unsubscribe(net, &publ->nodesub_list, node); kfree_rcu(publ, rcu); return true; } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index dd2d9fd80da2..1264ba0af937 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -69,7 +69,6 @@ struct distr_item { struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); -void named_cluster_distribute(struct net *net, struct sk_buff *buf); void tipc_named_node_up(struct net *net, u32 dnode); void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); void tipc_named_reinit(struct net *net); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 0f47f08bf38f..91fce70291a8 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -42,6 +42,7 @@ #include "subscr.h" #include "bcast.h" #include "addr.h" +#include "node.h" #include #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -677,7 +678,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, spin_unlock_bh(&tn->nametbl_lock); if (buf) - named_cluster_distribute(net, buf); + tipc_node_broadcast(net, buf); return publ; } @@ -709,7 +710,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, spin_unlock_bh(&tn->nametbl_lock); if (skb) { - named_cluster_distribute(net, skb); + tipc_node_broadcast(net, skb); return 1; } return 0; diff --git a/net/tipc/node.c b/net/tipc/node.c index 7756804034e2..932195258551 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -234,6 +234,42 @@ void tipc_node_stop(struct net *net) spin_unlock_bh(&tn->node_list_lock); } +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_lock(n); + list_add_tail(subscr, &n->publ_list); + tipc_node_unlock(n); + tipc_node_put(n); +} + +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_lock(n); + list_del_init(subscr); + tipc_node_unlock(n); + tipc_node_put(n); +} + int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; @@ -1075,6 +1111,30 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +void tipc_node_broadcast(struct net *net, struct sk_buff *skb) +{ + struct sk_buff *txskb; + struct tipc_node *n; + u32 dst; + + rcu_read_lock(); + list_for_each_entry_rcu(n, tipc_nodes(net), list) { + dst = n->addr; + if (in_own_node(net, dst)) + continue; + if (!tipc_node_is_up(n)) + continue; + txskb = pskb_copy(skb, GFP_ATOMIC); + if (!txskb) + break; + msg_set_destnode(buf_msg(txskb), dst); + tipc_node_xmit_skb(net, txskb, dst, 0); + } + rcu_read_unlock(); + + kfree_skb(skb); +} + /** * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node * @net: the applicable net namespace diff --git a/net/tipc/node.h b/net/tipc/node.h index 6734562d3c6e..dd79e9742bd6 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -149,6 +149,9 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); -- cgit v1.2.3 From 2312bf61ae365fdd6b9bfb24558a417859759447 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:43 -0500 Subject: tipc: introduce per-link spinlock As a preparation to allow parallel links to work more independently from each other we introduce a per-link spinlock, to be stored in the struct nodes's link entry area. Since the node lock still is a regular spinlock there is no increase in parallellism at this stage. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 9 +++++---- net/tipc/node.c | 39 ++++++++++++++++++--------------------- net/tipc/node.h | 3 ++- 3 files changed, 25 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index fa452fb5f34e..b5e895c6f1aa 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1995,6 +1995,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); + struct tipc_link_entry *le; if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2020,17 +2021,17 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) node = tipc_link_find_owner(net, link_name, &bearer_id); if (!node) return -EINVAL; - + le = &node->links[bearer_id]; tipc_node_lock(node); - - link = node->links[bearer_id].link; + spin_lock_bh(&le->lock); + link = le->link; if (!link) { tipc_node_unlock(node); return -EINVAL; } link_reset_statistics(link); - + spin_unlock_bh(&le->lock); tipc_node_unlock(node); return 0; diff --git a/net/tipc/node.c b/net/tipc/node.c index 932195258551..572063a0190e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -339,11 +339,13 @@ static void tipc_node_timeout(unsigned long data) for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { tipc_node_lock(n); le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); if (le->link) { /* Link tolerance may change asynchronously: */ tipc_node_calculate_timer(n, le->link); rc = tipc_link_timeout(le->link, &xmitq); } + spin_unlock_bh(&le->lock); tipc_node_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); if (rc & TIPC_LINK_DOWN_EVT) @@ -654,6 +656,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); le->link = l; + spin_lock_init(&le->lock); n->link_cnt++; tipc_node_calculate_timer(n, l); if (n->link_cnt == 1) @@ -1033,20 +1036,6 @@ msg_full: return -EMSGSIZE; } -static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, - int *bearer_id, - struct tipc_media_addr **maddr) -{ - int id = n->active_links[sel & 1]; - - if (unlikely(id < 0)) - return NULL; - - *bearer_id = id; - *maddr = &n->links[id].maddr; - return n->links[id].link; -} - /** * tipc_node_xmit() is the general link level function for message sending * @net: the applicable net namespace @@ -1059,26 +1048,32 @@ static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) { - struct tipc_link *l = NULL; + struct tipc_link_entry *le; struct tipc_node *n; struct sk_buff_head xmitq; - struct tipc_media_addr *maddr; - int bearer_id; + struct tipc_media_addr *maddr = NULL; + int bearer_id = -1; int rc = -EHOSTUNREACH; __skb_queue_head_init(&xmitq); n = tipc_node_find(net, dnode); if (likely(n)) { tipc_node_lock(n); - l = tipc_node_select_link(n, selector, &bearer_id, &maddr); - if (likely(l)) - rc = tipc_link_xmit(l, list, &xmitq); + bearer_id = n->active_links[selector & 1]; + if (bearer_id >= 0) { + le = &n->links[bearer_id]; + maddr = &le->maddr; + spin_lock_bh(&le->lock); + if (likely(le->link)) + rc = tipc_link_xmit(le->link, list, &xmitq); + spin_unlock_bh(&le->lock); + } tipc_node_unlock(n); if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); } - if (likely(!rc)) { + if (likely(!skb_queue_empty(&xmitq))) { tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); return 0; } @@ -1374,7 +1369,9 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Check and if necessary update node state */ if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { + spin_lock_bh(&le->lock); rc = tipc_link_rcv(le->link, skb, &xmitq); + spin_unlock_bh(&le->lock); skb = NULL; } unlock: diff --git a/net/tipc/node.h b/net/tipc/node.h index dd79e9742bd6..8784907486c0 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -69,6 +69,7 @@ enum { struct tipc_link_entry { struct tipc_link *link; + spinlock_t lock; /* per-link */ u32 mtu; struct sk_buff_head inputq; struct tipc_media_addr maddr; @@ -86,7 +87,7 @@ struct tipc_bclink_entry { * struct tipc_node - TIPC node structure * @addr: network address of node * @ref: reference counter to node object - * @lock: spinlock governing access to structure + * @lock: rwlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain * @inputq: pointer to input queue containing messages for msg event -- cgit v1.2.3 From 5405ff6e15f40f2f53e37d2dcd7de521e2b7a96f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:44 -0500 Subject: tipc: convert node lock to rwlock According to the node FSM a node in state SELF_UP_PEER_UP cannot change state inside a lock context, except when a TUNNEL_PROTOCOL (SYNCH or FAILOVER) packet arrives. However, the node's individual links may still change state. Since each link now is protected by its own spinlock, we finally have the conditions in place to convert the node spinlock to an rwlock_t. If the node state and arriving packet type are rigth, we can let the link directly receive the packet under protection of its own spinlock and the node lock in read mode. In all other cases we use the node lock in write mode. This enables full concurrent execution between parallel links during steady-state traffic situations, i.e., 99+ % of the time. This commit implements this change. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 32 ++++---- net/tipc/node.c | 227 +++++++++++++++++++++++++++++--------------------------- net/tipc/node.h | 10 +-- 3 files changed, 136 insertions(+), 133 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b5e895c6f1aa..1dda46e5dd83 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1547,7 +1547,7 @@ static struct tipc_node *tipc_link_find_owner(struct net *net, *bearer_id = 0; rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_lock(n_ptr); + tipc_node_read_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { l_ptr = n_ptr->links[i].link; if (l_ptr && !strcmp(l_ptr->name, link_name)) { @@ -1556,7 +1556,7 @@ static struct tipc_node *tipc_link_find_owner(struct net *net, break; } } - tipc_node_unlock(n_ptr); + tipc_node_read_unlock(n_ptr); if (found_node) break; } @@ -1658,7 +1658,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) if (!node) return -EINVAL; - tipc_node_lock(node); + tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { @@ -1699,7 +1699,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) } out: - tipc_node_unlock(node); + tipc_node_read_unlock(node); return res; } @@ -1898,10 +1898,10 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry_continue_rcu(node, &tn->node_list, list) { - tipc_node_lock(node); + tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); - tipc_node_unlock(node); + tipc_node_read_unlock(node); if (err) goto out; @@ -1913,10 +1913,10 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out; list_for_each_entry_rcu(node, &tn->node_list, list) { - tipc_node_lock(node); + tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); - tipc_node_unlock(node); + tipc_node_read_unlock(node); if (err) goto out; @@ -1967,16 +1967,16 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) if (!node) return -EINVAL; - tipc_node_lock(node); + tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { - tipc_node_unlock(node); + tipc_node_read_unlock(node); nlmsg_free(msg.skb); return -EINVAL; } err = __tipc_nl_add_link(net, &msg, link, 0); - tipc_node_unlock(node); + tipc_node_read_unlock(node); if (err) { nlmsg_free(msg.skb); return err; @@ -2021,18 +2021,18 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) node = tipc_link_find_owner(net, link_name, &bearer_id); if (!node) return -EINVAL; + le = &node->links[bearer_id]; - tipc_node_lock(node); + tipc_node_read_lock(node); spin_lock_bh(&le->lock); link = le->link; if (!link) { - tipc_node_unlock(node); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); return -EINVAL; } - link_reset_statistics(link); spin_unlock_bh(&le->lock); - tipc_node_unlock(node); - + tipc_node_read_unlock(node); return 0; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 572063a0190e..47d5f84c90c5 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -141,10 +141,63 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) return NULL; } +void tipc_node_read_lock(struct tipc_node *n) +{ + read_lock_bh(&n->lock); +} + +void tipc_node_read_unlock(struct tipc_node *n) +{ + read_unlock_bh(&n->lock); +} + +static void tipc_node_write_lock(struct tipc_node *n) +{ + write_lock_bh(&n->lock); +} + +static void tipc_node_write_unlock(struct tipc_node *n) +{ + struct net *net = n->net; + u32 addr = 0; + u32 flags = n->action_flags; + u32 link_id = 0; + struct list_head *publ_list; + + if (likely(!flags)) { + write_unlock_bh(&n->lock); + return; + } + + addr = n->addr; + link_id = n->link_id; + publ_list = &n->publ_list; + + n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); + + write_unlock_bh(&n->lock); + + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, addr); + + if (flags & TIPC_NOTIFY_NODE_UP) + tipc_named_node_up(net, addr); + + if (flags & TIPC_NOTIFY_LINK_UP) + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, addr); + + if (flags & TIPC_NOTIFY_LINK_DOWN) + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, + link_id, addr); +} + struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n_ptr, *temp_node; + int i; spin_lock_bh(&tn->node_list_lock); n_ptr = tipc_node_find(net, addr); @@ -159,7 +212,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n_ptr->net = net; n_ptr->capabilities = capabilities; kref_init(&n_ptr->kref); - spin_lock_init(&n_ptr->lock); + rwlock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); @@ -168,6 +221,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) skb_queue_head_init(&n_ptr->bc_entry.inputq1); __skb_queue_head_init(&n_ptr->bc_entry.arrvq); skb_queue_head_init(&n_ptr->bc_entry.inputq2); + for (i = 0; i < MAX_BEARERS; i++) + spin_lock_init(&n_ptr->links[i].lock); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -246,9 +301,9 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); return; } - tipc_node_lock(n); + tipc_node_write_lock(n); list_add_tail(subscr, &n->publ_list); - tipc_node_unlock(n); + tipc_node_write_unlock(n); tipc_node_put(n); } @@ -264,9 +319,9 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); return; } - tipc_node_lock(n); + tipc_node_write_lock(n); list_del_init(subscr); - tipc_node_unlock(n); + tipc_node_write_unlock(n); tipc_node_put(n); } @@ -293,9 +348,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) conn->port = port; conn->peer_port = peer_port; - tipc_node_lock(node); + tipc_node_write_lock(node); list_add_tail(&conn->list, &node->conn_sks); - tipc_node_unlock(node); + tipc_node_write_unlock(node); exit: tipc_node_put(node); return err; @@ -313,14 +368,14 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) if (!node) return; - tipc_node_lock(node); + tipc_node_write_lock(node); list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { if (port != conn->port) continue; list_del(&conn->list); kfree(conn); } - tipc_node_unlock(node); + tipc_node_write_unlock(node); tipc_node_put(node); } @@ -337,7 +392,7 @@ static void tipc_node_timeout(unsigned long data) __skb_queue_head_init(&xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { - tipc_node_lock(n); + tipc_node_read_lock(n); le = &n->links[bearer_id]; spin_lock_bh(&le->lock); if (le->link) { @@ -346,7 +401,7 @@ static void tipc_node_timeout(unsigned long data) rc = tipc_link_timeout(le->link, &xmitq); } spin_unlock_bh(&le->lock); - tipc_node_unlock(n); + tipc_node_read_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); @@ -425,9 +480,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { - tipc_node_lock(n); + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); - tipc_node_unlock(n); + tipc_node_write_unlock(n); } /** @@ -516,7 +571,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) __skb_queue_head_init(&xmitq); - tipc_node_lock(n); + tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); if (delete) { @@ -528,7 +583,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) /* Defuse pending tipc_node_link_up() */ tipc_link_fsm_evt(l, LINK_RESET_EVT); } - tipc_node_unlock(n); + tipc_node_write_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } @@ -561,7 +616,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (!n) return; - tipc_node_lock(n); + tipc_node_write_lock(n); le = &n->links[b->identity]; @@ -656,7 +711,6 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); le->link = l; - spin_lock_init(&le->lock); n->link_cnt++; tipc_node_calculate_timer(n, l); if (n->link_cnt == 1) @@ -665,7 +719,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, } memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: - tipc_node_unlock(n); + tipc_node_write_unlock(n); if (reset && !tipc_link_is_reset(l)) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); @@ -873,24 +927,6 @@ illegal_evt: pr_err("Illegal node fsm evt %x in state %x\n", evt, state); } -bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr) -{ - int state = n->state; - - if (likely(state == SELF_UP_PEER_UP)) - return true; - - if (state == SELF_LEAVING_PEER_DOWN) - return false; - - if (state == SELF_DOWN_PEER_LEAVING) { - if (msg_peer_node_is_up(hdr)) - return false; - } - - return true; -} - static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { @@ -952,56 +988,18 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, if (bearer_id >= MAX_BEARERS) goto exit; - tipc_node_lock(node); + tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { strncpy(linkname, link->name, len); err = 0; } exit: - tipc_node_unlock(node); + tipc_node_read_unlock(node); tipc_node_put(node); return err; } -void tipc_node_unlock(struct tipc_node *node) -{ - struct net *net = node->net; - u32 addr = 0; - u32 flags = node->action_flags; - u32 link_id = 0; - struct list_head *publ_list; - - if (likely(!flags)) { - spin_unlock_bh(&node->lock); - return; - } - - addr = node->addr; - link_id = node->link_id; - publ_list = &node->publ_list; - - node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | - TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); - - spin_unlock_bh(&node->lock); - - if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr); - - if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr); - - if (flags & TIPC_NOTIFY_LINK_UP) - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, addr); - - if (flags & TIPC_NOTIFY_LINK_DOWN) - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - link_id, addr); - -} - /* Caller should hold node lock for the passed node */ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) { @@ -1048,40 +1046,38 @@ msg_full: int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) { - struct tipc_link_entry *le; + struct tipc_link_entry *le = NULL; struct tipc_node *n; struct sk_buff_head xmitq; - struct tipc_media_addr *maddr = NULL; int bearer_id = -1; int rc = -EHOSTUNREACH; __skb_queue_head_init(&xmitq); n = tipc_node_find(net, dnode); if (likely(n)) { - tipc_node_lock(n); + tipc_node_read_lock(n); bearer_id = n->active_links[selector & 1]; if (bearer_id >= 0) { le = &n->links[bearer_id]; - maddr = &le->maddr; spin_lock_bh(&le->lock); - if (likely(le->link)) - rc = tipc_link_xmit(le->link, list, &xmitq); + rc = tipc_link_xmit(le->link, list, &xmitq); spin_unlock_bh(&le->lock); } - tipc_node_unlock(n); + tipc_node_read_unlock(n); + if (likely(!skb_queue_empty(&xmitq))) { + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + return 0; + } if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); + return rc; } - if (likely(!skb_queue_empty(&xmitq))) { - tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); - return 0; - } - if (likely(in_own_node(net, dnode))) { - tipc_sk_rcv(net, list); - return 0; - } - return rc; + + if (unlikely(!in_own_node(net, dnode))) + return rc; + tipc_sk_rcv(net, list); + return 0; } /* tipc_node_xmit_skb(): send single buffer to destination @@ -1171,9 +1167,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { - tipc_node_lock(n); + tipc_node_read_lock(n); tipc_link_build_ack_msg(le->link, &xmitq); - tipc_node_unlock(n); + tipc_node_read_unlock(n); } if (!skb_queue_empty(&xmitq)) @@ -1229,7 +1225,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, } } - /* Update node accesibility if applicable */ + /* Check and update node accesibility if applicable */ if (state == SELF_UP_PEER_COMING) { if (!tipc_link_is_up(l)) return true; @@ -1245,6 +1241,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, return true; } + if (state == SELF_LEAVING_PEER_DOWN) + return false; + /* Ignore duplicate packets */ if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) return true; @@ -1361,21 +1360,29 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) else if (unlikely(n->bc_entry.link->acked != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); - tipc_node_lock(n); - - /* Is reception permitted at the moment ? */ - if (!tipc_node_filter_pkt(n, hdr)) - goto unlock; - - /* Check and if necessary update node state */ - if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { + /* Receive packet directly if conditions permit */ + tipc_node_read_lock(n); + if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) { spin_lock_bh(&le->lock); - rc = tipc_link_rcv(le->link, skb, &xmitq); + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } spin_unlock_bh(&le->lock); - skb = NULL; } -unlock: - tipc_node_unlock(n); + tipc_node_read_unlock(n); + + /* Check/update node state before receiving */ + if (unlikely(skb)) { + tipc_node_write_lock(n); + if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + } + tipc_node_write_unlock(n); + } if (unlikely(rc & TIPC_LINK_UP_EVT)) tipc_node_link_up(n, bearer_id, &xmitq); @@ -1440,15 +1447,15 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - tipc_node_lock(node); + tipc_node_read_lock(node); err = __tipc_nl_add_node(&msg, node); if (err) { last_addr = node->addr; - tipc_node_unlock(node); + tipc_node_read_unlock(node); goto out; } - tipc_node_unlock(node); + tipc_node_read_unlock(node); } done = 1; out: diff --git a/net/tipc/node.h b/net/tipc/node.h index 8784907486c0..651a1581a210 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -109,7 +109,7 @@ struct tipc_bclink_entry { struct tipc_node { u32 addr; struct kref kref; - spinlock_t lock; + rwlock_t lock; struct net *net; struct hlist_node hash; int active_links[2]; @@ -145,7 +145,8 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); -void tipc_node_unlock(struct tipc_node *node); +void tipc_node_read_lock(struct tipc_node *n); +void tipc_node_read_unlock(struct tipc_node *node); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, @@ -157,11 +158,6 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); -static inline void tipc_node_lock(struct tipc_node *node) -{ - spin_lock_bh(&node->lock); -} - static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) { int bearer_id = n->active_links[sel & 1]; -- cgit v1.2.3 From 5be9c086715c10fb9ae3ffc0ef580dc3a165f98a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:45 -0500 Subject: tipc: narrow down exposure of struct tipc_node In our effort to have less code and include dependencies between entities such as node, link and bearer, we try to narrow down the exposed interface towards the node as much as possible. In this commit, we move the definition of struct tipc_node, along with many of its associated function declarations, from node.h to node.c. We also move some function definitions from link.c and name_distr.c to node.c, since they access fields in struct tipc_node that should not be externally visible. The moved functions are renamed according to new location, and made static whenever possible. There are no functional changes in this commit. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.h | 1 + net/tipc/link.c | 337 +--------------------------------- net/tipc/link.h | 9 +- net/tipc/netlink.c | 6 +- net/tipc/netlink_compat.c | 4 +- net/tipc/node.c | 449 +++++++++++++++++++++++++++++++++++++++++++++- net/tipc/node.h | 117 +----------- 7 files changed, 462 insertions(+), 461 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 2855b9356a15..1944c6c00bb9 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -43,6 +43,7 @@ struct tipc_node; struct tipc_msg; struct tipc_nl_msg; struct tipc_node_map; +extern const char tipc_bclink_name[]; int tipc_bcast_init(struct net *net); void tipc_bcast_reinit(struct net *net); diff --git a/net/tipc/link.c b/net/tipc/link.c index 1dda46e5dd83..c513a807b3a1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -50,23 +50,6 @@ */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; -static const char tipc_bclink_name[] = "broadcast-link"; - -static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { - [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_LINK_NAME] = { - .type = NLA_STRING, - .len = TIPC_MAX_LINK_NAME - }, - [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, - [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, - [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, - [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, - [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } -}; /* Properties valid for media, bearar and link */ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { @@ -117,7 +100,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); -static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); static void tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); @@ -1527,49 +1509,11 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } -/* tipc_link_find_owner - locate owner node of link by link's name - * @net: the applicable net namespace - * @name: pointer to link name string - * @bearer_id: pointer to index in 'node->links' array where the link was found. - * - * Returns pointer to node owning the link, or 0 if no matching link is found. - */ -static struct tipc_node *tipc_link_find_owner(struct net *net, - const char *link_name, - unsigned int *bearer_id) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - struct tipc_node *found_node = NULL; - int i; - - *bearer_id = 0; - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_read_lock(n_ptr); - for (i = 0; i < MAX_BEARERS; i++) { - l_ptr = n_ptr->links[i].link; - if (l_ptr && !strcmp(l_ptr->name, link_name)) { - *bearer_id = i; - found_node = n_ptr; - break; - } - } - tipc_node_read_unlock(n_ptr); - if (found_node) - break; - } - rcu_read_unlock(); - - return found_node; -} - /** * link_reset_statistics - reset link statistics * @l_ptr: pointer to link */ -static void link_reset_statistics(struct tipc_link *l_ptr) +void link_reset_statistics(struct tipc_link *l_ptr) { memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); l_ptr->stats.sent_info = l_ptr->snd_nxt; @@ -1626,84 +1570,6 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) return 0; } -int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) -{ - int err; - int res = 0; - int bearer_id; - char *name; - struct tipc_link *link; - struct tipc_node *node; - struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; - struct net *net = sock_net(skb->sk); - - if (!info->attrs[TIPC_NLA_LINK]) - return -EINVAL; - - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); - if (err) - return err; - - if (!attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - - name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - - if (strcmp(name, tipc_bclink_name) == 0) - return tipc_nl_bc_link_set(net, attrs); - - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; - - tipc_node_read_lock(node); - - link = node->links[bearer_id].link; - if (!link) { - res = -EINVAL; - goto out; - } - - if (attrs[TIPC_NLA_LINK_PROP]) { - struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; - - err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], - props); - if (err) { - res = err; - goto out; - } - - if (props[TIPC_NLA_PROP_TOL]) { - u32 tol; - - tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - link->tolerance = tol; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); - } - if (props[TIPC_NLA_PROP_PRIO]) { - u32 prio; - - prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); - link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); - } - if (props[TIPC_NLA_PROP_WIN]) { - u32 win; - - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - tipc_link_set_queue_limits(link, win); - } - } - -out: - tipc_node_read_unlock(node); - - return res; -} - static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) { int i; @@ -1770,8 +1636,8 @@ msg_full: } /* Caller should hold appropriate locks to protect the link */ -static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, - struct tipc_link *link, int nlflags) +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags) { int err; void *hdr; @@ -1839,200 +1705,3 @@ msg_full: return -EMSGSIZE; } - -/* Caller should hold node lock */ -static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, - struct tipc_node *node, u32 *prev_link) -{ - u32 i; - int err; - - for (i = *prev_link; i < MAX_BEARERS; i++) { - *prev_link = i; - - if (!node->links[i].link) - continue; - - err = __tipc_nl_add_link(net, msg, - node->links[i].link, NLM_F_MULTI); - if (err) - return err; - } - *prev_link = 0; - - return 0; -} - -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node; - struct tipc_nl_msg msg; - u32 prev_node = cb->args[0]; - u32 prev_link = cb->args[1]; - int done = cb->args[2]; - int err; - - if (done) - return 0; - - msg.skb = skb; - msg.portid = NETLINK_CB(cb->skb).portid; - msg.seq = cb->nlh->nlmsg_seq; - - rcu_read_lock(); - if (prev_node) { - node = tipc_node_find(net, prev_node); - if (!node) { - /* We never set seq or call nl_dump_check_consistent() - * this means that setting prev_seq here will cause the - * consistence check to fail in the netlink callback - * handler. Resulting in the last NLMSG_DONE message - * having the NLM_F_DUMP_INTR flag set. - */ - cb->prev_seq = 1; - goto out; - } - tipc_node_put(node); - - list_for_each_entry_continue_rcu(node, &tn->node_list, - list) { - tipc_node_read_lock(node); - err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); - tipc_node_read_unlock(node); - if (err) - goto out; - - prev_node = node->addr; - } - } else { - err = tipc_nl_add_bc_link(net, &msg); - if (err) - goto out; - - list_for_each_entry_rcu(node, &tn->node_list, list) { - tipc_node_read_lock(node); - err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); - tipc_node_read_unlock(node); - if (err) - goto out; - - prev_node = node->addr; - } - } - done = 1; -out: - rcu_read_unlock(); - - cb->args[0] = prev_node; - cb->args[1] = prev_link; - cb->args[2] = done; - - return skb->len; -} - -int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) -{ - struct net *net = genl_info_net(info); - struct tipc_nl_msg msg; - char *name; - int err; - - msg.portid = info->snd_portid; - msg.seq = info->snd_seq; - - if (!info->attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); - - msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!msg.skb) - return -ENOMEM; - - if (strcmp(name, tipc_bclink_name) == 0) { - err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } - } else { - int bearer_id; - struct tipc_node *node; - struct tipc_link *link; - - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; - - tipc_node_read_lock(node); - link = node->links[bearer_id].link; - if (!link) { - tipc_node_read_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; - } - - err = __tipc_nl_add_link(net, &msg, link, 0); - tipc_node_read_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } - } - - return genlmsg_reply(msg.skb, info); -} - -int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) -{ - int err; - char *link_name; - unsigned int bearer_id; - struct tipc_link *link; - struct tipc_node *node; - struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; - struct net *net = sock_net(skb->sk); - struct tipc_link_entry *le; - - if (!info->attrs[TIPC_NLA_LINK]) - return -EINVAL; - - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); - if (err) - return err; - - if (!attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - - link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - - if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(net); - if (err) - return err; - return 0; - } - - node = tipc_link_find_owner(net, link_name, &bearer_id); - if (!node) - return -EINVAL; - - le = &node->links[bearer_id]; - tipc_node_read_lock(node); - spin_lock_bh(&le->lock); - link = le->link; - if (!link) { - spin_unlock_bh(&le->lock); - tipc_node_read_unlock(node); - return -EINVAL; - } - link_reset_statistics(link); - spin_unlock_bh(&le->lock); - tipc_node_read_unlock(node); - return 0; -} diff --git a/net/tipc/link.h b/net/tipc/link.h index 66d859b66c84..a7ee806e1ee4 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -249,14 +249,15 @@ bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); void tipc_link_set_active(struct tipc_link *l, bool active); void tipc_link_reset(struct tipc_link *l_ptr); +void link_reset_statistics(struct tipc_link *l); int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); +void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, + u32 gap, u32 tolerance, u32 priority); void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); - +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags); int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); -int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); -int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); -int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 7f6475efc984..29dfcc94b6a5 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -101,18 +101,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_LINK_GET, - .doit = tipc_nl_link_get, + .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_link_dump, .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, - .doit = tipc_nl_link_set, + .doit = tipc_nl_node_set_link, .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, - .doit = tipc_nl_link_reset_stats, + .doit = tipc_nl_node_reset_link_stats, .policy = tipc_nl_policy, }, { diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1eadc95e1132..acda1ce57151 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1036,12 +1036,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) case TIPC_CMD_SET_LINK_PRI: case TIPC_CMD_SET_LINK_WINDOW: msg->req_type = TIPC_TLV_LINK_CONFIG; - doit.doit = tipc_nl_link_set; + doit.doit = tipc_nl_node_set_link; doit.transcode = tipc_nl_compat_link_set; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_RESET_LINK_STATS: msg->req_type = TIPC_TLV_LINK_NAME; - doit.doit = tipc_nl_link_reset_stats; + doit.doit = tipc_nl_node_reset_link_stats; doit.transcode = tipc_nl_compat_link_reset_stats; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_SHOW_NAME_TABLE: diff --git a/net/tipc/node.c b/net/tipc/node.c index 47d5f84c90c5..e110ba67422e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,87 @@ #include "bcast.h" #include "discover.h" +/* Out-of-range value for node signature */ +#define INVALID_NODE_SIG 0x10000 + +#define INVALID_BEARER_ID -1 + +/* Flags used to take different actions according to flag type + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type + */ +enum { + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7) +}; + +struct tipc_link_entry { + struct tipc_link *link; + spinlock_t lock; /* per link */ + u32 mtu; + struct sk_buff_head inputq; + struct tipc_media_addr maddr; +}; + +struct tipc_bclink_entry { + struct tipc_link *link; + struct sk_buff_head inputq1; + struct sk_buff_head arrvq; + struct sk_buff_head inputq2; + struct sk_buff_head namedq; +}; + +/** + * struct tipc_node - TIPC node structure + * @addr: network address of node + * @ref: reference counter to node object + * @lock: rwlock governing access to structure + * @net: the applicable net namespace + * @hash: links to adjacent nodes in unsorted hash chain + * @inputq: pointer to input queue containing messages for msg event + * @namedq: pointer to name table input queue with name table messages + * @active_links: bearer ids of active links, used as index into links[] array + * @links: array containing references to all links to node + * @action_flags: bit mask of different types of node actions + * @state: connectivity state vs peer node + * @sync_point: sequence number where synch/failover is finished + * @list: links to adjacent nodes in sorted list of cluster's nodes + * @working_links: number of working links to node (both active and standby) + * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities + * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any + * @publ_list: list of publications + * @rcu: rcu struct for tipc_node + */ +struct tipc_node { + u32 addr; + struct kref kref; + rwlock_t lock; + struct net *net; + struct hlist_node hash; + int active_links[2]; + struct tipc_link_entry links[MAX_BEARERS]; + struct tipc_bclink_entry bc_entry; + int action_flags; + struct list_head list; + int state; + u16 sync_point; + int link_cnt; + u16 working_links; + u16 capabilities; + u32 signature; + u32 link_id; + struct list_head publ_list; + struct list_head conn_sks; + unsigned long keepalive_intv; + struct timer_list timer; + struct rcu_head rcu; +}; + /* Node FSM states and events: */ enum { @@ -75,6 +156,9 @@ static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); +static struct tipc_node *tipc_node_find(struct net *net, u32 addr); +static void tipc_node_put(struct tipc_node *node); +static bool tipc_node_is_up(struct tipc_node *n); struct tipc_sock_conn { u32 port; @@ -83,12 +167,54 @@ struct tipc_sock_conn { struct list_head list; }; +static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { + [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_LINK_NAME] = { + .type = NLA_STRING, + .len = TIPC_MAX_LINK_NAME + }, + [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } +}; + static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } }; +static struct tipc_link *node_active_link(struct tipc_node *n, int sel) +{ + int bearer_id = n->active_links[sel & 1]; + + if (unlikely(bearer_id == INVALID_BEARER_ID)) + return NULL; + + return n->links[bearer_id].link; +} + +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) +{ + struct tipc_node *n; + int bearer_id; + unsigned int mtu = MAX_MSG_SIZE; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return mtu; + + bearer_id = n->active_links[sel & 1]; + if (likely(bearer_id != INVALID_BEARER_ID)) + mtu = n->links[bearer_id].mtu; + tipc_node_put(n); + return mtu; +} /* * A trivial power-of-two bitmask technique is used for speed, since this * operation is done for every incoming TIPC packet. The number of hash table @@ -107,7 +233,7 @@ static void tipc_node_kref_release(struct kref *kref) tipc_node_delete(node); } -void tipc_node_put(struct tipc_node *node) +static void tipc_node_put(struct tipc_node *node) { kref_put(&node->kref, tipc_node_kref_release); } @@ -120,7 +246,7 @@ static void tipc_node_get(struct tipc_node *node) /* * tipc_node_find - locate specified node object, if it exists */ -struct tipc_node *tipc_node_find(struct net *net, u32 addr) +static struct tipc_node *tipc_node_find(struct net *net, u32 addr) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; @@ -141,12 +267,12 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) return NULL; } -void tipc_node_read_lock(struct tipc_node *n) +static void tipc_node_read_lock(struct tipc_node *n) { read_lock_bh(&n->lock); } -void tipc_node_read_unlock(struct tipc_node *n) +static void tipc_node_read_unlock(struct tipc_node *n) { read_unlock_bh(&n->lock); } @@ -588,7 +714,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_sk_rcv(n->net, &le->inputq); } -bool tipc_node_is_up(struct tipc_node *n) +static bool tipc_node_is_up(struct tipc_node *n) { return n->active_links[0] != INVALID_BEARER_ID; } @@ -1465,3 +1591,316 @@ out: return skb->len; } + +/* tipc_link_find_owner - locate owner node of link by link's name + * @net: the applicable net namespace + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. + * + * Returns pointer to node owning the link, or 0 if no matching link is found. + */ +static struct tipc_node *tipc_link_find_owner(struct net *net, + const char *link_name, + unsigned int *bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l_ptr; + struct tipc_node *n_ptr; + struct tipc_node *found_node = NULL; + int i; + + *bearer_id = 0; + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { + tipc_node_read_lock(n_ptr); + for (i = 0; i < MAX_BEARERS; i++) { + l_ptr = n_ptr->links[i].link; + if (l_ptr && !strcmp(l_ptr->name, link_name)) { + *bearer_id = i; + found_node = n_ptr; + break; + } + } + tipc_node_read_unlock(n_ptr); + if (found_node) + break; + } + rcu_read_unlock(); + + return found_node; +} + +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int res = 0; + int bearer_id; + char *name; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(name, tipc_bclink_name) == 0) + return tipc_nl_bc_link_set(net, attrs); + + node = tipc_link_find_owner(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + + link = node->links[bearer_id].link; + if (!link) { + res = -EINVAL; + goto out; + } + + if (attrs[TIPC_NLA_LINK_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], + props); + if (err) { + res = err; + goto out; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + link->tolerance = tol; + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); + } + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + link->priority = prio; + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); + } + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, win); + } + } + +out: + tipc_node_read_unlock(node); + + return res; +} + +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct tipc_nl_msg msg; + char *name; + int err; + + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + if (!info->attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; + + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; + + node = tipc_link_find_owner(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (!link) { + tipc_node_read_unlock(node); + nlmsg_free(msg.skb); + return -EINVAL; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_read_unlock(node); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } + + return genlmsg_reply(msg.skb, info); +} + +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *link_name; + unsigned int bearer_id; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_link_entry *le; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(link_name, tipc_bclink_name) == 0) { + err = tipc_bclink_reset_stats(net); + if (err) + return err; + return 0; + } + + node = tipc_link_find_owner(net, link_name, &bearer_id); + if (!node) + return -EINVAL; + + le = &node->links[bearer_id]; + tipc_node_read_lock(node); + spin_lock_bh(&le->lock); + link = node->links[bearer_id].link; + if (!link) { + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return -EINVAL; + } + link_reset_statistics(link); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return 0; +} + +/* Caller should hold node lock */ +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) +{ + u32 i; + int err; + + for (i = *prev_link; i < MAX_BEARERS; i++) { + *prev_link = i; + + if (!node->links[i].link) + continue; + + err = __tipc_nl_add_link(net, msg, + node->links[i].link, NLM_F_MULTI); + if (err) + return err; + } + *prev_link = 0; + + return 0; +} + +int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + struct tipc_nl_msg msg; + u32 prev_node = cb->args[0]; + u32 prev_link = cb->args[1]; + int done = cb->args[2]; + int err; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (prev_node) { + node = tipc_node_find(net, prev_node); + if (!node) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + goto out; + } + tipc_node_put(node); + + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } else { + err = tipc_nl_add_bc_link(net, &msg); + if (err) + goto out; + + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } + done = 1; +out: + rcu_read_unlock(); + + cb->args[0] = prev_node; + cb->args[1] = prev_link; + cb->args[2] = done; + + return skb->len; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 651a1581a210..1fbed29d9a25 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,23 +42,8 @@ #include "bearer.h" #include "msg.h" -/* Out-of-range value for node signature */ -#define INVALID_NODE_SIG 0x10000 - #define INVALID_BEARER_ID -1 -/* Flags used to take different actions according to flag type - * TIPC_NOTIFY_NODE_DOWN: notify node is down - * TIPC_NOTIFY_NODE_UP: notify node is up - * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type - */ -enum { - TIPC_NOTIFY_NODE_DOWN = (1 << 3), - TIPC_NOTIFY_NODE_UP = (1 << 4), - TIPC_NOTIFY_LINK_UP = (1 << 6), - TIPC_NOTIFY_LINK_DOWN = (1 << 7) -}; - /* Optional capabilities supported by this code version */ enum { @@ -67,72 +52,6 @@ enum { #define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH -struct tipc_link_entry { - struct tipc_link *link; - spinlock_t lock; /* per-link */ - u32 mtu; - struct sk_buff_head inputq; - struct tipc_media_addr maddr; -}; - -struct tipc_bclink_entry { - struct tipc_link *link; - struct sk_buff_head inputq1; - struct sk_buff_head arrvq; - struct sk_buff_head inputq2; - struct sk_buff_head namedq; -}; - -/** - * struct tipc_node - TIPC node structure - * @addr: network address of node - * @ref: reference counter to node object - * @lock: rwlock governing access to structure - * @net: the applicable net namespace - * @hash: links to adjacent nodes in unsorted hash chain - * @inputq: pointer to input queue containing messages for msg event - * @namedq: pointer to name table input queue with name table messages - * @active_links: bearer ids of active links, used as index into links[] array - * @links: array containing references to all links to node - * @action_flags: bit mask of different types of node actions - * @state: connectivity state vs peer node - * @sync_point: sequence number where synch/failover is finished - * @list: links to adjacent nodes in sorted list of cluster's nodes - * @working_links: number of working links to node (both active and standby) - * @link_cnt: number of links to node - * @capabilities: bitmap, indicating peer node's functional capabilities - * @signature: node instance identifier - * @link_id: local and remote bearer ids of changing link, if any - * @publ_list: list of publications - * @rcu: rcu struct for tipc_node - */ -struct tipc_node { - u32 addr; - struct kref kref; - rwlock_t lock; - struct net *net; - struct hlist_node hash; - int active_links[2]; - struct tipc_link_entry links[MAX_BEARERS]; - struct tipc_bclink_entry bc_entry; - int action_flags; - struct list_head list; - int state; - u16 sync_point; - int link_cnt; - u16 working_links; - u16 capabilities; - u32 signature; - u32 link_id; - struct list_head publ_list; - struct list_head conn_sks; - unsigned long keepalive_intv; - struct timer_list timer; - struct rcu_head rcu; -}; - -struct tipc_node *tipc_node_find(struct net *net, u32 addr); -void tipc_node_put(struct tipc_node *node); void tipc_node_stop(struct net *net); void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_bearer *bearer, @@ -140,13 +59,8 @@ void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); -void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); -void tipc_node_read_lock(struct tipc_node *n); -void tipc_node_read_unlock(struct tipc_node *node); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, @@ -156,33 +70,10 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); - -static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) -{ - int bearer_id = n->active_links[sel & 1]; - - if (unlikely(bearer_id == INVALID_BEARER_ID)) - return NULL; - - return n->links[bearer_id].link; -} - -static inline unsigned int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) -{ - struct tipc_node *n; - int bearer_id; - unsigned int mtu = MAX_MSG_SIZE; - - n = tipc_node_find(net, addr); - if (unlikely(!n)) - return mtu; - - bearer_id = n->active_links[sel & 1]; - if (likely(bearer_id != INVALID_BEARER_ID)) - mtu = n->links[bearer_id].mtu; - tipc_node_put(n); - return mtu; -} +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); #endif -- cgit v1.2.3 From 38206d5939068415c413ac253be6f364d06e672f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:46 -0500 Subject: tipc: narrow down interface towards struct tipc_link We move the definition of struct tipc_link from link.h to link.c in order to minimize its exposure to the rest of the code. When needed, we define new functions to make it possible for external entities to access and set data in the link. Apart from the above, there are no functional changes. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 126 +---------------- net/tipc/link.c | 346 ++++++++++++++++++++++++++++++++++++++++++++-- net/tipc/link.h | 170 ++--------------------- net/tipc/netlink.c | 2 +- net/tipc/netlink_compat.c | 4 +- net/tipc/node.c | 108 +++++++-------- net/tipc/node.h | 4 +- 7 files changed, 415 insertions(+), 345 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 9dc239dfe192..e401108360a2 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -332,131 +332,15 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) tipc_sk_rcv(net, inputq); } -static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, - struct tipc_stats *stats) -{ - int i; - struct nlattr *nest; - - struct nla_map { - __u32 key; - __u32 val; - }; - - struct nla_map map[] = { - {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, - {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, - {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, - {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, - {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, - {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, - {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, - {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, - {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, - {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, - {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, - {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, - {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, - {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, - {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, - {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, - {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, - {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, - {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? - (stats->accu_queue_sz / stats->queue_sz_counts) : 0} - }; - - nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); - if (!nest) - return -EMSGSIZE; - - for (i = 0; i < ARRAY_SIZE(map); i++) - if (nla_put_u32(skb, map[i].key, map[i].val)) - goto msg_full; - - nla_nest_end(skb, nest); - - return 0; -msg_full: - nla_nest_cancel(skb, nest); - - return -EMSGSIZE; -} - -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) -{ - int err; - void *hdr; - struct nlattr *attrs; - struct nlattr *prop; - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - - if (!bcl) - return 0; - - tipc_bcast_lock(net); - - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_LINK_GET); - if (!hdr) - return -EMSGSIZE; - - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); - if (!attrs) - goto msg_full; - - /* The broadcast link is always up */ - if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) - goto attr_msg_full; - - if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) - goto attr_msg_full; - if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) - goto attr_msg_full; - - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); - if (!prop) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) - goto prop_msg_full; - nla_nest_end(msg->skb, prop); - - err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); - if (err) - goto attr_msg_full; - - tipc_bcast_unlock(net); - nla_nest_end(msg->skb, attrs); - genlmsg_end(msg->skb, hdr); - - return 0; - -prop_msg_full: - nla_nest_cancel(msg->skb, prop); -attr_msg_full: - nla_nest_cancel(msg->skb, attrs); -msg_full: - tipc_bcast_unlock(net); - genlmsg_cancel(msg->skb, hdr); - - return -EMSGSIZE; -} - int tipc_bclink_reset_stats(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + struct tipc_link *l = tipc_bc_sndlink(net); - if (!bcl) + if (!l) return -ENOPROTOOPT; tipc_bcast_lock(net); - memset(&bcl->stats, 0, sizeof(bcl->stats)); + tipc_link_reset_stats(l); tipc_bcast_unlock(net); return 0; } @@ -530,9 +414,7 @@ enomem: void tipc_bcast_reinit(struct net *net) { - struct tipc_bc_base *b = tipc_bc_base(net); - - msg_set_prevnode(b->link->pmsg, tipc_own_addr(net)); + tipc_link_reinit(tipc_bc_sndlink(net), tipc_own_addr(net)); } void tipc_bcast_stop(struct net *net) diff --git a/net/tipc/link.c b/net/tipc/link.c index c513a807b3a1..4380eb119796 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -45,6 +45,151 @@ #include +struct tipc_stats { + u32 sent_info; /* used in counting # sent packets */ + u32 recv_info; /* used in counting # recv'd packets */ + u32 sent_states; + u32 recv_states; + u32 sent_probes; + u32 recv_probes; + u32 sent_nacks; + u32 recv_nacks; + u32 sent_acks; + u32 sent_bundled; + u32 sent_bundles; + u32 recv_bundled; + u32 recv_bundles; + u32 retransmitted; + u32 sent_fragmented; + u32 sent_fragments; + u32 recv_fragmented; + u32 recv_fragments; + u32 link_congs; /* # port sends blocked by congestion */ + u32 deferred_recv; + u32 duplicates; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ +}; + +/** + * struct tipc_link - TIPC link data structure + * @addr: network address of link's peer node + * @name: link name character string + * @media_addr: media address to use when sending messages over link + * @timer: link timer + * @net: pointer to namespace struct + * @refcnt: reference counter for permanent references (owner node & timer) + * @peer_session: link session # being used by peer end of link + * @peer_bearer_id: bearer id used by link's peer endpoint + * @bearer_id: local bearer id used by link + * @tolerance: minimum link continuity loss needed to reset link [in ms] + * @keepalive_intv: link keepalive timer interval + * @abort_limit: # of unacknowledged continuity probes needed to reset link + * @state: current state of link FSM + * @peer_caps: bitmap describing capabilities of peer node + * @silent_intv_cnt: # of timer intervals without any reception from peer + * @proto_msg: template for control messages generated by link + * @pmsg: convenience pointer to "proto_msg" field + * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) + * @exp_msg_count: # of tunnelled messages expected during link changeover + * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent + * @snt_nxt: next sequence number to use for outbound messages + * @last_retransmitted: sequence number of most recently retransmitted message + * @stale_count: # of identical retransmit requests made by peer + * @ackers: # of peers that needs to ack each packet before it can be released + * @acked: # last packet acked by a certain peer. Used for broadcast. + * @rcv_nxt: next sequence number to expect for inbound messages + * @deferred_queue: deferred queue saved OOS b'cast message received from node + * @unacked_window: # of inbound messages rx'd without ack'ing back to peer + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards + * @next_out: ptr to first unsent outbound message in queue + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate + * @long_msg_seq_no: next identifier to use for outbound fragmented messages + * @reasm_buf: head of partially reassembled inbound message fragments + * @bc_rcvr: marks that this is a broadcast receiver link + * @stats: collects statistics regarding link activity + */ +struct tipc_link { + u32 addr; + char name[TIPC_MAX_LINK_NAME]; + struct tipc_media_addr *media_addr; + struct net *net; + + /* Management and link supervision data */ + u32 peer_session; + u32 peer_bearer_id; + u32 bearer_id; + u32 tolerance; + unsigned long keepalive_intv; + u32 abort_limit; + u32 state; + u16 peer_caps; + bool active; + u32 silent_intv_cnt; + struct { + unchar hdr[INT_H_SIZE]; + unchar body[TIPC_MAX_IF_NAME]; + } proto_msg; + struct tipc_msg *pmsg; + u32 priority; + char net_plane; + + /* Failover/synch */ + u16 drop_point; + struct sk_buff *failover_reasm_skb; + + /* Max packet negotiation */ + u16 mtu; + u16 advertised_mtu; + + /* Sending */ + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; + u16 snd_nxt; + u16 last_retransm; + u16 window; + u32 stale_count; + + /* Reception */ + u16 rcv_nxt; + u32 rcv_unacked; + struct sk_buff_head deferdq; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; + + /* Congestion handling */ + struct sk_buff_head wakeupq; + + /* Fragmentation/reassembly */ + struct sk_buff *reasm_buf; + + /* Broadcast */ + u16 ackers; + u16 acked; + struct tipc_link *bc_rcvlink; + struct tipc_link *bc_sndlink; + int nack_state; + bool bc_peer_is_up; + + /* Statistics */ + struct tipc_stats stats; +}; + /* * Error message prefixes */ @@ -165,6 +310,36 @@ void tipc_link_set_active(struct tipc_link *l, bool active) l->active = active; } +u32 tipc_link_id(struct tipc_link *l) +{ + return l->peer_bearer_id << 16 | l->bearer_id; +} + +int tipc_link_window(struct tipc_link *l) +{ + return l->window; +} + +int tipc_link_prio(struct tipc_link *l) +{ + return l->priority; +} + +unsigned long tipc_link_tolerance(struct tipc_link *l) +{ + return l->tolerance; +} + +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l) +{ + return l->inputq; +} + +char tipc_link_plane(struct tipc_link *l) +{ + return l->net_plane; +} + void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq) @@ -207,11 +382,31 @@ int tipc_link_mtu(struct tipc_link *l) return l->mtu; } +u16 tipc_link_rcv_nxt(struct tipc_link *l) +{ + return l->rcv_nxt; +} + +u16 tipc_link_acked(struct tipc_link *l) +{ + return l->acked; +} + +char *tipc_link_name(struct tipc_link *l) +{ + return l->name; +} + static u32 link_own_addr(struct tipc_link *l) { return msg_prevnode(l->pmsg); } +void tipc_link_reinit(struct tipc_link *l, u32 addr) +{ + msg_set_prevnode(l->pmsg, addr); +} + /** * tipc_link_create - create a new link * @n: pointer to associated node @@ -674,7 +869,7 @@ void tipc_link_reset(struct tipc_link *l) l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; - link_reset_statistics(l); + tipc_link_reset_stats(l); } /** @@ -1067,8 +1262,9 @@ drop: /* * Send protocol message to the other endpoint. */ -void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority) +static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, + int probe_msg, u32 gap, u32 tolerance, + u32 priority) { struct sk_buff *skb = NULL; struct sk_buff_head xmitq; @@ -1510,14 +1706,16 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) } /** - * link_reset_statistics - reset link statistics + * link_reset_stats - reset link statistics * @l_ptr: pointer to link */ -void link_reset_statistics(struct tipc_link *l_ptr) +void tipc_link_reset_stats(struct tipc_link *l) { - memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); - l_ptr->stats.sent_info = l_ptr->snd_nxt; - l_ptr->stats.recv_info = l_ptr->rcv_nxt; + memset(&l->stats, 0, sizeof(l->stats)); + if (!link_is_bc_sndlink(l)) { + l->stats.sent_info = l->snd_nxt; + l->stats.recv_info = l->rcv_nxt; + } } static void link_print(struct tipc_link *l, const char *str) @@ -1705,3 +1903,135 @@ msg_full: return -EMSGSIZE; } + +static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, + struct tipc_stats *stats) +{ + int i; + struct nlattr *nest; + + struct nla_map { + __u32 key; + __u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, + {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, + {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, + {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, + {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? + (stats->accu_queue_sz / stats->queue_sz_counts) : 0} + }; + + nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!nest) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, nest); + + return 0; +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) +{ + int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + + if (!bcl) + return 0; + + tipc_bcast_lock(net); + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + /* The broadcast link is always up */ + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); + if (err) + goto attr_msg_full; + + tipc_bcast_unlock(net); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + tipc_bcast_unlock(net); + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol) +{ + l->tolerance = tol; + tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0); +} + +void tipc_link_set_prio(struct tipc_link *l, u32 prio) +{ + l->priority = prio; + tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio); +} + +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) +{ + l->abort_limit = limit; +} diff --git a/net/tipc/link.h b/net/tipc/link.h index a7ee806e1ee4..616fc808f23a 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -45,10 +45,6 @@ */ #define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ -/* Out-of-range value for link sequence numbers - */ -#define INVALID_LINK_SEQ 0x10000 - /* Link FSM events: */ enum { @@ -75,151 +71,6 @@ enum { */ #define MAX_PKT_DEFAULT 1500 -struct tipc_stats { - u32 sent_info; /* used in counting # sent packets */ - u32 recv_info; /* used in counting # recv'd packets */ - u32 sent_states; - u32 recv_states; - u32 sent_probes; - u32 recv_probes; - u32 sent_nacks; - u32 recv_nacks; - u32 sent_acks; - u32 sent_bundled; - u32 sent_bundles; - u32 recv_bundled; - u32 recv_bundles; - u32 retransmitted; - u32 sent_fragmented; - u32 sent_fragments; - u32 recv_fragmented; - u32 recv_fragments; - u32 link_congs; /* # port sends blocked by congestion */ - u32 deferred_recv; - u32 duplicates; - u32 max_queue_sz; /* send queue size high water mark */ - u32 accu_queue_sz; /* used for send queue size profiling */ - u32 queue_sz_counts; /* used for send queue size profiling */ - u32 msg_length_counts; /* used for message length profiling */ - u32 msg_lengths_total; /* used for message length profiling */ - u32 msg_length_profile[7]; /* used for msg. length profiling */ -}; - -/** - * struct tipc_link - TIPC link data structure - * @addr: network address of link's peer node - * @name: link name character string - * @media_addr: media address to use when sending messages over link - * @timer: link timer - * @net: pointer to namespace struct - * @refcnt: reference counter for permanent references (owner node & timer) - * @peer_session: link session # being used by peer end of link - * @peer_bearer_id: bearer id used by link's peer endpoint - * @bearer_id: local bearer id used by link - * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @keepalive_intv: link keepalive timer interval - * @abort_limit: # of unacknowledged continuity probes needed to reset link - * @state: current state of link FSM - * @peer_caps: bitmap describing capabilities of peer node - * @silent_intv_cnt: # of timer intervals without any reception from peer - * @proto_msg: template for control messages generated by link - * @pmsg: convenience pointer to "proto_msg" field - * @priority: current link priority - * @net_plane: current link network plane ('A' through 'H') - * @backlog_limit: backlog queue congestion thresholds (indexed by importance) - * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset - * @mtu: current maximum packet size for this link - * @advertised_mtu: advertised own mtu when link is being established - * @transmitq: queue for sent, non-acked messages - * @backlogq: queue for messages waiting to be sent - * @snt_nxt: next sequence number to use for outbound messages - * @last_retransmitted: sequence number of most recently retransmitted message - * @stale_count: # of identical retransmit requests made by peer - * @ackers: # of peers that needs to ack each packet before it can be released - * @acked: # last packet acked by a certain peer. Used for broadcast. - * @rcv_nxt: next sequence number to expect for inbound messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @unacked_window: # of inbound messages rx'd without ack'ing back to peer - * @inputq: buffer queue for messages to be delivered upwards - * @namedq: buffer queue for name table messages to be delivered upwards - * @next_out: ptr to first unsent outbound message in queue - * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate - * @long_msg_seq_no: next identifier to use for outbound fragmented messages - * @reasm_buf: head of partially reassembled inbound message fragments - * @bc_rcvr: marks that this is a broadcast receiver link - * @stats: collects statistics regarding link activity - */ -struct tipc_link { - u32 addr; - char name[TIPC_MAX_LINK_NAME]; - struct tipc_media_addr *media_addr; - struct net *net; - - /* Management and link supervision data */ - u32 peer_session; - u32 peer_bearer_id; - u32 bearer_id; - u32 tolerance; - unsigned long keepalive_intv; - u32 abort_limit; - u32 state; - u16 peer_caps; - bool active; - u32 silent_intv_cnt; - struct { - unchar hdr[INT_H_SIZE]; - unchar body[TIPC_MAX_IF_NAME]; - } proto_msg; - struct tipc_msg *pmsg; - u32 priority; - char net_plane; - - /* Failover/synch */ - u16 drop_point; - struct sk_buff *failover_reasm_skb; - - /* Max packet negotiation */ - u16 mtu; - u16 advertised_mtu; - - /* Sending */ - struct sk_buff_head transmq; - struct sk_buff_head backlogq; - struct { - u16 len; - u16 limit; - } backlog[5]; - u16 snd_nxt; - u16 last_retransm; - u16 window; - u32 stale_count; - - /* Reception */ - u16 rcv_nxt; - u32 rcv_unacked; - struct sk_buff_head deferdq; - struct sk_buff_head *inputq; - struct sk_buff_head *namedq; - - /* Congestion handling */ - struct sk_buff_head wakeupq; - - /* Fragmentation/reassembly */ - struct sk_buff *reasm_buf; - - /* Broadcast */ - u16 ackers; - u16 acked; - struct tipc_link *bc_rcvlink; - struct tipc_link *bc_sndlink; - int nack_state; - bool bc_peer_is_up; - - /* Statistics */ - struct tipc_stats stats; -}; - bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, int window, u32 session, u32 ownnode, u32 peer, @@ -235,11 +86,11 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, struct tipc_link **link); +void tipc_link_reinit(struct tipc_link *l, u32 addr); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); -void tipc_link_reset_fragments(struct tipc_link *l_ptr); bool tipc_link_is_up(struct tipc_link *l); bool tipc_link_peer_is_down(struct tipc_link *l); bool tipc_link_is_reset(struct tipc_link *l); @@ -249,15 +100,24 @@ bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); void tipc_link_set_active(struct tipc_link *l, bool active); void tipc_link_reset(struct tipc_link *l_ptr); -void link_reset_statistics(struct tipc_link *l); -int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, +void tipc_link_reset_stats(struct tipc_link *l); +int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); -void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority); +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l); +u16 tipc_link_rcv_nxt(struct tipc_link *l); +u16 tipc_link_acked(struct tipc_link *l); +u32 tipc_link_id(struct tipc_link *l); +char *tipc_link_name(struct tipc_link *l); +char tipc_link_plane(struct tipc_link *l); +int tipc_link_prio(struct tipc_link *l); +int tipc_link_window(struct tipc_link *l); +unsigned long tipc_link_tolerance(struct tipc_link *l); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol); +void tipc_link_set_prio(struct tipc_link *l, u32 prio); +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *link, int nlflags); -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 29dfcc94b6a5..8975b0135b76 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -102,7 +102,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_LINK_GET, .doit = tipc_nl_node_get_link, - .dumpit = tipc_nl_link_dump, + .dumpit = tipc_nl_node_dump_link, .policy = tipc_nl_policy, }, { diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index acda1ce57151..2c016fdefe97 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1023,13 +1023,13 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) msg->req_type = TIPC_TLV_LINK_NAME; msg->rep_size = ULTRA_STRING_MAX_LEN; msg->rep_type = TIPC_TLV_ULTRA_STRING; - dump.dumpit = tipc_nl_link_dump; + dump.dumpit = tipc_nl_node_dump_link; dump.format = tipc_nl_compat_link_stat_dump; return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_GET_LINKS: msg->req_type = TIPC_TLV_NET_ADDR; msg->rep_size = ULTRA_STRING_MAX_LEN; - dump.dumpit = tipc_nl_link_dump; + dump.dumpit = tipc_nl_node_dump_link; dump.format = tipc_nl_compat_link_dump; return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_SET_LINK_TOL: diff --git a/net/tipc/node.c b/net/tipc/node.c index e110ba67422e..82c05e9dd0ee 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,11 +42,8 @@ #include "bcast.h" #include "discover.h" -/* Out-of-range value for node signature */ #define INVALID_NODE_SIG 0x10000 -#define INVALID_BEARER_ID -1 - /* Flags used to take different actions according to flag type * TIPC_NOTIFY_NODE_DOWN: notify node is down * TIPC_NOTIFY_NODE_UP: notify node is up @@ -360,7 +357,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, - U16_MAX, tipc_bc_sndlink(net)->window, + U16_MAX, + tipc_link_window(tipc_bc_sndlink(net)), n_ptr->capabilities, &n_ptr->bc_entry.inputq1, &n_ptr->bc_entry.namedq, @@ -381,7 +379,7 @@ exit: static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) { - unsigned long tol = l->tolerance; + unsigned long tol = tipc_link_tolerance(l); unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; unsigned long keepalive_intv = msecs_to_jiffies(intv); @@ -390,7 +388,7 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) n->keepalive_intv = keepalive_intv; /* Ensure link's abort limit corresponds to current interval */ - l->abort_limit = l->tolerance / jiffies_to_msecs(n->keepalive_intv); + tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv)); } static void tipc_node_delete(struct tipc_node *node) @@ -559,16 +557,16 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->working_links++; n->action_flags |= TIPC_NOTIFY_LINK_UP; - n->link_id = nl->peer_bearer_id << 16 | bearer_id; + n->link_id = tipc_link_id(nl); /* Leave room for tunnel header when returning 'mtu' to users: */ - n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; + n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE; tipc_bearer_add_dest(n->net, bearer_id, n->addr); tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); pr_debug("Established link <%s> on network plane %c\n", - nl->name, nl->net_plane); + tipc_link_name(nl), tipc_link_plane(nl)); /* First link? => give it both slots */ if (!ol) { @@ -581,17 +579,17 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, } /* Second link => redistribute slots */ - if (nl->priority > ol->priority) { - pr_debug("Old link <%s> becomes standby\n", ol->name); + if (tipc_link_prio(nl) > tipc_link_prio(ol)) { + pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol)); *slot0 = bearer_id; *slot1 = bearer_id; tipc_link_set_active(nl, true); tipc_link_set_active(ol, false); - } else if (nl->priority == ol->priority) { + } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) { tipc_link_set_active(nl, true); *slot1 = bearer_id; } else { - pr_debug("New link <%s> is standby\n", nl->name); + pr_debug("New link <%s> is standby\n", tipc_link_name(nl)); } /* Prepare synchronization with first link */ @@ -621,7 +619,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, struct tipc_link_entry *le = &n->links[*bearer_id]; int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; - int i, highest = 0; + int i, highest = 0, prio; struct tipc_link *l, *_l, *tnl; l = n->links[*bearer_id].link; @@ -630,12 +628,12 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; - n->link_id = l->peer_bearer_id << 16 | *bearer_id; + n->link_id = tipc_link_id(l); tipc_bearer_remove_dest(n->net, *bearer_id, n->addr); pr_debug("Lost link <%s> on network plane %c\n", - l->name, l->net_plane); + tipc_link_name(l), tipc_link_plane(l)); /* Select new active link if any available */ *slot0 = INVALID_BEARER_ID; @@ -646,10 +644,11 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, continue; if (_l == l) continue; - if (_l->priority < highest) + prio = tipc_link_prio(_l); + if (prio < highest) continue; - if (_l->priority > highest) { - highest = _l->priority; + if (prio > highest) { + highest = prio; *slot0 = i; *slot1 = i; continue; @@ -672,17 +671,17 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); /* There is still a working link => initiate failover */ - tnl = node_active_link(n, 0); + *bearer_id = n->active_links[0]; + tnl = n->links[*bearer_id].link; tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); - n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); - *maddr = &n->links[tnl->bearer_id].maddr; - *bearer_id = tnl->bearer_id; + *maddr = &n->links[*bearer_id].maddr; } static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) @@ -1117,7 +1116,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { - strncpy(linkname, link->name, len); + strncpy(linkname, tipc_link_name(link), len); err = 0; } exit: @@ -1328,25 +1327,25 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, u16 oseqno = msg_seqno(hdr); u16 iseqno = msg_seqno(msg_get_wrapped(hdr)); u16 exp_pkts = msg_msgcnt(hdr); - u16 rcv_nxt, syncpt, dlv_nxt; + u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; struct tipc_link *l, *tnl, *pl = NULL; struct tipc_media_addr *maddr; - int i, pb_id; + int pb_id; l = n->links[bearer_id].link; if (!l) return false; - rcv_nxt = l->rcv_nxt; + rcv_nxt = tipc_link_rcv_nxt(l); if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) return true; /* Find parallel link, if any */ - for (i = 0; i < MAX_BEARERS; i++) { - if ((i != bearer_id) && n->links[i].link) { - pl = n->links[i].link; + for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) { + if ((pb_id != bearer_id) && n->links[pb_id].link) { + pl = n->links[pb_id].link; break; } } @@ -1378,9 +1377,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; if (pl && tipc_link_is_up(pl)) { - pb_id = pl->bearer_id; __tipc_node_link_down(n, &pb_id, xmitq, &maddr); - tipc_skb_queue_splice_tail_init(pl->inputq, l->inputq); + tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), + tipc_link_inputq(l)); } /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) @@ -1423,7 +1422,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tnl = pl; pl = l; } - dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq)); + inputq_len = skb_queue_len(tipc_link_inputq(pl)); + dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len; if (more(dlv_nxt, n->sync_point)) { tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); @@ -1483,7 +1483,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Ensure broadcast reception is in synch with peer's send state */ if (unlikely(usr == LINK_PROTOCOL)) tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); - else if (unlikely(n->bc_entry.link->acked != bc_ack)) + else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); /* Receive packet directly if conditions permit */ @@ -1592,36 +1592,36 @@ out: return skb->len; } -/* tipc_link_find_owner - locate owner node of link by link's name +/* tipc_node_find_by_name - locate owner node of link by link's name * @net: the applicable net namespace * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ -static struct tipc_node *tipc_link_find_owner(struct net *net, - const char *link_name, - unsigned int *bearer_id) +static struct tipc_node *tipc_node_find_by_name(struct net *net, + const char *link_name, + unsigned int *bearer_id) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; + struct tipc_link *l; + struct tipc_node *n; struct tipc_node *found_node = NULL; int i; *bearer_id = 0; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_read_lock(n_ptr); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_read_lock(n); for (i = 0; i < MAX_BEARERS; i++) { - l_ptr = n_ptr->links[i].link; - if (l_ptr && !strcmp(l_ptr->name, link_name)) { + l = n->links[i].link; + if (l && !strcmp(tipc_link_name(l), link_name)) { *bearer_id = i; - found_node = n_ptr; + found_node = n; break; } } - tipc_node_read_unlock(n_ptr); + tipc_node_read_unlock(n); if (found_node) break; } @@ -1658,7 +1658,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) if (strcmp(name, tipc_bclink_name) == 0) return tipc_nl_bc_link_set(net, attrs); - node = tipc_link_find_owner(net, name, &bearer_id); + node = tipc_node_find_by_name(net, name, &bearer_id); if (!node) return -EINVAL; @@ -1684,15 +1684,13 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) u32 tol; tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - link->tolerance = tol; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); + tipc_link_set_tolerance(link, tol); } if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); - link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); + tipc_link_set_prio(link, prio); } if (props[TIPC_NLA_PROP_WIN]) { u32 win; @@ -1737,7 +1735,7 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) struct tipc_node *node; struct tipc_link *link; - node = tipc_link_find_owner(net, name, &bearer_id); + node = tipc_node_find_by_name(net, name, &bearer_id); if (!node) return -EINVAL; @@ -1792,7 +1790,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) return 0; } - node = tipc_link_find_owner(net, link_name, &bearer_id); + node = tipc_node_find_by_name(net, link_name, &bearer_id); if (!node) return -EINVAL; @@ -1805,7 +1803,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) tipc_node_read_unlock(node); return -EINVAL; } - link_reset_statistics(link); + tipc_link_reset_stats(link); spin_unlock_bh(&le->lock); tipc_node_read_unlock(node); return 0; @@ -1834,7 +1832,7 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, return 0; } -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); diff --git a/net/tipc/node.h b/net/tipc/node.h index 1fbed29d9a25..f39d9d06e8bb 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,8 +42,6 @@ #include "bearer.h" #include "msg.h" -#define INVALID_BEARER_ID -1 - /* Optional capabilities supported by this code version */ enum { @@ -51,6 +49,7 @@ enum { }; #define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH +#define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); void tipc_node_check_dest(struct net *net, u32 onode, @@ -72,6 +71,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3 From 1a90632da8c17a27e0c93538ee987764adee43a5 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:47 -0500 Subject: tipc: eliminate remnants of hungarian notation The number of variables with Hungarian notation (l_ptr, n_ptr etc.) has been significantly reduced over the last couple of years. We now root out the last traces of this practice. There are no functional changes in this commit. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 140 ++++++++++++++++++++++++++-------------------------- net/tipc/bearer.h | 8 +-- net/tipc/discover.c | 38 +++++++------- net/tipc/link.c | 4 +- net/tipc/link.h | 2 +- net/tipc/node.c | 74 +++++++++++++-------------- 6 files changed, 133 insertions(+), 133 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 648f2a67f314..802ffad3200d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -71,7 +71,7 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } }; -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr); +static void bearer_disable(struct net *net, struct tipc_bearer *b); /** * tipc_media_find - locates specified media object by name @@ -107,13 +107,13 @@ static struct tipc_media *media_find_id(u8 type) void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { char addr_str[MAX_ADDR_STR]; - struct tipc_media *m_ptr; + struct tipc_media *m; int ret; - m_ptr = media_find_id(a->media_id); + m = media_find_id(a->media_id); - if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) - ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); + if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) + ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); else { u32 i; @@ -175,13 +175,13 @@ static int bearer_name_validate(const char *name, struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (b_ptr && (!strcmp(b_ptr->name, name))) - return b_ptr; + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (!strcmp(b->name, name))) + return b; } return NULL; } @@ -189,24 +189,24 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) - tipc_disc_add_dest(b_ptr->link_req); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_add_dest(b->link_req); rcu_read_unlock(); } void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) - tipc_disc_remove_dest(b_ptr->link_req); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_remove_dest(b->link_req); rcu_read_unlock(); } @@ -218,8 +218,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, struct nlattr *attr[]) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; - struct tipc_media *m_ptr; + struct tipc_bearer *b; + struct tipc_media *m; struct tipc_bearer_names b_names; char addr_string[16]; u32 bearer_id; @@ -255,31 +255,31 @@ static int tipc_enable_bearer(struct net *net, const char *name, return -EINVAL; } - m_ptr = tipc_media_find(b_names.media_name); - if (!m_ptr) { + m = tipc_media_find(b_names.media_name); + if (!m) { pr_warn("Bearer <%s> rejected, media <%s> not registered\n", name, b_names.media_name); return -EINVAL; } if (priority == TIPC_MEDIA_LINK_PRI) - priority = m_ptr->priority; + priority = m->priority; restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (!b_ptr) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { bearer_id = i; continue; } - if (!strcmp(name, b_ptr->name)) { + if (!strcmp(name, b->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); return -EINVAL; } - if ((b_ptr->priority == priority) && + if ((b->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", @@ -297,35 +297,35 @@ restart: return -EINVAL; } - b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); - if (!b_ptr) + b = kzalloc(sizeof(*b), GFP_ATOMIC); + if (!b) return -ENOMEM; - strcpy(b_ptr->name, name); - b_ptr->media = m_ptr; - res = m_ptr->enable_media(net, b_ptr, attr); + strcpy(b->name, name); + b->media = m; + res = m->enable_media(net, b, attr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); return -EINVAL; } - b_ptr->identity = bearer_id; - b_ptr->tolerance = m_ptr->tolerance; - b_ptr->window = m_ptr->window; - b_ptr->domain = disc_domain; - b_ptr->net_plane = bearer_id + 'A'; - b_ptr->priority = priority; + b->identity = bearer_id; + b->tolerance = m->tolerance; + b->window = m->window; + b->domain = disc_domain; + b->net_plane = bearer_id + 'A'; + b->priority = priority; - res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); + res = tipc_disc_create(net, b, &b->bcast_addr); if (res) { - bearer_disable(net, b_ptr); + bearer_disable(net, b); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); return -EINVAL; } - rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr); + rcu_assign_pointer(tn->bearer_list[bearer_id], b); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -336,11 +336,11 @@ restart: /** * tipc_reset_bearer - Reset all links established over this bearer */ -static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { - pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_node_delete_links(net, b_ptr->identity); - tipc_disc_reset(net, b_ptr); + pr_info("Resetting bearer <%s>\n", b->name); + tipc_node_delete_links(net, b->identity); + tipc_disc_reset(net, b); return 0; } @@ -349,26 +349,26 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds RTNL lock. */ -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr) +static void bearer_disable(struct net *net, struct tipc_bearer *b) { struct tipc_net *tn = net_generic(net, tipc_net_id); u32 i; - pr_info("Disabling bearer <%s>\n", b_ptr->name); - b_ptr->media->disable_media(b_ptr); + pr_info("Disabling bearer <%s>\n", b->name); + b->media->disable_media(b); - tipc_node_delete_links(net, b_ptr->identity); - RCU_INIT_POINTER(b_ptr->media_ptr, NULL); - if (b_ptr->link_req) - tipc_disc_delete(b_ptr->link_req); + tipc_node_delete_links(net, b->identity); + RCU_INIT_POINTER(b->media_ptr, NULL); + if (b->link_req) + tipc_disc_delete(b->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == rtnl_dereference(tn->bearer_list[i])) { + if (b == rtnl_dereference(tn->bearer_list[i])) { RCU_INIT_POINTER(tn->bearer_list[i], NULL); break; } } - kfree_rcu(b_ptr, rcu); + kfree_rcu(b, rcu); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, @@ -411,7 +411,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @buf: the packet to be sent - * @b_ptr: the bearer through which the packet is to be sent + * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, @@ -532,14 +532,14 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b_ptr)) { + b = rcu_dereference_rtnl(dev->tipc_ptr); + if (likely(b)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_rcv(dev_net(dev), buf, b_ptr); + tipc_rcv(dev_net(dev), buf, b); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -564,13 +564,13 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; - b_ptr = rtnl_dereference(dev->tipc_ptr); - if (!b_ptr) + b = rtnl_dereference(dev->tipc_ptr); + if (!b) return NOTIFY_DONE; - b_ptr->mtu = dev->mtu; + b->mtu = dev->mtu; switch (evt) { case NETDEV_CHANGE: @@ -578,16 +578,16 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_GOING_DOWN: case NETDEV_CHANGEMTU: - tipc_reset_bearer(net, b_ptr); + tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: - b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); - tipc_reset_bearer(net, b_ptr); + tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(dev_net(dev), b_ptr); + bearer_disable(dev_net(dev), b); break; } return NOTIFY_OK; @@ -623,13 +623,13 @@ void tipc_bearer_cleanup(void) void tipc_bearer_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (b_ptr) { - bearer_disable(net, b_ptr); + b = rtnl_dereference(tn->bearer_list[i]); + if (b) { + bearer_disable(net, b); tn->bearer_list[i] = NULL; } } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 552185bc4773..e31820516774 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -103,11 +103,11 @@ struct tipc_bearer; */ struct tipc_media { int (*send_msg)(struct net *net, struct sk_buff *buf, - struct tipc_bearer *b_ptr, + struct tipc_bearer *b, struct tipc_media_addr *dest); - int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + int (*enable_media)(struct net *net, struct tipc_bearer *b, struct nlattr *attr[]); - void (*disable_media)(struct tipc_bearer *b_ptr); + void (*disable_media)(struct tipc_bearer *b); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, int bufsz); @@ -176,7 +176,7 @@ struct tipc_bearer_names { * TIPC routines available to supported media types */ -void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr); +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b); /* * Routines made available to TIPC by supported media types diff --git a/net/tipc/discover.c b/net/tipc/discover.c index afe8c47c4085..f1e738e80535 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -75,14 +75,14 @@ struct tipc_link_req { * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace * @type: message type (request or response) - * @b_ptr: ptr to bearer issuing message + * @b: ptr to bearer issuing message */ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, - struct tipc_bearer *b_ptr) + struct tipc_bearer *b) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg; - u32 dest_domain = b_ptr->domain; + u32 dest_domain = b->domain; msg = buf_msg(buf); tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, @@ -92,16 +92,16 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); - b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); + b->media->addr2msg(msg_media_addr(msg), &b->addr); } /** * disc_dupl_alert - issue node address duplication alert - * @b_ptr: pointer to bearer detecting duplication + * @b: pointer to bearer detecting duplication * @node_addr: duplicated node address * @media_addr: media address advertised by duplicated node */ -static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, +static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr, struct tipc_media_addr *media_addr) { char node_addr_str[16]; @@ -111,7 +111,7 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str), media_addr); pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str, - media_addr_str, b_ptr->name); + media_addr_str, b->name); } /** @@ -261,13 +261,13 @@ exit: /** * tipc_disc_create - create object to send periodic link setup requests * @net: the applicable net namespace - * @b_ptr: ptr to bearer issuing requests + * @b: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain to which links can be established * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, +int tipc_disc_create(struct net *net, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct tipc_link_req *req; @@ -282,17 +282,17 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, return -ENOMEM; } - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); memcpy(&req->dest, dest, sizeof(*dest)); req->net = net; - req->bearer_id = b_ptr->identity; - req->domain = b_ptr->domain; + req->bearer_id = b->identity; + req->domain = b->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); - b_ptr->link_req = req; + b->link_req = req; skb = skb_clone(req->buf, GFP_ATOMIC); if (skb) tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); @@ -313,19 +313,19 @@ void tipc_disc_delete(struct tipc_link_req *req) /** * tipc_disc_reset - reset object to send periodic link setup requests * @net: the applicable net namespace - * @b_ptr: ptr to bearer issuing requests + * @b: ptr to bearer issuing requests * @dest_domain: network domain to which links can be established */ -void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) +void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { - struct tipc_link_req *req = b_ptr->link_req; + struct tipc_link_req *req = b->link_req; struct sk_buff *skb; spin_lock_bh(&req->lock); - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); req->net = net; - req->bearer_id = b_ptr->identity; - req->domain = b_ptr->domain; + req->bearer_id = b->identity; + req->domain = b->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; mod_timer(&req->timer, jiffies + req->timer_intv); diff --git a/net/tipc/link.c b/net/tipc/link.c index 4380eb119796..b11afe71dfc1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -245,7 +245,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); -static void link_print(struct tipc_link *l_ptr, const char *str); +static void link_print(struct tipc_link *l, const char *str); static void tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, @@ -1707,7 +1707,7 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) /** * link_reset_stats - reset link statistics - * @l_ptr: pointer to link + * @l: pointer to link */ void tipc_link_reset_stats(struct tipc_link *l) { diff --git a/net/tipc/link.h b/net/tipc/link.h index 616fc808f23a..b2ae0f4276af 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -99,7 +99,7 @@ bool tipc_link_is_synching(struct tipc_link *l); bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); void tipc_link_set_active(struct tipc_link *l, bool active); -void tipc_link_reset(struct tipc_link *l_ptr); +void tipc_link_reset(struct tipc_link *l); void tipc_link_reset_stats(struct tipc_link *l); int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); diff --git a/net/tipc/node.c b/net/tipc/node.c index 82c05e9dd0ee..3f7a4ed71990 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -319,62 +319,62 @@ static void tipc_node_write_unlock(struct tipc_node *n) struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *n_ptr, *temp_node; + struct tipc_node *n, *temp_node; int i; spin_lock_bh(&tn->node_list_lock); - n_ptr = tipc_node_find(net, addr); - if (n_ptr) + n = tipc_node_find(net, addr); + if (n) goto exit; - n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); - if (!n_ptr) { + n = kzalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { pr_warn("Node creation failed, no memory\n"); goto exit; } - n_ptr->addr = addr; - n_ptr->net = net; - n_ptr->capabilities = capabilities; - kref_init(&n_ptr->kref); - rwlock_init(&n_ptr->lock); - INIT_HLIST_NODE(&n_ptr->hash); - INIT_LIST_HEAD(&n_ptr->list); - INIT_LIST_HEAD(&n_ptr->publ_list); - INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->bc_entry.namedq); - skb_queue_head_init(&n_ptr->bc_entry.inputq1); - __skb_queue_head_init(&n_ptr->bc_entry.arrvq); - skb_queue_head_init(&n_ptr->bc_entry.inputq2); + n->addr = addr; + n->net = net; + n->capabilities = capabilities; + kref_init(&n->kref); + rwlock_init(&n->lock); + INIT_HLIST_NODE(&n->hash); + INIT_LIST_HEAD(&n->list); + INIT_LIST_HEAD(&n->publ_list); + INIT_LIST_HEAD(&n->conn_sks); + skb_queue_head_init(&n->bc_entry.namedq); + skb_queue_head_init(&n->bc_entry.inputq1); + __skb_queue_head_init(&n->bc_entry.arrvq); + skb_queue_head_init(&n->bc_entry.inputq2); for (i = 0; i < MAX_BEARERS; i++) - spin_lock_init(&n_ptr->links[i].lock); - hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + spin_lock_init(&n->links[i].lock); + hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { - if (n_ptr->addr < temp_node->addr) + if (n->addr < temp_node->addr) break; } - list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->state = SELF_DOWN_PEER_LEAVING; - n_ptr->signature = INVALID_NODE_SIG; - n_ptr->active_links[0] = INVALID_BEARER_ID; - n_ptr->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, + list_add_tail_rcu(&n->list, &temp_node->list); + n->state = SELF_DOWN_PEER_LEAVING; + n->signature = INVALID_NODE_SIG; + n->active_links[0] = INVALID_BEARER_ID; + n->active_links[1] = INVALID_BEARER_ID; + if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr, U16_MAX, tipc_link_window(tipc_bc_sndlink(net)), - n_ptr->capabilities, - &n_ptr->bc_entry.inputq1, - &n_ptr->bc_entry.namedq, + n->capabilities, + &n->bc_entry.inputq1, + &n->bc_entry.namedq, tipc_bc_sndlink(net), - &n_ptr->bc_entry.link)) { + &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - kfree(n_ptr); - n_ptr = NULL; + kfree(n); + n = NULL; goto exit; } - tipc_node_get(n_ptr); - setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); - n_ptr->keepalive_intv = U32_MAX; + tipc_node_get(n); + setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); + n->keepalive_intv = U32_MAX; exit: spin_unlock_bh(&tn->node_list_lock); - return n_ptr; + return n; } static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) -- cgit v1.2.3 From e2f9dc3bd213792ac006e83f50a5453f23b8c354 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Nov 2015 12:11:23 -0800 Subject: net: avoid NULL deref in napi_get_frags() napi_alloc_skb() can return NULL. We should not crash should this happen. Fixes: 93f93a440415 ("net: move skb_mark_napi_id() into core networking stack") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 41cef3e3f558..5df6cbce727c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4390,8 +4390,10 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = napi_alloc_skb(napi, GRO_MAX_HEAD); - napi->skb = skb; - skb_mark_napi_id(skb, napi); + if (skb) { + napi->skb = skb; + skb_mark_napi_id(skb, napi); + } } return skb; } -- cgit v1.2.3 From ceff86af56e09469d21732c16fd27a7337983c48 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 19 Nov 2015 16:16:41 +0100 Subject: Bluetooth: Add instance range check for Add Advertising command The instance range check for Add Advertising command is missing. If the provided instance is out of range an Invalid Parameters error should be returned. At the moment, the generic Failed error is returned. This extra check ensures that clear error messages are returned. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index eca203e891d2..2c6533a3f937 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6839,6 +6839,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, status); + if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + flags = __le32_to_cpu(cp->flags); timeout = __le16_to_cpu(cp->timeout); duration = __le16_to_cpu(cp->duration); -- cgit v1.2.3 From 31a3248dd97be9268859abed9a30c1040b2f4090 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 19 Nov 2015 16:16:42 +0100 Subject: Bluetooth: Simplify if statements in tlv_data_is_valid function The if statements for checking the flags parameter could be written a bit easier to read. This changes this. No functional behavior has been changed. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2c6533a3f937..05370e76feb0 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6700,17 +6700,19 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, int i, cur_len; bool flags_managed = false; bool tx_power_managed = false; - u32 flags_params = MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS; - if (is_adv_data && (adv_flags & flags_params)) { - flags_managed = true; - max_len -= 3; - } + if (is_adv_data) { + if (adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS)) { + flags_managed = true; + max_len -= 3; + } - if (is_adv_data && (adv_flags & MGMT_ADV_FLAG_TX_POWER)) { - tx_power_managed = true; - max_len -= 3; + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) { + tx_power_managed = true; + max_len -= 3; + } } if (len > max_len) -- cgit v1.2.3 From 40b25fe5dc57a6557b96241b75ae63dce716a487 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 19 Nov 2015 16:16:43 +0100 Subject: Bluetooth: Add support for Get Advertising Size Information command The Get Advertising Size Information command allows to retrieve size information for advertising data and scan response data fields depending on the selected flags. This is useful if applications want to know the available size ahead of time. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/mgmt.h | 13 ++++++++++ net/bluetooth/mgmt.c | 58 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) (limited to 'net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index b831242d48a4..af17774c9416 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -571,6 +571,19 @@ struct mgmt_rp_remove_advertising { __u8 instance; } __packed; +#define MGMT_OP_GET_ADV_SIZE_INFO 0x0040 +struct mgmt_cp_get_adv_size_info { + __u8 instance; + __le32 flags; +} __packed; +#define MGMT_GET_ADV_SIZE_INFO_SIZE 5 +struct mgmt_rp_get_adv_size_info { + __u8 instance; + __le32 flags; + __u8 max_adv_data_len; + __u8 max_scan_rsp_len; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 05370e76feb0..dc8e428050d9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -102,6 +102,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_READ_ADV_FEATURES, MGMT_OP_ADD_ADVERTISING, MGMT_OP_REMOVE_ADVERTISING, + MGMT_OP_GET_ADV_SIZE_INFO, }; static const u16 mgmt_events[] = { @@ -7059,6 +7060,62 @@ unlock: return err; } +static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) +{ + u8 max_len = HCI_MAX_AD_LENGTH; + + if (is_adv_data) { + if (adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS)) + max_len -= 3; + + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) + max_len -= 3; + } + + return max_len; +} + +static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_get_adv_size_info *cp = data; + struct mgmt_rp_get_adv_size_info rp; + u32 flags, supported_flags; + int err; + + BT_DBG("%s", hdev->name); + + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_REJECTED); + + if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_INVALID_PARAMS); + + flags = __le32_to_cpu(cp->flags); + + /* The current implementation only supports a subset of the specified + * flags. + */ + supported_flags = get_supported_adv_flags(hdev); + if (flags & ~supported_flags) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_INVALID_PARAMS); + + rp.instance = cp->instance; + rp.flags = cp->flags; + rp.max_adv_data_len = tlv_data_max_len(flags, true); + rp.max_scan_rsp_len = tlv_data_max_len(flags, false); + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + + return err; +} + static const struct hci_mgmt_handler mgmt_handlers[] = { { NULL }, /* 0x0000 (no command) */ { read_version, MGMT_READ_VERSION_SIZE, @@ -7146,6 +7203,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_advertising, MGMT_ADD_ADVERTISING_SIZE, HCI_MGMT_VAR_LEN }, { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, + { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit v1.2.3 From b811580d91e9c0945b0a923dcec3e10cce04ac30 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Nov 2015 12:24:22 -0800 Subject: net: IPv6 fib lookup tracepoint Add tracepoint to show fib6 table lookups and result. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/fib6.h | 76 +++++++++++++++++++++++++++++++++++++++++++++ net/core/net-traces.c | 4 +++ net/ipv6/route.c | 10 ++++++ 3 files changed, 90 insertions(+) create mode 100644 include/trace/events/fib6.h (limited to 'net') diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h new file mode 100644 index 000000000000..4cf6bac4686d --- /dev/null +++ b/include/trace/events/fib6.h @@ -0,0 +1,76 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fib6 + +#if !defined(_TRACE_FIB6_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FIB6_H + +#include +#include +#include +#include + +TRACE_EVENT(fib6_table_lookup, + + TP_PROTO(const struct net *net, const struct rt6_info *rt, + u32 tb_id, const struct flowi6 *flp), + + TP_ARGS(net, rt, tb_id, flp), + + TP_STRUCT__entry( + __field( u32, tb_id ) + + __field( int, oif ) + __field( int, iif ) + __field( __u8, tos ) + __field( __u8, scope ) + __field( __u8, flags ) + __array( __u8, src, 16 ) + __array( __u8, dst, 16 ) + + __dynamic_array( char, name, IFNAMSIZ ) + __array( __u8, gw, 16 ) + ), + + TP_fast_assign( + struct in6_addr *in6; + + __entry->tb_id = tb_id; + __entry->oif = flp->flowi6_oif; + __entry->iif = flp->flowi6_iif; + __entry->tos = flp->flowi6_tos; + __entry->scope = flp->flowi6_scope; + __entry->flags = flp->flowi6_flags; + + in6 = (struct in6_addr *)__entry->src; + *in6 = flp->saddr; + + in6 = (struct in6_addr *)__entry->dst; + *in6 = flp->daddr; + + if (rt->rt6i_idev) { + __assign_str(name, rt->rt6i_idev->dev->name); + } else { + __assign_str(name, ""); + } + if (rt == net->ipv6.ip6_null_entry) { + struct in6_addr in6_zero = {}; + + in6 = (struct in6_addr *)__entry->gw; + *in6 = in6_zero; + + } else if (rt) { + in6 = (struct in6_addr *)__entry->gw; + *in6 = rt->rt6i_gateway; + } + ), + + TP_printk("table %3u oif %d iif %d src %pI6c dst %pI6c tos %d scope %d flags %x ==> dev %s gw %pI6c", + __entry->tb_id, __entry->oif, __entry->iif, + __entry->src, __entry->dst, __entry->tos, __entry->scope, + __entry->flags, __get_str(name), __entry->gw) +); + +#endif /* _TRACE_FIB6_H */ + +/* This part must be outside protection */ +#include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index adef015b2f41..92da5e4ceb4f 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -32,6 +32,10 @@ #include #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); +#endif EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f01fe122abd..89758be9c6a6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -62,6 +62,7 @@ #include #include #include +#include #include @@ -865,6 +866,9 @@ restart: } dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); + + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + return rt; } @@ -1078,6 +1082,8 @@ redo_rt6_select: read_unlock_bh(&table->tb6_lock); rt6_dst_from_metrics_check(rt); + + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !(rt->rt6i_flags & RTF_GATEWAY))) { @@ -1101,6 +1107,8 @@ redo_rt6_select: uncached_rt = net->ipv6.ip6_null_entry; dst_hold(&uncached_rt->dst); + + trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); return uncached_rt; } else { @@ -1125,6 +1133,7 @@ redo_rt6_select: dst_release(&rt->dst); } + trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); return pcpu_rt; } @@ -1474,6 +1483,7 @@ out: read_unlock_bh(&table->tb6_lock); + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; }; -- cgit v1.2.3 From 568f44f63621e00af9895f09c70aa38025be8813 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 23 Nov 2015 14:40:47 +0200 Subject: Bluetooth: Fix returning proper HCI status from __hci_req_sync There were a couple of code paths missed by the previous patch that added a HCI status return parameter to __hci_req_sync. This patch adds the missing assignments for them. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 76bd912be9fe..e639671f54bd 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -220,8 +220,14 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, * trigger any commands to be sent. This is normal behavior * and should not trigger an error return. */ - if (err == -ENODATA) + if (err == -ENODATA) { + if (hci_status) + *hci_status = 0; return 0; + } + + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; return err; } -- cgit v1.2.3 From e59a554235b960b3b251772ac1bb743e49d09cee Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Sun, 22 Nov 2015 21:42:21 +0100 Subject: Bluetooth: Fix powering on with privacy and advertising In order to enable advertising with privacy enabled, SMP has to be registered in order to generate new RPA. During power on, it will be registered at the very end which is the reason why advertising is not enabled and it's not possible to enable it anymore due to mismatch between hci_dev settings and actual controller state. This fixes this problem by moving SMP registration earlier, just after controller is powered (which is ok, because LE SMP will be already able to decide on identity address to be used), but before advertising is enabled. Signed-off-by: Andrzej Kaczmarek Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index dc8e428050d9..e8a2f8baf958 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7305,13 +7305,6 @@ static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) BT_DBG("status 0x%02x", status); if (!status) { - /* Register the available SMP channels (BR/EDR and LE) only - * when successfully powering on the controller. This late - * registration is required so that LE SMP can clearly - * decide if the public address or static address is used. - */ - smp_register(hdev); - restart_le_actions(hdev); hci_update_background_scan(hdev); } @@ -7423,6 +7416,13 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) return 0; if (powered) { + /* Register the available SMP channels (BR/EDR and LE) only + * when successfully powering on the controller. This late + * registration is required so that LE SMP can clearly + * decide if the public address or static address is used. + */ + smp_register(hdev); + if (powered_update_hci(hdev) == 0) return 0; -- cgit v1.2.3 From dc4270c0cd880f1b28dd48f2a31d869d22da941e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 23 Nov 2015 15:07:51 +0200 Subject: Bluetooth: Increment management interface revision This patch increments the management interface revision due to introduction of a new Get Advertising Size Information command and various other fixes & improvements. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e8a2f8baf958..3d9d2e4839c5 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 10 +#define MGMT_REVISION 11 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, -- cgit v1.2.3 From cc30c16344fc3a25153175c7eb9037b2136cd466 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 20 Nov 2015 03:56:23 +0100 Subject: net: dsa: Add support for a switch reset gpio Some boards have a gpio line tied to the switch reset pin. Allow this gpio to be retrieved from the device tree, and take the switch out of reset before performing the probe. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/dsa.txt | 3 +++ include/net/dsa.h | 8 ++++++++ net/dsa/dsa.c | 17 +++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'net') diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt index 04e6bef3ac3f..5fdbbcdf8c4b 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.txt +++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt @@ -31,6 +31,8 @@ A switch child node has the following optional property: switch. Must be set if the switch can not detect the presence and/or size of a connected EEPROM, otherwise optional. +- reset-gpios : phandle and specifier to a gpio line connected to + reset pin of the switch chip. A switch may have multiple "port" children nodes @@ -114,6 +116,7 @@ Example: #size-cells = <0>; reg = <17 1>; /* MDIO address 17, switch 1 in tree */ mii-bus = <&mii_bus1>; + reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; switch1port0: port@0 { reg = <0>; diff --git a/include/net/dsa.h b/include/net/dsa.h index 82a4c6011173..3f23dd9d6a69 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -64,6 +65,13 @@ struct dsa_chip_data { * NULL if there is only one switch chip. */ s8 *rtable; + + /* + * A switch may have a GPIO line tied to its reset pin. Parse + * this from the device tree, and use it before performing + * switch soft reset. + */ + struct gpio_desc *reset; }; struct dsa_platform_data { diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1eba07feb34a..0b5565f923cc 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include "dsa_priv.h" @@ -688,6 +689,9 @@ static int dsa_of_probe(struct device *dev) const char *port_name; int chip_index, port_index; const unsigned int *sw_addr, *port_reg; + int gpio; + enum of_gpio_flags of_flags; + unsigned long flags; u32 eeprom_len; int ret; @@ -766,6 +770,19 @@ static int dsa_of_probe(struct device *dev) put_device(cd->host_dev); cd->host_dev = &mdio_bus_switch->dev; } + gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, + &of_flags); + if (gpio_is_valid(gpio)) { + flags = (of_flags == OF_GPIO_ACTIVE_LOW ? + GPIOF_ACTIVE_LOW : 0); + ret = devm_gpio_request_one(dev, gpio, flags, + "switch_reset"); + if (ret) + goto out_free_chip; + + cd->reset = gpio_to_desc(gpio); + gpiod_direction_output(cd->reset, 0); + } for_each_available_child_of_node(child, port) { port_reg = of_get_property(port, "reg", NULL); -- cgit v1.2.3 From 052a4bc49de9f959682140a200e7bcff98ca2cdf Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Mon, 26 Oct 2015 09:10:40 +0000 Subject: netfilter-bridge: Cleanse indentation Fixes a bunch of issues detected by checkpatch with regards to code indentation. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_stp.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtable_nat.c | 2 +- net/bridge/netfilter/ebtables.c | 44 +++++++++++++++++------------------ 4 files changed, 25 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 0c40570069ba..6b731e12ecfa 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -41,7 +41,7 @@ struct stp_config_pdu { #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) static bool ebt_filter_config(const struct ebt_stp_info *info, - const struct stp_config_pdu *stpc) + const struct stp_config_pdu *stpc) { const struct ebt_stp_config_info *c; uint16_t v16; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 32eccd101f26..593a1bdc079e 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -12,7 +12,7 @@ #include #define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \ - (1 << NF_BR_LOCAL_OUT)) + (1 << NF_BR_LOCAL_OUT)) static struct ebt_entries initial_chains[] = { { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index ec55358f00c8..eb33919821ee 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -12,7 +12,7 @@ #include #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ - (1 << NF_BR_POST_ROUTING)) + (1 << NF_BR_POST_ROUTING)) static struct ebt_entries initial_chains[] = { { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f46ca417bf2d..2a0b2f67dad6 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -46,7 +46,7 @@ #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) #define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter))) #define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \ - COUNTER_OFFSET(n) * cpu)) + COUNTER_OFFSET(n) * cpu)) @@ -126,7 +126,7 @@ ebt_dev_check(const char *entry, const struct net_device *device) /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out) + const struct net_device *in, const struct net_device *out) { const struct ethhdr *h = eth_hdr(skb); const struct net_bridge_port *p; @@ -323,7 +323,7 @@ letscontinue: /* If it succeeds, returns element and locks mutex */ static inline void * find_inlist_lock_noload(struct list_head *head, const char *name, int *error, - struct mutex *mutex) + struct mutex *mutex) { struct { struct list_head list; @@ -342,7 +342,7 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error, static void * find_inlist_lock(struct list_head *head, const char *name, const char *prefix, - int *error, struct mutex *mutex) + int *error, struct mutex *mutex) { return try_then_request_module( find_inlist_lock_noload(head, name, error, mutex), @@ -493,9 +493,9 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, */ static inline int ebt_check_entry_size_and_hooks(const struct ebt_entry *e, - const struct ebt_table_info *newinfo, - unsigned int *n, unsigned int *cnt, - unsigned int *totalcnt, unsigned int *udc_cnt) + const struct ebt_table_info *newinfo, + unsigned int *n, unsigned int *cnt, + unsigned int *totalcnt, unsigned int *udc_cnt) { int i; @@ -562,7 +562,7 @@ struct ebt_cl_stack */ static inline int ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, - unsigned int *n, struct ebt_cl_stack *udc) + unsigned int *n, struct ebt_cl_stack *udc) { int i; @@ -649,9 +649,9 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) static inline int ebt_check_entry(struct ebt_entry *e, struct net *net, - const struct ebt_table_info *newinfo, - const char *name, unsigned int *cnt, - struct ebt_cl_stack *cl_s, unsigned int udc_cnt) + const struct ebt_table_info *newinfo, + const char *name, unsigned int *cnt, + struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; struct xt_target *target; @@ -764,7 +764,7 @@ cleanup_matches: * accessed. This mask is a parameter to the check() functions of the extensions */ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s, - unsigned int udc_cnt, unsigned int hooknr, char *base) + unsigned int udc_cnt, unsigned int hooknr, char *base) { int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict; const struct ebt_entry *e = (struct ebt_entry *)chain->data; @@ -955,7 +955,7 @@ static int translate_table(struct net *net, const char *name, /* called under write_lock */ static void get_counters(const struct ebt_counter *oldcounters, - struct ebt_counter *counters, unsigned int nentries) + struct ebt_counter *counters, unsigned int nentries) { int i, cpu; struct ebt_counter *counter_base; @@ -1342,7 +1342,7 @@ static int update_counters(struct net *net, const void __user *user, } static inline int ebt_make_matchname(const struct ebt_entry_match *m, - const char *base, char __user *ubase) + const char *base, char __user *ubase) { char __user *hlp = ubase + ((char *)m - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; @@ -1356,7 +1356,7 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, } static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, - const char *base, char __user *ubase) + const char *base, char __user *ubase) { char __user *hlp = ubase + ((char *)w - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; @@ -1367,8 +1367,8 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, return 0; } -static inline int -ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) +static inline int ebt_make_names(struct ebt_entry *e, const char *base, + char __user *ubase) { int ret; char __user *hlp; @@ -1394,9 +1394,9 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) } static int copy_counters_to_user(struct ebt_table *t, - const struct ebt_counter *oldcounters, - void __user *user, unsigned int num_counters, - unsigned int nentries) + const struct ebt_counter *oldcounters, + void __user *user, unsigned int num_counters, + unsigned int nentries) { struct ebt_counter *counterstmp; int ret = 0; @@ -1427,7 +1427,7 @@ static int copy_counters_to_user(struct ebt_table *t, /* called with ebt_mutex locked */ static int copy_everything_to_user(struct ebt_table *t, void __user *user, - const int *len, int cmd) + const int *len, int cmd) { struct ebt_replace tmp; const struct ebt_counter *oldcounters; @@ -2305,7 +2305,7 @@ static int compat_do_ebt_set_ctl(struct sock *sk, break; default: ret = -EINVAL; - } + } return ret; } -- cgit v1.2.3 From 7f495ad946a6be7bd78df752fad3a084d2710ee2 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Mon, 26 Oct 2015 09:10:41 +0000 Subject: netfilter-bridge: use netdev style comments Changes comments to use netdev style. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_log.c | 3 +- net/bridge/netfilter/ebt_vlan.c | 15 +++++--- net/bridge/netfilter/ebtables.c | 84 +++++++++++++++++++++-------------------- 3 files changed, 56 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 0ad639a96142..f22284d8a454 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -152,7 +152,8 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ntohs(ah->ar_op)); /* If it's for Ethernet and the lengths are OK, - * then log the ARP payload */ + * then log the ARP payload + */ if (ah->ar_hrd == htons(1) && ah->ar_hln == ETH_ALEN && ah->ar_pln == sizeof(__be32)) { diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index 618568888128..98c221dbf059 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -66,7 +66,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) * - Canonical Format Indicator (CFI). The Canonical Format Indicator * (CFI) is a single bit flag value. Currently ignored. * - VLAN Identifier (VID). The VID is encoded as - * an unsigned binary number. */ + * an unsigned binary number. + */ id = TCI & VLAN_VID_MASK; prio = (TCI >> 13) & 0x7; @@ -98,7 +99,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) } /* Check for bitmask range - * True if even one bit is out of mask */ + * True if even one bit is out of mask + */ if (info->bitmask & ~EBT_VLAN_MASK) { pr_debug("bitmask %2X is out of mask (%2X)\n", info->bitmask, EBT_VLAN_MASK); @@ -117,7 +119,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) * 0 - The null VLAN ID. * 1 - The default Port VID (PVID) * 0x0FFF - Reserved for implementation use. - * if_vlan.h: VLAN_N_VID 4096. */ + * if_vlan.h: VLAN_N_VID 4096. + */ if (GET_BITMASK(EBT_VLAN_ID)) { if (!!info->id) { /* if id!=0 => check vid range */ if (info->id > VLAN_N_VID) { @@ -128,7 +131,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) /* Note: This is valid VLAN-tagged frame point. * Any value of user_priority are acceptable, * but should be ignored according to 802.1Q Std. - * So we just drop the prio flag. */ + * So we just drop the prio flag. + */ info->bitmask &= ~EBT_VLAN_PRIO; } /* Else, id=0 (null VLAN ID) => user_priority range (any?) */ @@ -143,7 +147,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) } /* Check for encapsulated proto range - it is possible to be * any value for u_short range. - * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */ + * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS + */ if (GET_BITMASK(EBT_VLAN_ENCAP)) { if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) { pr_debug("encap frame length %d is less than " diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 2a0b2f67dad6..62090e273aed 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -35,8 +35,7 @@ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ -/* - * Each cpu has its own set of counters, so there is no need for write_lock in +/* Each cpu has its own set of counters, so there is no need for write_lock in * the softirq * For reading or updating the counters, the user context needs to * get a write_lock @@ -237,7 +236,8 @@ unsigned int ebt_do_table(struct sk_buff *skb, (*(counter_base + i)).bcnt += skb->len; /* these should only watch: not modify, nor tell us - what to do with the packet */ + * what to do with the packet + */ EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar); t = (struct ebt_entry_target *) @@ -451,7 +451,8 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { if (e->bitmask != 0) { /* we make userspace set this right, - so there is no misunderstanding */ + * so there is no misunderstanding + */ BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " "in distinguisher\n"); return -EINVAL; @@ -487,8 +488,7 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, return 0; } -/* - * this one is very careful, as it is the first function +/* this one is very careful, as it is the first function * to parse the userspace data */ static inline int @@ -504,10 +504,12 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, break; } /* beginning of a new chain - if i == NF_BR_NUMHOOKS it must be a user defined chain */ + * if i == NF_BR_NUMHOOKS it must be a user defined chain + */ if (i != NF_BR_NUMHOOKS || !e->bitmask) { /* this checks if the previous chain has as many entries - as it said it has */ + * as it said it has + */ if (*n != *cnt) { BUGPRINT("nentries does not equal the nr of entries " "in the chain\n"); @@ -556,8 +558,7 @@ struct ebt_cl_stack unsigned int hookmask; }; -/* - * we need these positions to check that the jumps to a different part of the +/* We need these positions to check that the jumps to a different part of the * entries is a jump to the beginning of a new chain. */ static inline int @@ -687,7 +688,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, break; } /* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on - a base chain */ + * a base chain + */ if (i < NF_BR_NUMHOOKS) hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); else { @@ -758,8 +760,7 @@ cleanup_matches: return ret; } -/* - * checks for loops and sets the hook mask for udc +/* checks for loops and sets the hook mask for udc * the hook mask for udc tells us from which base chains the udc can be * accessed. This mask is a parameter to the check() functions of the extensions */ @@ -853,7 +854,8 @@ static int translate_table(struct net *net, const char *name, return -EINVAL; } /* make sure chains are ordered after each other in same order - as their corresponding hooks */ + * as their corresponding hooks + */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { if (!newinfo->hook_entry[j]) continue; @@ -868,7 +870,8 @@ static int translate_table(struct net *net, const char *name, i = 0; /* holds the expected nr. of entries for the chain */ j = 0; /* holds the up to now counted entries for the chain */ k = 0; /* holds the total nr. of entries, should equal - newinfo->nentries afterwards */ + * newinfo->nentries afterwards + */ udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */ ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_check_entry_size_and_hooks, newinfo, @@ -888,10 +891,12 @@ static int translate_table(struct net *net, const char *name, } /* get the location of the udc, put them in an array - while we're at it, allocate the chainstack */ + * while we're at it, allocate the chainstack + */ if (udc_cnt) { /* this will get free'd in do_replace()/ebt_register_table() - if an error occurs */ + * if an error occurs + */ newinfo->chainstack = vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack))); if (!newinfo->chainstack) @@ -932,14 +937,15 @@ static int translate_table(struct net *net, const char *name, } /* we now know the following (along with E=mc²): - - the nr of entries in each chain is right - - the size of the allocated space is right - - all valid hooks have a corresponding chain - - there are no loops - - wrong data can still be on the level of a single entry - - could be there are jumps to places that are not the - beginning of a chain. This can only occur in chains that - are not accessible from any base chains, so we don't care. */ + * - the nr of entries in each chain is right + * - the size of the allocated space is right + * - all valid hooks have a corresponding chain + * - there are no loops + * - wrong data can still be on the level of a single entry + * - could be there are jumps to places that are not the + * beginning of a chain. This can only occur in chains that + * are not accessible from any base chains, so we don't care. + */ /* used to know what we need to clean up if something goes wrong */ i = 0; @@ -986,7 +992,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, struct ebt_table *t; /* the user wants counters back - the check on the size is done later, when we have the lock */ + * the check on the size is done later, when we have the lock + */ if (repl->num_counters) { unsigned long size = repl->num_counters * sizeof(*counterstmp); counterstmp = vmalloc(size); @@ -1038,9 +1045,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, write_unlock_bh(&t->lock); mutex_unlock(&ebt_mutex); /* so, a user can change the chains while having messed up her counter - allocation. Only reason why this is done is because this way the lock - is held only once, while this doesn't bring the kernel into a - dangerous state. */ + * allocation. Only reason why this is done is because this way the lock + * is held only once, while this doesn't bring the kernel into a + * dangerous state. + */ if (repl->num_counters && copy_to_user(repl->counters, counterstmp, repl->num_counters * sizeof(struct ebt_counter))) { @@ -1348,7 +1356,8 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, char name[EBT_FUNCTION_MAXNAMELEN] = {}; /* ebtables expects 32 bytes long names but xt_match names are 29 bytes - long. Copy 29 bytes and fill remaining bytes with zeroes. */ + * long. Copy 29 bytes and fill remaining bytes with zeroes. + */ strlcpy(name, m->u.match->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; @@ -1595,8 +1604,7 @@ static int ebt_compat_entry_padsize(void) static int ebt_compat_match_offset(const struct xt_match *match, unsigned int userlen) { - /* - * ebt_among needs special handling. The kernel .matchsize is + /* ebt_among needs special handling. The kernel .matchsize is * set to -1 at registration time; at runtime an EBT_ALIGN()ed * value is expected. * Example: userspace sends 4500, ebt_among.c wants 4504. @@ -1966,8 +1974,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, return off + match_size; } -/* - * return size of all matches, watchers or target, including necessary +/* return size of all matches, watchers or target, including necessary * alignment and padding. */ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, @@ -2070,8 +2077,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, if (ret < 0) return ret; buf_start = (char *) entry; - /* - * 0: matches offset, always follows ebt_entry. + /* 0: matches offset, always follows ebt_entry. * 1: watchers offset, from ebt_entry structure * 2: target offset, from ebt_entry structure * 3: next ebt_entry offset, from ebt_entry structure @@ -2115,8 +2121,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, return 0; } -/* - * repl->entries_size is the size of the ebt_entry blob in userspace. +/* repl->entries_size is the size of the ebt_entry blob in userspace. * It might need more memory when copied to a 64 bit kernel in case * userspace is 32-bit. So, first task: find out how much memory is needed. * @@ -2360,8 +2365,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, break; case EBT_SO_GET_ENTRIES: case EBT_SO_GET_INIT_ENTRIES: - /* - * try real handler first in case of userland-side padding. + /* try real handler first in case of userland-side padding. * in case we are dealing with an 'ordinary' 32 bit binary * without 64bit compatibility padding, this will fail right * after copy_from_user when the *len argument is validated. -- cgit v1.2.3 From abcdd9a6239d42851faac86ba32158fbfee71b22 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Mon, 26 Oct 2015 09:10:42 +0000 Subject: netfilter-bridge: brace placement Change brace placement to eliminate checkpatch error. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_log.c | 6 ++---- net/bridge/netfilter/ebtables.c | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index f22284d8a454..152300d164ac 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -36,14 +36,12 @@ static int ebt_log_tg_check(const struct xt_tgchk_param *par) return 0; } -struct tcpudphdr -{ +struct tcpudphdr { __be16 src; __be16 dst; }; -struct arppayload -{ +struct arppayload { unsigned char mac_src[ETH_ALEN]; unsigned char ip_src[4]; unsigned char mac_dst[ETH_ALEN]; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 62090e273aed..b13ea69c1bc1 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -551,8 +551,7 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, return 0; } -struct ebt_cl_stack -{ +struct ebt_cl_stack { struct ebt_chainstack cs; int from; unsigned int hookmask; -- cgit v1.2.3 From c1bc1d257bd06943413f9b0e943028c028eb34a6 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Mon, 26 Oct 2015 09:10:43 +0000 Subject: netfilter-bridge: layout of if statements Eliminate some checkpatch issues by improved layout of if statements. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_ip6.c | 4 ++-- net/bridge/netfilter/ebtables.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 17fd5f2cb4b8..98de6e7fd86d 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -65,8 +65,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) return false; - if (!(info->bitmask & ( EBT_IP6_DPORT | - EBT_IP6_SPORT | EBT_IP6_ICMP6))) + if (!(info->bitmask & (EBT_IP6_DPORT | + EBT_IP6_SPORT | EBT_IP6_ICMP6))) return true; /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index b13ea69c1bc1..67b2e27999aa 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -161,7 +161,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, for (i = 0; i < 6; i++) verdict |= (h->h_source[i] ^ e->sourcemac[i]) & e->sourcemsk[i]; - if (FWINV2(verdict != 0, EBT_ISOURCE) ) + if (FWINV2(verdict != 0, EBT_ISOURCE)) return 1; } if (e->bitmask & EBT_DESTMAC) { @@ -169,7 +169,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, for (i = 0; i < 6; i++) verdict |= (h->h_dest[i] ^ e->destmac[i]) & e->destmsk[i]; - if (FWINV2(verdict != 0, EBT_IDEST) ) + if (FWINV2(verdict != 0, EBT_IDEST)) return 1; } return 0; @@ -673,7 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, BUGPRINT("Unknown flag for inv bitmask\n"); return -EINVAL; } - if ( (e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3) ) { + if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) { BUGPRINT("NOPROTO & 802_3 not allowed\n"); return -EINVAL; } @@ -1370,7 +1370,7 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, char name[EBT_FUNCTION_MAXNAMELEN] = {}; strlcpy(name, w->u.watcher->name, sizeof(name)); - if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) + if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; } -- cgit v1.2.3 From a18fd970ce99eee5105a511621d7064812b8cc8c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 17 Nov 2015 13:45:53 -0800 Subject: netfilter: remove duplicate include Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_reject_ipv4.c | 1 - net/ipv6/netfilter/nf_reject_ipv6.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index c747b2d9eb77..b6ea57ec5e14 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -14,7 +14,6 @@ #include #include #include -#include const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *_oth, int hook) diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index e0f922b777e3..4709f657b7b6 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -14,7 +14,6 @@ #include #include #include -#include const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *otcph, -- cgit v1.2.3 From 029f7f3b8701cc7aca8bdb31f0c7edd6a479e357 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Nov 2015 23:32:39 +0100 Subject: netfilter: ipv6: nf_defrag: avoid/free clone operations commit 6aafeef03b9d9ecf ("netfilter: push reasm skb through instead of original frag skbs") changed ipv6 defrag to not use the original skbs anymore. So rather than keeping the original skbs around just to discard them afterwards just use the original skbs directly for the fraglist of the newly assembled skb and remove the extra clone/free operations. The skb that completes the fragment queue is morphed into a the reassembled one instead, just like ipv4 defrag. openvswitch doesn't need any additional skb_morph magic anymore to deal with this situation so just remove that. A followup patch can then also remove the NF_HOOK (re)invocation in the ipv6 netfilter defrag hook. Cc: Joe Stringer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 105 +++++++++++----------------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 6 -- net/openvswitch/conntrack.c | 14 ---- 4 files changed, 40 insertions(+), 86 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index fb7da5bb76cc..fcd20cf8f5d5 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -6,7 +6,6 @@ void nf_defrag_ipv6_enable(void); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); -void nf_ct_frag6_consume_orig(struct sk_buff *skb); struct inet_frags_ctl; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d5efeb87350e..1a86a08adbe5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -56,7 +56,6 @@ struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; int offset; - struct sk_buff *orig; }; #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb)) @@ -170,12 +169,6 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q) return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } -static void nf_skb_free(struct sk_buff *skb) -{ - if (NFCT_FRAG6_CB(skb)->orig) - kfree_skb(NFCT_FRAG6_CB(skb)->orig); -} - static void nf_ct_frag6_expire(unsigned long data) { struct frag_queue *fq; @@ -376,9 +369,9 @@ err: * the last and the first frames arrived and all the bits are here. */ static struct sk_buff * -nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) +nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->q.fragments; + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; u8 ecn; @@ -429,10 +422,38 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->csum = 0; clone->ip_summed = head->ip_summed; - NFCT_FRAG6_CB(clone)->orig = NULL; add_frag_mem_limit(fq->q.net, clone->truesize); } + /* morph head into last received skb: prev. + * + * This allows callers of ipv6 conntrack defrag to continue + * to use the last skb(frag) passed into the reasm engine. + * The last skb frag 'silently' turns into the full reassembled skb. + * + * Since prev is also part of q->fragments we have to clone it first. + */ + if (head != prev) { + struct sk_buff *iter; + + fp = skb_clone(prev, GFP_ATOMIC); + if (!fp) + goto out_oom; + + fp->next = prev->next; + skb_queue_walk(head, iter) { + if (iter->next != prev) + continue; + iter->next = fp; + break; + } + + skb_morph(prev, head); + prev->next = head->next; + consume_skb(head); + head = prev; + } + /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; @@ -473,21 +494,6 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ - fp = skb_shinfo(head)->frag_list; - if (fp && NFCT_FRAG6_CB(fp)->orig == NULL) - /* at above code, head skb is divided into two skbs. */ - fp = fp->next; - - op = NFCT_FRAG6_CB(head)->orig; - for (; fp; fp = fp->next) { - struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; - - op->next = orig; - op = orig; - NFCT_FRAG6_CB(fp)->orig = NULL; - } - return head; out_oversize: @@ -565,7 +571,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { - struct sk_buff *clone; struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -583,42 +588,30 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) return skb; - clone = skb_clone(skb, GFP_ATOMIC); - if (clone == NULL) { - pr_debug("Can't clone skb\n"); + if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return skb; - } - NFCT_FRAG6_CB(clone)->orig = skb; - - if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { - pr_debug("message is too short.\n"); - goto ret_orig; - } - - skb_set_transport_header(clone, fhoff); - hdr = ipv6_hdr(clone); - fhdr = (struct frag_hdr *)skb_transport_header(clone); + skb_set_transport_header(skb, fhoff); + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); - if (fq == NULL) { - pr_debug("Can't find and can't create new queue\n"); - goto ret_orig; - } + if (fq == NULL) + return skb; spin_lock_bh(&fq->q.lock); - if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { + if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); inet_frag_put(&fq->q, &nf_frags); - goto ret_orig; + return skb; } if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, dev); + ret_skb = nf_ct_frag6_reasm(fq, skb, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } @@ -626,26 +619,9 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use inet_frag_put(&fq->q, &nf_frags); return ret_skb; - -ret_orig: - kfree_skb(clone); - return skb; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); -void nf_ct_frag6_consume_orig(struct sk_buff *skb) -{ - struct sk_buff *s, *s2; - - for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - s2 = s->next; - s->next = NULL; - consume_skb(s); - s = s2; - } -} -EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); - static int nf_ct_net_init(struct net *net) { int res; @@ -680,7 +656,6 @@ int nf_ct_frag6_init(void) nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; - nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 4fdbed5ebfb6..fb96b1018884 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -69,12 +69,6 @@ static unsigned int ipv6_defrag(void *priv, if (reasm == NULL) return NF_STOLEN; - /* error occurred or not fragmented */ - if (reasm == skb) - return NF_ACCEPT; - - nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, state->in, state->out, state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c2cc11168fd5..cac2169f2909 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -321,21 +321,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, if (!reasm) return -EINPROGRESS; - if (skb == reasm) { - kfree_skb(skb); - return -EINVAL; - } - - /* Don't free 'skb' even though it is one of the original - * fragments, as we're going to morph it into the head. - */ - skb_get(skb); - nf_ct_frag6_consume_orig(reasm); - key->ip.proto = ipv6_hdr(reasm)->nexthdr; - skb_morph(skb, reasm); - skb->next = reasm->next; - consume_skb(reasm); ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif } else { -- cgit v1.2.3 From daaa7d647f81f3f1494d9a9029d611b666d63181 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Nov 2015 23:32:40 +0100 Subject: netfilter: ipv6: avoid nf_iterate recursion The previous patch changed nf_ct_frag6_gather() to morph reassembled skb with the previous one. This means that the return value is always NULL or the skb argument. So change it to an err value. Instead of invoking NF_HOOK recursively with threshold to skip already-called hooks we can now just return NF_ACCEPT to move on to the next hook except for -EINPROGRESS (which means skb has been queued for reassembly), in which case we return NF_STOLEN. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 71 +++++++++++++---------------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 14 ++---- net/openvswitch/conntrack.c | 11 ++--- 4 files changed, 42 insertions(+), 56 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index fcd20cf8f5d5..ddf162f7966f 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -5,7 +5,7 @@ void nf_defrag_ipv6_enable(void); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); -struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); struct inet_frags_ctl; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1a86a08adbe5..912bc3afc183 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -361,14 +361,15 @@ err: /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. + * + * returns true if *prev skb has been transformed into the reassembled + * skb, false otherwise. */ -static struct sk_buff * +static bool nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { struct sk_buff *fp, *head = fq->q.fragments; @@ -382,22 +383,21 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - goto out_fail; + return false; /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { - pr_debug("payload len is too large.\n"); - goto out_oversize; + net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", + payload_len); + return false; } /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) { - pr_debug("skb is cloned but can't expand head"); - goto out_oom; - } + if (skb_unclone(head, GFP_ATOMIC)) + return false; /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part @@ -408,7 +408,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic clone = alloc_skb(0, GFP_ATOMIC); if (clone == NULL) - goto out_oom; + return false; clone->next = head->next; head->next = clone; @@ -438,7 +438,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic fp = skb_clone(prev, GFP_ATOMIC); if (!fp) - goto out_oom; + return false; fp->next = prev->next; skb_queue_walk(head, iter) { @@ -494,16 +494,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - return head; - -out_oversize: - net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", - payload_len); - goto out_fail; -out_oom: - net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n"); -out_fail: - return NULL; + return true; } /* @@ -569,27 +560,26 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return 0; } -struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { struct net_device *dev = skb->dev; + int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; - int fhoff, nhoff; u8 prevhdr; - struct sk_buff *ret_skb = NULL; /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { pr_debug("payload len = 0\n"); - return skb; + return -EINVAL; } if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) - return skb; + return -EINVAL; if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) - return skb; + return -ENOMEM; skb_set_transport_header(skb, fhoff); hdr = ipv6_hdr(skb); @@ -598,27 +588,28 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq == NULL) - return skb; + return -ENOMEM; spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { - spin_unlock_bh(&fq->q.lock); - pr_debug("Can't insert skb to queue\n"); - inet_frag_put(&fq->q, &nf_frags); - return skb; + ret = -EINVAL; + goto out_unlock; } + /* after queue has assumed skb ownership, only 0 or -EINPROGRESS + * must be returned. + */ + ret = -EINPROGRESS; if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, skb, dev); - if (ret_skb == NULL) - pr_debug("Can't reassemble fragmented packets\n"); - } - spin_unlock_bh(&fq->q.lock); + fq->q.meat == fq->q.len && + nf_ct_frag6_reasm(fq, skb, dev)) + ret = 0; +out_unlock: + spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q, &nf_frags); - return ret_skb; + return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index fb96b1018884..f7aab5ab93a5 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -55,7 +55,7 @@ static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct sk_buff *reasm; + int err; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ @@ -63,17 +63,13 @@ static unsigned int ipv6_defrag(void *priv, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(state->net, skb, - nf_ct6_defrag_user(state->hook, skb)); + err = nf_ct_frag6_gather(state->net, skb, + nf_ct6_defrag_user(state->hook, skb)); /* queued */ - if (reasm == NULL) + if (err == -EINPROGRESS) return NF_STOLEN; - NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, - state->in, state->out, - state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - - return NF_STOLEN; + return NF_ACCEPT; } static struct nf_hook_ops ipv6_defrag_ops[] = { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index cac2169f2909..0c68c8e46d0b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -300,10 +300,10 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { struct ovs_skb_cb ovs_cb = *OVS_CB(skb); + int err; if (key->eth.type == htons(ETH_P_IP)) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; - int err; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); err = ip_defrag(net, skb, user); @@ -314,14 +314,13 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - struct sk_buff *reasm; memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - reasm = nf_ct_frag6_gather(net, skb, user); - if (!reasm) - return -EINPROGRESS; + err = nf_ct_frag6_gather(net, skb, user); + if (err) + return err; - key->ip.proto = ipv6_hdr(reasm)->nexthdr; + key->ip.proto = ipv6_hdr(skb)->nexthdr; ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif } else { -- cgit v1.2.3 From 1113ebbcf9e43c80fe5ef05c48b4cd1c25b306b2 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:24 +0100 Subject: net: ipmr: move the tbl id check in ipmr_new_table Move the table id check in ipmr_new_table and make it return error pointer. We need this change for the upcoming netlink table manipulation support in order to avoid code duplication and a race condition. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 92dd4b74d513..5271e2eee110 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -252,8 +252,8 @@ static int __net_init ipmr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv4.mr_tables); mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - if (!mrt) { - err = -ENOMEM; + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); goto err1; } @@ -301,8 +301,13 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, static int __net_init ipmr_rules_init(struct net *net) { - net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - return net->ipv4.mrt ? 0 : -ENOMEM; + struct mr_table *mrt; + + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) + return PTR_ERR(mrt); + net->ipv4.mrt = mrt; + return 0; } static void __net_exit ipmr_rules_exit(struct net *net) @@ -319,13 +324,17 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) struct mr_table *mrt; unsigned int i; + /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (id != RT_TABLE_DEFAULT && id >= 1000000000) + return ERR_PTR(-EINVAL); + mrt = ipmr_get_table(net, id); if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); if (!mrt) - return NULL; + return ERR_PTR(-ENOMEM); write_pnet(&mrt->net, net); mrt->id = id; @@ -1407,17 +1416,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi if (get_user(v, (u32 __user *)optval)) return -EFAULT; - /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ - if (v != RT_TABLE_DEFAULT && v >= 1000000000) - return -EINVAL; - rtnl_lock(); ret = 0; if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { - if (!ipmr_new_table(net, v)) - ret = -ENOMEM; + mrt = ipmr_new_table(net, v); + if (IS_ERR(mrt)) + ret = PTR_ERR(mrt); else raw_sk(sk)->ipmr_table = v; } -- cgit v1.2.3 From f3d431810e85bad13635669402ca1153bb7e398c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:25 +0100 Subject: net: ipmr: always define mroute_reg_vif_num Before mroute_reg_vif_num was defined only if any of the CONFIG_PIMSM_ options were set, but that's not really necessary as the size of the struct is the same in both cases (checked with pahole, both cases size is 3256 bytes) and we can remove some unnecessary ifdefs to simplify the code. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5271e2eee110..dd2462f70d34 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -84,9 +84,7 @@ struct mr_table { atomic_t cache_resolve_queue_len; bool mroute_do_assert; bool mroute_do_pim; -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) int mroute_reg_vif_num; -#endif }; struct ipmr_rule { @@ -347,9 +345,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, (unsigned long)mrt); -#ifdef CONFIG_IP_PIMSM mrt->mroute_reg_vif_num = -1; -#endif #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); #endif @@ -584,10 +580,8 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, return -EADDRNOTAVAIL; } -#ifdef CONFIG_IP_PIMSM if (vifi == mrt->mroute_reg_vif_num) mrt->mroute_reg_vif_num = -1; -#endif if (vifi + 1 == mrt->maxvif) { int tmp; @@ -824,10 +818,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); v->dev = dev; -#ifdef CONFIG_IP_PIMSM if (v->flags & VIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; -#endif if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); -- cgit v1.2.3 From c316c629f12e01e5d7710e456248a1ebef8426ef Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:26 +0100 Subject: net: ipmr: remove some pimsm ifdefs and simplify Add the helper pimsm_enabled() which replaces the old CONFIG_IP_PIMSM define and is used to check if any version of PIM-SM has been enabled. Use a single if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) for the pim-sm shared code. This is okay w.r.t IGMPMSG_WHOLEPKT because only a VIFF_REGISTER device can send such packet, and it can't be created if pimsm_enabled() is false. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 180 ++++++++++++++++++++++++++------------------------------ 1 file changed, 84 insertions(+), 96 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index dd2462f70d34..e153ab7b17a1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,10 +67,6 @@ #include #include -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) -#define CONFIG_IP_PIMSM 1 -#endif - struct mr_table { struct list_head list; possible_net_t net; @@ -95,6 +91,11 @@ struct ipmr_result { struct mr_table *mrt; }; +static inline bool pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ @@ -454,8 +455,7 @@ failure: return NULL; } -#ifdef CONFIG_IP_PIMSM - +#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); @@ -552,6 +552,51 @@ failure: unregister_netdevice(dev); return NULL; } + +/* called with rcu_read_lock() */ +static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, + unsigned int pimlen) +{ + struct net_device *reg_dev = NULL; + struct iphdr *encap; + + encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); + /* + * Check that: + * a. packet is really sent to a multicast group + * b. packet is not a NULL-REGISTER + * c. packet is not truncated + */ + if (!ipv4_is_multicast(encap->daddr) || + encap->tot_len == 0 || + ntohs(encap->tot_len) + pimlen > skb->len) + return 1; + + read_lock(&mrt_lock); + if (mrt->mroute_reg_vif_num >= 0) + reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; + read_unlock(&mrt_lock); + + if (!reg_dev) + return 1; + + skb->mac_header = skb->network_header; + skb_pull(skb, (u8 *)encap - skb->data); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + skb->ip_summed = CHECKSUM_NONE; + + skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); + + netif_rx(skb); + + return NET_RX_SUCCESS; +} +#else +static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) +{ + return NULL; +} #endif /** @@ -734,10 +779,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRINUSE; switch (vifc->vifc_flags) { -#ifdef CONFIG_IP_PIMSM case VIFF_REGISTER: - /* - * Special Purpose VIF in PIM + if (!pimsm_enabled()) + return -EINVAL; + /* Special Purpose VIF in PIM * All the packets will be sent to the daemon */ if (mrt->mroute_reg_vif_num >= 0) @@ -752,7 +797,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; -#endif case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); if (!dev) @@ -942,34 +986,29 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, } } -/* - * Bounce a cache query up to mrouted. We could use netlink for this but mrouted - * expects the following bizarre scheme. +/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted + * expects the following bizarre scheme. * - * Called under mrt_lock. + * Called under mrt_lock. */ - static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { - struct sk_buff *skb; const int ihl = ip_hdrlen(pkt); + struct sock *mroute_sk; struct igmphdr *igmp; struct igmpmsg *msg; - struct sock *mroute_sk; + struct sk_buff *skb; int ret; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else -#endif skb = alloc_skb(128, GFP_ATOMIC); if (!skb) return -ENOBUFS; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) { /* Ugly, but we have no choice with this interface. * Duplicate old header, fix ihl, length etc. @@ -987,28 +1026,23 @@ static int ipmr_cache_report(struct mr_table *mrt, ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); - } else -#endif - { - - /* Copy the IP header */ - - skb_set_network_header(skb, skb->len); - skb_put(skb, ihl); - skb_copy_to_linear_data(skb, pkt->data, ihl); - ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ - msg = (struct igmpmsg *)skb_network_header(skb); - msg->im_vif = vifi; - skb_dst_set(skb, dst_clone(skb_dst(pkt))); - - /* Add our header */ - - igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); - igmp->type = - msg->im_msgtype = assert; - igmp->code = 0; - ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ - skb->transport_header = skb->network_header; + } else { + /* Copy the IP header */ + skb_set_network_header(skb, skb->len); + skb_put(skb, ihl); + skb_copy_to_linear_data(skb, pkt->data, ihl); + /* Flag to the kernel this is a route add */ + ip_hdr(skb)->protocol = 0; + msg = (struct igmpmsg *)skb_network_header(skb); + msg->im_vif = vifi; + skb_dst_set(skb, dst_clone(skb_dst(pkt))); + /* Add our header */ + igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); + igmp->type = assert; + msg->im_msgtype = assert; + igmp->code = 0; + ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ + skb->transport_header = skb->network_header; } rcu_read_lock(); @@ -1020,7 +1054,6 @@ static int ipmr_cache_report(struct mr_table *mrt, } /* Deliver to mrouted */ - ret = sock_queue_rcv_skb(mroute_sk, skb); rcu_read_unlock(); if (ret < 0) { @@ -1377,11 +1410,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi mrt->mroute_do_assert = v; return 0; } -#ifdef CONFIG_IP_PIMSM case MRT_PIM: { int v; + if (!pimsm_enabled()) + return -ENOPROTOOPT; if (optlen != sizeof(v)) return -EINVAL; if (get_user(v, (int __user *)optval)) @@ -1397,7 +1431,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; } -#endif #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES case MRT_TABLE: { @@ -1452,9 +1485,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int return -ENOENT; if (optname != MRT_VERSION && -#ifdef CONFIG_IP_PIMSM optname != MRT_PIM && -#endif optname != MRT_ASSERT) return -ENOPROTOOPT; @@ -1467,14 +1498,15 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int if (put_user(olr, optlen)) return -EFAULT; - if (optname == MRT_VERSION) + if (optname == MRT_VERSION) { val = 0x0305; -#ifdef CONFIG_IP_PIMSM - else if (optname == MRT_PIM) + } else if (optname == MRT_PIM) { + if (!pimsm_enabled()) + return -ENOPROTOOPT; val = mrt->mroute_do_pim; -#endif - else + } else { val = mrt->mroute_do_assert; + } if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; @@ -1707,7 +1739,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (!vif->dev) goto out_free; -#ifdef CONFIG_IP_PIMSM if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; @@ -1716,7 +1747,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } -#endif if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, @@ -2047,48 +2077,6 @@ dont_forward: return 0; } -#ifdef CONFIG_IP_PIMSM -/* called with rcu_read_lock() */ -static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, - unsigned int pimlen) -{ - struct net_device *reg_dev = NULL; - struct iphdr *encap; - - encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); - /* - * Check that: - * a. packet is really sent to a multicast group - * b. packet is not a NULL-REGISTER - * c. packet is not truncated - */ - if (!ipv4_is_multicast(encap->daddr) || - encap->tot_len == 0 || - ntohs(encap->tot_len) + pimlen > skb->len) - return 1; - - read_lock(&mrt_lock); - if (mrt->mroute_reg_vif_num >= 0) - reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; - read_unlock(&mrt_lock); - - if (!reg_dev) - return 1; - - skb->mac_header = skb->network_header; - skb_pull(skb, (u8 *)encap - skb->data); - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_IP); - skb->ip_summed = CHECKSUM_NONE; - - skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); - - netif_rx(skb); - - return NET_RX_SUCCESS; -} -#endif - #ifdef CONFIG_IP_PIMSM_V1 /* * Handle IGMP messages of PIMv1 -- cgit v1.2.3 From 7ef8f65df976369588fa1b6466668b1b6a26eb3c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:27 +0100 Subject: net: ipmr: fix code and comment style Trivial code and comment style fixes, also removed some extra newlines, spaces and tabs. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 59 ++++++------------ net/ipv4/ipmr.c | 142 ++++++++++++-------------------------------- 2 files changed, 54 insertions(+), 147 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index a382d2c04a42..cf943016930f 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -4,15 +4,13 @@ #include #include -/* - * Based on the MROUTING 3.5 defines primarily to keep - * source compatibility with BSD. +/* Based on the MROUTING 3.5 defines primarily to keep + * source compatibility with BSD. * - * See the mrouted code for the original history. - * - * Protocol Independent Multicast (PIM) data structures included - * Carlos Picoto (cap@di.fc.ul.pt) + * See the mrouted code for the original history. * + * Protocol Independent Multicast (PIM) data structures included + * Carlos Picoto (cap@di.fc.ul.pt) */ #define MRT_BASE 200 @@ -34,15 +32,13 @@ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) -#define MAXVIFS 32 +#define MAXVIFS 32 typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short vifi_t; #define ALL_VIFS ((vifi_t)(-1)) -/* - * Same idea as select - */ - +/* Same idea as select */ + #define VIFM_SET(n,m) ((m)|=(1<<(n))) #define VIFM_CLR(n,m) ((m)&=~(1<<(n))) #define VIFM_ISSET(n,m) ((m)&(1<<(n))) @@ -50,11 +46,9 @@ typedef unsigned short vifi_t; #define VIFM_COPY(mfrom,mto) ((mto)=(mfrom)) #define VIFM_SAME(m1,m2) ((m1)==(m2)) -/* - * Passed by mrouted for an MRT_ADD_VIF - again we use the - * mrouted 3.6 structures for compatibility +/* Passed by mrouted for an MRT_ADD_VIF - again we use the + * mrouted 3.6 structures for compatibility */ - struct vifctl { vifi_t vifc_vifi; /* Index of VIF */ unsigned char vifc_flags; /* VIFF_ flags */ @@ -73,10 +67,7 @@ struct vifctl { #define VIFF_USE_IFINDEX 0x8 /* use vifc_lcl_ifindex instead of vifc_lcl_addr to find an interface */ -/* - * Cache manipulation structures for mrouted and PIMd - */ - +/* Cache manipulation structures for mrouted and PIMd */ struct mfcctl { struct in_addr mfcc_origin; /* Origin of mcast */ struct in_addr mfcc_mcastgrp; /* Group in question */ @@ -88,10 +79,7 @@ struct mfcctl { int mfcc_expire; }; -/* - * Group count retrieval for mrouted - */ - +/* Group count retrieval for mrouted */ struct sioc_sg_req { struct in_addr src; struct in_addr grp; @@ -100,10 +88,7 @@ struct sioc_sg_req { unsigned long wrong_if; }; -/* - * To get vif packet counts - */ - +/* To get vif packet counts */ struct sioc_vif_req { vifi_t vifi; /* Which iface */ unsigned long icount; /* In packets */ @@ -112,11 +97,9 @@ struct sioc_vif_req { unsigned long obytes; /* Out bytes */ }; -/* - * This is the format the mroute daemon expects to see IGMP control - * data. Magically happens to be like an IP packet as per the original +/* This is the format the mroute daemon expects to see IGMP control + * data. Magically happens to be like an IP packet as per the original */ - struct igmpmsg { __u32 unused1,unused2; unsigned char im_msgtype; /* What is this */ @@ -126,21 +109,13 @@ struct igmpmsg { struct in_addr im_src,im_dst; }; -/* - * That's all usermode folks - */ - - +/* That's all usermode folks */ #define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ -/* - * Pseudo messages used by mrouted - */ - +/* Pseudo messages used by mrouted */ #define IGMPMSG_NOCACHE 1 /* Kern cache fill request to mrouted */ #define IGMPMSG_WRONGVIF 2 /* For PIM assert processing (unused) */ #define IGMPMSG_WHOLEPKT 3 /* For PIM Register processing */ - #endif /* _UAPI__LINUX_MROUTE_H */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index e153ab7b17a1..286ede3716ee 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -102,9 +102,7 @@ static inline bool pimsm_enabled(void) static DEFINE_RWLOCK(mrt_lock); -/* - * Multicast router control variables - */ +/* Multicast router control variables */ #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) @@ -393,8 +391,7 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) } } -static -struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) +static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *dev; @@ -561,8 +558,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, struct iphdr *encap; encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); - /* - * Check that: + /* Check that: * a. packet is really sent to a multicast group * b. packet is not a NULL-REGISTER * c. packet is not truncated @@ -603,7 +599,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call */ - static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { @@ -673,7 +668,6 @@ static inline void ipmr_cache_free(struct mfc_cache *c) /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. */ - static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) { struct net *net = read_pnet(&mrt->net); @@ -701,9 +695,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) ipmr_cache_free(c); } - /* Timer process for the unresolved queue. */ - static void ipmr_expire_process(unsigned long arg) { struct mr_table *mrt = (struct mr_table *)arg; @@ -743,7 +735,6 @@ out: } /* Fill oifs list. It is called under write locked mrt_lock. */ - static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, unsigned char *ttls) { @@ -808,7 +799,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; - case VIFF_USE_IFINDEX: case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) { @@ -928,9 +918,7 @@ skip: return ipmr_cache_find_any_parent(mrt, vifi); } -/* - * Allocate a multicast cache entry - */ +/* Allocate a multicast cache entry */ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); @@ -951,10 +939,7 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void) return c; } -/* - * A cache entry has gone into a resolved state from queued - */ - +/* A cache entry has gone into a resolved state from queued */ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc_cache *uc, struct mfc_cache *c) { @@ -962,7 +947,6 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct nlmsgerr *e; /* Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); @@ -1064,12 +1048,9 @@ static int ipmr_cache_report(struct mr_table *mrt, return ret; } -/* - * Queue a packet for resolution. It gets locked cache entry! - */ - -static int -ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) +/* Queue a packet for resolution. It gets locked cache entry! */ +static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, + struct sk_buff *skb) { bool found = false; int err; @@ -1087,7 +1068,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) if (!found) { /* Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || (c = ipmr_cache_alloc_unres()) == NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -1097,13 +1077,11 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* Fill in the new cache entry */ - c->mfc_parent = -1; c->mfc_origin = iph->saddr; c->mfc_mcastgrp = iph->daddr; /* Reflect first query at mrouted. */ - err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); if (err < 0) { /* If the report failed throw the cache entry @@ -1125,7 +1103,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* See if we can append the packet */ - if (c->mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; @@ -1138,9 +1115,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) return err; } -/* - * MFC cache manipulation by user space mroute daemon - */ +/* MFC cache manipulation by user space mroute daemon */ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { @@ -1211,9 +1186,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_add_rcu(&c->list, &mrt->mfc_cache_array[line]); - /* - * Check to see if we resolved a queued list. If so we - * need to send on the frames and tidy up. + /* Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); @@ -1238,10 +1212,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } -/* - * Close the multicast socket, and clear the vif tables etc - */ - +/* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt) { int i; @@ -1249,7 +1220,6 @@ static void mroute_clean_tables(struct mr_table *mrt) struct mfc_cache *c, *next; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { if (!(mrt->vif_table[i].flags & VIFF_STATIC)) vif_delete(mrt, i, 0, &list); @@ -1257,7 +1227,6 @@ static void mroute_clean_tables(struct mr_table *mrt) unregister_netdevice_many(&list); /* Wipe the cache */ - for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { if (c->mfc_flags & MFC_STATIC) @@ -1301,11 +1270,10 @@ static void mrtsock_destruct(struct sock *sk) rtnl_unlock(); } -/* - * Socket options and virtual interface manipulation. The whole - * virtual interface system is a complete heap, but unfortunately - * that's how BSD mrouted happens to think. Maybe one day with a proper - * MOSPF/PIM router set up we can clean this up. +/* Socket options and virtual interface manipulation. The whole + * virtual interface system is a complete heap, but unfortunately + * that's how BSD mrouted happens to think. Maybe one day with a proper + * MOSPF/PIM router set up we can clean this up. */ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) @@ -1373,10 +1341,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; - /* - * Manipulate the forwarding caches. These live - * in a sort of kernel/user symbiosis. - */ + /* Manipulate the forwarding caches. These live + * in a sort of kernel/user symbiosis. + */ case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; @@ -1397,9 +1364,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi parent); rtnl_unlock(); return ret; - /* - * Control PIM assert. - */ + /* Control PIM assert. */ case MRT_ASSERT: { int v; @@ -1456,19 +1421,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return ret; } #endif - /* - * Spurious command, or MRT_VERSION which you cannot - * set. - */ + /* Spurious command, or MRT_VERSION which you cannot set. */ default: return -ENOPROTOOPT; } } -/* - * Getsock opt support for the multicast routing system. - */ - +/* Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int olr; @@ -1512,10 +1471,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int return 0; } -/* - * The IP multicast ioctl support routines. - */ - +/* The IP multicast ioctl support routines. */ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { struct sioc_sg_req sr; @@ -1648,7 +1604,6 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif - static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -1670,17 +1625,14 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v return NOTIFY_DONE; } - static struct notifier_block ip_mr_notifier = { .notifier_call = ipmr_device_event, }; -/* - * Encapsulate a packet by attaching a valid IPIP header to it. - * This avoids tunnel drivers and other mess and gives us the speed so - * important for multicast video. +/* Encapsulate a packet by attaching a valid IPIP header to it. + * This avoids tunnel drivers and other mess and gives us the speed so + * important for multicast video. */ - static void ip_encap(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr) { @@ -1722,9 +1674,7 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } -/* - * Processing handlers for ipmr_forward - */ +/* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, int vifi) @@ -1773,7 +1723,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * allow to send ICMP, so that packets will disappear * to blackhole. */ - IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; @@ -1805,8 +1754,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, IPCB(skb)->flags |= IPSKB_FORWARDED; - /* - * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output * interfaces. It is clear, if mrouter runs a multicasting * program, it should receive packets not depending to what interface @@ -1837,7 +1785,6 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) } /* "local" means that we should preserve one skb (for local delivery) */ - static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local) @@ -1862,9 +1809,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, goto forward; } - /* - * Wrong interface: drop packet and (maybe) send PIM assert. - */ + /* Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. @@ -1903,9 +1848,7 @@ forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; - /* - * Forward the frame - */ + /* Forward the frame */ if (cache->mfc_origin == htonl(INADDR_ANY) && cache->mfc_mcastgrp == htonl(INADDR_ANY)) { if (true_vifi >= 0 && @@ -1979,11 +1922,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) return mrt; } -/* - * Multicast packets for forwarding arrive here - * Called with rcu_read_lock(); +/* Multicast packets for forwarding arrive here + * Called with rcu_read_lock(); */ - int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; @@ -2034,9 +1975,7 @@ int ip_mr_input(struct sk_buff *skb) vif); } - /* - * No usable cache entry - */ + /* No usable cache entry */ if (!cache) { int vif; @@ -2078,10 +2017,7 @@ dont_forward: } #ifdef CONFIG_IP_PIMSM_V1 -/* - * Handle IGMP messages of PIMv1 - */ - +/* Handle IGMP messages of PIMv1 */ int pim_rcv_v1(struct sk_buff *skb) { struct igmphdr *pim; @@ -2406,9 +2342,8 @@ done: } #ifdef CONFIG_PROC_FS -/* - * The /proc interfaces to multicast routing : - * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif +/* The /proc interfaces to multicast routing : + * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif */ struct ipmr_vif_iter { struct seq_net_private p; @@ -2692,10 +2627,7 @@ static const struct net_protocol pim_protocol = { }; #endif - -/* - * Setup for IP multicast routing - */ +/* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { int err; -- cgit v1.2.3 From fe9ef3ce395d06e4f17e5995ab8455b9627f3306 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:28 +0100 Subject: net: ipmr: make ip_mroute_getsockopt more understandable Use a switch to determine if optname is correct and set val accordingly. This produces a much more straight-forward and readable code. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 286ede3716ee..694fecf7838e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1443,29 +1443,29 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int if (!mrt) return -ENOENT; - if (optname != MRT_VERSION && - optname != MRT_PIM && - optname != MRT_ASSERT) + switch (optname) { + case MRT_VERSION: + val = 0x0305; + break; + case MRT_PIM: + if (!pimsm_enabled()) + return -ENOPROTOOPT; + val = mrt->mroute_do_pim; + break; + case MRT_ASSERT: + val = mrt->mroute_do_assert; + break; + default: return -ENOPROTOOPT; + } if (get_user(olr, optlen)) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; - if (put_user(olr, optlen)) return -EFAULT; - if (optname == MRT_VERSION) { - val = 0x0305; - } else if (optname == MRT_PIM) { - if (!pimsm_enabled()) - return -ENOPROTOOPT; - val = mrt->mroute_do_pim; - } else { - val = mrt->mroute_do_assert; - } if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; -- cgit v1.2.3 From 29c3f19739421cf749991cb8c693093b4ac58ad1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:29 +0100 Subject: net: ipmr: drop an instance of CONFIG_IP_MROUTE_MULTIPLE_TABLES Trivial replace of ifdef with IS_BUILTIN(). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 694fecf7838e..a006d96d6cd9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1396,11 +1396,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; } -#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES case MRT_TABLE: { u32 v; + if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) + return -ENOPROTOOPT; if (optlen != sizeof(u32)) return -EINVAL; if (get_user(v, (u32 __user *)optval)) @@ -1420,7 +1421,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; } -#endif /* Spurious command, or MRT_VERSION which you cannot set. */ default: return -ENOPROTOOPT; -- cgit v1.2.3 From af623236a9f3a20aa2f15f03cf9fe7bfd13b8889 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:30 +0100 Subject: net: ipmr: drop ip_mr_init() mrt_cachep null check as we'll panic if it fails It's not necessary to check for null as SLAB_PANIC is used and we'll panic if the alloc fails, so just drop it. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a006d96d6cd9..50aec313119d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2677,8 +2677,6 @@ int __init ip_mr_init(void) sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - if (!mrt_cachep) - return -ENOMEM; err = register_pernet_subsys(&ipmr_net_ops); if (err) -- cgit v1.2.3 From 29e97d214509ef4977838e073d30f6b16f75c6d5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:31 +0100 Subject: net: ipmr: rearrange and cleanup setsockopt Take rtnl in the beginning unconditionally as most options already need it (one exception - MRT_DONE, see the comment inside), make the lock/unlock places central and move out the switch() local variables. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 191 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 107 insertions(+), 84 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 50aec313119d..e384f39202cb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1276,38 +1276,45 @@ static void mrtsock_destruct(struct sock *sk) * MOSPF/PIM router set up we can clean this up. */ -int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) +int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) { - int ret, parent = 0; - struct vifctl vif; - struct mfcctl mfc; struct net *net = sock_net(sk); + int val, ret = 0, parent = 0; struct mr_table *mrt; + struct vifctl vif; + struct mfcctl mfc; + u32 uval; + /* There's one exception to the lock - MRT_DONE which needs to unlock */ + rtnl_lock(); if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_IGMP) - return -EOPNOTSUPP; + inet_sk(sk)->inet_num != IPPROTO_IGMP) { + ret = -EOPNOTSUPP; + goto out_unlock; + } mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (!mrt) - return -ENOENT; - + if (!mrt) { + ret = -ENOENT; + goto out_unlock; + } if (optname != MRT_INIT) { if (sk != rcu_access_pointer(mrt->mroute_sk) && - !ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EACCES; + !ns_capable(net->user_ns, CAP_NET_ADMIN)) { + ret = -EACCES; + goto out_unlock; + } } switch (optname) { case MRT_INIT: if (optlen != sizeof(int)) - return -EINVAL; - - rtnl_lock(); - if (rtnl_dereference(mrt->mroute_sk)) { - rtnl_unlock(); - return -EADDRINUSE; - } + ret = -EINVAL; + if (rtnl_dereference(mrt->mroute_sk)) + ret = -EADDRINUSE; + if (ret) + break; ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { @@ -1317,30 +1324,41 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } - rtnl_unlock(); - return ret; + break; case MRT_DONE: - if (sk != rcu_access_pointer(mrt->mroute_sk)) - return -EACCES; - return ip_ra_control(sk, 0, NULL); + if (sk != rcu_access_pointer(mrt->mroute_sk)) { + ret = -EACCES; + } else { + /* We need to unlock here because mrtsock_destruct takes + * care of rtnl itself and we can't change that due to + * the IP_ROUTER_ALERT setsockopt which runs without it. + */ + rtnl_unlock(); + ret = ip_ra_control(sk, 0, NULL); + goto out; + } + break; case MRT_ADD_VIF: case MRT_DEL_VIF: - if (optlen != sizeof(vif)) - return -EINVAL; - if (copy_from_user(&vif, optval, sizeof(vif))) - return -EFAULT; - if (vif.vifc_vifi >= MAXVIFS) - return -ENFILE; - rtnl_lock(); + if (optlen != sizeof(vif)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&vif, optval, sizeof(vif))) { + ret = -EFAULT; + break; + } + if (vif.vifc_vifi >= MAXVIFS) { + ret = -ENFILE; + break; + } if (optname == MRT_ADD_VIF) { ret = vif_add(net, mrt, &vif, sk == rtnl_dereference(mrt->mroute_sk)); } else { ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); } - rtnl_unlock(); - return ret; - + break; /* Manipulate the forwarding caches. These live * in a sort of kernel/user symbiosis. */ @@ -1349,82 +1367,87 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi parent = -1; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: - if (optlen != sizeof(mfc)) - return -EINVAL; - if (copy_from_user(&mfc, optval, sizeof(mfc))) - return -EFAULT; + if (optlen != sizeof(mfc)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&mfc, optval, sizeof(mfc))) { + ret = -EFAULT; + break; + } if (parent == 0) parent = mfc.mfcc_parent; - rtnl_lock(); if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); - rtnl_unlock(); - return ret; + break; /* Control PIM assert. */ case MRT_ASSERT: - { - int v; - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - mrt->mroute_do_assert = v; - return 0; - } + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } + mrt->mroute_do_assert = val; + break; case MRT_PIM: - { - int v; - - if (!pimsm_enabled()) - return -ENOPROTOOPT; - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - v = !!v; + if (!pimsm_enabled()) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } - rtnl_lock(); - ret = 0; - if (v != mrt->mroute_do_pim) { - mrt->mroute_do_pim = v; - mrt->mroute_do_assert = v; + val = !!val; + if (val != mrt->mroute_do_pim) { + mrt->mroute_do_pim = val; + mrt->mroute_do_assert = val; } - rtnl_unlock(); - return ret; - } + break; case MRT_TABLE: - { - u32 v; - - if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) - return -ENOPROTOOPT; - if (optlen != sizeof(u32)) - return -EINVAL; - if (get_user(v, (u32 __user *)optval)) - return -EFAULT; + if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(uval)) { + ret = -EINVAL; + break; + } + if (get_user(uval, (u32 __user *)optval)) { + ret = -EFAULT; + break; + } - rtnl_lock(); - ret = 0; if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { - mrt = ipmr_new_table(net, v); + mrt = ipmr_new_table(net, uval); if (IS_ERR(mrt)) ret = PTR_ERR(mrt); else - raw_sk(sk)->ipmr_table = v; + raw_sk(sk)->ipmr_table = uval; } - rtnl_unlock(); - return ret; - } + break; /* Spurious command, or MRT_VERSION which you cannot set. */ default: - return -ENOPROTOOPT; + ret = -ENOPROTOOPT; } +out_unlock: + rtnl_unlock(); +out: + return ret; } /* Getsock opt support for the multicast routing system. */ -- cgit v1.2.3 From a0b477366a9550ae46f78caa9e55de34fac4ba9c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:32 +0100 Subject: net: ipmr: factor out common vif init code Factor out common vif init code used in both tunnel and pimreg initialization and create ipmr_init_vif_indev() function. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index e384f39202cb..a2d248d9c35c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -391,6 +391,23 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) } } +/* Initialize ipmr pimreg/tunnel in_device */ +static bool ipmr_init_vif_indev(const struct net_device *dev) +{ + struct in_device *in_dev; + + ASSERT_RTNL(); + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return false; + ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; + + return true; +} + static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *dev; @@ -402,7 +419,6 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) int err; struct ifreq ifr; struct ip_tunnel_parm p; - struct in_device *in_dev; memset(&p, 0, sizeof(p)); p.iph.daddr = v->vifc_rmt_addr.s_addr; @@ -427,15 +443,8 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) + if (!ipmr_init_vif_indev(dev)) goto failure; - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - if (dev_open(dev)) goto failure; dev_hold(dev); @@ -502,7 +511,6 @@ static void reg_vif_setup(struct net_device *dev) static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; - struct in_device *in_dev; char name[IFNAMSIZ]; if (mrt->id == RT_TABLE_DEFAULT) @@ -522,18 +530,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) return NULL; } - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (!in_dev) { - rcu_read_unlock(); + if (!ipmr_init_vif_indev(dev)) goto failure; - } - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - rcu_read_unlock(); - if (dev_open(dev)) goto failure; -- cgit v1.2.3 From 4dd191bb6195641edbc527a8495b7b1b816a41e6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 Nov 2015 18:28:05 +0100 Subject: net: atm: constify in_cache_ops and eg_cache_ops structures The in_cache_ops and eg_cache_ops structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/atm/mpc.h | 4 ++-- net/atm/mpoa_caches.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 0919a88bbc70..cfc7b745aa91 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -21,11 +21,11 @@ struct mpoa_client { uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ rwlock_t ingress_lock; - struct in_cache_ops *in_ops; /* ingress cache operations */ + const struct in_cache_ops *in_ops; /* ingress cache operations */ in_cache_entry *in_cache; /* the ingress cache of this MPC */ rwlock_t egress_lock; - struct eg_cache_ops *eg_ops; /* egress cache operations */ + const struct eg_cache_ops *eg_ops; /* egress cache operations */ eg_cache_entry *eg_cache; /* the egress cache of this MPC */ uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index d1b2d9a03144..9e60e74c807d 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -534,7 +534,7 @@ static void eg_destroy_cache(struct mpoa_client *mpc) } -static struct in_cache_ops ingress_ops = { +static const struct in_cache_ops ingress_ops = { in_cache_add_entry, /* add_entry */ in_cache_get, /* get */ in_cache_get_with_mask, /* get_with_mask */ @@ -548,7 +548,7 @@ static struct in_cache_ops ingress_ops = { in_destroy_cache /* destroy_cache */ }; -static struct eg_cache_ops egress_ops = { +static const struct eg_cache_ops egress_ops = { eg_cache_add_entry, /* add_entry */ eg_cache_get_by_cache_id, /* get_by_cache_id */ eg_cache_get_by_tag, /* get_by_tag */ -- cgit v1.2.3 From 3b22dae38db1cea9ead3229f08cfb0b69aca5706 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 Nov 2015 18:39:17 +0100 Subject: VSOCK: constify vmci_transport_notify_ops structures The vmci_transport_notify_ops structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.h | 2 +- net/vmw_vsock/vmci_transport_notify.c | 2 +- net/vmw_vsock/vmci_transport_notify.h | 5 +++-- net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index 2ad46f39649f..1820e74a5752 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -121,7 +121,7 @@ struct vmci_transport { u64 queue_pair_max_size; u32 detach_sub_id; union vmci_transport_notify notify; - struct vmci_transport_notify_ops *notify_ops; + const struct vmci_transport_notify_ops *notify_ops; struct list_head elem; struct sock *sk; spinlock_t lock; /* protects sk. */ diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 9b7f207f2bee..fd8cf0214d51 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) } /* Socket control packet based operations. */ -struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { +const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { vmci_transport_notify_pkt_socket_init, vmci_transport_notify_pkt_socket_destruct, vmci_transport_notify_pkt_poll_in, diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h index 7df793249b6c..3c464d394a8f 100644 --- a/net/vmw_vsock/vmci_transport_notify.h +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -77,7 +77,8 @@ struct vmci_transport_notify_ops { void (*process_negotiate) (struct sock *sk); }; -extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; -extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; +extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern const +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; #endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index dc9c7929a2f9..21e591dafb03 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue( } /* Socket always on control packet based operations. */ -struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { +const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { vmci_transport_notify_pkt_socket_init, vmci_transport_notify_pkt_socket_destruct, vmci_transport_notify_pkt_poll_in, -- cgit v1.2.3 From 85beabfeca5343b86057c0d588e33f7975684d37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 24 Nov 2015 12:34:49 +0100 Subject: net: dsa: include gpio consumer header file After the introduction of the switch gpio reset API, I'm getting build errors in configurations that disable CONFIG_GPIOLIB: net/dsa/dsa.c:783:16: error: implicit declaration of function 'gpio_to_desc' [-Werror=implicit-function-declaration] The reason is that linux/gpio/consumer.h is not automatically included without gpiolib support. This adds an explicit #include statement to make it compile in all configurations. The reset functionality will not work without gpiolib, which is what you get when disabling the feature. As far as I can tell, gpiolib is supported on all architectures on which you can have DSA at the moment. Signed-off-by: Arnd Bergmann Fixes: cc30c16344fc ("net: dsa: Add support for a switch reset gpio") Acked-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0b5565f923cc..b7448c8490ac 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "dsa_priv.h" char dsa_driver_version[] = "0.1"; -- cgit v1.2.3 From 6c1c36b02c325ecebce6e3b34bce7f1dfe012cf9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Nov 2015 14:08:23 +0100 Subject: net/ipv4/ipconfig: Rejoin broken lines in console output Commit 09605cc12c078306 ("net ipv4: use preferred log methods") replaced a few calls of pr_cont() after a console print without a trailing newline by pr_info(), causing lines to be split during IP autoconfiguration, like: . , OK IP-Config: Got DHCP answer from 192.168.97.254, my address is 192.168.97.44 Convert these back to using pr_cont(), so it prints again: ., OK IP-Config: Got DHCP answer from 192.168.97.254, my address is 192.168.97.44 Absorb the printing of "my address ..." into the previous call to pr_info(), as there's no reason to use a continuation there. Convert one more pr_info() to print nameservers while we're at it. Fixes: 09605cc12c078306 ("net ipv4: use preferred log methods") Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e86e8a9738ea..67f7c9de0b16 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1239,13 +1239,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - pr_notice(","); + pr_cont(","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - pr_notice(" OK\n"); + pr_cont(" OK\n"); break; } @@ -1253,7 +1253,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - pr_notice(" timed out!\n"); + pr_cont(" timed out!\n"); break; } @@ -1263,7 +1263,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - pr_notice("."); + pr_cont("."); } #ifdef IPCONFIG_BOOTP @@ -1280,11 +1280,10 @@ static int __init ic_dynamic(void) return -1; } - pr_info("IP-Config: Got %s answer from %pI4, ", + pr_info("IP-Config: Got %s answer from %pI4, my address is %pI4\n", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_addrservaddr); - pr_info("my address is %pI4\n", &ic_myaddr); + &ic_addrservaddr, &ic_myaddr); return 0; } @@ -1527,14 +1526,14 @@ static int __init ip_auto_config(void) pr_cont(", mtu=%d", ic_dev_mtu); for (i = 0; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) { - pr_info(" nameserver%u=%pI4", + pr_cont(" nameserver%u=%pI4", i, &ic_nameservers[i]); break; } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_info(", nameserver%u=%pI4", i, &ic_nameservers[i]); - pr_info("\n"); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; -- cgit v1.2.3 From f13f2aeed154da8e48f90b85e720f8ba39b1e881 Mon Sep 17 00:00:00 2001 From: Philip Whineray Date: Sun, 22 Nov 2015 11:35:07 +0000 Subject: netfilter: Set /proc/net entries owner to root in namespace Various files are owned by root with 0440 permission. Reading them is impossible in an unprivileged user namespace, interfering with firewall tools. For instance, iptables-save relies on /proc/net/ip_tables_names contents to dump only loaded tables. This patch assigned ownership of the following files to root in the current namespace: - /proc/net/*_tables_names - /proc/net/*_tables_matches - /proc/net/*_tables_targets - /proc/net/nf_conntrack - /proc/net/nf_conntrack_expect - /proc/net/netfilter/nfnetlink_log A mapping for root must be available, so this order should be followed: unshare(CLONE_NEWUSER); /* Setup the mapping */ unshare(CLONE_NEWNET); Signed-off-by: Philip Whineray Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 7 +++++++ net/netfilter/nf_conntrack_standalone.c | 7 +++++++ net/netfilter/nfnetlink_log.c | 15 +++++++++++++-- net/netfilter/x_tables.c | 12 ++++++++++++ 4 files changed, 39 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index acf5c7b3f378..278927ab0948 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -596,11 +596,18 @@ static int exp_proc_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_PROCFS struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; proc = proc_create("nf_conntrack_expect", 0440, net->proc_net, &exp_file_ops); if (!proc) return -ENOMEM; + + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif /* CONFIG_NF_CONNTRACK_PROCFS */ return 0; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 1fb3cacc04e1..0f1a45bcacb2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -392,11 +392,18 @@ static const struct file_operations ct_cpu_seq_fops = { static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; + kuid_t root_uid; + kgid_t root_gid; pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops); if (!pde) goto out_nf_conntrack; + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(pde, root_uid, root_gid); + pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat, &ct_cpu_seq_fops); if (!pde) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 740cce4685ac..dea467647c90 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1064,15 +1064,26 @@ static int __net_init nfnl_log_net_init(struct net *net) { unsigned int i; struct nfnl_log_net *log = nfnl_log_pernet(net); +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; +#endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&log->instance_table[i]); spin_lock_init(&log->instances_lock); #ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_log", 0440, - net->nf.proc_netfilter, &nful_file_ops)) + proc = proc_create("nfnetlink_log", 0440, + net->nf.proc_netfilter, &nful_file_ops); + if (!proc) return -ENOMEM; + + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif return 0; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d4aaad747ea9..c8a0b7da5ff4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1226,6 +1227,8 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; #endif if (af >= ARRAY_SIZE(xt_prefix)) @@ -1233,12 +1236,17 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops, (void *)(unsigned long)af); if (!proc) goto out; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); @@ -1246,6 +1254,8 @@ int xt_proto_init(struct net *net, u_int8_t af) (void *)(unsigned long)af); if (!proc) goto out_remove_tables; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); @@ -1253,6 +1263,8 @@ int xt_proto_init(struct net *net, u_int8_t af) (void *)(unsigned long)af); if (!proc) goto out_remove_matches; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif return 0; -- cgit v1.2.3 From 7ec3f7b47b8d9ad7ba425726f2c58f9ddce040df Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 24 Nov 2015 10:00:22 +0000 Subject: netfilter: nft_payload: add packet mangling support Add support for mangling packet payload. Checksum for the specified base header is updated automatically if requested, however no updates for any kind of pseudo headers are supported, meaning no stateless NAT is supported. For checksum updates different checksumming methods can be specified. The currently supported methods are NONE for no checksum updates, and INET for internet type checksums. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 9 +++ include/uapi/linux/netfilter/nf_tables.h | 17 ++++ net/netfilter/nft_payload.c | 135 +++++++++++++++++++++++++++++-- 3 files changed, 155 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index c6f400cfaac8..4ff5424909aa 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -47,6 +47,15 @@ struct nft_payload { enum nft_registers dreg:8; }; +struct nft_payload_set { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers sreg:8; + u8 csum_type; + u8 csum_offset; +}; + extern const struct nft_expr_ops nft_payload_fast_ops; int nft_payload_module_init(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index d8c8a7c9d88a..5f3ececf84b3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -597,6 +597,17 @@ enum nft_payload_bases { NFT_PAYLOAD_TRANSPORT_HEADER, }; +/** + * enum nft_payload_csum_types - nf_tables payload expression checksum types + * + * @NFT_PAYLOAD_CSUM_NONE: no checksumming + * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791) + */ +enum nft_payload_csum_types { + NFT_PAYLOAD_CSUM_NONE, + NFT_PAYLOAD_CSUM_INET, +}; + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * @@ -604,6 +615,9 @@ enum nft_payload_bases { * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) + * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) */ enum nft_payload_attributes { NFTA_PAYLOAD_UNSPEC, @@ -611,6 +625,9 @@ enum nft_payload_attributes { NFTA_PAYLOAD_BASE, NFTA_PAYLOAD_OFFSET, NFTA_PAYLOAD_LEN, + NFTA_PAYLOAD_SREG, + NFTA_PAYLOAD_CSUM_TYPE, + NFTA_PAYLOAD_CSUM_OFFSET, __NFTA_PAYLOAD_MAX }; #define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 09b4b07eb676..12cd4bf16d17 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,10 +107,13 @@ err: } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { - [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, @@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static void nft_payload_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + const u32 *src = ®s->data[priv->sreg]; + int offset, csum_offset; + __wsum fsum, tsum; + __sum16 sum; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = pkt->xt.thoff; + break; + default: + BUG(); + } + + csum_offset = offset + priv->csum_offset; + offset += priv->offset; + + if (priv->csum_type == NFT_PAYLOAD_CSUM_INET && + (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || + skb->ip_summed != CHECKSUM_PARTIAL)) { + if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + + fsum = skb_checksum(skb, offset, priv->len, 0); + tsum = csum_partial(src, priv->len, 0); + sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), + tsum)); + if (sum == 0) + sum = CSUM_MANGLED_0; + + if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || + skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + } + + if (!skb_make_writable(skb, max(offset + priv->len, 0)) || + skb_store_bits(skb, offset, src, priv->len) < 0) + goto err; + + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_payload_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload_set *priv = nft_expr_priv(expr); + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); + + if (tb[NFTA_PAYLOAD_CSUM_TYPE]) + priv->csum_type = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); + if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) + priv->csum_offset = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + + switch (priv->csum_type) { + case NFT_PAYLOAD_CSUM_NONE: + case NFT_PAYLOAD_CSUM_INET: + break; + default: + return -EOPNOTSUPP; + } + + return nft_validate_register_load(priv->sreg, priv->len); +} + +static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, + htonl(priv->csum_offset))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_expr_ops nft_payload_set_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), + .eval = nft_payload_set_eval, + .init = nft_payload_set_init, + .dump = nft_payload_set_dump, +}; + static const struct nft_expr_ops * nft_payload_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, enum nft_payload_bases base; unsigned int offset, len; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || + if (tb[NFTA_PAYLOAD_BASE] == NULL || tb[NFTA_PAYLOAD_OFFSET] == NULL || tb[NFTA_PAYLOAD_LEN] == NULL) return ERR_PTR(-EINVAL); @@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-EOPNOTSUPP); } + if (tb[NFTA_PAYLOAD_SREG] != NULL) { + if (tb[NFTA_PAYLOAD_DREG] != NULL) + return ERR_PTR(-EINVAL); + return &nft_payload_set_ops; + } + + if (tb[NFTA_PAYLOAD_DREG] == NULL) + return ERR_PTR(-EINVAL); + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); -- cgit v1.2.3 From 1ce0bf50ae2233c7115a18c0c623662d177b434c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 26 Nov 2015 13:55:39 +0800 Subject: net: Generalise wq_has_sleeper helper The memory barrier in the helper wq_has_sleeper is needed by just about every user of waitqueue_active. This patch generalises it by making it take a wait_queue_head_t directly. The existing helper is renamed to skwq_has_sleeper. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algif_aead.c | 4 ++-- crypto/algif_skcipher.c | 4 ++-- include/linux/wait.h | 21 +++++++++++++++++++++ include/net/sock.h | 15 +++++---------- net/atm/common.c | 4 ++-- net/core/sock.c | 8 ++++---- net/core/stream.c | 2 +- net/dccp/output.c | 2 +- net/iucv/af_iucv.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/socket.c | 2 +- net/tipc/socket.c | 4 ++-- net/unix/af_unix.c | 2 +- 13 files changed, 44 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 0aa6fdfb448a..fb99f30849d2 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -106,7 +106,7 @@ static void aead_wmem_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); @@ -157,7 +157,7 @@ static void aead_data_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLRDNORM | POLLRDBAND); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index af31a0ee4057..0e6702e41472 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -238,7 +238,7 @@ static void skcipher_wmem_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); @@ -288,7 +288,7 @@ static void skcipher_data_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLRDNORM | POLLRDBAND); diff --git a/include/linux/wait.h b/include/linux/wait.h index 1e1bf9f963a9..6aa09a875fbd 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -107,6 +107,27 @@ static inline int waitqueue_active(wait_queue_head_t *q) return !list_empty(&q->task_list); } +/** + * wq_has_sleeper - check if there are any waiting processes + * @wq: wait queue head + * + * Returns true if wq has waiting processes + * + * Please refer to the comment for waitqueue_active. + */ +static inline bool wq_has_sleeper(wait_queue_head_t *wq) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier should be paired with one on the + * waiting side. + */ + smp_mb(); + return waitqueue_active(wq); +} + extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); diff --git a/include/net/sock.h b/include/net/sock.h index 7f89e4ba18d1..62d35afcb3ac 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -1879,12 +1880,12 @@ static inline bool sk_has_allocations(const struct sock *sk) } /** - * wq_has_sleeper - check if there are any waiting processes + * skwq_has_sleeper - check if there are any waiting processes * @wq: struct socket_wq * * Returns true if socket_wq has waiting processes * - * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory + * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * * Consider following tcp code paths: @@ -1910,15 +1911,9 @@ static inline bool sk_has_allocations(const struct sock *sk) * data on the socket. * */ -static inline bool wq_has_sleeper(struct socket_wq *wq) +static inline bool skwq_has_sleeper(struct socket_wq *wq) { - /* We need to be sure we are in sync with the - * add_wait_queue modifications to the wait queue. - * - * This memory barrier is paired in the sock_poll_wait. - */ - smp_mb(); - return wq && waitqueue_active(&wq->wait); + return wq && wq_has_sleeper(&wq->wait); } /** diff --git a/net/atm/common.c b/net/atm/common.c index 49a872db7e42..6dc12305799e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -96,7 +96,7 @@ static void vcc_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up(&wq->wait); rcu_read_unlock(); } @@ -117,7 +117,7 @@ static void vcc_write_space(struct sock *sk) if (vcc_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); diff --git a/net/core/sock.c b/net/core/sock.c index 1e4dd54bfb5a..2769bd3a4d7c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2283,7 +2283,7 @@ static void sock_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); rcu_read_unlock(); } @@ -2294,7 +2294,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); rcu_read_unlock(); @@ -2306,7 +2306,7 @@ static void sock_def_readable(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); @@ -2324,7 +2324,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); diff --git a/net/core/stream.c b/net/core/stream.c index d70f77a0c889..8ff9d63b4265 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) diff --git a/net/dccp/output.c b/net/dccp/output.c index 4ce912e691d0..b66c84db0766 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -201,7 +201,7 @@ void dccp_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index fcb2752419c6..4f0aa91470c6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -303,7 +303,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 1f8a144a5dc2..7e2d1057d8bc 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -67,7 +67,7 @@ static void rxrpc_write_space(struct sock *sk) if (rxrpc_writable(sk)) { struct socket_wq *wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 897c01c029ca..ec10b66354b8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6978,7 +6978,7 @@ void sctp_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 552dbaba9cf3..525acf6dd1c6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1492,7 +1492,7 @@ static void tipc_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); rcu_read_unlock(); @@ -1509,7 +1509,7 @@ static void tipc_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); rcu_read_unlock(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 955ec152cb71..efb706e1d1c0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -339,7 +339,7 @@ static void unix_write_space(struct sock *sk) rcu_read_lock(); if (unix_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); -- cgit v1.2.3 From dfc3b0e89188e0dfe6eb12f9bb29c9dfc27bbda1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:44 +0100 Subject: net: remove unnecessary mroute.h includes It looks like many files are including mroute.h unnecessarily, so remove the include. Most importantly remove it from ipv6. CC: Hideaki YOSHIFUJI CC: Steffen Klassert CC: Herbert Xu Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 1 - net/ipv4/ip_output.c | 1 - net/ipv4/ip_tunnel.c | 1 - net/ipv4/ip_tunnel_core.c | 1 - net/ipv4/ip_vti.c | 1 - net/ipv4/ipip.c | 1 - net/ipv6/ip6_gre.c | 1 - 7 files changed, 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 614521437e30..04a48c0159cc 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4233cbe47052..e0b94cd843d7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -76,7 +76,6 @@ #include #include #include -#include #include #include diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cbb51f3fac06..0f6e9ee031c4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6cb9009c3d96..1db8418aa62e 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 4d8f0b698777..02d9c21e2953 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index f34c31defafe..1f067294cbc5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -103,7 +103,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3c7b9310b33f..938d03ce5e4b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 06bd6c0370bb88a2256c6763a32bc4e4ade06521 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:45 +0100 Subject: net: ipmr: remove unused MFC_NOTIFY flag and make the flags enum MFC_NOTIFY was introduced in kernel 2.1.68 but afaik it hasn't been used and I couldn't find any users currently so just remove it. Only MFC_STATIC is left, so move it into an enum, add a description and use BIT(). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 10 +++++++--- net/ipv4/ipmr.c | 2 -- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 79aaa9fc1a15..fa66ebc1fed6 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -64,6 +64,13 @@ struct vif_device { #define VIFF_STATIC 0x8000 +/* mfc_flags: + * MFC_STATIC - the entry was added statically (not by a routing daemon) + */ +enum { + MFC_STATIC = BIT(0), +}; + struct mfc_cache { struct list_head list; __be32 mfc_mcastgrp; /* Group the entry belongs to */ @@ -89,9 +96,6 @@ struct mfc_cache { struct rcu_head rcu; }; -#define MFC_STATIC 1 -#define MFC_NOTIFY 2 - #define MFC_LINES 64 #ifdef __BIG_ENDIAN diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a2d248d9c35c..a74e61883b8f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2199,8 +2199,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } read_lock(&mrt_lock); - if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY)) - cache->mfc_flags |= MFC_NOTIFY; err = __ipmr_fill_mroute(mrt, skb, cache, rtm); read_unlock(&mrt_lock); rcu_read_unlock(); -- cgit v1.2.3 From 5ea1f13299d8b8edcb2969eda4c81f8e3264b706 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:47 +0100 Subject: net: ipmr: move struct mr_table and VIF_EXISTS to mroute.h Move the definitions of VIF_EXISTS() and struct mr_table to mroute.h Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 21 +++++++++++++++++++-- net/ipv4/ipmr.c | 18 ------------------ 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 7c567a2679ce..bf9b322cb0b0 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -59,6 +59,25 @@ struct vif_device { #define VIFF_STATIC 0x8000 +#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) +#define MFC_LINES 64 + +struct mr_table { + struct list_head list; + possible_net_t net; + u32 id; + struct sock __rcu *mroute_sk; + struct timer_list ipmr_expire_timer; + struct list_head mfc_unres_queue; + struct list_head mfc_cache_array[MFC_LINES]; + struct vif_device vif_table[MAXVIFS]; + int maxvif; + atomic_t cache_resolve_queue_len; + bool mroute_do_assert; + bool mroute_do_pim; + int mroute_reg_vif_num; +}; + /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) */ @@ -91,8 +110,6 @@ struct mfc_cache { struct rcu_head rcu; }; -#define MFC_LINES 64 - #ifdef __BIG_ENDIAN #define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1)) #else diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a74e61883b8f..ff3dbbb9f11c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,22 +67,6 @@ #include #include -struct mr_table { - struct list_head list; - possible_net_t net; - u32 id; - struct sock __rcu *mroute_sk; - struct timer_list ipmr_expire_timer; - struct list_head mfc_unres_queue; - struct list_head mfc_cache_array[MFC_LINES]; - struct vif_device vif_table[MAXVIFS]; - int maxvif; - atomic_t cache_resolve_queue_len; - bool mroute_do_assert; - bool mroute_do_pim; - int mroute_reg_vif_num; -}; - struct ipmr_rule { struct fib_rule common; }; @@ -104,8 +88,6 @@ static DEFINE_RWLOCK(mrt_lock); /* Multicast router control variables */ -#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) - /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); -- cgit v1.2.3 From 1973a4ea6ceaa47671227c3077f90508ea30897b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:48 +0100 Subject: net: ipmr: move pimsm_enabled to pim.h and rename Move the inline pimsm_enabled() to pim.h and rename it to ipmr_pimsm_enabled to show it's for the ipv4 ipmr code since pim.h is used by IPv6 too. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/pim.h | 5 +++++ net/ipv4/ipmr.c | 11 +++-------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/linux/pim.h b/include/linux/pim.h index 252bf6644c51..e1d756f81348 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -13,6 +13,11 @@ #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) +static inline bool ipmr_pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ struct pimreghdr { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ff3dbbb9f11c..322fdc6ac75b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -75,11 +75,6 @@ struct ipmr_result { struct mr_table *mrt; }; -static inline bool pimsm_enabled(void) -{ - return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); -} - /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ @@ -751,7 +746,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, switch (vifc->vifc_flags) { case VIFF_REGISTER: - if (!pimsm_enabled()) + if (!ipmr_pimsm_enabled()) return -EINVAL; /* Special Purpose VIF in PIM * All the packets will be sent to the daemon @@ -1377,7 +1372,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, mrt->mroute_do_assert = val; break; case MRT_PIM: - if (!pimsm_enabled()) { + if (!ipmr_pimsm_enabled()) { ret = -ENOPROTOOPT; break; } @@ -1451,7 +1446,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int val = 0x0305; break; case MRT_PIM: - if (!pimsm_enabled()) + if (!ipmr_pimsm_enabled()) return -ENOPROTOOPT; val = mrt->mroute_do_pim; break; -- cgit v1.2.3 From 42e6b89ce4e8a4f02a1e906694d81acf60db6f4d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:49 +0100 Subject: net: ipmr: fix setsockopt error return We can have both errors and we'll return the second one, fix it to return an error at a time as it's normal. I've overlooked this in my previous set. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 322fdc6ac75b..6c24a16299c7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1284,12 +1284,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, switch (optname) { case MRT_INIT: - if (optlen != sizeof(int)) + if (optlen != sizeof(int)) { ret = -EINVAL; - if (rtnl_dereference(mrt->mroute_sk)) + break; + } + if (rtnl_dereference(mrt->mroute_sk)) { ret = -EADDRINUSE; - if (ret) break; + } ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { -- cgit v1.2.3 From ccbb0aa62da7f4b765b3e311caf25ea43cc3d0ad Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:50 +0100 Subject: net: ipmr: add mfc newroute/delroute netlink support This patch adds support to add and remove MFC entries. It uses the same attributes like the already present dump support in order to be consistent. There's one new entry - RTA_PREFSRC, it's used to denote an MFC_PROXY entry (see MRT_ADD_MFC vs MRT_ADD_MFC_PROXY). The already existing infrastructure is used to create and delete the entries, the netlink message gets converted internally to a struct mfcctl which is used with ipmr_mfc_add/delete. The other used attributes are: RTA_IIF - used for mfcc_parent (when adding it's required to be valid) RTA_SRC - used for mfcc_origin RTA_DST - used for mfcc_mcastgrp RTA_TABLE - the MRT table id RTA_MULTIPATH - the "oifs" ttl array Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6c24a16299c7..4c10ee771648 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -66,6 +66,7 @@ #include #include #include +#include struct ipmr_rule { struct fib_rule common; @@ -2339,6 +2340,130 @@ done: return skb->len; } +static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { + [RTA_SRC] = { .type = NLA_U32 }, + [RTA_DST] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, +}; + +static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) +{ + switch (rtm_protocol) { + case RTPROT_STATIC: + case RTPROT_MROUTED: + return true; + } + return false; +} + +static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) +{ + struct rtnexthop *rtnh = nla_data(nla); + int remaining = nla_len(nla), vifi = 0; + + while (rtnh_ok(rtnh, remaining)) { + mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; + if (++vifi == MAXVIFS) + break; + rtnh = rtnh_next(rtnh, &remaining); + } + + return remaining > 0 ? -EINVAL : vifi; +} + +/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ +static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, + struct mfcctl *mfcc, int *mrtsock, + struct mr_table **mrtret) +{ + struct net_device *dev = NULL; + u32 tblid = RT_TABLE_DEFAULT; + struct mr_table *mrt; + struct nlattr *attr; + struct rtmsg *rtm; + int ret, rem; + + ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy); + if (ret < 0) + goto out; + rtm = nlmsg_data(nlh); + + ret = -EINVAL; + if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || + rtm->rtm_type != RTN_MULTICAST || + rtm->rtm_scope != RT_SCOPE_UNIVERSE || + !ipmr_rtm_validate_proto(rtm->rtm_protocol)) + goto out; + + memset(mfcc, 0, sizeof(*mfcc)); + mfcc->mfcc_parent = -1; + ret = 0; + nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { + switch (nla_type(attr)) { + case RTA_SRC: + mfcc->mfcc_origin.s_addr = nla_get_be32(attr); + break; + case RTA_DST: + mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); + break; + case RTA_IIF: + dev = __dev_get_by_index(net, nla_get_u32(attr)); + if (!dev) { + ret = -ENODEV; + goto out; + } + break; + case RTA_MULTIPATH: + if (ipmr_nla_get_ttls(attr, mfcc) < 0) { + ret = -EINVAL; + goto out; + } + break; + case RTA_PREFSRC: + ret = 1; + break; + case RTA_TABLE: + tblid = nla_get_u32(attr); + break; + } + } + mrt = ipmr_get_table(net, tblid); + if (!mrt) { + ret = -ENOENT; + goto out; + } + *mrtret = mrt; + *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; + if (dev) + mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); + +out: + return ret; +} + +/* takes care of both newroute and delroute */ +static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + int ret, mrtsock, parent; + struct mr_table *tbl; + struct mfcctl mfcc; + + mrtsock = 0; + tbl = NULL; + ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl); + if (ret < 0) + return ret; + + parent = ret ? mfcc.mfcc_parent : -1; + if (nlh->nlmsg_type == RTM_NEWROUTE) + return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); + else + return ipmr_mfc_delete(tbl, &mfcc, parent); +} + #ifdef CONFIG_PROC_FS /* The /proc interfaces to multicast routing : * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif @@ -2692,6 +2817,10 @@ int __init ip_mr_init(void) #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute, NULL); + rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, + ipmr_rtm_route, NULL, NULL); + rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, + ipmr_rtm_route, NULL, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 -- cgit v1.2.3 From 77b75f4d8cf105b599beef38724f8171e557919d Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Thu, 26 Nov 2015 19:23:15 +0000 Subject: unix: use wq_has_sleeper in unix_dgram_recvmsg The current unix_dgram_recvmsg does a wake up for every received datagram. This seems wasteful as only SOCK_DGRAM client sockets in an n:1 association with a server socket will ever wait because of the associated condition. The patch below changes the function such that the wake up only happens if wq_has_sleeper indicates that someone actually wants to be notified. Testing with SOCK_SEQPACKET and SOCK_DGRAM socket seems to confirm that this is an improvment. Signed-Off-By: Rainer Weikusat Signed-off-by: David S. Miller --- net/unix/af_unix.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index efb706e1d1c0..ac011b97097d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1914,8 +1914,10 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, goto out_unlock; } - wake_up_interruptible_sync_poll(&u->peer_wait, - POLLOUT | POLLWRNORM | POLLWRBAND); + if (wq_has_sleeper(&u->peer_wait)) + wake_up_interruptible_sync_poll(&u->peer_wait, + POLLOUT | POLLWRNORM | + POLLWRBAND); if (msg->msg_name) unix_copy_addr(msg, skb->sk); -- cgit v1.2.3 From 7450aaf61f0ae2ee6cc6491138d11df2c25e7609 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Nov 2015 08:57:28 -0800 Subject: tcp: suppress too verbose messages in tcp_send_ack() If tcp_send_ack() can not allocate skb, we properly handle this and setup a timer to try later. Use __GFP_NOWARN to avoid polluting syslog in the case host is under memory pressure, so that pertinent messages are not lost under a flood of useless information. sk_gfp_atomic() can use its gfp_mask argument (all callers currently were using GFP_ATOMIC before this patch) We rename sk_gfp_atomic() to sk_gfp_mask() to clearly express this function now takes into account its second argument (gfp_mask) Note that when tcp_transmit_skb() is called with clone_it set to false, we do not attempt memory allocations, so can pass a 0 gfp_mask, which most compilers can emit faster than a non zero or constant value. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/ipv4/tcp_output.c | 14 ++++++++------ net/ipv6/tcp_ipv6.c | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 62d35afcb3ac..9065f8b7e646 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -775,9 +775,9 @@ static inline int sk_memalloc_socks(void) #endif -static inline gfp_t sk_gfp_atomic(const struct sock *sk, gfp_t gfp_mask) +static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) { - return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); + return gfp_mask | (sk->sk_allocation & __GFP_MEMALLOC); } static inline void sk_acceptq_removed(struct sock *sk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cb7ca569052c..a800cee88035 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2296,7 +2296,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, return; if (tcp_write_xmit(sk, cur_mss, nonagle, 0, - sk_gfp_atomic(sk, GFP_ATOMIC))) + sk_gfp_mask(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -3352,8 +3352,9 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); - if (!buff) { + buff = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); + if (unlikely(!buff)) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, @@ -3375,7 +3376,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ skb_mstamp_get(&buff->skb_mstamp); - tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); + tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); } EXPORT_SYMBOL_GPL(tcp_send_ack); @@ -3396,7 +3397,8 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); if (!skb) return -1; @@ -3409,7 +3411,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); NET_INC_STATS(sock_net(sk), mib); - return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); + return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0); } void tcp_send_window_probe(struct sock *sk) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c5429a636f1a..41bcd59a2ac7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1130,7 +1130,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); } #endif @@ -1146,7 +1146,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) @@ -1212,7 +1212,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); + opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst = sk->sk_rx_dst; -- cgit v1.2.3 From b03804e7c3ad41c265c0ca21ddb306b252b4f99f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Dec 2015 12:12:03 +0100 Subject: net: Check CHANGEUPPER notifier return value switchdev drivers reflect the newly requested topology to hardware when CHANGEUPPER is received, after software links were already formed. However, the operation can fail and user will not be notified, as the return value of the notifier is not checked. Add this check and rollback software links if necessary. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5df6cbce727c..939cd1b1da15 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5490,8 +5490,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback_lower_mesh; } - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, - &changeupper_info.info); + ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); + ret = notifier_to_errno(ret); + if (ret) + goto rollback_lower_mesh; + return 0; rollback_lower_mesh: -- cgit v1.2.3 From 6dffb0447c25476f499d205dfceb1972e8dae919 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:10 +0100 Subject: net: propagate upper priv via netdev_master_upper_dev_link Eliminate netdev_master_upper_dev_link_private and pass priv directly as a parameter of netdev_master_upper_dev_link. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/vrf.c | 2 +- include/linux/netdevice.h | 6 ++---- net/batman-adv/hard-interface.c | 3 ++- net/bridge/br_if.c | 2 +- net/core/dev.c | 18 ++++++------------ net/openvswitch/vport-netdev.c | 2 +- 8 files changed, 15 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9e0f8a7ef8b1..924015729b2d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1204,7 +1204,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, { int err; - err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); + err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave); if (err) return err; slave_dev->flags |= IFF_SLAVE; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index d2f3ee832c47..b37f8d14dca0 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1083,7 +1083,7 @@ static int team_upper_dev_link(struct net_device *dev, { int err; - err = netdev_master_upper_dev_link(port_dev, dev); + err = netdev_master_upper_dev_link(port_dev, dev, NULL); if (err) return err; port_dev->priv_flags |= IFF_TEAM_PORT; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index c2d54c4ed556..59c5bddeaedd 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -624,7 +624,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) goto out_fail; } - ret = netdev_master_upper_dev_link(port_dev, dev); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL); if (ret < 0) goto out_unregister; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1506be58c59a..939b8f3de810 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3619,10 +3619,8 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private); + struct net_device *upper_dev, + void *upper_priv); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f11345e163d7..a7f4f1085dbb 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -464,7 +464,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); - ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface); + ret = netdev_master_upper_dev_link(hard_iface->net_dev, + soft_iface, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ec02f5869a78..781abc34667a 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev); + err = netdev_master_upper_dev_link(dev, br->dev, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index 939cd1b1da15..27d052bb78bc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *private) + void *upper_priv) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5452,7 +5452,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv, master); if (ret) return ret; @@ -5557,6 +5557,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * netdev_master_upper_dev_link - Add a master link to the upper device * @dev: device * @upper_dev: new upper device + * @upper_priv: upper device private * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5565,20 +5566,13 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * counts are adjusted and the function returns zero. */ int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + void *upper_priv) { - return __netdev_upper_dev_link(dev, upper_dev, true, NULL); + return __netdev_upper_dev_link(dev, upper_dev, true, upper_priv); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private) -{ - return __netdev_upper_dev_link(dev, upper_dev, true, private); -} -EXPORT_SYMBOL(netdev_master_upper_dev_link_private); - /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b327368a3848..3ee3df1edeae 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp)); + get_dpdev(vport->dp), NULL); if (err) goto error_unlock; -- cgit v1.2.3 From 29bf24afb29042f568fa67b1b0eee46796725ed2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:11 +0100 Subject: net: add possibility to pass information about upper device via notifier Sometimes the drivers and other code would find it handy to know some internal information about upper device being changed. So allow upper-code to pass information down to notifier listeners during linking. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/vrf.c | 2 +- include/linux/netdevice.h | 3 ++- net/batman-adv/hard-interface.c | 2 +- net/bridge/br_if.c | 2 +- net/core/dev.c | 11 +++++++---- net/openvswitch/vport-netdev.c | 2 +- 8 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 924015729b2d..fa3ed1d8a12d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1204,7 +1204,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, { int err; - err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave); + err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave, NULL); if (err) return err; slave_dev->flags |= IFF_SLAVE; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b37f8d14dca0..f7b6ff7948b8 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1083,7 +1083,7 @@ static int team_upper_dev_link(struct net_device *dev, { int err; - err = netdev_master_upper_dev_link(port_dev, dev, NULL); + err = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (err) return err; port_dev->priv_flags |= IFF_TEAM_PORT; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 59c5bddeaedd..8944a49cda15 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -624,7 +624,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) goto out_fail; } - ret = netdev_master_upper_dev_link(port_dev, dev, NULL); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) goto out_unregister; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 939b8f3de810..aea556c64f2c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2163,6 +2163,7 @@ struct netdev_notifier_changeupper_info { struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ bool linking; /* is the nofication for link or unlink */ + void *upper_info; /* upper dev info */ }; static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, @@ -3620,7 +3621,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv); + void *upper_priv, void *upper_info); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index a7f4f1085dbb..aa8867e1d983 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -465,7 +465,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv = netdev_priv(hard_iface->soft_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL); + soft_iface, NULL, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 781abc34667a..8d1d4a22c50d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index 27d052bb78bc..8ed886663c6d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *upper_priv) + void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5445,6 +5445,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, changeupper_info.upper_dev = upper_dev; changeupper_info.master = master; changeupper_info.linking = true; + changeupper_info.upper_info = upper_info; ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, &changeupper_info.info); @@ -5549,7 +5550,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -5558,6 +5559,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * @dev: device * @upper_dev: new upper device * @upper_priv: upper device private + * @upper_info: upper info to be passed down via notifier * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5567,9 +5569,10 @@ EXPORT_SYMBOL(netdev_upper_dev_link); */ int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv) + void *upper_priv, void *upper_info) { - return __netdev_upper_dev_link(dev, upper_dev, true, upper_priv); + return __netdev_upper_dev_link(dev, upper_dev, true, + upper_priv, upper_info); } EXPORT_SYMBOL(netdev_master_upper_dev_link); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 3ee3df1edeae..8f4dd4c39bfe 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp), NULL); + get_dpdev(vport->dp), NULL, NULL); if (err) goto error_unlock; -- cgit v1.2.3 From 04d482660a07039fc4e9a42bb3517db236d98f96 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:15 +0100 Subject: net: introduce change lower state notifier When lower device like bonding slave, team/bridge port, etc changes its state, it is useful for others to notice this change. Currently this is implemented specificly for bonding as NETDEV_BONDING_INFO notifier. This patch aims to replace this specific usage and make this more generic to be used for all upper-lower devices. Introduce NETDEV_CHANGELOWERSTATE netdev notifier type and netdev_lower_state_changed() helper. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ net/core/dev.c | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ab90ea0ed03..ad69f237aa78 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2158,6 +2158,7 @@ struct netdev_lag_upper_info { #define NETDEV_CHANGEINFODATA 0x0018 #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A +#define NETDEV_CHANGELOWERSTATE 0x001B int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -2179,6 +2180,11 @@ struct netdev_notifier_changeupper_info { void *upper_info; /* upper dev info */ }; +struct netdev_notifier_changelowerstate_info { + struct netdev_notifier_info info; /* must be first */ + void *lower_state_info; /* is lower dev state */ +}; + static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { @@ -3640,6 +3646,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 diff --git a/net/core/dev.c b/net/core/dev.c index 8ed886663c6d..d1706e88fbeb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5756,6 +5756,26 @@ int dev_get_nest_level(struct net_device *dev, } EXPORT_SYMBOL(dev_get_nest_level); +/** + * netdev_lower_change - Dispatch event about lower device state change + * @lower_dev: device + * @lower_state_info: state to dispatch + * + * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info. + * The caller must hold the RTNL lock. + */ +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info) +{ + struct netdev_notifier_changelowerstate_info changelowerstate_info; + + ASSERT_RTNL(); + changelowerstate_info.lower_state_info = lower_state_info; + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + &changelowerstate_info.info); +} +EXPORT_SYMBOL(netdev_lower_state_changed); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; -- cgit v1.2.3 From d6df198d924775e4751561cf60ef0294e95f74df Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 1 Dec 2015 22:45:15 +0100 Subject: net: ipv6: restrict hop_limit sysctl setting to range [1; 255] Setting a value bigger than 255 resulted in using only the lower eight bits of that value as it is assigned to the u8 header field. To avoid this unexpected result, reject such values. Setting a value of zero is technically possible, but hosts receiving such a packet have to treat it like hop_limit was set to one, according to RFC2460. Therefore I don't see a use-case for that. Setting a route's hop_limit to zero in iproute2 means to use the sysctl default, which is not the case here: Setting e.g. net.conf.eth0.hop_limit=0 will not make the kernel use net.conf.all.hop_limit for outgoing packets on eth0. To avoid these kinds of confusion, reject zero. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d84742f003a9..a5de1a616c12 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5199,6 +5199,20 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } +static +int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table lctl; + int min_hl = 1, max_hl = 255; + + lctl = *ctl; + lctl.extra1 = &min_hl; + lctl.extra2 = &max_hl; + + return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); +} + static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -5454,7 +5468,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_hop_limit, }, { .procname = "mtu", -- cgit v1.2.3 From c89359a42e2a49656451569c382eed63e781153c Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 1 Dec 2015 22:18:11 -0800 Subject: mpls: support for dead routes Adds support for RTNH_F_DEAD and RTNH_F_LINKDOWN flags on mpls routes due to link events. Also adds code to ignore dead routes during route selection. Unlike ip routes, mpls routes are not deleted when the route goes dead. This is current mpls behaviour and this patch does not change that. With this patch however, routes will be marked dead. dead routes are not notified to userspace (this is consistent with ipv4 routes). dead routes: ----------- $ip -f mpls route show 100 nexthop as to 200 via inet 10.1.1.2 dev swp1 nexthop as to 700 via inet 10.1.1.6 dev swp2 $ip link set dev swp1 down $ip link show dev swp1 4: swp1: mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 1000 link/ether 00:02:00:00:00:01 brd ff:ff:ff:ff:ff:ff $ip -f mpls route show 100 nexthop as to 200 via inet 10.1.1.2 dev swp1 dead linkdown nexthop as to 700 via inet 10.1.1.6 dev swp2 linkdown routes: ---------------- $ip -f mpls route show 100 nexthop as to 200 via inet 10.1.1.2 dev swp1 nexthop as to 700 via inet 10.1.1.6 dev swp2 $ip link show dev swp1 4: swp1: mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000 link/ether 00:02:00:00:00:01 brd ff:ff:ff:ff:ff:ff /* carrier goes down */ $ip link show dev swp1 4: swp1: mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 1000 link/ether 00:02:00:00:00:01 brd ff:ff:ff:ff:ff:ff $ip -f mpls route show 100 nexthop as to 200 via inet 10.1.1.2 dev swp1 linkdown nexthop as to 700 via inet 10.1.1.6 dev swp2 Signed-off-by: Roopa Prabhu Acked-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 185 ++++++++++++++++++++++++++++++++++++++++++++-------- net/mpls/internal.h | 2 + 2 files changed, 159 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750148b6..4b3b9b310c3a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -96,22 +96,15 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) +static u32 mpls_multipath_hash(struct mpls_route *rt, + struct sk_buff *skb, bool bos) { struct mpls_entry_decoded dec; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; - int nh_index = 0; u32 hash = 0; - /* No need to look further into packet if there's only - * one path - */ - if (rt->rt_nhn == 1) - goto out; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; label_index++) { if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) @@ -165,7 +158,38 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } } - nh_index = hash % rt->rt_nhn; + return hash; +} + +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, + struct sk_buff *skb, bool bos) +{ + int alive = ACCESS_ONCE(rt->rt_nhn_alive); + u32 hash = 0; + int nh_index = 0; + int n = 0; + + /* No need to look further into packet if there's only + * one path + */ + if (rt->rt_nhn == 1) + goto out; + + if (alive <= 0) + return NULL; + + hash = mpls_multipath_hash(rt, skb, bos); + nh_index = hash % alive; + if (alive == rt->rt_nhn) + goto out; + for_nexthops(rt) { + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + continue; + if (n == nh_index) + return nh; + n++; + } endfor_nexthops(rt); + out: return &rt->rt_nh[nh_index]; } @@ -365,6 +389,7 @@ static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) GFP_KERNEL); if (rt) { rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; rt->rt_max_alen = max_alen_aligned; } @@ -536,6 +561,16 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, RCU_INIT_POINTER(nh->nh_dev, dev); + if (!(dev->flags & IFF_UP)) { + nh->nh_flags |= RTNH_F_DEAD; + } else { + unsigned int flags; + + flags = dev_get_flags(dev); + if (!(flags & (IFF_RUNNING | IFF_LOWER_UP))) + nh->nh_flags |= RTNH_F_LINKDOWN; + } + return 0; errout: @@ -570,6 +605,9 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, if (err) goto errout; + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + rt->rt_nhn_alive--; + return 0; errout: @@ -577,8 +615,8 @@ errout: } static int mpls_nh_build(struct net *net, struct mpls_route *rt, - struct mpls_nh *nh, int oif, - struct nlattr *via, struct nlattr *newdst) + struct mpls_nh *nh, int oif, struct nlattr *via, + struct nlattr *newdst) { int err = -ENOMEM; @@ -681,11 +719,13 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, goto errout; err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, - nla_newdst); + rtnh->rtnh_ifindex, nla_via, nla_newdst); if (err) goto errout; + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + rt->rt_nhn_alive--; + rtnh = rtnh_next(rtnh, &remaining); nhs++; } endfor_nexthops(rt); @@ -875,34 +915,74 @@ free: return ERR_PTR(err); } -static void mpls_ifdown(struct net_device *dev) +static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - struct mpls_dev *mdev; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + if (!rt) continue; - for_nexthops(rt) { + + change_nexthops(rt) { if (rtnl_dereference(nh->nh_dev) != dev) continue; - nh->nh_dev = NULL; + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + nh->nh_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + nh->nh_flags |= RTNH_F_LINKDOWN; + ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; + break; + } + if (event == NETDEV_UNREGISTER) + RCU_INIT_POINTER(nh->nh_dev, NULL); } endfor_nexthops(rt); } - mdev = mpls_dev_get(dev); - if (!mdev) - return; - mpls_dev_sysctl_unregister(mdev); + return; +} + +static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +{ + struct mpls_route __rcu **platform_label; + struct net *net = dev_net(dev); + unsigned index; + int alive; + + platform_label = rtnl_dereference(net->mpls.platform_label); + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + + if (!rt) + continue; + + alive = 0; + change_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!(nh->nh_flags & nh_flags)) { + alive++; + continue; + } + if (nh_dev != dev) + continue; + alive++; + nh->nh_flags &= ~nh_flags; + } endfor_nexthops(rt); - RCU_INIT_POINTER(dev->mpls_ptr, NULL); + ACCESS_ONCE(rt->rt_nhn_alive) = alive; + } - kfree_rcu(mdev, rcu); + return; } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, @@ -910,9 +990,9 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpls_dev *mdev; + unsigned int flags; - switch(event) { - case NETDEV_REGISTER: + if (event == NETDEV_REGISTER) { /* For now just support ethernet devices */ if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK)) { @@ -920,10 +1000,39 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (IS_ERR(mdev)) return notifier_from_errno(PTR_ERR(mdev)); } - break; + return NOTIFY_OK; + } + mdev = mpls_dev_get(dev); + if (!mdev) + return NOTIFY_OK; + + switch (event) { + case NETDEV_DOWN: + mpls_ifdown(dev, event); + break; + case NETDEV_UP: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifup(dev, RTNH_F_DEAD); + break; + case NETDEV_CHANGE: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifdown(dev, event); + break; case NETDEV_UNREGISTER: - mpls_ifdown(dev); + mpls_ifdown(dev, event); + mdev = mpls_dev_get(dev); + if (mdev) { + mpls_dev_sysctl_unregister(mdev); + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + kfree_rcu(mdev, rcu); + } break; case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); @@ -1237,9 +1346,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; + if (nh->nh_flags & RTNH_F_LINKDOWN) + rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (nh->nh_flags & RTNH_F_DEAD) + rtm->rtm_flags |= RTNH_F_DEAD; } else { struct rtnexthop *rtnh; struct nlattr *mp; + int dead = 0; + int linkdown = 0; mp = nla_nest_start(skb, RTA_MULTIPATH); if (!mp) @@ -1253,6 +1368,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev) rtnh->rtnh_ifindex = dev->ifindex; + if (nh->nh_flags & RTNH_F_LINKDOWN) { + rtnh->rtnh_flags |= RTNH_F_LINKDOWN; + linkdown++; + } + if (nh->nh_flags & RTNH_F_DEAD) { + rtnh->rtnh_flags |= RTNH_F_DEAD; + dead++; + } + if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, nh->nh_label)) @@ -1266,6 +1390,11 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; } endfor_nexthops(rt); + if (linkdown == rt->rt_nhn) + rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (dead == rt->rt_nhn) + rtm->rtm_flags |= RTNH_F_DEAD; + nla_nest_end(skb, mp); } diff --git a/net/mpls/internal.h b/net/mpls/internal.h index bde52ce88c94..732a5c17e986 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -41,6 +41,7 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + unsigned int nh_flags; u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; @@ -74,6 +75,7 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_payload_type; u8 rt_max_alen; unsigned int rt_nhn; + unsigned int rt_nhn_alive; struct mpls_nh rt_nh[0]; }; -- cgit v1.2.3 From 357ab2234d57f6c74386f64ded42dff8e3c0500b Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:43:59 +0800 Subject: VSOCK: Introduce vsock_find_unbound_socket and vsock_bind_dgram_generic Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/net/af_vsock.h | 2 ++ net/vmw_vsock/af_vsock.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index e9eb2d6791b3..a0c8fa2ababf 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -175,8 +175,10 @@ void vsock_insert_connected(struct vsock_sock *vsk); void vsock_remove_bound(struct vsock_sock *vsk); void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr); #endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7fd1220fbfa0..77247a2b670b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -223,6 +223,17 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) return NULL; } +static struct sock *__vsock_find_unbound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_unbound_sockets, bound_table) + if (addr->svm_port == vsk->local_addr.svm_port) + return sk_vsock(vsk); + + return NULL; +} + static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -298,6 +309,21 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) } EXPORT_SYMBOL_GPL(vsock_find_bound_socket); +struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_unbound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_unbound_socket); + struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -532,6 +558,50 @@ static int __vsock_bind_stream(struct vsock_sock *vsk, return 0; } +int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_unbound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_unbound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_bind_dgram_generic); + static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { -- cgit v1.2.3 From 80a19e338d458abb5a700df3fd00795c51361f06 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:44:00 +0800 Subject: VSOCK: Introduce virtio-vsock-common.ko This module contains the common code and header files for the following virtio-vsock and virtio-vhost kernel modules. Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 209 +++++ include/uapi/linux/virtio_ids.h | 1 + include/uapi/linux/virtio_vsock.h | 89 +++ net/vmw_vsock/virtio_transport_common.c | 1272 +++++++++++++++++++++++++++++++ 4 files changed, 1571 insertions(+) create mode 100644 include/linux/virtio_vsock.h create mode 100644 include/uapi/linux/virtio_vsock.h create mode 100644 net/vmw_vsock/virtio_transport_common.c (limited to 'net') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h new file mode 100644 index 000000000000..a5f3ecc038f7 --- /dev/null +++ b/include/linux/virtio_vsock.h @@ -0,0 +1,209 @@ +/* + * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) Red Hat, Inc., 2013-2015 + * Copyright (C) Asias He , 2013 + * Copyright (C) Stefan Hajnoczi , 2015 + */ + +#ifndef _LINUX_VIRTIO_VSOCK_H +#define _LINUX_VIRTIO_VSOCK_H + +#include +#include +#include + +#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 +#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) +#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) +#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) +#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL +#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) +#define VIRTIO_VSOCK_MAX_TX_BUF_SIZE (1024 * 1024 * 16) +#define VIRTIO_VSOCK_MAX_DGRAM_SIZE (1024 * 64) + +struct vsock_transport_recv_notify_data; +struct vsock_transport_send_notify_data; +struct sockaddr_vm; +struct vsock_sock; + +enum { + VSOCK_VQ_CTRL = 0, + VSOCK_VQ_RX = 1, /* for host to guest data */ + VSOCK_VQ_TX = 2, /* for guest to host data */ + VSOCK_VQ_MAX = 3, +}; + +/* virtio transport socket state */ +struct virtio_transport { + struct virtio_transport_pkt_ops *ops; + struct vsock_sock *vsk; + + u32 buf_size; + u32 buf_size_min; + u32 buf_size_max; + + struct mutex tx_lock; + struct mutex rx_lock; + + struct list_head rx_queue; + u32 rx_bytes; + + /* Protected by trans->tx_lock */ + u32 tx_cnt; + u32 buf_alloc; + u32 peer_fwd_cnt; + u32 peer_buf_alloc; + /* Protected by trans->rx_lock */ + u32 fwd_cnt; + + /* Protected by sk_lock */ + u16 dgram_id; + struct list_head incomplete_dgrams; /* dgram fragments */ +}; + +struct virtio_vsock_pkt { + struct virtio_vsock_hdr hdr; + struct virtio_transport *trans; + struct work_struct work; + struct list_head list; + void *buf; + u32 len; + u32 off; +}; + +struct virtio_vsock_pkt_info { + u32 remote_cid, remote_port; + struct msghdr *msg; + u32 pkt_len; + u16 type; + u16 op; + u32 flags; + u16 dgram_id; + u16 dgram_len; +}; + +struct virtio_transport_pkt_ops { + int (*send_pkt)(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info); +}; + +void virtio_vsock_dumppkt(const char *func, + const struct virtio_vsock_pkt *pkt); + +struct sock * +virtio_transport_get_pending(struct sock *listener, + struct virtio_vsock_pkt *pkt); +struct virtio_vsock_pkt * +virtio_transport_alloc_pkt(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info, + size_t len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port); +ssize_t +virtio_transport_stream_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, + int type); +int +virtio_transport_dgram_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags); + +s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); +s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); + +int virtio_transport_do_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk); +u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); +u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); +u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); +void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); +void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); +void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); +int +virtio_transport_notify_poll_in(struct vsock_sock *vsk, + size_t target, + bool *data_ready_now); +int +virtio_transport_notify_poll_out(struct vsock_sock *vsk, + size_t target, + bool *space_available_now); + +int virtio_transport_notify_recv_init(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, + size_t target, ssize_t copied, bool data_read, + struct vsock_transport_recv_notify_data *data); +int virtio_transport_notify_send_init(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data); +int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, + ssize_t written, struct vsock_transport_send_notify_data *data); + +u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); +bool virtio_transport_stream_is_active(struct vsock_sock *vsk); +bool virtio_transport_stream_allow(u32 cid, u32 port); +int virtio_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr); +bool virtio_transport_dgram_allow(u32 cid, u32 port); + +int virtio_transport_connect(struct vsock_sock *vsk); + +int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); + +void virtio_transport_release(struct vsock_sock *vsk); + +ssize_t +virtio_transport_stream_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len); +int +virtio_transport_dgram_enqueue(struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct msghdr *msg, + size_t len); + +void virtio_transport_destruct(struct vsock_sock *vsk); + +void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt); +u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 wanted); +void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit); +#endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 77925f587b15..16dcf5d06cd7 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -39,6 +39,7 @@ #define VIRTIO_ID_9P 9 /* 9p virtio console */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_VSOCK 13 /* virtio vsock transport */ #define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_INPUT 18 /* virtio input */ diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h new file mode 100644 index 000000000000..8cf9b5682628 --- /dev/null +++ b/include/uapi/linux/virtio_vsock.h @@ -0,0 +1,89 @@ +/* + * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) Red Hat, Inc., 2013-2015 + * Copyright (C) Asias He , 2013 + * Copyright (C) Stefan Hajnoczi , 2015 + */ + +#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H +#define _UAPI_LINUX_VIRTIO_VOSCK_H + +#include +#include +#include + +struct virtio_vsock_config { + __le32 guest_cid; + __le32 max_virtqueue_pairs; +}; + +struct virtio_vsock_hdr { + __le32 src_cid; + __le32 src_port; + __le32 dst_cid; + __le32 dst_port; + __le32 len; + __le16 type; /* enum virtio_vsock_type */ + __le16 op; /* enum virtio_vsock_op */ + __le32 flags; + __le32 buf_alloc; + __le32 fwd_cnt; +}; + +enum virtio_vsock_type { + VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_DGRAM = 2, +}; + +enum virtio_vsock_op { + VIRTIO_VSOCK_OP_INVALID = 0, + + /* Connect operations */ + VIRTIO_VSOCK_OP_REQUEST = 1, + VIRTIO_VSOCK_OP_RESPONSE = 2, + VIRTIO_VSOCK_OP_ACK = 3, + VIRTIO_VSOCK_OP_RST = 4, + VIRTIO_VSOCK_OP_SHUTDOWN = 5, + + /* To send payload */ + VIRTIO_VSOCK_OP_RW = 6, + + /* Tell the peer our credit info */ + VIRTIO_VSOCK_OP_CREDIT_UPDATE = 7, + /* Request the peer to send the credit info to us */ + VIRTIO_VSOCK_OP_CREDIT_REQUEST = 8, +}; + +/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ +enum virtio_vsock_shutdown { + VIRTIO_VSOCK_SHUTDOWN_RCV = 1, + VIRTIO_VSOCK_SHUTDOWN_SEND = 2, +}; + +#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c new file mode 100644 index 000000000000..28f790da6f15 --- /dev/null +++ b/net/vmw_vsock/virtio_transport_common.c @@ -0,0 +1,1272 @@ +/* + * common code for virtio vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define COOKIEBITS 24 +#define COOKIEMASK (((u32)1 << COOKIEBITS) - 1) +#define VSOCK_TIMEOUT_INIT 4 + +#define SHA_MESSAGE_WORDS 16 +#define SHA_VSOCK_WORDS 5 + +static u32 vsockcookie_secret[2][SHA_MESSAGE_WORDS - SHA_VSOCK_WORDS + + SHA_DIGEST_WORDS]; + +static DEFINE_PER_CPU(__u32[SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS + + SHA_WORKSPACE_WORDS], vsock_cookie_scratch); + +static u32 cookie_hash(u32 saddr, u32 daddr, u16 sport, u16 dport, + u32 count, int c) +{ + __u32 *tmp = this_cpu_ptr(vsock_cookie_scratch); + + memcpy(tmp + SHA_VSOCK_WORDS, vsockcookie_secret[c], + sizeof(vsockcookie_secret[c])); + tmp[0] = saddr; + tmp[1] = daddr; + tmp[2] = sport; + tmp[3] = dport; + tmp[4] = count; + sha_transform(tmp + SHA_MESSAGE_WORDS, (__u8 *)tmp, + tmp + SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS); + + return tmp[17]; +} + +static u32 +virtio_vsock_secure_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, + u32 count) +{ + u32 h1, h2; + + h1 = cookie_hash(saddr, daddr, sport, dport, 0, 0); + h2 = cookie_hash(saddr, daddr, sport, dport, count, 1); + + return h1 + (count << COOKIEBITS) + (h2 & COOKIEMASK); +} + +static u32 +virtio_vsock_check_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, + u32 count, u32 cookie, u32 maxdiff) +{ + u32 diff; + u32 ret; + + cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0); + + diff = (count - (cookie >> COOKIEBITS)) & ((u32)-1 >> COOKIEBITS); + pr_debug("%s: diff=%x\n", __func__, diff); + if (diff >= maxdiff) + return (u32)-1; + + ret = (cookie - + cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) + & COOKIEMASK; + pr_debug("%s: ret=%x\n", __func__, diff); + + return ret; +} + +void virtio_vsock_dumppkt(const char *func, const struct virtio_vsock_pkt *pkt) +{ + pr_debug("%s: pkt=%p, op=%d, len=%d, %d:%d---%d:%d, len=%d\n", + func, pkt, + le16_to_cpu(pkt->hdr.op), + le32_to_cpu(pkt->hdr.len), + le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port), + pkt->len); +} +EXPORT_SYMBOL_GPL(virtio_vsock_dumppkt); + +struct virtio_vsock_pkt * +virtio_transport_alloc_pkt(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info, + size_t len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt *pkt; + int err; + + BUG_ON(!trans); + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return NULL; + + pkt->hdr.type = cpu_to_le16(info->type); + pkt->hdr.op = cpu_to_le16(info->op); + pkt->hdr.src_cid = cpu_to_le32(src_cid); + pkt->hdr.src_port = cpu_to_le32(src_port); + pkt->hdr.dst_cid = cpu_to_le32(dst_cid); + pkt->hdr.dst_port = cpu_to_le32(dst_port); + pkt->hdr.flags = cpu_to_le32(info->flags); + pkt->len = len; + pkt->trans = trans; + if (info->type == VIRTIO_VSOCK_TYPE_DGRAM) + pkt->hdr.len = cpu_to_le32(len + (info->dgram_len << 16)); + else if (info->type == VIRTIO_VSOCK_TYPE_STREAM) + pkt->hdr.len = cpu_to_le32(len); + + if (info->msg && len > 0) { + pkt->buf = kmalloc(len, GFP_KERNEL); + if (!pkt->buf) + goto out_pkt; + err = memcpy_from_msg(pkt->buf, info->msg, len); + if (err) + goto out; + } + + return pkt; + +out: + kfree(pkt->buf); +out_pkt: + kfree(pkt); + return NULL; +} +EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); + +struct sock * +virtio_transport_get_pending(struct sock *listener, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sockaddr_vm src; + struct sockaddr_vm dst; + struct sock *pending; + + vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); + vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); + + vlistener = vsock_sk(listener); + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + return pending; + } + } + + return NULL; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_pending); + +static void virtio_transport_inc_rx_pkt(struct virtio_vsock_pkt *pkt) +{ + pkt->trans->rx_bytes += pkt->len; +} + +static void virtio_transport_dec_rx_pkt(struct virtio_vsock_pkt *pkt) +{ + pkt->trans->rx_bytes -= pkt->len; + pkt->trans->fwd_cnt += pkt->len; +} + +void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt) +{ + mutex_lock(&pkt->trans->tx_lock); + pkt->hdr.fwd_cnt = cpu_to_le32(pkt->trans->fwd_cnt); + pkt->hdr.buf_alloc = cpu_to_le32(pkt->trans->buf_alloc); + mutex_unlock(&pkt->trans->tx_lock); +} +EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); + +void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt) +{ +} +EXPORT_SYMBOL_GPL(virtio_transport_dec_tx_pkt); + +u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 credit) +{ + u32 ret; + + mutex_lock(&trans->tx_lock); + ret = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); + if (ret > credit) + ret = credit; + trans->tx_cnt += ret; + mutex_unlock(&trans->tx_lock); + + pr_debug("%s: ret=%d, buf_alloc=%d, peer_buf_alloc=%d," + "tx_cnt=%d, fwd_cnt=%d, peer_fwd_cnt=%d\n", __func__, + ret, trans->buf_alloc, trans->peer_buf_alloc, + trans->tx_cnt, trans->fwd_cnt, trans->peer_fwd_cnt); + + return ret; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_credit); + +void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit) +{ + mutex_lock(&trans->tx_lock); + trans->tx_cnt -= credit; + mutex_unlock(&trans->tx_lock); +} +EXPORT_SYMBOL_GPL(virtio_transport_put_credit); + +static int virtio_transport_send_credit_update(struct vsock_sock *vsk, int type, struct virtio_vsock_hdr *hdr) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, + .type = type, + }; + + if (hdr && type == VIRTIO_VSOCK_TYPE_DGRAM) { + info.remote_cid = le32_to_cpu(hdr->src_cid); + info.remote_port = le32_to_cpu(hdr->src_port); + } + + pr_debug("%s: sk=%p send_credit_update\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static int virtio_transport_send_credit_request(struct vsock_sock *vsk, int type) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_CREDIT_REQUEST, + .type = type, + }; + + pr_debug("%s: sk=%p send_credit_request\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static ssize_t +virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt *pkt; + size_t bytes, total = 0; + int err = -EFAULT; + + mutex_lock(&trans->rx_lock); + while (total < len && trans->rx_bytes > 0 && + !list_empty(&trans->rx_queue)) { + pkt = list_first_entry(&trans->rx_queue, + struct virtio_vsock_pkt, list); + + bytes = len - total; + if (bytes > pkt->len - pkt->off) + bytes = pkt->len - pkt->off; + + err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); + if (err) + goto out; + total += bytes; + pkt->off += bytes; + if (pkt->off == pkt->len) { + virtio_transport_dec_rx_pkt(pkt); + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + } + mutex_unlock(&trans->rx_lock); + + /* Send a credit pkt to peer */ + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, + NULL); + + return total; + +out: + mutex_unlock(&trans->rx_lock); + if (total) + err = total; + return err; +} + +ssize_t +virtio_transport_stream_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags) +{ + if (flags & MSG_PEEK) + return -EOPNOTSUPP; + + return virtio_transport_stream_do_dequeue(vsk, msg, len); +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); + +struct dgram_skb { + struct list_head list; + struct sk_buff *skb; + u16 id; +}; + +static struct dgram_skb *dgram_id_to_skb(struct virtio_transport *trans, + u16 id) +{ + struct dgram_skb *dgram_skb; + + list_for_each_entry(dgram_skb, &trans->incomplete_dgrams, list) { + if (dgram_skb->id == id) + return dgram_skb; + } + + return NULL; +} + +static void +virtio_transport_recv_dgram(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct sk_buff *skb = NULL; + struct vsock_sock *vsk; + struct virtio_transport *trans; + size_t size; + u16 dgram_id, pkt_off, dgram_len, pkt_len; + u32 flags, len; + struct dgram_skb *dgram_skb; + + vsk = vsock_sk(sk); + trans = vsk->trans; + + /* len: dgram_len | pkt_len */ + len = le32_to_cpu(pkt->hdr.len); + dgram_len = len >> 16; + pkt_len = len & 0xFFFF; + + /* flags: dgram_id | pkt_off */ + flags = le32_to_cpu(pkt->hdr.flags); + dgram_id = flags >> 16; + pkt_off = flags & 0xFFFF; + + pr_debug("%s: dgram_len=%d, pkt_len=%d, id=%d, off=%d\n", __func__, + dgram_len, pkt_len, dgram_id, pkt_off); + + dgram_skb = dgram_id_to_skb(trans, dgram_id); + if (dgram_skb) { + /* This pkt is for a existing dgram */ + skb = dgram_skb->skb; + pr_debug("%s:found skb\n", __func__); + } + + /* Packet payload must be within datagram bounds */ + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) + goto drop; + if (pkt_len > dgram_len) + goto drop; + if (pkt_off > dgram_len) + goto drop; + if (dgram_len - pkt_off < pkt_len) + goto drop; + + if (!skb) { + /* This pkt is for a new dgram */ + pr_debug("%s:create skb\n", __func__); + + size = sizeof(pkt->hdr) + dgram_len; + /* Attach the packet to the socket's receive queue as an sk_buff. */ + dgram_skb = kzalloc(sizeof(struct dgram_skb), GFP_ATOMIC); + if (!dgram_skb) + goto drop; + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) { + kfree(dgram_skb); + dgram_skb = NULL; + goto drop; + } + dgram_skb->id = dgram_id; + dgram_skb->skb = skb; + list_add_tail(&dgram_skb->list, &trans->incomplete_dgrams); + + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, &pkt->hdr, sizeof(pkt->hdr)); + } + + memcpy(skb->data + sizeof(pkt->hdr) + pkt_off, pkt->buf, pkt_len); + + pr_debug("%s:C, off=%d, pkt_len=%d, dgram_len=%d\n", __func__, + pkt_off, pkt_len, dgram_len); + + /* We are done with this dgram */ + if (pkt_off + pkt_len == dgram_len) { + pr_debug("%s:dgram_id=%d is done\n", __func__, dgram_id); + list_del(&dgram_skb->list); + kfree(dgram_skb); + sk_receive_skb(sk, skb, 0); + } + virtio_transport_free_pkt(pkt); + return; + +drop: + if (dgram_skb) { + list_del(&dgram_skb->list); + kfree(dgram_skb); + kfree_skb(skb); + sock_put(sk); + } + virtio_transport_free_pkt(pkt); +} + +int +virtio_transport_dgram_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags) +{ + struct virtio_vsock_hdr *hdr; + struct sk_buff *skb; + int noblock; + int err; + int dgram_len; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + if (!skb) + return -EAGAIN; + + hdr = (struct virtio_vsock_hdr *)skb->data; + if (!hdr) + goto out; + + dgram_len = le32_to_cpu(hdr->len) >> 16; + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_msg(skb, sizeof(*hdr), msg, dgram_len); + if (err) + goto out; + + if (msg->msg_name) { + /* Provide the address of the sender. */ + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); + vsock_addr_init(vm_addr, le32_to_cpu(hdr->src_cid), le32_to_cpu(hdr->src_port)); + msg->msg_namelen = sizeof(*vm_addr); + } + err = dgram_len; + + /* Send a credit pkt to peer */ + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, hdr); + + pr_debug("%s:done, recved =%d\n", __func__, dgram_len); +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); + +s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + mutex_lock(&trans->rx_lock); + bytes = trans->rx_bytes; + mutex_unlock(&trans->rx_lock); + + return bytes; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); + +static s64 virtio_transport_has_space(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + bytes = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); + if (bytes < 0) + bytes = 0; + + return bytes; +} + +s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + mutex_lock(&trans->tx_lock); + bytes = virtio_transport_has_space(vsk); + mutex_unlock(&trans->tx_lock); + + pr_debug("%s: bytes=%lld\n", __func__, bytes); + + return bytes; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); + +int virtio_transport_do_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + struct virtio_transport *trans; + + trans = kzalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) + return -ENOMEM; + + vsk->trans = trans; + trans->vsk = vsk; + if (psk) { + struct virtio_transport *ptrans = psk->trans; + trans->buf_size = ptrans->buf_size; + trans->buf_size_min = ptrans->buf_size_min; + trans->buf_size_max = ptrans->buf_size_max; + trans->peer_buf_alloc = ptrans->peer_buf_alloc; + } else { + trans->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; + trans->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; + trans->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; + } + + trans->buf_alloc = trans->buf_size; + + pr_debug("%s: trans->buf_alloc=%d\n", __func__, trans->buf_alloc); + + mutex_init(&trans->rx_lock); + mutex_init(&trans->tx_lock); + INIT_LIST_HEAD(&trans->rx_queue); + INIT_LIST_HEAD(&trans->incomplete_dgrams); + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); + +u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); + +u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size_min; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); + +u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size_max; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); + +void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val < trans->buf_size_min) + trans->buf_size_min = val; + if (val > trans->buf_size_max) + trans->buf_size_max = val; + trans->buf_size = val; + trans->buf_alloc = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); + +void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val > trans->buf_size) + trans->buf_size = val; + trans->buf_size_min = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); + +void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val < trans->buf_size) + trans->buf_size = val; + trans->buf_size_max = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); + +int +virtio_transport_notify_poll_in(struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + if (vsock_stream_has_data(vsk)) + *data_ready_now = true; + else + *data_ready_now = false; + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); + +int +virtio_transport_notify_poll_out(struct vsock_sock *vsk, + size_t target, + bool *space_avail_now) +{ + s64 free_space; + + free_space = vsock_stream_has_space(vsk); + if (free_space > 0) + *space_avail_now = true; + else if (free_space == 0) + *space_avail_now = false; + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); + +int virtio_transport_notify_recv_init(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); + +int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); + +int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); + +int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, + size_t target, ssize_t copied, bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); + +int virtio_transport_notify_send_init(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); + +int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); + +int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); + +int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, + ssize_t written, struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); + +u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); + +bool virtio_transport_stream_is_active(struct vsock_sock *vsk) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); + +bool virtio_transport_stream_allow(u32 cid, u32 port) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); + +int virtio_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return vsock_bind_dgram_generic(vsk, addr); +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); + +bool virtio_transport_dgram_allow(u32 cid, u32 port) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); + +int virtio_transport_connect(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_REQUEST, + .type = VIRTIO_VSOCK_TYPE_STREAM, + }; + + pr_debug("%s: vsk=%p send_request\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_connect); + +int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_SHUTDOWN, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .flags = (mode & RCV_SHUTDOWN ? + VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | + (mode & SEND_SHUTDOWN ? + VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + }; + + pr_debug("%s: vsk=%p: send_shutdown\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_shutdown); + +void virtio_transport_release(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + struct sock *sk = &vsk->sk; + struct dgram_skb *dgram_skb; + struct dgram_skb *dgram_skb_tmp; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + + /* Tell other side to terminate connection */ + if (sk->sk_type == SOCK_STREAM && sk->sk_state == SS_CONNECTED) { + virtio_transport_shutdown(vsk, SHUTDOWN_MASK); + } + + /* Free incomplete dgrams */ + lock_sock(sk); + list_for_each_entry_safe(dgram_skb, dgram_skb_tmp, + &trans->incomplete_dgrams, list) { + list_del(&dgram_skb->list); + kfree_skb(dgram_skb->skb); + kfree(dgram_skb); + sock_put(sk); /* held in virtio_transport_recv_dgram() */ + } + release_sock(sk); +} +EXPORT_SYMBOL_GPL(virtio_transport_release); + +int +virtio_transport_dgram_enqueue(struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct msghdr *msg, + size_t dgram_len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .type = VIRTIO_VSOCK_TYPE_DGRAM, + .msg = msg, + }; + size_t total_written = 0, pkt_off = 0, written; + u16 dgram_id; + + /* The max size of a single dgram we support is 64KB */ + if (dgram_len > VIRTIO_VSOCK_MAX_DGRAM_SIZE) + return -EMSGSIZE; + + info.dgram_len = dgram_len; + vsk->remote_addr = *remote_addr; + + dgram_id = trans->dgram_id++; + + /* TODO: To optimize, if we have enough credit to send the pkt already, + * do not ask the peer to send credit to use */ + virtio_transport_send_credit_request(vsk, VIRTIO_VSOCK_TYPE_DGRAM); + + while (total_written < dgram_len) { + info.pkt_len = dgram_len - total_written; + info.flags = dgram_id << 16 | pkt_off; + written = trans->ops->send_pkt(vsk, &info); + if (written < 0) + return -ENOMEM; + if (written == 0) { + /* TODO: if written = 0, we need a sleep & wakeup + * instead of sleep */ + pr_debug("%s: SHOULD WAIT written==0", __func__); + msleep(10); + } + total_written += written; + pkt_off += written; + pr_debug("%s:id=%d, dgram_len=%zu, off=%zu, total_written=%zu, written=%zu\n", + __func__, dgram_id, dgram_len, pkt_off, total_written, written); + } + + return dgram_len; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); + +ssize_t +virtio_transport_stream_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .msg = msg, + .pkt_len = len, + }; + + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); + +void virtio_transport_destruct(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + kfree(trans); +} +EXPORT_SYMBOL_GPL(virtio_transport_destruct); + +static int virtio_transport_send_ack(struct vsock_sock *vsk, u32 cookie) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_ACK, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .flags = cpu_to_le32(cookie), + }; + + pr_debug("%s: sk=%p send_offer\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static int virtio_transport_send_reset(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RST, + .type = VIRTIO_VSOCK_TYPE_STREAM, + }; + + pr_debug("%s\n", __func__); + + /* Send RST only if the original pkt is not a RST pkt */ + if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) + return 0; + + return trans->ops->send_pkt(vsk, &info); +} + +static int +virtio_transport_recv_connecting(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + int err; + int skerr; + u32 cookie; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_RESPONSE: + cookie = le32_to_cpu(pkt->hdr.flags); + pr_debug("%s: got RESPONSE and send ACK, cookie=%x\n", __func__, cookie); + err = virtio_transport_send_ack(vsk, cookie); + if (err < 0) { + skerr = -err; + goto destroy; + } + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + break; + case VIRTIO_VSOCK_OP_INVALID: + pr_debug("%s: got invalid\n", __func__); + break; + case VIRTIO_VSOCK_OP_RST: + pr_debug("%s: got rst\n", __func__); + skerr = ECONNRESET; + err = 0; + goto destroy; + default: + pr_debug("%s: got def\n", __func__); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + return 0; + +destroy: + virtio_transport_send_reset(vsk, pkt); + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int +virtio_transport_recv_connected(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct virtio_transport *trans = vsk->trans; + int err = 0; + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_RW: + pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->off = 0; + pkt->trans = trans; + + mutex_lock(&trans->rx_lock); + virtio_transport_inc_rx_pkt(pkt); + list_add_tail(&pkt->list, &trans->rx_queue); + mutex_unlock(&trans->rx_lock); + + sk->sk_data_ready(sk); + return err; + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + sk->sk_write_space(sk); + break; + case VIRTIO_VSOCK_OP_SHUTDOWN: + pr_debug("%s: got shutdown\n", __func__); + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) + vsk->peer_shutdown |= RCV_SHUTDOWN; + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) + vsk->peer_shutdown |= SEND_SHUTDOWN; + if (le32_to_cpu(pkt->hdr.flags)) + sk->sk_state_change(sk); + break; + case VIRTIO_VSOCK_OP_RST: + pr_debug("%s: got rst\n", __func__); + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + sk->sk_state_change(sk); + break; + default: + err = -EINVAL; + break; + } + + virtio_transport_free_pkt(pkt); + return err; +} + +static int +virtio_transport_send_response(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RESPONSE, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .remote_cid = le32_to_cpu(pkt->hdr.src_cid), + .remote_port = le32_to_cpu(pkt->hdr.src_port), + }; + u32 cookie; + + cookie = virtio_vsock_secure_cookie(le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_port), + jiffies / (HZ * 60)); + info.flags = cpu_to_le32(cookie); + + pr_debug("%s: send_response, cookie=%x\n", __func__, le32_to_cpu(cookie)); + + return trans->ops->send_pkt(vsk, &info); +} + +/* Handle server socket */ +static int +virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct vsock_sock *vpending; + struct sock *pending; + int err; + u32 cookie; + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_REQUEST: + err = virtio_transport_send_response(vsk, pkt); + if (err < 0) { + // FIXME vsk should be vpending + virtio_transport_send_reset(vsk, pkt); + return err; + } + break; + case VIRTIO_VSOCK_OP_ACK: + cookie = le32_to_cpu(pkt->hdr.flags); + err = virtio_vsock_check_cookie(le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_port), + jiffies / (HZ * 60), + le32_to_cpu(pkt->hdr.flags), + VSOCK_TIMEOUT_INIT); + pr_debug("%s: cookie=%x, err=%d\n", __func__, cookie, err); + if (err) + return err; + + /* So no pending socket are responsible for this pkt, create one */ + pr_debug("%s: create pending\n", __func__); + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type, 0); + if (!pending) { + virtio_transport_send_reset(vsk, pkt); + return -ENOMEM; + } + sk->sk_ack_backlog++; + pending->sk_state = SS_CONNECTING; + + vpending = vsock_sk(pending); + vsock_addr_init(&vpending->local_addr, le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port)); + vsock_addr_init(&vpending->remote_addr, le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port)); + vsock_add_pending(sk, pending); + + pr_debug("%s: get pending\n", __func__); + pending = virtio_transport_get_pending(sk, pkt); + vpending = vsock_sk(pending); + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_ACK) { + pr_debug("%s: op=%d != OP_ACK\n", __func__, + le16_to_cpu(pkt->hdr.op)); + virtio_transport_send_reset(vpending, pkt); + pending->sk_err = EPROTO; + pending->sk_state = SS_UNCONNECTED; + sock_put(pending); + } else { + pending->sk_state = SS_CONNECTED; + vsock_insert_connected(vpending); + + vsock_remove_pending(sk, pending); + vsock_enqueue_accept(sk, pending); + + sk->sk_data_ready(sk); + } + err = 0; + break; + default: + pr_debug("%s: sk->sk_ack_backlog=%d\n", __func__, + sk->sk_ack_backlog); + virtio_transport_send_reset(vpending, pkt); + err = -EINVAL; + break; + } + if (err < 0) + vsock_remove_pending(sk, pending); + release_sock(pending); + + /* Release refcnt obtained in virtio_transport_get_pending */ + sock_put(pending); + break; + default: + break; + } + + return 0; +} + +static void virtio_transport_space_update(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct virtio_transport *trans = vsk->trans; + bool space_available; + + /* buf_alloc and fwd_cnt is always included in the hdr */ + mutex_lock(&trans->tx_lock); + trans->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); + trans->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); + space_available = virtio_transport_has_space(vsk); + mutex_unlock(&trans->tx_lock); + + if (space_available) + sk->sk_write_space(sk); +} + +/* We are under the virtio-vsock's vsock->rx_lock or + * vhost-vsock's vq->mutex lock */ +void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans; + struct sockaddr_vm src, dst; + struct vsock_sock *vsk; + struct sock *sk; + + vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); + vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); + + virtio_vsock_dumppkt(__func__, pkt); + + if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) { + sk = vsock_find_unbound_socket(&dst); + if (!sk) + goto free_pkt; + + vsk = vsock_sk(sk); + trans = vsk->trans; + BUG_ON(!trans); + + virtio_transport_space_update(sk, pkt); + + lock_sock(sk); + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + virtio_transport_free_pkt(pkt); + break; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, + &pkt->hdr); + virtio_transport_free_pkt(pkt); + break; + case VIRTIO_VSOCK_OP_RW: + virtio_transport_recv_dgram(sk, pkt); + break; + default: + virtio_transport_free_pkt(pkt); + break; + } + release_sock(sk); + + /* Release refcnt obtained when we fetched this socket out of + * the unbound list. + */ + sock_put(sk); + return; + } else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) { + /* The socket must be in connected or bound table + * otherwise send reset back + */ + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + pr_debug("%s: can not find bound_socket\n", __func__); + virtio_vsock_dumppkt(__func__, pkt); + /* Ignore this pkt instead of sending reset back */ + /* TODO send a RST unless this packet is a RST (to avoid infinite loops) */ + goto free_pkt; + } + } + + vsk = vsock_sk(sk); + trans = vsk->trans; + BUG_ON(!trans); + + virtio_transport_space_update(sk, pkt); + + lock_sock(sk); + switch (sk->sk_state) { + case VSOCK_SS_LISTEN: + virtio_transport_recv_listen(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + case SS_CONNECTING: + virtio_transport_recv_connecting(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + case SS_CONNECTED: + virtio_transport_recv_connected(sk, pkt); + break; + default: + virtio_transport_free_pkt(pkt); + break; + } + release_sock(sk); + + /* Release refcnt obtained when we fetched this socket out of the + * bound or connected list. + */ + sock_put(sk); + } + return; + +free_pkt: + virtio_transport_free_pkt(pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); + +void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) +{ + kfree(pkt->buf); + kfree(pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); + +static int __init virtio_vsock_common_init(void) +{ + get_random_bytes(vsockcookie_secret, sizeof(vsockcookie_secret)); + return 0; +} + +static void __exit virtio_vsock_common_exit(void) +{ +} + +module_init(virtio_vsock_common_init); +module_exit(virtio_vsock_common_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("common code for virtio vsock"); -- cgit v1.2.3 From 32e61b06b6946ba137723c5b1de2a1fdb2e0e0a5 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:44:01 +0800 Subject: VSOCK: Introduce virtio-vsock.ko VM sockets virtio transport implementation. This module runs in guest kernel. Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 466 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100644 net/vmw_vsock/virtio_transport.c (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c new file mode 100644 index 000000000000..df65dca55fa1 --- /dev/null +++ b/net/vmw_vsock/virtio_transport.c @@ -0,0 +1,466 @@ +/* + * virtio transport for vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * Some of the code is take from Gerd Hoffmann 's + * early virtio-vsock proof-of-concept bits. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct workqueue_struct *virtio_vsock_workqueue; +static struct virtio_vsock *the_virtio_vsock; +static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock); + +struct virtio_vsock { + /* Virtio device */ + struct virtio_device *vdev; + /* Virtio virtqueue */ + struct virtqueue *vqs[VSOCK_VQ_MAX]; + /* Wait queue for send pkt */ + wait_queue_head_t queue_wait; + /* Work item to send pkt */ + struct work_struct tx_work; + /* Work item to recv pkt */ + struct work_struct rx_work; + /* Mutex to protect send pkt*/ + struct mutex tx_lock; + /* Mutex to protect recv pkt*/ + struct mutex rx_lock; + /* Number of recv buffers */ + int rx_buf_nr; + /* Number of max recv buffers */ + int rx_buf_max_nr; + /* Used for global tx buf limitation */ + u32 total_tx_buf; + /* Guest context id, just like guest ip address */ + u32 guest_cid; +}; + +static struct virtio_vsock *virtio_vsock_get(void) +{ + return the_virtio_vsock; +} + +static u32 virtio_transport_get_local_cid(void) +{ + struct virtio_vsock *vsock = virtio_vsock_get(); + + return vsock->guest_cid; +} + +static int +virtio_transport_send_pkt(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info) +{ + u32 src_cid, src_port, dst_cid, dst_port; + int ret, in_sg = 0, out_sg = 0; + struct virtio_transport *trans; + struct virtio_vsock_pkt *pkt; + struct virtio_vsock *vsock; + struct scatterlist hdr, buf, *sgs[2]; + struct virtqueue *vq; + u32 pkt_len = info->pkt_len; + DEFINE_WAIT(wait); + + vsock = virtio_vsock_get(); + if (!vsock) + return -ENODEV; + + src_cid = virtio_transport_get_local_cid(); + src_port = vsk->local_addr.svm_port; + if (!info->remote_cid) { + dst_cid = vsk->remote_addr.svm_cid; + dst_port = vsk->remote_addr.svm_port; + } else { + dst_cid = info->remote_cid; + dst_port = info->remote_port; + } + + trans = vsk->trans; + vq = vsock->vqs[VSOCK_VQ_TX]; + + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) + pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + pkt_len = virtio_transport_get_credit(trans, pkt_len); + /* Do not send zero length OP_RW pkt*/ + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) + return pkt_len; + + /* Respect global tx buf limitation */ + mutex_lock(&vsock->tx_lock); + while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { + prepare_to_wait_exclusive(&vsock->queue_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&vsock->tx_lock); + schedule(); + mutex_lock(&vsock->tx_lock); + finish_wait(&vsock->queue_wait, &wait); + } + vsock->total_tx_buf += pkt_len; + mutex_unlock(&vsock->tx_lock); + + pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, + src_cid, src_port, + dst_cid, dst_port); + if (!pkt) { + mutex_lock(&vsock->tx_lock); + vsock->total_tx_buf -= pkt_len; + mutex_unlock(&vsock->tx_lock); + virtio_transport_put_credit(trans, pkt_len); + return -ENOMEM; + } + + pr_debug("%s:info->pkt_len= %d\n", __func__, info->pkt_len); + + /* Will be released in virtio_transport_send_pkt_work */ + sock_hold(&trans->vsk->sk); + virtio_transport_inc_tx_pkt(pkt); + + /* Put pkt in the virtqueue */ + sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); + sgs[out_sg++] = &hdr; + if (info->msg && info->pkt_len > 0) { + sg_init_one(&buf, pkt->buf, pkt->len); + sgs[out_sg++] = &buf; + } + + mutex_lock(&vsock->tx_lock); + while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, + GFP_KERNEL)) < 0) { + prepare_to_wait_exclusive(&vsock->queue_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&vsock->tx_lock); + schedule(); + mutex_lock(&vsock->tx_lock); + finish_wait(&vsock->queue_wait, &wait); + } + virtqueue_kick(vq); + mutex_unlock(&vsock->tx_lock); + + return pkt_len; +} + +static struct virtio_transport_pkt_ops virtio_ops = { + .send_pkt = virtio_transport_send_pkt, +}; + +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) +{ + int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + struct virtio_vsock_pkt *pkt; + struct scatterlist hdr, buf, *sgs[2]; + struct virtqueue *vq; + int ret; + + vq = vsock->vqs[VSOCK_VQ_RX]; + + do { + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) { + pr_debug("%s: fail to allocate pkt\n", __func__); + goto out; + } + + /* TODO: use mergeable rx buffer */ + pkt->buf = kmalloc(buf_len, GFP_KERNEL); + if (!pkt->buf) { + pr_debug("%s: fail to allocate pkt->buf\n", __func__); + goto err; + } + + sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); + sgs[0] = &hdr; + + sg_init_one(&buf, pkt->buf, buf_len); + sgs[1] = &buf; + ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); + if (ret) + goto err; + vsock->rx_buf_nr++; + } while (vq->num_free); + if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) + vsock->rx_buf_max_nr = vsock->rx_buf_nr; +out: + virtqueue_kick(vq); + return; +err: + virtqueue_kick(vq); + virtio_transport_free_pkt(pkt); + return; +} + +static void virtio_transport_send_pkt_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, tx_work); + struct virtio_vsock_pkt *pkt; + bool added = false; + struct virtqueue *vq; + unsigned int len; + struct sock *sk; + + vq = vsock->vqs[VSOCK_VQ_TX]; + mutex_lock(&vsock->tx_lock); + do { + virtqueue_disable_cb(vq); + while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { + sk = &pkt->trans->vsk->sk; + virtio_transport_dec_tx_pkt(pkt); + /* Release refcnt taken in virtio_transport_send_pkt */ + sock_put(sk); + vsock->total_tx_buf -= pkt->len; + virtio_transport_free_pkt(pkt); + added = true; + } + } while (!virtqueue_enable_cb(vq)); + mutex_unlock(&vsock->tx_lock); + + if (added) + wake_up(&vsock->queue_wait); +} + +static void virtio_transport_recv_pkt_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, rx_work); + struct virtio_vsock_pkt *pkt; + struct virtqueue *vq; + unsigned int len; + + vq = vsock->vqs[VSOCK_VQ_RX]; + mutex_lock(&vsock->rx_lock); + do { + virtqueue_disable_cb(vq); + while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { + pkt->len = len; + virtio_transport_recv_pkt(pkt); + vsock->rx_buf_nr--; + } + } while (!virtqueue_enable_cb(vq)); + + if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) + virtio_vsock_rx_fill(vsock); + mutex_unlock(&vsock->rx_lock); +} + +static void virtio_vsock_ctrl_done(struct virtqueue *vq) +{ +} + +static void virtio_vsock_tx_done(struct virtqueue *vq) +{ + struct virtio_vsock *vsock = vq->vdev->priv; + + if (!vsock) + return; + queue_work(virtio_vsock_workqueue, &vsock->tx_work); +} + +static void virtio_vsock_rx_done(struct virtqueue *vq) +{ + struct virtio_vsock *vsock = vq->vdev->priv; + + if (!vsock) + return; + queue_work(virtio_vsock_workqueue, &vsock->rx_work); +} + +static int +virtio_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) +{ + struct virtio_transport *trans; + int ret; + + ret = virtio_transport_do_socket_init(vsk, psk); + if (ret) + return ret; + + trans = vsk->trans; + trans->ops = &virtio_ops; + return ret; +} + +static struct vsock_transport virtio_transport = { + .get_local_cid = virtio_transport_get_local_cid, + + .init = virtio_transport_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + + .dgram_bind = virtio_transport_dgram_bind, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + + .set_buffer_size = virtio_transport_set_buffer_size, + .set_min_buffer_size = virtio_transport_set_min_buffer_size, + .set_max_buffer_size = virtio_transport_set_max_buffer_size, + .get_buffer_size = virtio_transport_get_buffer_size, + .get_min_buffer_size = virtio_transport_get_min_buffer_size, + .get_max_buffer_size = virtio_transport_get_max_buffer_size, +}; + +static int virtio_vsock_probe(struct virtio_device *vdev) +{ + vq_callback_t *callbacks[] = { + virtio_vsock_ctrl_done, + virtio_vsock_rx_done, + virtio_vsock_tx_done, + }; + const char *names[] = { + "ctrl", + "rx", + "tx", + }; + struct virtio_vsock *vsock = NULL; + u32 guest_cid; + int ret; + + ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); + if (ret) + return ret; + + /* Only one virtio-vsock device per guest is supported */ + if (the_virtio_vsock) { + ret = -EBUSY; + goto out; + } + + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); + if (!vsock) { + ret = -ENOMEM; + goto out; + } + + vsock->vdev = vdev; + + ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, + vsock->vqs, callbacks, names); + if (ret < 0) + goto out; + + vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), + &guest_cid, sizeof(guest_cid)); + vsock->guest_cid = le32_to_cpu(guest_cid); + pr_debug("%s:guest_cid=%d\n", __func__, vsock->guest_cid); + + ret = vsock_core_init(&virtio_transport); + if (ret < 0) + goto out_vqs; + + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + + vdev->priv = the_virtio_vsock = vsock; + init_waitqueue_head(&vsock->queue_wait); + mutex_init(&vsock->tx_lock); + mutex_init(&vsock->rx_lock); + INIT_WORK(&vsock->rx_work, virtio_transport_recv_pkt_work); + INIT_WORK(&vsock->tx_work, virtio_transport_send_pkt_work); + + mutex_lock(&vsock->rx_lock); + virtio_vsock_rx_fill(vsock); + mutex_unlock(&vsock->rx_lock); + + mutex_unlock(&the_virtio_vsock_mutex); + return 0; + +out_vqs: + vsock->vdev->config->del_vqs(vsock->vdev); +out: + kfree(vsock); + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} + +static void virtio_vsock_remove(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + + mutex_lock(&the_virtio_vsock_mutex); + the_virtio_vsock = NULL; + vsock_core_exit(); + mutex_unlock(&the_virtio_vsock_mutex); + + kfree(vsock); +} + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static unsigned int features[] = { +}; + +static struct virtio_driver virtio_vsock_driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtio_vsock_probe, + .remove = virtio_vsock_remove, +}; + +static int __init virtio_vsock_init(void) +{ + int ret; + + virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); + if (!virtio_vsock_workqueue) + return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); + if (ret) + destroy_workqueue(virtio_vsock_workqueue); + return ret; +} + +static void __exit virtio_vsock_exit(void) +{ + unregister_virtio_driver(&virtio_vsock_driver); + destroy_workqueue(virtio_vsock_workqueue); +} + +module_init(virtio_vsock_init); +module_exit(virtio_vsock_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("virtio transport for vsock"); +MODULE_DEVICE_TABLE(virtio, id_table); -- cgit v1.2.3 From 8a2a2029893b4c35d1aba2932111a1a164b9c948 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:44:03 +0800 Subject: VSOCK: Add Makefile and Kconfig Enable virtio-vsock and vhost-vsock. Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- drivers/vhost/Kconfig | 4 ++++ drivers/vhost/Kconfig.vsock | 7 +++++++ drivers/vhost/Makefile | 4 ++++ net/vmw_vsock/Kconfig | 18 ++++++++++++++++++ net/vmw_vsock/Makefile | 2 ++ 5 files changed, 35 insertions(+) create mode 100644 drivers/vhost/Kconfig.vsock (limited to 'net') diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 533eaf04f12f..81449bfc8d3b 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -47,3 +47,7 @@ config VHOST_CROSS_ENDIAN_LEGACY adds some overhead, it is disabled by default. If unsure, say "N". + +if STAGING +source "drivers/vhost/Kconfig.vsock" +endif diff --git a/drivers/vhost/Kconfig.vsock b/drivers/vhost/Kconfig.vsock new file mode 100644 index 000000000000..3491865d3eb9 --- /dev/null +++ b/drivers/vhost/Kconfig.vsock @@ -0,0 +1,7 @@ +config VHOST_VSOCK + tristate "vhost virtio-vsock driver" + depends on VSOCKETS && EVENTFD + select VIRTIO_VSOCKETS_COMMON + default n + ---help--- + Say M here to enable the vhost-vsock for virtio-vsock guests diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index e0441c34db1c..6b012b986b57 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -4,5 +4,9 @@ vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o +obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o +vhost_vsock-y := vsock.o + obj-$(CONFIG_VHOST_RING) += vringh.o + obj-$(CONFIG_VHOST) += vhost.o diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 14810abedc2e..74e0bc887a33 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -26,3 +26,21 @@ config VMWARE_VMCI_VSOCKETS To compile this driver as a module, choose M here: the module will be called vmw_vsock_vmci_transport. If unsure, say N. + +config VIRTIO_VSOCKETS + tristate "virtio transport for Virtual Sockets" + depends on VSOCKETS && VIRTIO + select VIRTIO_VSOCKETS_COMMON + help + This module implements a virtio transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on Qemu/KVM. + + To compile this driver as a module, choose M here: the module + will be called virtio_vsock_transport. If unsure, say N. + +config VIRTIO_VSOCKETS_COMMON + tristate + ---help--- + This option is selected by any driver which needs to access + the virtio_vsock. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index 2ce52d70f224..cf4c29439081 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_VSOCKETS) += vsock.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o +obj-$(CONFIG_VIRTIO_VSOCKETS) += virtio_transport.o +obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += virtio_transport_common.o vsock-y += af_vsock.o vsock_addr.o -- cgit v1.2.3 From 681b4d88ad8e5b67c34f4d0a40448efb94e2b227 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 2 Dec 2015 16:27:39 +0100 Subject: pppox: use standard module auto-loading feature * Register PF_PPPOX with pppox module rather than with pppoe, so that pppoe doesn't get loaded for any PF_PPPOX socket. * Register PX_PROTO_* with standard MODULE_ALIAS_NET_PF_PROTO() instead of using pppox's own naming scheme. * While there, add auto-loading feature for pptp. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 2 +- drivers/net/ppp/pppox.c | 3 ++- drivers/net/ppp/pptp.c | 1 + net/l2tp/l2tp_ppp.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 277e6827d7cd..b8da2eabac3e 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1202,4 +1202,4 @@ module_exit(pppoe_exit); MODULE_AUTHOR("Michal Ostrowski "); MODULE_DESCRIPTION("PPP over Ethernet driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_PPPOX); +MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OE); diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 0200de74eebc..b9c8be6283d3 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -113,7 +113,7 @@ static int pppox_create(struct net *net, struct socket *sock, int protocol, rc = -EPROTONOSUPPORT; if (!pppox_protos[protocol]) - request_module("pppox-proto-%d", protocol); + request_module("net-pf-%d-proto-%d", PF_PPPOX, protocol); if (!pppox_protos[protocol] || !try_module_get(pppox_protos[protocol]->owner)) goto out; @@ -147,3 +147,4 @@ module_exit(pppox_exit); MODULE_AUTHOR("Michal Ostrowski "); MODULE_DESCRIPTION("PPP over Ethernet driver (generic socket layer)"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_PPPOX); diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index fc69e41d0950..e18e0980bc61 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -718,3 +718,4 @@ module_exit(pptp_exit_module); MODULE_DESCRIPTION("Point-to-Point Tunneling Protocol"); MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_PPTP); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 1ad18c55064c..d93f113cb522 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1862,5 +1862,5 @@ MODULE_AUTHOR("James Chapman "); MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); -MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP)); +MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); MODULE_ALIAS_L2TP_PWTYPE(11); -- cgit v1.2.3 From dc8d1eb305984b1182f5e85de3c3a1f8592b83af Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 2 Dec 2015 15:19:37 -0500 Subject: tipc: fix node reference count bug Commit 5405ff6e15f40f2f ("tipc: convert node lock to rwlock") introduced a bug to the node reference counter handling. When a message is successfully sent in the function tipc_node_xmit(), we return directly after releasing the node lock, instead of continuing and decrementing the node reference counter as we should do. This commit fixes this bug. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index 3f7a4ed71990..fa97d9649a28 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1189,20 +1189,19 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, spin_unlock_bh(&le->lock); } tipc_node_read_unlock(n); - if (likely(!skb_queue_empty(&xmitq))) { + if (likely(!rc)) tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); - return 0; - } - if (unlikely(rc == -ENOBUFS)) + else if (rc == -ENOBUFS) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); return rc; } - if (unlikely(!in_own_node(net, dnode))) - return rc; - tipc_sk_rcv(net, list); - return 0; + if (likely(in_own_node(net, dnode))) { + tipc_sk_rcv(net, list); + return 0; + } + return rc; } /* tipc_node_xmit_skb(): send single buffer to destination -- cgit v1.2.3 From 3110489117581a980537b6d999a3724214ba772c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Oct 2015 17:35:19 +0200 Subject: mac80211: allow driver to prevent two stations w/ same address Some devices or drivers cannot deal with having the same station address for different virtual interfaces, say as a client to two virtual AP interfaces. Rather than requiring each driver with a limitation like that to enforce it, add a hardware flag for it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/debugfs.c | 1 + net/mac80211/sta_info.c | 18 ++++++++++++++++-- 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 760bc4d5a2cf..8628118214cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1901,6 +1901,11 @@ struct ieee80211_txq { * @IEEE80211_HW_BEACON_TX_STATUS: The device/driver provides TX status * for sent beacons. * + * @IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR: Hardware (or driver) requires that each + * station has a unique address, i.e. each station entry can be identified + * by just its MAC address; this prevents, for example, the same station + * from connecting to two virtual AP interfaces at the same time. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -1936,6 +1941,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TDLS_WIDER_BW, IEEE80211_HW_SUPPORTS_AMSDU_IN_AMPDU, IEEE80211_HW_BEACON_TX_STATUS, + IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 4d2aaebd4f97..abbdff03ce92 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -125,6 +125,7 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { FLAG(TDLS_WIDER_BW), FLAG(SUPPORTS_AMSDU_IN_AMPDU), FLAG(BEACON_TX_STATUS), + FLAG(NEEDS_UNIQUE_STA_ADDR), /* keep last for the build bug below */ (void *)0x1 diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f91d1873218c..8f630f51d9bd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -435,6 +435,19 @@ static int sta_info_insert_check(struct sta_info *sta) is_multicast_ether_addr(sta->sta.addr))) return -EINVAL; + /* Strictly speaking this isn't necessary as we hold the mutex, but + * the rhashtable code can't really deal with that distinction. We + * do require the mutex for correctness though. + */ + rcu_read_lock(); + lockdep_assert_held(&sdata->local->sta_mtx); + if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && + ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { + rcu_read_unlock(); + return -ENOTUNIQ; + } + rcu_read_unlock(); + return 0; } @@ -554,14 +567,15 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) might_sleep(); + mutex_lock(&local->sta_mtx); + err = sta_info_insert_check(sta); if (err) { + mutex_unlock(&local->sta_mtx); rcu_read_lock(); goto out_free; } - mutex_lock(&local->sta_mtx); - err = sta_info_insert_finish(sta); if (err) goto out_free; -- cgit v1.2.3 From a1056b1baaa887de52a76a5fcf5aeb4327c96c8a Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 22 Oct 2015 22:27:46 +0300 Subject: cfg80211: Add missing tracing to cfg80211 Add missing tracing for: 1. start_radar_detection() 2. set_mcast_rates() 3. set_coalesce() Signed-off-by: Ilan Peer Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 9 ++++---- net/wireless/rdev-ops.h | 43 ++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c71e274c810a..41e57d0c4d43 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6507,8 +6507,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (WARN_ON(!cac_time_ms)) cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; - err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef, - cac_time_ms); + err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms); if (!err) { wdev->chandef = chandef; wdev->cac_started = true; @@ -7571,7 +7570,7 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) if (!nl80211_parse_mcast_rate(rdev, mcast_rate, nla_rate)) return -EINVAL; - err = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate); + err = rdev_set_mcast_rate(rdev, dev, mcast_rate); return err; } @@ -9716,7 +9715,7 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) { cfg80211_rdev_free_coalesce(rdev); - rdev->ops->set_coalesce(&rdev->wiphy, NULL); + rdev_set_coalesce(rdev, NULL); return 0; } @@ -9744,7 +9743,7 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) i++; } - err = rdev->ops->set_coalesce(&rdev->wiphy, &new_coalesce); + err = rdev_set_coalesce(rdev, &new_coalesce); if (err) goto error; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index c23516d0f807..b8cc594d409d 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1020,4 +1020,47 @@ rdev_tdls_cancel_channel_switch(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int +rdev_start_radar_detection(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_chan_def *chandef, + u32 cac_time_ms) +{ + int ret = -ENOTSUPP; + + trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef, + cac_time_ms); + if (rdev->ops->start_radar_detection) + ret = rdev->ops->start_radar_detection(&rdev->wiphy, dev, + chandef, cac_time_ms); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int +rdev_set_mcast_rate(struct cfg80211_registered_device *rdev, + struct net_device *dev, + int mcast_rate[IEEE80211_NUM_BANDS]) +{ + int ret = -ENOTSUPP; + + trace_rdev_set_mcast_rate(&rdev->wiphy, dev, mcast_rate); + if (rdev->ops->set_mcast_rate) + ret = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int +rdev_set_coalesce(struct cfg80211_registered_device *rdev, + struct cfg80211_coalesce *coalesce) +{ + int ret = -ENOTSUPP; + + trace_rdev_set_coalesce(&rdev->wiphy, coalesce); + if (rdev->ops->set_coalesce) + ret = rdev->ops->set_coalesce(&rdev->wiphy, coalesce); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 0c392d36781b..62d9b961ce64 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2818,6 +2818,67 @@ TRACE_EVENT(cfg80211_stop_iface, WIPHY_PR_ARG, WDEV_PR_ARG) ); +TRACE_EVENT(rdev_start_radar_detection, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_chan_def *chandef, + u32 cac_time_ms), + TP_ARGS(wiphy, netdev, chandef, cac_time_ms), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + CHAN_DEF_ENTRY + __field(u32, cac_time_ms) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + __entry->cac_time_ms = cac_time_ms; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT + ", cac_time_ms=%u", + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, + __entry->cac_time_ms) +); + +TRACE_EVENT(rdev_set_mcast_rate, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + int mcast_rate[IEEE80211_NUM_BANDS]), + TP_ARGS(wiphy, netdev, mcast_rate), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __array(int, mcast_rate, IEEE80211_NUM_BANDS) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + memcpy(__entry->mcast_rate, mcast_rate, + sizeof(int) * IEEE80211_NUM_BANDS); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " + "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]", + WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->mcast_rate[IEEE80211_BAND_2GHZ], + __entry->mcast_rate[IEEE80211_BAND_5GHZ], + __entry->mcast_rate[IEEE80211_BAND_60GHZ]) +); + +TRACE_EVENT(rdev_set_coalesce, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce), + TP_ARGS(wiphy, coalesce), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(int, n_rules) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->n_rules = coalesce ? coalesce->n_rules : 0; + ), + TP_printk(WIPHY_PR_FMT ", n_rules=%d", + WIPHY_PR_ARG, __entry->n_rules) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 6e045905d1786f62cb3f7ddc6c987f7dc3ad8ed6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Oct 2015 22:27:47 +0300 Subject: cfg80211: add complete data to station add/change tracing Complete the tracepoint with the missing data - it's not printed by default (a lot of it is dynamic arrays) but will be recorded and be available during post-processing. Signed-off-by: Johannes Berg --- net/wireless/trace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 62d9b961ce64..5b9139e53199 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -623,12 +623,24 @@ DECLARE_EVENT_CLASS(station_add_change, __field(u32, sta_flags_set) __field(u32, sta_modify_mask) __field(int, listen_interval) + __field(u16, capability) __field(u16, aid) __field(u8, plink_action) __field(u8, plink_state) __field(u8, uapsd_queues) + __field(u8, max_sp) + __field(u8, opmode_notif) + __field(bool, opmode_notif_used) __array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap)) + __array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap)) __array(char, vlan, IFNAMSIZ) + __dynamic_array(u8, supported_rates, + params->supported_rates_len) + __dynamic_array(u8, ext_capab, params->ext_capab_len) + __dynamic_array(u8, supported_channels, + params->supported_channels_len) + __dynamic_array(u8, supported_oper_classes, + params->supported_oper_classes_len) ), TP_fast_assign( WIPHY_ASSIGN; @@ -646,9 +658,35 @@ DECLARE_EVENT_CLASS(station_add_change, if (params->ht_capa) memcpy(__entry->ht_capa, params->ht_capa, sizeof(struct ieee80211_ht_cap)); + memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap)); + if (params->vht_capa) + memcpy(__entry->vht_capa, params->vht_capa, + sizeof(struct ieee80211_vht_cap)); memset(__entry->vlan, 0, sizeof(__entry->vlan)); if (params->vlan) memcpy(__entry->vlan, params->vlan->name, IFNAMSIZ); + if (params->supported_rates && params->supported_rates_len) + memcpy(__get_dynamic_array(supported_rates), + params->supported_rates, + params->supported_rates_len); + if (params->ext_capab && params->ext_capab_len) + memcpy(__get_dynamic_array(ext_capab), + params->ext_capab, + params->ext_capab_len); + if (params->supported_channels && + params->supported_channels_len) + memcpy(__get_dynamic_array(supported_channels), + params->supported_channels, + params->supported_channels_len); + if (params->supported_oper_classes && + params->supported_oper_classes_len) + memcpy(__get_dynamic_array(supported_oper_classes), + params->supported_oper_classes, + params->supported_oper_classes_len); + __entry->max_sp = params->max_sp; + __entry->capability = params->capability; + __entry->opmode_notif = params->opmode_notif; + __entry->opmode_notif_used = params->opmode_notif_used; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT ", station flags mask: %u, station flags set: %u, " -- cgit v1.2.3 From 0483eeac59876ac37d4edbabd48727a468416d5b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 23 Oct 2015 09:50:03 +0200 Subject: cfg80211: replace ieee80211_ie_split() with an inline The function is a very simple wrapper around another one, just adds a few default parameters, so replace it with a static inline instead of using EXPORT_SYMBOL, reducing the module size slightly. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 +++++-- net/wireless/util.c | 7 ------- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2c7bdb81d30c..e568872203a5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5173,8 +5173,11 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, * buffer starts, which may be @ielen if the entire (remainder) * of the buffer should be used. */ -size_t ieee80211_ie_split(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, size_t offset); +static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen, + const u8 *ids, int n_ids, size_t offset) +{ + return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); +} /** * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN diff --git a/net/wireless/util.c b/net/wireless/util.c index baf7218cec15..010a3c75a677 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1325,13 +1325,6 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, } EXPORT_SYMBOL(ieee80211_ie_split_ric); -size_t ieee80211_ie_split(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, size_t offset) -{ - return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); -} -EXPORT_SYMBOL(ieee80211_ie_split); - bool ieee80211_operating_class_to_band(u8 operating_class, enum ieee80211_band *band) { -- cgit v1.2.3 From d671b2a077a92ff71ad76fba0e8bfd1b7c5ca820 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Nov 2015 11:30:46 +0100 Subject: mac80211: mesh: print MAC address instead of pointer There's no point in printing the mpath pointer since it can't be used for anything - print the MAC address instead (like in the forwarding case.) Signed-off-by: Johannes Berg Acked-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index b3b44a5dd375..dadf8dc6f1cf 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -968,8 +968,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) copy = true; } else { mpath_dbg(sdata, - "Not forwarding %p (flags %#x)\n", - gate->mpath, gate->mpath->flags); + "Not forwarding to %pM (flags %#x)\n", + gate->mpath->dst, gate->mpath->flags); } } -- cgit v1.2.3 From 996bf99c71944590e4f56504d5ec99ddd0d85e9c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Nov 2015 12:02:31 +0100 Subject: lib80211: ratelimit key index mismatch This indicates a driver key selection issue, but even then there's no point in printing it all the time, so ratelimit it. Also remove the priv pointer from it -- people debugging will only have a single device anyway and it's useless as anything but a cookie. Signed-off-by: Johannes Berg --- net/wireless/lib80211_crypt_ccmp.c | 4 ++-- net/wireless/lib80211_crypt_tkip.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index dc0e59e53dbf..6beab0cfcb99 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -311,8 +311,8 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } keyidx >>= 6; if (key->key_idx != keyidx) { - printk(KERN_DEBUG "CCMP: RX tkey->key_idx=%d frame " - "keyidx=%d priv=%p\n", key->key_idx, keyidx, priv); + net_dbg_ratelimited("CCMP: RX tkey->key_idx=%d frame keyidx=%d\n", + key->key_idx, keyidx); return -6; } if (!key->key_set) { diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 8c90ba79e56e..3cd819539241 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -434,8 +434,8 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } keyidx >>= 6; if (tkey->key_idx != keyidx) { - printk(KERN_DEBUG "TKIP: RX tkey->key_idx=%d frame " - "keyidx=%d priv=%p\n", tkey->key_idx, keyidx, priv); + net_dbg_ratelimited("TKIP: RX tkey->key_idx=%d frame keyidx=%d\n", + tkey->key_idx, keyidx); return -6; } if (!tkey->key_set) { -- cgit v1.2.3 From 441275e1038a803d61df41eae9a44d26486d8301 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Nov 2015 12:34:24 +0100 Subject: mac80211: remove string from unaligned packet warning This really should never happen except very early in the process of bringing up a new driver, at which point you'll have to add more debugging in the driver and this string isn't useful. Remove it and save some size (when it's even compiled in.) Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8bae5de0dc44..1f827539d828 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -661,8 +661,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - WARN_ONCE((unsigned long)rx->skb->data & 1, - "unaligned packet at 0x%p\n", rx->skb->data); + WARN_ON_ONCE((unsigned long)rx->skb->data & 1); #endif } -- cgit v1.2.3 From 1b9df2d20eee9f3a675d1a3a7aa3640e6d8d7e94 Mon Sep 17 00:00:00 2001 From: Ola Olsson Date: Mon, 9 Nov 2015 22:02:09 +0100 Subject: cfg80211: ocb: Fix null pointer deref if join_ocb is unimplemented Signed-off-by: Ola Olsson Signed-off-by: Johannes Berg --- net/wireless/ocb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c index c00d4a792319..e64dbf16330c 100644 --- a/net/wireless/ocb.c +++ b/net/wireless/ocb.c @@ -29,6 +29,9 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) return -EOPNOTSUPP; + if (!rdev->ops->join_ocb) + return -EOPNOTSUPP; + if (WARN_ON(!setup->chandef.chan)) return -EINVAL; -- cgit v1.2.3 From 0ead2510f8cec11ce96308d79a1b4ee272fb5238 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Nov 2015 10:24:36 +0200 Subject: mac80211: allow the driver to send EOSP when needed This can happen when the driver needs to send less frames than expected and then needs to close the SP. Mac80211 still needs to set the more_data properly based on its buffer state (ps_tx_buffer and buffered frames on other TIDs). To that end, refactor the code that delivers frames upon uAPSD trigger frames to be able to get only the more_data bit without actually delivering those frames in case the driver is just asking to set a NDP with EOSP and MORE_DATA bit properly set. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 22 ++++++++ net/mac80211/sta_info.c | 144 +++++++++++++++++++++++++++++++++--------------- net/mac80211/trace.h | 25 +++++++++ 3 files changed, 148 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8628118214cc..18ac733afc91 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4868,6 +4868,28 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, */ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta); +/** + * ieee80211_send_eosp_nullfunc - ask mac80211 to send NDP with EOSP + * @pubsta: the station + * @tid: the tid of the NDP + * + * Sometimes the device understands that it needs to close + * the Service Period unexpectedly. This can happen when + * sending frames that are filling holes in the BA window. + * In this case, the device can ask mac80211 to send a + * Nullfunc frame with EOSP set. When that happens, the + * driver must have called ieee80211_sta_set_buffered() to + * let mac80211 know that there are no buffered frames any + * more, otherwise mac80211 will get the more_data bit wrong. + * The low level driver must have made sure that the frame + * will be sent despite the station being in power-save. + * Mac80211 won't call allow_buffered_frames(). + * Note that calling this function, doesn't exempt the driver + * from closing the EOSP properly, it will still have to call + * ieee80211_sta_eosp when the NDP is sent. + */ +void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid); + /** * ieee80211_iter_keys - iterate keys programmed into the device * @hw: pointer obtained from ieee80211_alloc_hw() diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8f630f51d9bd..723fa30aafc5 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2,6 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright (C) 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1244,11 +1245,11 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) ieee80211_check_fast_xmit(sta); } -static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, int tid, +static void ieee80211_send_null_response(struct sta_info *sta, int tid, enum ieee80211_frame_release_type reason, - bool call_driver) + bool call_driver, bool more_data) { + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; @@ -1288,9 +1289,13 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, if (qos) { nullfunc->qos_ctrl = cpu_to_le16(tid); - if (reason == IEEE80211_FRAME_RELEASE_UAPSD) + if (reason == IEEE80211_FRAME_RELEASE_UAPSD) { nullfunc->qos_ctrl |= cpu_to_le16(IEEE80211_QOS_CTL_EOSP); + if (more_data) + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + } } info = IEEE80211_SKB_CB(skb); @@ -1337,22 +1342,48 @@ static int find_highest_prio_tid(unsigned long tids) return fls(tids) - 1; } +/* Indicates if the MORE_DATA bit should be set in the last + * frame obtained by ieee80211_sta_ps_get_frames. + * Note that driver_release_tids is relevant only if + * reason = IEEE80211_FRAME_RELEASE_PSPOLL + */ +static bool +ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs, + enum ieee80211_frame_release_type reason, + unsigned long driver_release_tids) +{ + int ac; + + /* If the driver has data on more than one TID then + * certainly there's more data if we release just a + * single frame now (from a single TID). This will + * only happen for PS-Poll. + */ + if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && + hweight16(driver_release_tids) > 1) + return true; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + if (ignored_acs & BIT(ac)) + continue; + + if (!skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac])) + return true; + } + + return false; +} + static void -ieee80211_sta_ps_deliver_response(struct sta_info *sta, - int n_frames, u8 ignored_acs, - enum ieee80211_frame_release_type reason) +ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs, + enum ieee80211_frame_release_type reason, + struct sk_buff_head *frames, + unsigned long *driver_release_tids) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - bool more_data = false; int ac; - unsigned long driver_release_tids = 0; - struct sk_buff_head frames; - - /* Service or PS-Poll period starts */ - set_sta_flag(sta, WLAN_STA_SP); - - __skb_queue_head_init(&frames); /* Get response frame(s) and more data bit for the last one. */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { @@ -1366,26 +1397,13 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* if we already have frames from software, then we can't also * release from hardware queues */ - if (skb_queue_empty(&frames)) { - driver_release_tids |= sta->driver_buffered_tids & tids; - driver_release_tids |= sta->txq_buffered_tids & tids; + if (skb_queue_empty(frames)) { + *driver_release_tids |= + sta->driver_buffered_tids & tids; + *driver_release_tids |= sta->txq_buffered_tids & tids; } - if (driver_release_tids) { - /* If the driver has data on more than one TID then - * certainly there's more data if we release just a - * single frame now (from a single TID). This will - * only happen for PS-Poll. - */ - if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && - hweight16(driver_release_tids) > 1) { - more_data = true; - driver_release_tids = - BIT(find_highest_prio_tid( - driver_release_tids)); - break; - } - } else { + if (!*driver_release_tids) { struct sk_buff *skb; while (n_frames > 0) { @@ -1399,20 +1417,44 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, if (!skb) break; n_frames--; - __skb_queue_tail(&frames, skb); + __skb_queue_tail(frames, skb); } } - /* If we have more frames buffered on this AC, then set the - * more-data bit and abort the loop since we can't send more - * data from other ACs before the buffered frames from this. + /* If we have more frames buffered on this AC, then abort the + * loop since we can't send more data from other ACs before + * the buffered frames from this. */ if (!skb_queue_empty(&sta->tx_filtered[ac]) || - !skb_queue_empty(&sta->ps_tx_buf[ac])) { - more_data = true; + !skb_queue_empty(&sta->ps_tx_buf[ac])) break; - } } +} + +static void +ieee80211_sta_ps_deliver_response(struct sta_info *sta, + int n_frames, u8 ignored_acs, + enum ieee80211_frame_release_type reason) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + unsigned long driver_release_tids = 0; + struct sk_buff_head frames; + bool more_data; + + /* Service or PS-Poll period starts */ + set_sta_flag(sta, WLAN_STA_SP); + + __skb_queue_head_init(&frames); + + ieee80211_sta_ps_get_frames(sta, n_frames, ignored_acs, reason, + &frames, &driver_release_tids); + + more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids); + + if (reason == IEEE80211_FRAME_RELEASE_PSPOLL) + driver_release_tids = + BIT(find_highest_prio_tid(driver_release_tids)); if (skb_queue_empty(&frames) && !driver_release_tids) { int tid; @@ -1435,7 +1477,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* This will evaluate to 1, 3, 5 or 7. */ tid = 7 - ((ffs(~ignored_acs) - 1) << 1); - ieee80211_send_null_response(sdata, sta, tid, reason, true); + ieee80211_send_null_response(sta, tid, reason, true, false); } else if (!driver_release_tids) { struct sk_buff_head pending; struct sk_buff *skb; @@ -1535,8 +1577,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, if (need_null) ieee80211_send_null_response( - sdata, sta, find_highest_prio_tid(tids), - reason, false); + sta, find_highest_prio_tid(tids), + reason, false, false); sta_info_recalc_tim(sta); } else { @@ -1674,6 +1716,22 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta) } EXPORT_SYMBOL(ieee80211_sta_eosp); +void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + enum ieee80211_frame_release_type reason; + bool more_data; + + trace_api_send_eosp_nullfunc(sta->local, pubsta, tid); + + reason = IEEE80211_FRAME_RELEASE_UAPSD; + more_data = ieee80211_sta_ps_more_data(sta, ~sta->sta.uapsd_queues, + reason, 0); + + ieee80211_send_null_response(sta, tid, reason, false, more_data); +} +EXPORT_SYMBOL(ieee80211_send_eosp_nullfunc); + void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, u8 tid, bool buffered) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 56c6d6cfa5a1..a6b4442776a0 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2027,6 +2027,31 @@ TRACE_EVENT(api_eosp, ) ); +TRACE_EVENT(api_send_eosp_nullfunc, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u8 tid), + + TP_ARGS(local, sta, tid), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u8, tid) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->tid = tid; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT " tid:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->tid + ) +); + TRACE_EVENT(api_sta_set_buffered, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, -- cgit v1.2.3 From ef044763a3ca6b9e0bb65a9ce0cb38c0eca62756 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 17 Nov 2015 10:24:37 +0200 Subject: mac80211: add atomic uploaded keys iterator add ieee80211_iter_keys_rcu() to iterate over uploaded keys in atomic context (when rcu is locked) The station removal code removes the keys only after calling synchronize_net(), so it's not safe to iterate the keys at this point (and postponing the actual key deletion with call_rcu() might result in some badly-ordered ops calls). Add a flag to indicate a station is being removed, and skip the configured keys if it's set. Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 24 +++++++++++++++++++++ net/mac80211/key.c | 56 +++++++++++++++++++++++++++++++++++++++++++++---- net/mac80211/sta_info.c | 1 + net/mac80211/sta_info.h | 2 ++ 4 files changed, 79 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 18ac733afc91..a68051c41ac3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4917,6 +4917,30 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, void *data), void *iter_data); +/** + * ieee80211_iter_keys_rcu - iterate keys programmed into the device + * @hw: pointer obtained from ieee80211_alloc_hw() + * @vif: virtual interface to iterate, may be %NULL for all + * @iter: iterator function that will be called for each key + * @iter_data: custom data to pass to the iterator function + * + * This function can be used to iterate all the keys known to + * mac80211, even those that weren't previously programmed into + * the device. Note that due to locking reasons, keys of station + * in removal process will be skipped. + * + * This function requires being called in an RCU critical section, + * and thus iter must be atomic. + */ +void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data); + /** * ieee80211_iter_chan_contexts_atomic - iterate channel contexts * @hw: pointre obtained from ieee80211_alloc_hw(). diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 44388d6a1d8e..5e5bc599da4c 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2008 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -320,7 +321,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, return; if (new) - list_add_tail(&new->list, &sdata->key_list); + list_add_tail_rcu(&new->list, &sdata->key_list); WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); @@ -368,7 +369,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } if (old) - list_del(&old->list); + list_del_rcu(&old->list); } struct ieee80211_key * @@ -592,8 +593,8 @@ static void ieee80211_key_destroy(struct ieee80211_key *key, return; /* - * Synchronize so the TX path can no longer be using - * this key before we free/remove it. + * Synchronize so the TX path and rcu key iterators + * can no longer be using this key before we free/remove it. */ synchronize_net(); @@ -744,6 +745,53 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_iter_keys); +static void +_ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, + struct ieee80211_sub_if_data *sdata, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data) +{ + struct ieee80211_key *key; + + list_for_each_entry_rcu(key, &sdata->key_list, list) { + /* skip keys of station in removal process */ + if (key->sta && key->sta->removed) + continue; + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + continue; + + iter(hw, &sdata->vif, + key->sta ? &key->sta->sta : NULL, + &key->conf, iter_data); + } +} + +void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + + if (vif) { + sdata = vif_to_sdata(vif); + _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); + } else { + list_for_each_entry_rcu(sdata, &local->interfaces, list) + _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); + } +} +EXPORT_SYMBOL(ieee80211_iter_keys_rcu); + static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, struct list_head *keys) { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 723fa30aafc5..4402ad5b27d1 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -883,6 +883,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) } list_del_rcu(&sta->list); + sta->removed = true; drv_sta_pre_rcu_remove(local, sta->sdata, sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2cafb21b422f..d6051629ed15 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -367,6 +367,7 @@ DECLARE_EWMA(signal, 1024, 8) * @mesh: mesh STA information * @debugfs: debug filesystem info * @dead: set to true when sta is unlinked + * @removed: set to true when sta is being removed from sta_list * @uploaded: set to true when sta is uploaded to the driver * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) @@ -412,6 +413,7 @@ struct sta_info { u16 listen_interval; bool dead; + bool removed; bool uploaded; -- cgit v1.2.3 From 491728746b500b22f384cb1d0aba76f7c55a9269 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Mon, 23 Nov 2015 19:27:14 +0100 Subject: cfg80211: reg: Remove unused function parameter Signed-off-by: Michal Sojka Signed-off-by: Johannes Berg --- net/wireless/reg.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2e8d6f39ed56..43b3e577b2ea 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1052,7 +1052,7 @@ static u32 map_regdom_flags(u32 rd_flags) } static const struct ieee80211_reg_rule * -freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, +freq_reg_info_regd(u32 center_freq, const struct ieee80211_regdomain *regd, u32 bw) { int i; @@ -1097,7 +1097,7 @@ __freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 min_bw) u32 bw; for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) { - reg_rule = freq_reg_info_regd(wiphy, center_freq, regd, bw); + reg_rule = freq_reg_info_regd(center_freq, regd, bw); if (!IS_ERR(reg_rule)) return reg_rule; } @@ -1765,8 +1765,7 @@ static void handle_channel_custom(struct wiphy *wiphy, u32 bw; for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) { - reg_rule = freq_reg_info_regd(wiphy, - MHZ_TO_KHZ(chan->center_freq), + reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq), regd, bw); if (!IS_ERR(reg_rule)) break; -- cgit v1.2.3 From c781944b71f87aa4d30eaaafb4e7573ce94bdcfd Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Mon, 23 Nov 2015 19:27:15 +0100 Subject: cfg80211: Remove unused cfg80211_can_use_iftype_chan() Last caller of this function was removed in 3.17 in commit 97dc94f1d933c9df2c0b327066ea130c0e92083f. Signed-off-by: Michal Sojka Signed-off-by: Johannes Berg --- net/wireless/core.h | 7 ---- net/wireless/util.c | 114 ---------------------------------------------------- 2 files changed, 121 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index a618b4b86fa4..022ccad06cbe 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -416,13 +416,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); void cfg80211_process_wdev_events(struct wireless_dev *wdev); -int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - enum nl80211_iftype iftype, - struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode, - u8 radar_detect); - /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/util.c b/net/wireless/util.c index 010a3c75a677..92770427b211 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1613,120 +1613,6 @@ int cfg80211_check_combinations(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_check_combinations); -int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - enum nl80211_iftype iftype, - struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode, - u8 radar_detect) -{ - struct wireless_dev *wdev_iter; - int num[NUM_NL80211_IFTYPES]; - struct ieee80211_channel - *used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS]; - struct ieee80211_channel *ch; - enum cfg80211_chan_mode chmode; - int num_different_channels = 0; - int total = 1; - int i; - - ASSERT_RTNL(); - - if (WARN_ON(hweight32(radar_detect) > 1)) - return -EINVAL; - - if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) - return -EINVAL; - - /* Always allow software iftypes */ - if (rdev->wiphy.software_iftypes & BIT(iftype)) { - if (radar_detect) - return -EINVAL; - return 0; - } - - memset(num, 0, sizeof(num)); - memset(used_channels, 0, sizeof(used_channels)); - - num[iftype] = 1; - - /* TODO: We'll probably not need this anymore, since this - * should only be called with CHAN_MODE_UNDEFINED. There are - * still a couple of pending calls where other chanmodes are - * used, but we should get rid of them. - */ - switch (chanmode) { - case CHAN_MODE_UNDEFINED: - break; - case CHAN_MODE_SHARED: - WARN_ON(!chan); - used_channels[0] = chan; - num_different_channels++; - break; - case CHAN_MODE_EXCLUSIVE: - num_different_channels++; - break; - } - - list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { - if (wdev_iter == wdev) - continue; - if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) { - if (!wdev_iter->p2p_started) - continue; - } else if (wdev_iter->netdev) { - if (!netif_running(wdev_iter->netdev)) - continue; - } else { - WARN_ON(1); - } - - if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype)) - continue; - - /* - * We may be holding the "wdev" mutex, but now need to lock - * wdev_iter. This is OK because once we get here wdev_iter - * is not wdev (tested above), but we need to use the nested - * locking for lockdep. - */ - mutex_lock_nested(&wdev_iter->mtx, 1); - __acquire(wdev_iter->mtx); - cfg80211_get_chan_state(wdev_iter, &ch, &chmode, &radar_detect); - wdev_unlock(wdev_iter); - - switch (chmode) { - case CHAN_MODE_UNDEFINED: - break; - case CHAN_MODE_SHARED: - for (i = 0; i < CFG80211_MAX_NUM_DIFFERENT_CHANNELS; i++) - if (!used_channels[i] || used_channels[i] == ch) - break; - - if (i == CFG80211_MAX_NUM_DIFFERENT_CHANNELS) - return -EBUSY; - - if (used_channels[i] == NULL) { - used_channels[i] = ch; - num_different_channels++; - } - break; - case CHAN_MODE_EXCLUSIVE: - num_different_channels++; - break; - } - - num[wdev_iter->iftype]++; - total++; - } - - if (total == 1 && !radar_detect) - return 0; - - return cfg80211_check_combinations(&rdev->wiphy, num_different_channels, - radar_detect, num); -} - int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, const u8 *rates, unsigned int n_rates, u32 *mask) -- cgit v1.2.3 From b115b972997428b9134aba377721fea6486adbd0 Mon Sep 17 00:00:00 2001 From: "Janusz.Dziedzic@tieto.com" Date: Tue, 27 Oct 2015 08:38:40 +0100 Subject: mac80211: add new IEEE80211_VIF_GET_NOA_UPDATE flag Add new VIF flag, that will allow get NOA update notification when driver will request this, even this is not pure P2P vif (eg. STA vif). Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/mlme.c | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a68051c41ac3..7c30faff245f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1321,11 +1321,15 @@ struct ieee80211_channel_switch { * interface. This flag should be set during interface addition, * but may be set/cleared as late as authentication to an AP. It is * only valid for managed/station mode interfaces. + * @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes + * and send P2P_PS notification to the driver if NOA changed, even + * this is not pure P2P vif. */ enum ieee80211_vif_flags { IEEE80211_VIF_BEACON_FILTER = BIT(0), IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1), IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2), + IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), }; /** diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b140cc6651f4..123b26d177e8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1930,7 +1930,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE; - if (sdata->vif.p2p) { + if (sdata->vif.p2p || + sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) { const struct cfg80211_bss_ies *ies; rcu_read_lock(); @@ -3458,7 +3459,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } } - if (sdata->vif.p2p) { + if (sdata->vif.p2p || + sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) { struct ieee80211_p2p_noa_attr noa = {}; int ret; -- cgit v1.2.3 From 91d3ab46730379e89e1e908c6f62fbcadb3d8f08 Mon Sep 17 00:00:00 2001 From: Vidyullatha Kanchanapally Date: Fri, 30 Oct 2015 19:14:49 +0530 Subject: cfg80211: Add support for aborting an ongoing scan Implement new functionality for aborting an ongoing scan. Add NL80211_CMD_ABORT_SCAN to the nl80211 interface. After aborting the scan, driver shall provide the scan status by calling cfg80211_scan_done(). Reviewed-by: Jouni Malinen Signed-off-by: Vidyullatha Kanchanapally Signed-off-by: Sunil Dutt [change command to take wdev instead of netdev so that it can be used on p2p-device scans] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/nl80211.c | 26 ++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 8 ++++++++ net/wireless/trace.h | 4 ++++ 5 files changed, 47 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e568872203a5..9bcaaf7cd15a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2321,6 +2321,8 @@ struct cfg80211_qos_map { * the driver, and will be valid until passed to cfg80211_scan_done(). * For scan results, call cfg80211_inform_bss(); you can call this outside * the scan/scan_done bracket too. + * @abort_scan: Tell the driver to abort an ongoing scan. The driver shall + * indicate the status of the scan through cfg80211_scan_done(). * * @auth: Request to authenticate with the specified peer * (invoked with the wireless_dev mutex held) @@ -2593,6 +2595,7 @@ struct cfg80211_ops { int (*scan)(struct wiphy *wiphy, struct cfg80211_scan_request *request); + void (*abort_scan)(struct wiphy *wiphy, struct wireless_dev *wdev); int (*auth)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_auth_request *req); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 07099cb14778..5b7b5ebe7ca8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -820,6 +820,10 @@ * as an event to indicate changes for devices with wiphy-specific regdom * management. * + * @NL80211_CMD_ABORT_SCAN: Stop an ongoing scan. Returns -ENOENT if a scan is + * not running. The driver indicates the status of the scan through + * cfg80211_scan_done(). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1006,6 +1010,8 @@ enum nl80211_commands { NL80211_CMD_WIPHY_REG_CHANGE, + NL80211_CMD_ABORT_SCAN, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 41e57d0c4d43..67e7b531db79 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5997,6 +5997,24 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_abort_scan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (!rdev->ops->abort_scan) + return -EOPNOTSUPP; + + if (rdev->scan_msg) + return 0; + + if (!rdev->scan_req) + return -ENOENT; + + rdev_abort_scan(rdev, wdev); + return 0; +} + static int nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, struct cfg80211_sched_scan_request *request, @@ -10944,6 +10962,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_ABORT_SCAN, + .doit = nl80211_abort_scan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_GET_SCAN, .policy = nl80211_policy, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index b8cc594d409d..8ae0c04f9fc7 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -427,6 +427,14 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev, return ret; } +static inline void rdev_abort_scan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_abort_scan(&rdev->wiphy, wdev); + rdev->ops->abort_scan(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_auth_request *req) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5b9139e53199..09b242b09bed 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2917,6 +2917,10 @@ TRACE_EVENT(rdev_set_coalesce, WIPHY_PR_ARG, __entry->n_rules) ); +DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 91f123f20d64c99db0ce8d2bbc5bb82012d3cc1a Mon Sep 17 00:00:00 2001 From: Vidyullatha Kanchanapally Date: Fri, 30 Oct 2015 19:14:50 +0530 Subject: mac80211: Add support for aborting an ongoing scan This commit adds implementation for abort scan in mac80211. Reviewed-by: Jouni Malinen Signed-off-by: Vidyullatha Kanchanapally Signed-off-by: Sunil Dutt [adjust to wdev change in previous patch and clean up code a bit] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index da471eef07bb..763f2eb9b887 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1994,6 +1994,11 @@ static int ieee80211_scan(struct wiphy *wiphy, return ieee80211_request_scan(sdata, req); } +static void ieee80211_abort_scan(struct wiphy *wiphy, struct wireless_dev *wdev) +{ + ieee80211_scan_cancel(wiphy_priv(wiphy)); +} + static int ieee80211_sched_scan_start(struct wiphy *wiphy, struct net_device *dev, @@ -3842,6 +3847,7 @@ const struct cfg80211_ops mac80211_config_ops = { .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, + .abort_scan = ieee80211_abort_scan, .sched_scan_start = ieee80211_sched_scan_start, .sched_scan_stop = ieee80211_sched_scan_stop, .auth = ieee80211_auth, -- cgit v1.2.3 From a9bc31e418733e4c476f4322c90b7c09aab31002 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Thu, 26 Nov 2015 16:26:12 +0100 Subject: cfg80211: use NL80211_ATTR_STA_AID in nl82011_set_station Fix nl80211_set_station() to use the value of NL80211_ATTR_STA_AID attribute instead of NL80211_ATTR_PEER_AID attribute. Signed-off-by: Ayala Beker Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 67e7b531db79..f4afa995b867 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4256,8 +4256,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) * station. Include these parameters here and will check them in * cfg80211_check_station_change(). */ - if (info->attrs[NL80211_ATTR_PEER_AID]) - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); + if (info->attrs[NL80211_ATTR_STA_AID]) + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) params.listen_interval = -- cgit v1.2.3 From bda95eb1d1581cfd79e9717ebda4b7ccd2265351 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Nov 2015 16:26:13 +0100 Subject: cfg80211: handle add_station auth/assoc flag quirks When a new station is added to AP/GO interfaces the default behaviour is for it to be added authenticated and associated, due to backwards compatibility. To prevent that, the driver must be able to do that (setting the NL80211_FEATURE_FULL_AP_CLIENT_STATE feature flag) and userspace must set the flag mask to auth|assoc and clear the set. Handle this quirk in the API entirely in nl80211, and always push the full flags to the drivers. NL80211_FEATURE_FULL_AP_CLIENT_STATE is still required for userspace to be allowed to set the mask including those bits, but after checking that add both flags to the mask and set in case userspace didn't set them otherwise. This obsoletes the mac80211 code handling this difference, no other driver is currently using these flags. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 10 ---------- net/wireless/nl80211.c | 23 +++++++++++++++++++---- 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 763f2eb9b887..1df92fed74c2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1216,16 +1216,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; - /* - * defaults -- if userspace wants something else we'll - * change it accordingly in sta_apply_parameters() - */ - if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) && - !(params->sta_flags_set & (BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED)))) { - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); - } if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f4afa995b867..72de6989dd12 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4359,6 +4359,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct station_parameters params; u8 *mac_addr = NULL; + u32 auth_assoc = BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED); memset(¶ms, 0, sizeof(params)); @@ -4470,10 +4472,23 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* allow authenticated/associated only if driver handles it */ if (!(rdev->wiphy.features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && - params.sta_flags_mask & - (BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED))) - return -EINVAL; + params.sta_flags_mask & auth_assoc) + return -EINVAL; + + /* Older userspace, or userspace wanting to be compatible with + * !NL80211_FEATURE_FULL_AP_CLIENT_STATE, will not set the auth + * and assoc flags in the mask, but assumes the station will be + * added as associated anyway since this was the required driver + * behaviour before NL80211_FEATURE_FULL_AP_CLIENT_STATE was + * introduced. + * In order to not bother drivers with this quirk in the API + * set the flags in both the mask and set for new stations in + * this case. + */ + if (!(params.sta_flags_mask & auth_assoc)) { + params.sta_flags_mask |= auth_assoc; + params.sta_flags_set |= auth_assoc; + } /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); -- cgit v1.2.3 From 90f9ba9b89d88072324251da011b2a59992ad3e1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Nov 2015 16:26:14 +0100 Subject: Revert "mac80211: don't advertise NL80211_FEATURE_FULL_AP_CLIENT_STATE" This reverts commit 45bb780a2147b9995f3d288c44ecb87ca8a330e2, the previous two patches fixed the functionality. Signed-off-by: Johannes Berg --- net/mac80211/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 175ffcf7fb06..858f6b1cb149 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -541,7 +541,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, NL80211_FEATURE_HT_IBSS | NL80211_FEATURE_VIF_TXPOWER | NL80211_FEATURE_MAC_ON_CREATE | - NL80211_FEATURE_USERSPACE_MPM; + NL80211_FEATURE_USERSPACE_MPM | + NL80211_FEATURE_FULL_AP_CLIENT_STATE; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | -- cgit v1.2.3 From 86c7ec9eb154020797c39e1cc7dafa92da02f603 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 15:38:43 +0100 Subject: mac80211: properly free skb when r-o-c for TX fails When freeing the TX skb for an off-channel TX, use the correct API to also free the ACK skb that might have been allocated. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1df92fed74c2..6bcdbab65a8c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3474,7 +3474,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, params->wait, cookie, skb, IEEE80211_ROC_TYPE_MGMT_TX); if (ret) - kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); out_unlock: mutex_unlock(&local->mtx); return ret; -- cgit v1.2.3 From 63b4d8b3736b83126ea531c536eff9f76e4cd739 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 15:41:50 +0100 Subject: mac80211: properly free TX skbs when monitor TX fails We need to free all skbs here, not just the one we peeked from the list. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bdc224d5053a..3311ce0f3d6c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1431,7 +1431,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) { - dev_kfree_skb(skb); + ieee80211_purge_tx_queue(&local->hw, skbs); return true; } else vif = NULL; -- cgit v1.2.3 From 856142cdaa483099f50cac70a16898ead8e4094d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 15:29:53 +0100 Subject: mac80211: catch queue stop underflow If some code stops the queues more times than having started (for when refcounting is used), warn on and reset the counter to 0 to avoid blocking forever. Signed-off-by: Johannes Berg --- net/mac80211/util.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 74058020b7d6..08af2b307945 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -288,10 +288,13 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, if (!test_bit(reason, &local->queue_stop_reasons[queue])) return; - if (!refcounted) + if (!refcounted) { local->q_stop_reasons[queue][reason] = 0; - else + } else { local->q_stop_reasons[queue][reason]--; + if (WARN_ON(local->q_stop_reasons[queue][reason] < 0)) + local->q_stop_reasons[queue][reason] = 0; + } if (local->q_stop_reasons[queue][reason] == 0) __clear_bit(reason, &local->queue_stop_reasons[queue]); -- cgit v1.2.3 From e673a65952b4ab045a3e3eb200fdf408004fb4fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 20:28:27 +0100 Subject: mac80211: fix mgmt-tx abort cookie and leak If a mgmt-tx operation is aborted before it runs, the wrong cookie is reported back to userspace, and the ack_skb gets leaked since the frame is freed directly instead of freeing it using ieee80211_free_txskb(). Fix that. Fixes: 3b79af973cf4 ("mac80211: stop using pointers as userspace cookies") Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 04401037140e..0fe9f746cd7e 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -308,11 +308,10 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) /* was never transmitted */ if (roc->frame) { - cfg80211_mgmt_tx_status(&roc->sdata->wdev, - (unsigned long)roc->frame, + cfg80211_mgmt_tx_status(&roc->sdata->wdev, roc->mgmt_tx_cookie, roc->frame->data, roc->frame->len, false, GFP_KERNEL); - kfree_skb(roc->frame); + ieee80211_free_txskb(&roc->sdata->local->hw, roc->frame); } if (!roc->mgmt_tx_cookie) -- cgit v1.2.3 From a2fcfccbad43e413de7e7ac39879ba91548f06c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Nov 2015 17:18:35 +0100 Subject: mac80211: move off-channel/mgmt-tx code to offchannel.c This is quite a bit of code that logically depends here since it has to deal with all the remain-on-channel logic. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 504 ++------------------------------------------- net/mac80211/ieee80211_i.h | 19 +- net/mac80211/offchannel.c | 474 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 502 insertions(+), 495 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6bcdbab65a8c..b8ef33e62851 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2504,294 +2504,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return 0; } -static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, - struct ieee80211_roc_work *new_roc, - struct ieee80211_roc_work *cur_roc) -{ - unsigned long now = jiffies; - unsigned long remaining = cur_roc->hw_start_time + - msecs_to_jiffies(cur_roc->duration) - - now; - - if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun)) - return false; - - /* if it doesn't fit entirely, schedule a new one */ - if (new_roc->duration > jiffies_to_msecs(remaining)) - return false; - - ieee80211_handle_roc_started(new_roc); - - /* add to dependents so we send the expired event properly */ - list_add_tail(&new_roc->list, &cur_roc->dependents); - return true; -} - -static u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) -{ - lockdep_assert_held(&local->mtx); - - local->roc_cookie_counter++; - - /* wow, you wrapped 64 bits ... more likely a bug */ - if (WARN_ON(local->roc_cookie_counter == 0)) - local->roc_cookie_counter++; - - return local->roc_cookie_counter; -} - -static int ieee80211_start_roc_work(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *channel, - unsigned int duration, u64 *cookie, - struct sk_buff *txskb, - enum ieee80211_roc_type type) -{ - struct ieee80211_roc_work *roc, *tmp; - bool queued = false; - int ret; - - lockdep_assert_held(&local->mtx); - - if (local->use_chanctx && !local->ops->remain_on_channel) - return -EOPNOTSUPP; - - roc = kzalloc(sizeof(*roc), GFP_KERNEL); - if (!roc) - return -ENOMEM; - - /* - * If the duration is zero, then the driver - * wouldn't actually do anything. Set it to - * 10 for now. - * - * TODO: cancel the off-channel operation - * when we get the SKB's TX status and - * the wait time was zero before. - */ - if (!duration) - duration = 10; - - roc->chan = channel; - roc->duration = duration; - roc->req_duration = duration; - roc->frame = txskb; - roc->type = type; - roc->sdata = sdata; - INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); - INIT_LIST_HEAD(&roc->dependents); - - /* - * cookie is either the roc cookie (for normal roc) - * or the SKB (for mgmt TX) - */ - if (!txskb) { - roc->cookie = ieee80211_mgmt_tx_cookie(local); - *cookie = roc->cookie; - } else { - roc->mgmt_tx_cookie = *cookie; - } - - /* if there's one pending or we're scanning, queue this one */ - if (!list_empty(&local->roc_list) || - local->scanning || ieee80211_is_radar_required(local)) - goto out_check_combine; - - /* if not HW assist, just queue & schedule work */ - if (!local->ops->remain_on_channel) { - ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); - goto out_queue; - } - - /* otherwise actually kick it off here (for error handling) */ - - ret = drv_remain_on_channel(local, sdata, channel, duration, type); - if (ret) { - kfree(roc); - return ret; - } - - roc->started = true; - goto out_queue; - - out_check_combine: - list_for_each_entry(tmp, &local->roc_list, list) { - if (tmp->chan != channel || tmp->sdata != sdata) - continue; - - /* - * Extend this ROC if possible: - * - * If it hasn't started yet, just increase the duration - * and add the new one to the list of dependents. - * If the type of the new ROC has higher priority, modify the - * type of the previous one to match that of the new one. - */ - if (!tmp->started) { - list_add_tail(&roc->list, &tmp->dependents); - tmp->duration = max(tmp->duration, roc->duration); - tmp->type = max(tmp->type, roc->type); - queued = true; - break; - } - - /* If it has already started, it's more difficult ... */ - if (local->ops->remain_on_channel) { - /* - * In the offloaded ROC case, if it hasn't begun, add - * this new one to the dependent list to be handled - * when the master one begins. If it has begun, - * check if it fits entirely within the existing one, - * in which case it will just be dependent as well. - * Otherwise, schedule it by itself. - */ - if (!tmp->hw_begun) { - list_add_tail(&roc->list, &tmp->dependents); - queued = true; - break; - } - - if (ieee80211_coalesce_started_roc(local, roc, tmp)) - queued = true; - } else if (del_timer_sync(&tmp->work.timer)) { - unsigned long new_end; - - /* - * In the software ROC case, cancel the timer, if - * that fails then the finish work is already - * queued/pending and thus we queue the new ROC - * normally, if that succeeds then we can extend - * the timer duration and TX the frame (if any.) - */ - - list_add_tail(&roc->list, &tmp->dependents); - queued = true; - - new_end = jiffies + msecs_to_jiffies(roc->duration); - - /* ok, it was started & we canceled timer */ - if (time_after(new_end, tmp->work.timer.expires)) - mod_timer(&tmp->work.timer, new_end); - else - add_timer(&tmp->work.timer); - - ieee80211_handle_roc_started(roc); - } - break; - } - - out_queue: - if (!queued) - list_add_tail(&roc->list, &local->roc_list); - - return 0; -} - -static int ieee80211_remain_on_channel(struct wiphy *wiphy, - struct wireless_dev *wdev, - struct ieee80211_channel *chan, - unsigned int duration, - u64 *cookie) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - struct ieee80211_local *local = sdata->local; - int ret; - - mutex_lock(&local->mtx); - ret = ieee80211_start_roc_work(local, sdata, chan, - duration, cookie, NULL, - IEEE80211_ROC_TYPE_NORMAL); - mutex_unlock(&local->mtx); - - return ret; -} - -static int ieee80211_cancel_roc(struct ieee80211_local *local, - u64 cookie, bool mgmt_tx) -{ - struct ieee80211_roc_work *roc, *tmp, *found = NULL; - int ret; - - mutex_lock(&local->mtx); - list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { - struct ieee80211_roc_work *dep, *tmp2; - - list_for_each_entry_safe(dep, tmp2, &roc->dependents, list) { - if (!mgmt_tx && dep->cookie != cookie) - continue; - else if (mgmt_tx && dep->mgmt_tx_cookie != cookie) - continue; - /* found dependent item -- just remove it */ - list_del(&dep->list); - mutex_unlock(&local->mtx); - - ieee80211_roc_notify_destroy(dep, true); - return 0; - } - - if (!mgmt_tx && roc->cookie != cookie) - continue; - else if (mgmt_tx && roc->mgmt_tx_cookie != cookie) - continue; - - found = roc; - break; - } - - if (!found) { - mutex_unlock(&local->mtx); - return -ENOENT; - } - - /* - * We found the item to cancel, so do that. Note that it - * may have dependents, which we also cancel (and send - * the expired signal for.) Not doing so would be quite - * tricky here, but we may need to fix it later. - */ - - if (local->ops->remain_on_channel) { - if (found->started) { - ret = drv_cancel_remain_on_channel(local); - if (WARN_ON_ONCE(ret)) { - mutex_unlock(&local->mtx); - return ret; - } - } - - list_del(&found->list); - - if (found->started) - ieee80211_start_next_roc(local); - mutex_unlock(&local->mtx); - - ieee80211_roc_notify_destroy(found, true); - } else { - /* work may be pending so use it all the time */ - found->abort = true; - ieee80211_queue_delayed_work(&local->hw, &found->work, 0); - - mutex_unlock(&local->mtx); - - /* work will clean up etc */ - flush_delayed_work(&found->work); - WARN_ON(!found->to_be_freed); - kfree(found); - } - - return 0; -} - -static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, - struct wireless_dev *wdev, - u64 cookie) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - struct ieee80211_local *local = sdata->local; - - return ieee80211_cancel_roc(local, cookie, false); -} - static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef, @@ -3262,9 +2974,22 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return err; } -static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, - struct sk_buff *skb, u64 *cookie, - gfp_t gfp) +u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) +{ + lockdep_assert_held(&local->mtx); + + local->roc_cookie_counter++; + + /* wow, you wrapped 64 bits ... more likely a bug */ + if (WARN_ON(local->roc_cookie_counter == 0)) + local->roc_cookie_counter++; + + return local->roc_cookie_counter; +} + +struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, + struct sk_buff *skb, u64 *cookie, + gfp_t gfp) { unsigned long spin_flags; struct sk_buff *ack_skb; @@ -3292,203 +3017,6 @@ static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, return ack_skb; } -static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, - struct cfg80211_mgmt_tx_params *params, - u64 *cookie) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - struct ieee80211_local *local = sdata->local; - struct sk_buff *skb, *ack_skb; - struct sta_info *sta; - const struct ieee80211_mgmt *mgmt = (void *)params->buf; - bool need_offchan = false; - u32 flags; - int ret; - u8 *data; - - if (params->dont_wait_for_ack) - flags = IEEE80211_TX_CTL_NO_ACK; - else - flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | - IEEE80211_TX_CTL_REQ_TX_STATUS; - - if (params->no_cck) - flags |= IEEE80211_TX_CTL_NO_CCK_RATE; - - switch (sdata->vif.type) { - case NL80211_IFTYPE_ADHOC: - if (!sdata->vif.bss_conf.ibss_joined) - need_offchan = true; - /* fall through */ -#ifdef CONFIG_MAC80211_MESH - case NL80211_IFTYPE_MESH_POINT: - if (ieee80211_vif_is_mesh(&sdata->vif) && - !sdata->u.mesh.mesh_id_len) - need_offchan = true; - /* fall through */ -#endif - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_P2P_GO: - if (sdata->vif.type != NL80211_IFTYPE_ADHOC && - !ieee80211_vif_is_mesh(&sdata->vif) && - !rcu_access_pointer(sdata->bss->beacon)) - need_offchan = true; - if (!ieee80211_is_action(mgmt->frame_control) || - mgmt->u.action.category == WLAN_CATEGORY_PUBLIC || - mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED || - mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) - break; - rcu_read_lock(); - sta = sta_info_get(sdata, mgmt->da); - rcu_read_unlock(); - if (!sta) - return -ENOLINK; - break; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - sdata_lock(sdata); - if (!sdata->u.mgd.associated || - (params->offchan && params->wait && - local->ops->remain_on_channel && - memcmp(sdata->u.mgd.associated->bssid, - mgmt->bssid, ETH_ALEN))) - need_offchan = true; - sdata_unlock(sdata); - break; - case NL80211_IFTYPE_P2P_DEVICE: - need_offchan = true; - break; - default: - return -EOPNOTSUPP; - } - - /* configurations requiring offchan cannot work if no channel has been - * specified - */ - if (need_offchan && !params->chan) - return -EINVAL; - - mutex_lock(&local->mtx); - - /* Check if the operating channel is the requested channel */ - if (!need_offchan) { - struct ieee80211_chanctx_conf *chanctx_conf; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - - if (chanctx_conf) { - need_offchan = params->chan && - (params->chan != - chanctx_conf->def.chan); - } else if (!params->chan) { - ret = -EINVAL; - rcu_read_unlock(); - goto out_unlock; - } else { - need_offchan = true; - } - rcu_read_unlock(); - } - - if (need_offchan && !params->offchan) { - ret = -EBUSY; - goto out_unlock; - } - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + params->len); - if (!skb) { - ret = -ENOMEM; - goto out_unlock; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - - data = skb_put(skb, params->len); - memcpy(data, params->buf, params->len); - - /* Update CSA counters */ - if (sdata->vif.csa_active && - (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_ADHOC) && - params->n_csa_offsets) { - int i; - struct beacon_data *beacon = NULL; - - rcu_read_lock(); - - if (sdata->vif.type == NL80211_IFTYPE_AP) - beacon = rcu_dereference(sdata->u.ap.beacon); - else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) - beacon = rcu_dereference(sdata->u.ibss.presp); - else if (ieee80211_vif_is_mesh(&sdata->vif)) - beacon = rcu_dereference(sdata->u.mesh.beacon); - - if (beacon) - for (i = 0; i < params->n_csa_offsets; i++) - data[params->csa_offsets[i]] = - beacon->csa_current_counter; - - rcu_read_unlock(); - } - - IEEE80211_SKB_CB(skb)->flags = flags; - - skb->dev = sdata->dev; - - if (!params->dont_wait_for_ack) { - /* make a copy to preserve the frame contents - * in case of encryption. - */ - ack_skb = ieee80211_make_ack_skb(local, skb, cookie, - GFP_KERNEL); - if (IS_ERR(ack_skb)) { - ret = PTR_ERR(ack_skb); - kfree_skb(skb); - goto out_unlock; - } - } else { - /* Assign a dummy non-zero cookie, it's not sent to - * userspace in this case but we rely on its value - * internally in the need_offchan case to distinguish - * mgmt-tx from remain-on-channel. - */ - *cookie = 0xffffffff; - } - - if (!need_offchan) { - ieee80211_tx_skb(sdata, skb); - ret = 0; - goto out_unlock; - } - - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN | - IEEE80211_TX_INTFL_OFFCHAN_TX_OK; - if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) - IEEE80211_SKB_CB(skb)->hw_queue = - local->hw.offchannel_tx_hw_queue; - - /* This will handle all kinds of coalescing and immediate TX */ - ret = ieee80211_start_roc_work(local, sdata, params->chan, - params->wait, cookie, skb, - IEEE80211_ROC_TYPE_MGMT_TX); - if (ret) - ieee80211_free_txskb(&local->hw, skb); - out_unlock: - mutex_unlock(&local->mtx); - return ret; -} - -static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, - struct wireless_dev *wdev, - u64 cookie) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - - return ieee80211_cancel_roc(local, cookie, true); -} - static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, struct wireless_dev *wdev, u16 frame_type, bool reg) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d832bd59236b..b03d5410a2e9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1483,6 +1483,11 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, void ieee80211_configure_filter(struct ieee80211_local *local); u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); +u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); +struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, + struct sk_buff *skb, u64 *cookie, + gfp_t gfp); + /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, @@ -1577,16 +1582,22 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_local *local); void ieee80211_sched_scan_end(struct ieee80211_local *local); void ieee80211_sched_scan_stopped_work(struct work_struct *work); -/* off-channel helpers */ +/* off-channel/mgmt-tx */ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local); void ieee80211_offchannel_return(struct ieee80211_local *local); void ieee80211_roc_setup(struct ieee80211_local *local); void ieee80211_start_next_roc(struct ieee80211_local *local); void ieee80211_roc_purge(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free); -void ieee80211_sw_roc_work(struct work_struct *work); -void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); +int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + struct ieee80211_channel *chan, + unsigned int duration, u64 *cookie); +int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie); +int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_mgmt_tx_params *params, u64 *cookie); +int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie); /* channel switch handling */ void ieee80211_csa_finalize_work(struct work_struct *work); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 0fe9f746cd7e..b737437c9ac6 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -187,7 +187,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) false); } -void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) +static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) { if (roc->notified) return; @@ -299,7 +299,8 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) } } -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) +static void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, + bool free) { struct ieee80211_roc_work *dep, *tmp; @@ -328,7 +329,7 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) roc->to_be_freed = true; } -void ieee80211_sw_roc_work(struct work_struct *work) +static void ieee80211_sw_roc_work(struct work_struct *work) { struct ieee80211_roc_work *roc = container_of(work, struct ieee80211_roc_work, work.work); @@ -455,6 +456,473 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); +static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, + struct ieee80211_roc_work *new_roc, + struct ieee80211_roc_work *cur_roc) +{ + unsigned long now = jiffies; + unsigned long remaining = cur_roc->hw_start_time + + msecs_to_jiffies(cur_roc->duration) - + now; + + if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun)) + return false; + + /* if it doesn't fit entirely, schedule a new one */ + if (new_roc->duration > jiffies_to_msecs(remaining)) + return false; + + ieee80211_handle_roc_started(new_roc); + + /* add to dependents so we send the expired event properly */ + list_add_tail(&new_roc->list, &cur_roc->dependents); + return true; +} + +static int ieee80211_start_roc_work(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, + unsigned int duration, u64 *cookie, + struct sk_buff *txskb, + enum ieee80211_roc_type type) +{ + struct ieee80211_roc_work *roc, *tmp; + bool queued = false; + int ret; + + lockdep_assert_held(&local->mtx); + + if (local->use_chanctx && !local->ops->remain_on_channel) + return -EOPNOTSUPP; + + roc = kzalloc(sizeof(*roc), GFP_KERNEL); + if (!roc) + return -ENOMEM; + + /* + * If the duration is zero, then the driver + * wouldn't actually do anything. Set it to + * 10 for now. + * + * TODO: cancel the off-channel operation + * when we get the SKB's TX status and + * the wait time was zero before. + */ + if (!duration) + duration = 10; + + roc->chan = channel; + roc->duration = duration; + roc->req_duration = duration; + roc->frame = txskb; + roc->type = type; + roc->sdata = sdata; + INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); + INIT_LIST_HEAD(&roc->dependents); + + /* + * cookie is either the roc cookie (for normal roc) + * or the SKB (for mgmt TX) + */ + if (!txskb) { + roc->cookie = ieee80211_mgmt_tx_cookie(local); + *cookie = roc->cookie; + } else { + roc->mgmt_tx_cookie = *cookie; + } + + /* if there's one pending or we're scanning, queue this one */ + if (!list_empty(&local->roc_list) || + local->scanning || ieee80211_is_radar_required(local)) + goto out_check_combine; + + /* if not HW assist, just queue & schedule work */ + if (!local->ops->remain_on_channel) { + ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); + goto out_queue; + } + + /* otherwise actually kick it off here (for error handling) */ + + ret = drv_remain_on_channel(local, sdata, channel, duration, type); + if (ret) { + kfree(roc); + return ret; + } + + roc->started = true; + goto out_queue; + + out_check_combine: + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->chan != channel || tmp->sdata != sdata) + continue; + + /* + * Extend this ROC if possible: + * + * If it hasn't started yet, just increase the duration + * and add the new one to the list of dependents. + * If the type of the new ROC has higher priority, modify the + * type of the previous one to match that of the new one. + */ + if (!tmp->started) { + list_add_tail(&roc->list, &tmp->dependents); + tmp->duration = max(tmp->duration, roc->duration); + tmp->type = max(tmp->type, roc->type); + queued = true; + break; + } + + /* If it has already started, it's more difficult ... */ + if (local->ops->remain_on_channel) { + /* + * In the offloaded ROC case, if it hasn't begun, add + * this new one to the dependent list to be handled + * when the master one begins. If it has begun, + * check if it fits entirely within the existing one, + * in which case it will just be dependent as well. + * Otherwise, schedule it by itself. + */ + if (!tmp->hw_begun) { + list_add_tail(&roc->list, &tmp->dependents); + queued = true; + break; + } + + if (ieee80211_coalesce_started_roc(local, roc, tmp)) + queued = true; + } else if (del_timer_sync(&tmp->work.timer)) { + unsigned long new_end; + + /* + * In the software ROC case, cancel the timer, if + * that fails then the finish work is already + * queued/pending and thus we queue the new ROC + * normally, if that succeeds then we can extend + * the timer duration and TX the frame (if any.) + */ + + list_add_tail(&roc->list, &tmp->dependents); + queued = true; + + new_end = jiffies + msecs_to_jiffies(roc->duration); + + /* ok, it was started & we canceled timer */ + if (time_after(new_end, tmp->work.timer.expires)) + mod_timer(&tmp->work.timer, new_end); + else + add_timer(&tmp->work.timer); + + ieee80211_handle_roc_started(roc); + } + break; + } + + out_queue: + if (!queued) + list_add_tail(&roc->list, &local->roc_list); + + return 0; +} + +int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + struct ieee80211_channel *chan, + unsigned int duration, u64 *cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_local *local = sdata->local; + int ret; + + mutex_lock(&local->mtx); + ret = ieee80211_start_roc_work(local, sdata, chan, + duration, cookie, NULL, + IEEE80211_ROC_TYPE_NORMAL); + mutex_unlock(&local->mtx); + + return ret; +} + +static int ieee80211_cancel_roc(struct ieee80211_local *local, + u64 cookie, bool mgmt_tx) +{ + struct ieee80211_roc_work *roc, *tmp, *found = NULL; + int ret; + + mutex_lock(&local->mtx); + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + struct ieee80211_roc_work *dep, *tmp2; + + list_for_each_entry_safe(dep, tmp2, &roc->dependents, list) { + if (!mgmt_tx && dep->cookie != cookie) + continue; + else if (mgmt_tx && dep->mgmt_tx_cookie != cookie) + continue; + /* found dependent item -- just remove it */ + list_del(&dep->list); + mutex_unlock(&local->mtx); + + ieee80211_roc_notify_destroy(dep, true); + return 0; + } + + if (!mgmt_tx && roc->cookie != cookie) + continue; + else if (mgmt_tx && roc->mgmt_tx_cookie != cookie) + continue; + + found = roc; + break; + } + + if (!found) { + mutex_unlock(&local->mtx); + return -ENOENT; + } + + /* + * We found the item to cancel, so do that. Note that it + * may have dependents, which we also cancel (and send + * the expired signal for.) Not doing so would be quite + * tricky here, but we may need to fix it later. + */ + + if (local->ops->remain_on_channel) { + if (found->started) { + ret = drv_cancel_remain_on_channel(local); + if (WARN_ON_ONCE(ret)) { + mutex_unlock(&local->mtx); + return ret; + } + } + + list_del(&found->list); + + if (found->started) + ieee80211_start_next_roc(local); + mutex_unlock(&local->mtx); + + ieee80211_roc_notify_destroy(found, true); + } else { + /* work may be pending so use it all the time */ + found->abort = true; + ieee80211_queue_delayed_work(&local->hw, &found->work, 0); + + mutex_unlock(&local->mtx); + + /* work will clean up etc */ + flush_delayed_work(&found->work); + WARN_ON(!found->to_be_freed); + kfree(found); + } + + return 0; +} + +int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_local *local = sdata->local; + + return ieee80211_cancel_roc(local, cookie, false); +} + +int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_mgmt_tx_params *params, u64 *cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb, *ack_skb; + struct sta_info *sta; + const struct ieee80211_mgmt *mgmt = (void *)params->buf; + bool need_offchan = false; + u32 flags; + int ret; + u8 *data; + + if (params->dont_wait_for_ack) + flags = IEEE80211_TX_CTL_NO_ACK; + else + flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + if (params->no_cck) + flags |= IEEE80211_TX_CTL_NO_CCK_RATE; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_ADHOC: + if (!sdata->vif.bss_conf.ibss_joined) + need_offchan = true; + /* fall through */ +#ifdef CONFIG_MAC80211_MESH + case NL80211_IFTYPE_MESH_POINT: + if (ieee80211_vif_is_mesh(&sdata->vif) && + !sdata->u.mesh.mesh_id_len) + need_offchan = true; + /* fall through */ +#endif + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_GO: + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + !ieee80211_vif_is_mesh(&sdata->vif) && + !rcu_access_pointer(sdata->bss->beacon)) + need_offchan = true; + if (!ieee80211_is_action(mgmt->frame_control) || + mgmt->u.action.category == WLAN_CATEGORY_PUBLIC || + mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED || + mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) + break; + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->da); + rcu_read_unlock(); + if (!sta) + return -ENOLINK; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + sdata_lock(sdata); + if (!sdata->u.mgd.associated || + (params->offchan && params->wait && + local->ops->remain_on_channel && + memcmp(sdata->u.mgd.associated->bssid, + mgmt->bssid, ETH_ALEN))) + need_offchan = true; + sdata_unlock(sdata); + break; + case NL80211_IFTYPE_P2P_DEVICE: + need_offchan = true; + break; + default: + return -EOPNOTSUPP; + } + + /* configurations requiring offchan cannot work if no channel has been + * specified + */ + if (need_offchan && !params->chan) + return -EINVAL; + + mutex_lock(&local->mtx); + + /* Check if the operating channel is the requested channel */ + if (!need_offchan) { + struct ieee80211_chanctx_conf *chanctx_conf; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + + if (chanctx_conf) { + need_offchan = params->chan && + (params->chan != + chanctx_conf->def.chan); + } else if (!params->chan) { + ret = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } else { + need_offchan = true; + } + rcu_read_unlock(); + } + + if (need_offchan && !params->offchan) { + ret = -EBUSY; + goto out_unlock; + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + params->len); + if (!skb) { + ret = -ENOMEM; + goto out_unlock; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + data = skb_put(skb, params->len); + memcpy(data, params->buf, params->len); + + /* Update CSA counters */ + if (sdata->vif.csa_active && + (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_ADHOC) && + params->n_csa_offsets) { + int i; + struct beacon_data *beacon = NULL; + + rcu_read_lock(); + + if (sdata->vif.type == NL80211_IFTYPE_AP) + beacon = rcu_dereference(sdata->u.ap.beacon); + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + beacon = rcu_dereference(sdata->u.ibss.presp); + else if (ieee80211_vif_is_mesh(&sdata->vif)) + beacon = rcu_dereference(sdata->u.mesh.beacon); + + if (beacon) + for (i = 0; i < params->n_csa_offsets; i++) + data[params->csa_offsets[i]] = + beacon->csa_current_counter; + + rcu_read_unlock(); + } + + IEEE80211_SKB_CB(skb)->flags = flags; + + skb->dev = sdata->dev; + + if (!params->dont_wait_for_ack) { + /* make a copy to preserve the frame contents + * in case of encryption. + */ + ack_skb = ieee80211_make_ack_skb(local, skb, cookie, + GFP_KERNEL); + if (IS_ERR(ack_skb)) { + ret = PTR_ERR(ack_skb); + kfree_skb(skb); + goto out_unlock; + } + } else { + /* Assign a dummy non-zero cookie, it's not sent to + * userspace in this case but we rely on its value + * internally in the need_offchan case to distinguish + * mgmt-tx from remain-on-channel. + */ + *cookie = 0xffffffff; + } + + if (!need_offchan) { + ieee80211_tx_skb(sdata, skb); + ret = 0; + goto out_unlock; + } + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK; + if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) + IEEE80211_SKB_CB(skb)->hw_queue = + local->hw.offchannel_tx_hw_queue; + + /* This will handle all kinds of coalescing and immediate TX */ + ret = ieee80211_start_roc_work(local, sdata, params->chan, + params->wait, cookie, skb, + IEEE80211_ROC_TYPE_MGMT_TX); + if (ret) + ieee80211_free_txskb(&local->hw, skb); + out_unlock: + mutex_unlock(&local->mtx); + return ret; +} + +int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + return ieee80211_cancel_roc(local, cookie, true); +} + void ieee80211_roc_setup(struct ieee80211_local *local) { INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); -- cgit v1.2.3 From 5ee00dbd52c57f37d74306ce6e8db26171f599b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 14:25:49 +0100 Subject: mac80211: simplify ack_skb handling Since the cookie is assigned inside ieee80211_make_ack_skb() now, we no longer need to return the ack_skb as the cookie and can simplify the function's return and the callers. Also rename it to ieee80211_attach_ack_skb() to more accurately reflect its purpose. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 18 ++++++++---------- net/mac80211/ieee80211_i.h | 5 ++--- net/mac80211/offchannel.c | 8 +++----- 3 files changed, 13 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b8ef33e62851..2d1c4c35186d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2987,9 +2987,8 @@ u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) return local->roc_cookie_counter; } -struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, - struct sk_buff *skb, u64 *cookie, - gfp_t gfp) +int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, + u64 *cookie, gfp_t gfp) { unsigned long spin_flags; struct sk_buff *ack_skb; @@ -2997,7 +2996,7 @@ struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, ack_skb = skb_copy(skb, gfp); if (!ack_skb) - return ERR_PTR(-ENOMEM); + return -ENOMEM; spin_lock_irqsave(&local->ack_status_lock, spin_flags); id = idr_alloc(&local->ack_status_frames, ack_skb, @@ -3006,7 +3005,7 @@ struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, if (id < 0) { kfree_skb(ack_skb); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } IEEE80211_SKB_CB(skb)->ack_frame_id = id; @@ -3014,7 +3013,7 @@ struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, *cookie = ieee80211_mgmt_tx_cookie(local); IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; - return ack_skb; + return 0; } static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, @@ -3092,7 +3091,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; - struct sk_buff *skb, *ack_skb; + struct sk_buff *skb; int size = sizeof(*nullfunc); __le16 fc; bool qos; @@ -3160,10 +3159,9 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, if (qos) nullfunc->qos_ctrl = cpu_to_le16(7); - ack_skb = ieee80211_make_ack_skb(local, skb, cookie, GFP_ATOMIC); - if (IS_ERR(ack_skb)) { + ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_ATOMIC); + if (ret) { kfree_skb(skb); - ret = PTR_ERR(ack_skb); goto unlock; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b03d5410a2e9..0c50031fadac 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1484,9 +1484,8 @@ void ieee80211_configure_filter(struct ieee80211_local *local); u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); -struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, - struct sk_buff *skb, u64 *cookie, - gfp_t gfp); +int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, + u64 *cookie, gfp_t gfp); /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index b737437c9ac6..6a8178f4a675 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -733,7 +733,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; - struct sk_buff *skb, *ack_skb; + struct sk_buff *skb; struct sta_info *sta; const struct ieee80211_mgmt *mgmt = (void *)params->buf; bool need_offchan = false; @@ -876,10 +876,8 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* make a copy to preserve the frame contents * in case of encryption. */ - ack_skb = ieee80211_make_ack_skb(local, skb, cookie, - GFP_KERNEL); - if (IS_ERR(ack_skb)) { - ret = PTR_ERR(ack_skb); + ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_KERNEL); + if (ret) { kfree_skb(skb); goto out_unlock; } -- cgit v1.2.3 From aaa016ccd5df89d73483d0d51ee1f692978ccc35 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Nov 2015 23:53:51 +0100 Subject: mac80211: rewrite remain-on-channel logic Jouni found a bug in the remain-on-channel logic: when a short item is queued, a long item is combined with it extending the original one, and then the long item is deleted, the timeout doesn't go back to the short one, and the short item ends up taking a long time. In this case, this showed as blocking scan when running two test cases back to back - the scan from the second was delayed even though all the remain-on-channel items should long have been gone. Fixing this with the current data structures turns out to be a bit complicated, we just remove the long item from the dependents list right now and don't recalculate the timeouts. There's a somewhat similar bug where we delete the short item and all the dependents go with it; to fix this we'd have to move them from the dependents to the real list. Instead of trying to do that, rewrite the code to not have all this complexity in the data structures: use a single list and allow more than one entry in it being marked as started. This makes the code a bit more complex, the worker needs to understand that it might need to just remove one of the started items, while keeping the device off-channel, but that's not more complicated than the nested data structures. This then fixes both issues described, and makes it easier to also limit the overall off-channel time when combining. TODO: as before, with hardware remain-on-channel, deleting an item after combining results in cancelling them all - we can keep track of the time elapsed and only cancel after that to fix this. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 7 +- net/mac80211/main.c | 1 + net/mac80211/offchannel.c | 599 +++++++++++++++++++++++---------------------- 3 files changed, 316 insertions(+), 291 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0c50031fadac..c30b6842ed9f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -325,19 +325,15 @@ struct mesh_preq_queue { struct ieee80211_roc_work { struct list_head list; - struct list_head dependents; - - struct delayed_work work; struct ieee80211_sub_if_data *sdata; struct ieee80211_channel *chan; bool started, abort, hw_begun, notified; - bool to_be_freed; bool on_channel; - unsigned long hw_start_time; + unsigned long start_time; u32 duration, req_duration; struct sk_buff *frame; @@ -1335,6 +1331,7 @@ struct ieee80211_local { /* * Remain-on-channel support */ + struct delayed_work roc_work; struct list_head roc_list; struct work_struct hw_roc_start, hw_roc_done; unsigned long hw_roc_start_time; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 858f6b1cb149..6bcf0faa4a89 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1149,6 +1149,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) rtnl_unlock(); + cancel_delayed_work_sync(&local->roc_work); cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); cancel_work_sync(&local->tdls_chsw_work); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 6a8178f4a675..cfd3356e26fd 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -187,11 +187,76 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) false); } -static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) +static void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) { - if (roc->notified) + /* was never transmitted */ + if (roc->frame) { + cfg80211_mgmt_tx_status(&roc->sdata->wdev, roc->mgmt_tx_cookie, + roc->frame->data, roc->frame->len, + false, GFP_KERNEL); + ieee80211_free_txskb(&roc->sdata->local->hw, roc->frame); + } + + if (!roc->mgmt_tx_cookie) + cfg80211_remain_on_channel_expired(&roc->sdata->wdev, + roc->cookie, roc->chan, + GFP_KERNEL); + + list_del(&roc->list); + kfree(roc); +} + +static unsigned long ieee80211_end_finished_rocs(struct ieee80211_local *local, + unsigned long now) +{ + struct ieee80211_roc_work *roc, *tmp; + long remaining_dur_min = LONG_MAX; + + lockdep_assert_held(&local->mtx); + + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + long remaining; + + if (!roc->started) + break; + + remaining = roc->start_time + + msecs_to_jiffies(roc->duration) - + now; + + if (roc->abort || remaining <= 0) + ieee80211_roc_notify_destroy(roc); + else + remaining_dur_min = min(remaining_dur_min, remaining); + } + + return remaining_dur_min; +} + +static bool ieee80211_recalc_sw_work(struct ieee80211_local *local, + unsigned long now) +{ + long dur = ieee80211_end_finished_rocs(local, now); + + if (dur == LONG_MAX) + return false; + + mod_delayed_work(local->workqueue, &local->roc_work, dur); + return true; +} + +static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc, + unsigned long start_time) +{ + struct ieee80211_local *local = roc->sdata->local; + + if (WARN_ON(roc->notified)) return; + roc->start_time = start_time; + roc->started = true; + roc->hw_begun = true; + if (roc->mgmt_tx_cookie) { if (!WARN_ON(!roc->frame)) { ieee80211_tx_skb_tid_band(roc->sdata, roc->frame, 7, @@ -205,40 +270,26 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) } roc->notified = true; + + if (!local->ops->remain_on_channel) + ieee80211_recalc_sw_work(local, start_time); } static void ieee80211_hw_roc_start(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_start); - struct ieee80211_roc_work *roc, *dep, *tmp; + struct ieee80211_roc_work *roc; mutex_lock(&local->mtx); - if (list_empty(&local->roc_list)) - goto out_unlock; - - roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, - list); - - if (!roc->started) - goto out_unlock; - - roc->hw_begun = true; - roc->hw_start_time = local->hw_roc_start_time; - - ieee80211_handle_roc_started(roc); - list_for_each_entry_safe(dep, tmp, &roc->dependents, list) { - ieee80211_handle_roc_started(dep); + list_for_each_entry(roc, &local->roc_list, list) { + if (!roc->started) + break; - if (dep->duration > roc->duration) { - u32 dur = dep->duration; - dep->duration = dur - roc->duration; - roc->duration = dur; - list_move(&dep->list, &roc->list); - } + ieee80211_handle_roc_started(roc, local->hw_roc_start_time); } - out_unlock: + mutex_unlock(&local->mtx); } @@ -254,34 +305,40 @@ void ieee80211_ready_on_channel(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel); -void ieee80211_start_next_roc(struct ieee80211_local *local) +static void _ieee80211_start_next_roc(struct ieee80211_local *local) { - struct ieee80211_roc_work *roc; + struct ieee80211_roc_work *roc, *tmp; + enum ieee80211_roc_type type; + u32 min_dur, max_dur; lockdep_assert_held(&local->mtx); - if (list_empty(&local->roc_list)) { - ieee80211_run_deferred_scan(local); + if (WARN_ON(list_empty(&local->roc_list))) return; - } roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, list); - if (WARN_ON_ONCE(roc->started)) + if (WARN_ON(roc->started)) return; - if (local->ops->remain_on_channel) { - int ret, duration = roc->duration; - - /* XXX: duplicated, see ieee80211_start_roc_work() */ - if (!duration) - duration = 10; + min_dur = roc->duration; + max_dur = roc->duration; + type = roc->type; - ret = drv_remain_on_channel(local, roc->sdata, roc->chan, - duration, roc->type); + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp == roc) + continue; + if (tmp->sdata != roc->sdata || tmp->chan != roc->chan) + break; + max_dur = max(tmp->duration, max_dur); + min_dur = min(tmp->duration, min_dur); + type = max(tmp->type, type); + } - roc->started = true; + if (local->ops->remain_on_channel) { + int ret = drv_remain_on_channel(local, roc->sdata, roc->chan, + max_dur, type); if (ret) { wiphy_warn(local->hw.wiphy, @@ -290,74 +347,24 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) * queue the work struct again to avoid recursion * when multiple failures occur */ - ieee80211_remain_on_channel_expired(&local->hw); + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->sdata != roc->sdata || + tmp->chan != roc->chan) + break; + tmp->started = true; + tmp->abort = true; + } + ieee80211_queue_work(&local->hw, &local->hw_roc_done); + return; } - } else { - /* delay it a bit */ - ieee80211_queue_delayed_work(&local->hw, &roc->work, - round_jiffies_relative(HZ/2)); - } -} - -static void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, - bool free) -{ - struct ieee80211_roc_work *dep, *tmp; - - if (WARN_ON(roc->to_be_freed)) - return; - - /* was never transmitted */ - if (roc->frame) { - cfg80211_mgmt_tx_status(&roc->sdata->wdev, roc->mgmt_tx_cookie, - roc->frame->data, roc->frame->len, - false, GFP_KERNEL); - ieee80211_free_txskb(&roc->sdata->local->hw, roc->frame); - } - - if (!roc->mgmt_tx_cookie) - cfg80211_remain_on_channel_expired(&roc->sdata->wdev, - roc->cookie, roc->chan, - GFP_KERNEL); - - list_for_each_entry_safe(dep, tmp, &roc->dependents, list) - ieee80211_roc_notify_destroy(dep, true); - - if (free) - kfree(roc); - else - roc->to_be_freed = true; -} - -static void ieee80211_sw_roc_work(struct work_struct *work) -{ - struct ieee80211_roc_work *roc = - container_of(work, struct ieee80211_roc_work, work.work); - struct ieee80211_sub_if_data *sdata = roc->sdata; - struct ieee80211_local *local = sdata->local; - bool started, on_channel; - - mutex_lock(&local->mtx); - - if (roc->to_be_freed) - goto out_unlock; - - if (roc->abort) - goto finish; - - if (WARN_ON(list_empty(&local->roc_list))) - goto out_unlock; - - if (WARN_ON(roc != list_first_entry(&local->roc_list, - struct ieee80211_roc_work, - list))) - goto out_unlock; - - if (!roc->started) { - struct ieee80211_roc_work *dep; - - WARN_ON(local->use_chanctx); + /* we'll notify about the start once the HW calls back */ + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->sdata != roc->sdata || tmp->chan != roc->chan) + break; + tmp->started = true; + } + } else { /* If actually operating on the desired channel (with at least * 20 MHz channel width) don't stop all the operations but still * treat it as though the ROC operation started properly, so @@ -377,27 +384,72 @@ static void ieee80211_sw_roc_work(struct work_struct *work) ieee80211_hw_config(local, 0); } - /* tell userspace or send frame */ - ieee80211_handle_roc_started(roc); - list_for_each_entry(dep, &roc->dependents, list) - ieee80211_handle_roc_started(dep); + ieee80211_queue_delayed_work(&local->hw, &local->roc_work, + msecs_to_jiffies(min_dur)); + + /* tell userspace or send frame(s) */ + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->sdata != roc->sdata || tmp->chan != roc->chan) + break; + + tmp->on_channel = roc->on_channel; + ieee80211_handle_roc_started(tmp, jiffies); + } + } +} + +void ieee80211_start_next_roc(struct ieee80211_local *local) +{ + struct ieee80211_roc_work *roc; + + lockdep_assert_held(&local->mtx); + + if (list_empty(&local->roc_list)) { + ieee80211_run_deferred_scan(local); + return; + } + + roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, + list); + + if (WARN_ON_ONCE(roc->started)) + return; + + if (local->ops->remain_on_channel) { + _ieee80211_start_next_roc(local); + } else { + /* delay it a bit */ + ieee80211_queue_delayed_work(&local->hw, &local->roc_work, + round_jiffies_relative(HZ/2)); + } +} + +static void __ieee80211_roc_work(struct ieee80211_local *local) +{ + struct ieee80211_roc_work *roc; + bool on_channel; + + lockdep_assert_held(&local->mtx); + + if (WARN_ON(local->ops->remain_on_channel)) + return; - /* if it was pure TX, just finish right away */ - if (!roc->duration) - goto finish; + roc = list_first_entry_or_null(&local->roc_list, + struct ieee80211_roc_work, list); + if (!roc) + return; - roc->started = true; - ieee80211_queue_delayed_work(&local->hw, &roc->work, - msecs_to_jiffies(roc->duration)); + if (!roc->started) { + WARN_ON(local->use_chanctx); + _ieee80211_start_next_roc(local); } else { - /* finish this ROC */ - finish: - list_del(&roc->list); - started = roc->started; on_channel = roc->on_channel; - ieee80211_roc_notify_destroy(roc, !roc->abort); + if (ieee80211_recalc_sw_work(local, jiffies)) + return; - if (started && !on_channel) { + /* careful - roc pointer became invalid during recalc */ + + if (!on_channel) { ieee80211_flush_queues(local, NULL, false); local->tmp_channel = NULL; @@ -407,14 +459,17 @@ static void ieee80211_sw_roc_work(struct work_struct *work) } ieee80211_recalc_idle(local); - - if (started) - ieee80211_start_next_roc(local); - else if (list_empty(&local->roc_list)) - ieee80211_run_deferred_scan(local); + ieee80211_start_next_roc(local); } +} - out_unlock: +static void ieee80211_roc_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, roc_work.work); + + mutex_lock(&local->mtx); + __ieee80211_roc_work(local); mutex_unlock(&local->mtx); } @@ -422,27 +477,14 @@ static void ieee80211_hw_roc_done(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_done); - struct ieee80211_roc_work *roc; mutex_lock(&local->mtx); - if (list_empty(&local->roc_list)) - goto out_unlock; - - roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, - list); - - if (!roc->started) - goto out_unlock; - - list_del(&roc->list); - - ieee80211_roc_notify_destroy(roc, true); + ieee80211_end_finished_rocs(local, jiffies); /* if there's another roc, start it now */ ieee80211_start_next_roc(local); - out_unlock: mutex_unlock(&local->mtx); } @@ -456,26 +498,41 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); -static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, - struct ieee80211_roc_work *new_roc, - struct ieee80211_roc_work *cur_roc) +static bool +ieee80211_coalesce_hw_started_roc(struct ieee80211_local *local, + struct ieee80211_roc_work *new_roc, + struct ieee80211_roc_work *cur_roc) { unsigned long now = jiffies; - unsigned long remaining = cur_roc->hw_start_time + - msecs_to_jiffies(cur_roc->duration) - - now; + unsigned long remaining; + + if (WARN_ON(!cur_roc->started)) + return false; - if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun)) + /* if it was scheduled in the hardware, but not started yet, + * we can only combine if the older one had a longer duration + */ + if (!cur_roc->hw_begun && new_roc->duration > cur_roc->duration) return false; + remaining = cur_roc->start_time + + msecs_to_jiffies(cur_roc->duration) - + now; + /* if it doesn't fit entirely, schedule a new one */ if (new_roc->duration > jiffies_to_msecs(remaining)) return false; - ieee80211_handle_roc_started(new_roc); + /* add just after the current one so we combine their finish later */ + list_add(&new_roc->list, &cur_roc->list); + + /* if the existing one has already begun then let this one also + * begin, otherwise they'll both be marked properly by the work + * struct that runs once the driver notifies us of the beginning + */ + if (cur_roc->hw_begun) + ieee80211_handle_roc_started(new_roc, now); - /* add to dependents so we send the expired event properly */ - list_add_tail(&new_roc->list, &cur_roc->dependents); return true; } @@ -487,7 +544,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, enum ieee80211_roc_type type) { struct ieee80211_roc_work *roc, *tmp; - bool queued = false; + bool queued = false, combine_started = true; int ret; lockdep_assert_held(&local->mtx); @@ -517,8 +574,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->frame = txskb; roc->type = type; roc->sdata = sdata; - INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); - INIT_LIST_HEAD(&roc->dependents); /* * cookie is either the roc cookie (for normal roc) @@ -531,95 +586,88 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->mgmt_tx_cookie = *cookie; } - /* if there's one pending or we're scanning, queue this one */ - if (!list_empty(&local->roc_list) || - local->scanning || ieee80211_is_radar_required(local)) - goto out_check_combine; - - /* if not HW assist, just queue & schedule work */ - if (!local->ops->remain_on_channel) { - ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); - goto out_queue; - } - - /* otherwise actually kick it off here (for error handling) */ + /* if there's no need to queue, handle it immediately */ + if (list_empty(&local->roc_list) && + !local->scanning && !ieee80211_is_radar_required(local)) { + /* if not HW assist, just queue & schedule work */ + if (!local->ops->remain_on_channel) { + list_add_tail(&roc->list, &local->roc_list); + ieee80211_queue_delayed_work(&local->hw, + &local->roc_work, 0); + } else { + /* otherwise actually kick it off here + * (for error handling) + */ + ret = drv_remain_on_channel(local, sdata, channel, + duration, type); + if (ret) { + kfree(roc); + return ret; + } + roc->started = true; + list_add_tail(&roc->list, &local->roc_list); + } - ret = drv_remain_on_channel(local, sdata, channel, duration, type); - if (ret) { - kfree(roc); - return ret; + return 0; } - roc->started = true; - goto out_queue; + /* otherwise handle queueing */ - out_check_combine: list_for_each_entry(tmp, &local->roc_list, list) { if (tmp->chan != channel || tmp->sdata != sdata) continue; /* - * Extend this ROC if possible: - * - * If it hasn't started yet, just increase the duration - * and add the new one to the list of dependents. - * If the type of the new ROC has higher priority, modify the - * type of the previous one to match that of the new one. + * Extend this ROC if possible: If it hasn't started, add + * just after the new one to combine. */ if (!tmp->started) { - list_add_tail(&roc->list, &tmp->dependents); - tmp->duration = max(tmp->duration, roc->duration); - tmp->type = max(tmp->type, roc->type); + list_add(&roc->list, &tmp->list); queued = true; break; } - /* If it has already started, it's more difficult ... */ - if (local->ops->remain_on_channel) { - /* - * In the offloaded ROC case, if it hasn't begun, add - * this new one to the dependent list to be handled - * when the master one begins. If it has begun, - * check if it fits entirely within the existing one, - * in which case it will just be dependent as well. - * Otherwise, schedule it by itself. - */ - if (!tmp->hw_begun) { - list_add_tail(&roc->list, &tmp->dependents); - queued = true; - break; - } - - if (ieee80211_coalesce_started_roc(local, roc, tmp)) - queued = true; - } else if (del_timer_sync(&tmp->work.timer)) { - unsigned long new_end; + if (!combine_started) + continue; - /* - * In the software ROC case, cancel the timer, if - * that fails then the finish work is already - * queued/pending and thus we queue the new ROC - * normally, if that succeeds then we can extend - * the timer duration and TX the frame (if any.) + if (!local->ops->remain_on_channel) { + /* If there's no hardware remain-on-channel, and + * doing so won't push us over the maximum r-o-c + * we allow, then we can just add the new one to + * the list and mark it as having started now. + * If it would push over the limit, don't try to + * combine with other started ones (that haven't + * been running as long) but potentially sort it + * with others that had the same fate. */ + unsigned long now = jiffies; + u32 elapsed = jiffies_to_msecs(now - tmp->start_time); + struct wiphy *wiphy = local->hw.wiphy; + u32 max_roc = wiphy->max_remain_on_channel_duration; - list_add_tail(&roc->list, &tmp->dependents); - queued = true; - - new_end = jiffies + msecs_to_jiffies(roc->duration); - - /* ok, it was started & we canceled timer */ - if (time_after(new_end, tmp->work.timer.expires)) - mod_timer(&tmp->work.timer, new_end); - else - add_timer(&tmp->work.timer); + if (elapsed + roc->duration > max_roc) { + combine_started = false; + continue; + } - ieee80211_handle_roc_started(roc); + list_add(&roc->list, &tmp->list); + queued = true; + roc->on_channel = tmp->on_channel; + ieee80211_handle_roc_started(roc, now); + break; } - break; + + queued = ieee80211_coalesce_hw_started_roc(local, roc, tmp); + if (queued) + break; + /* if it wasn't queued, perhaps it can be combined with + * another that also couldn't get combined previously, + * but no need to check for already started ones, since + * that can't work. + */ + combine_started = false; } - out_queue: if (!queued) list_add_tail(&roc->list, &local->roc_list); @@ -651,21 +699,6 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { - struct ieee80211_roc_work *dep, *tmp2; - - list_for_each_entry_safe(dep, tmp2, &roc->dependents, list) { - if (!mgmt_tx && dep->cookie != cookie) - continue; - else if (mgmt_tx && dep->mgmt_tx_cookie != cookie) - continue; - /* found dependent item -- just remove it */ - list_del(&dep->list); - mutex_unlock(&local->mtx); - - ieee80211_roc_notify_destroy(dep, true); - return 0; - } - if (!mgmt_tx && roc->cookie != cookie) continue; else if (mgmt_tx && roc->mgmt_tx_cookie != cookie) @@ -680,42 +713,44 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, return -ENOENT; } - /* - * We found the item to cancel, so do that. Note that it - * may have dependents, which we also cancel (and send - * the expired signal for.) Not doing so would be quite - * tricky here, but we may need to fix it later. - */ + if (!found->started) { + ieee80211_roc_notify_destroy(found); + goto out_unlock; + } if (local->ops->remain_on_channel) { - if (found->started) { - ret = drv_cancel_remain_on_channel(local); - if (WARN_ON_ONCE(ret)) { - mutex_unlock(&local->mtx); - return ret; - } + ret = drv_cancel_remain_on_channel(local); + if (WARN_ON_ONCE(ret)) { + mutex_unlock(&local->mtx); + return ret; } - list_del(&found->list); + /* TODO: + * if multiple items were combined here then we really shouldn't + * cancel them all - we should wait for as much time as needed + * for the longest remaining one, and only then cancel ... + */ + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + if (!roc->started) + break; + if (roc == found) + found = NULL; + ieee80211_roc_notify_destroy(roc); + } - if (found->started) - ieee80211_start_next_roc(local); - mutex_unlock(&local->mtx); + /* that really must not happen - it was started */ + WARN_ON(found); - ieee80211_roc_notify_destroy(found, true); + ieee80211_start_next_roc(local); } else { - /* work may be pending so use it all the time */ + /* go through work struct to return to the operating channel */ found->abort = true; - ieee80211_queue_delayed_work(&local->hw, &found->work, 0); - - mutex_unlock(&local->mtx); - - /* work will clean up etc */ - flush_delayed_work(&found->work); - WARN_ON(!found->to_be_freed); - kfree(found); + mod_delayed_work(local->workqueue, &local->roc_work, 0); } + out_unlock: + mutex_unlock(&local->mtx); + return 0; } @@ -925,6 +960,7 @@ void ieee80211_roc_setup(struct ieee80211_local *local) { INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done); + INIT_DELAYED_WORK(&local->roc_work, ieee80211_roc_work); INIT_LIST_HEAD(&local->roc_list); } @@ -932,36 +968,27 @@ void ieee80211_roc_purge(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { struct ieee80211_roc_work *roc, *tmp; - LIST_HEAD(tmp_list); + bool work_to_do = false; mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (sdata && roc->sdata != sdata) continue; - if (roc->started && local->ops->remain_on_channel) { - /* can race, so ignore return value */ - drv_cancel_remain_on_channel(local); - } - - list_move_tail(&roc->list, &tmp_list); - roc->abort = true; - } - mutex_unlock(&local->mtx); - - list_for_each_entry_safe(roc, tmp, &tmp_list, list) { - if (local->ops->remain_on_channel) { - list_del(&roc->list); - ieee80211_roc_notify_destroy(roc, true); + if (roc->started) { + if (local->ops->remain_on_channel) { + /* can race, so ignore return value */ + drv_cancel_remain_on_channel(local); + ieee80211_roc_notify_destroy(roc); + } else { + roc->abort = true; + work_to_do = true; + } } else { - ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); - - /* work will clean up etc */ - flush_delayed_work(&roc->work); - WARN_ON(!roc->to_be_freed); - kfree(roc); + ieee80211_roc_notify_destroy(roc); } } - - WARN_ON_ONCE(!list_empty(&tmp_list)); + if (work_to_do) + __ieee80211_roc_work(local); + mutex_unlock(&local->mtx); } -- cgit v1.2.3 From 1aeb135f84fe40cf6ba1e3610ad2ca4cb9628089 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Mon, 23 Nov 2015 19:27:16 +0100 Subject: cfg80211: reg: Refactor calculation of bandwidth flags The same piece of code appears at two places. Make a function from it. Signed-off-by: Michal Sojka Signed-off-by: Johannes Berg --- net/wireless/reg.c | 91 ++++++++++++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 54 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 43b3e577b2ea..0a4f5481ab83 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1166,6 +1166,41 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, #endif } +static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd, + const struct ieee80211_reg_rule *reg_rule, + const struct ieee80211_channel *chan) +{ + const struct ieee80211_freq_range *freq_range = NULL; + u32 max_bandwidth_khz, bw_flags = 0; + + freq_range = ®_rule->freq_range; + + max_bandwidth_khz = freq_range->max_bandwidth_khz; + /* Check if auto calculation requested */ + if (reg_rule->flags & NL80211_RRF_AUTO_BW) + max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); + + /* If we get a reg_rule we can assume that at least 5Mhz fit */ + if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), + MHZ_TO_KHZ(10))) + bw_flags |= IEEE80211_CHAN_NO_10MHZ; + if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), + MHZ_TO_KHZ(20))) + bw_flags |= IEEE80211_CHAN_NO_20MHZ; + + if (max_bandwidth_khz < MHZ_TO_KHZ(10)) + bw_flags |= IEEE80211_CHAN_NO_10MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(20)) + bw_flags |= IEEE80211_CHAN_NO_20MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(40)) + bw_flags |= IEEE80211_CHAN_NO_HT40; + if (max_bandwidth_khz < MHZ_TO_KHZ(80)) + bw_flags |= IEEE80211_CHAN_NO_80MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(160)) + bw_flags |= IEEE80211_CHAN_NO_160MHZ; + return bw_flags; +} + /* * Note that right now we assume the desired channel bandwidth * is always 20 MHz for each individual channel (HT40 uses 20 MHz @@ -1178,11 +1213,9 @@ static void handle_channel(struct wiphy *wiphy, u32 flags, bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; - const struct ieee80211_freq_range *freq_range = NULL; struct wiphy *request_wiphy = NULL; struct regulatory_request *lr = get_last_request(); const struct ieee80211_regdomain *regd; - u32 max_bandwidth_khz; request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); @@ -1223,31 +1256,7 @@ static void handle_channel(struct wiphy *wiphy, chan_reg_rule_print_dbg(regd, chan, reg_rule); power_rule = ®_rule->power_rule; - freq_range = ®_rule->freq_range; - - max_bandwidth_khz = freq_range->max_bandwidth_khz; - /* Check if auto calculation requested */ - if (reg_rule->flags & NL80211_RRF_AUTO_BW) - max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); - - /* If we get a reg_rule we can assume that at least 5Mhz fit */ - if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), - MHZ_TO_KHZ(10))) - bw_flags |= IEEE80211_CHAN_NO_10MHZ; - if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), - MHZ_TO_KHZ(20))) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; - - if (max_bandwidth_khz < MHZ_TO_KHZ(10)) - bw_flags |= IEEE80211_CHAN_NO_10MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(20)) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags |= IEEE80211_CHAN_NO_HT40; - if (max_bandwidth_khz < MHZ_TO_KHZ(80)) - bw_flags |= IEEE80211_CHAN_NO_80MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(160)) - bw_flags |= IEEE80211_CHAN_NO_160MHZ; + bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan); if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && @@ -1760,8 +1769,6 @@ static void handle_channel_custom(struct wiphy *wiphy, u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; - const struct ieee80211_freq_range *freq_range = NULL; - u32 max_bandwidth_khz; u32 bw; for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) { @@ -1786,31 +1793,7 @@ static void handle_channel_custom(struct wiphy *wiphy, chan_reg_rule_print_dbg(regd, chan, reg_rule); power_rule = ®_rule->power_rule; - freq_range = ®_rule->freq_range; - - max_bandwidth_khz = freq_range->max_bandwidth_khz; - /* Check if auto calculation requested */ - if (reg_rule->flags & NL80211_RRF_AUTO_BW) - max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); - - /* If we get a reg_rule we can assume that at least 5Mhz fit */ - if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), - MHZ_TO_KHZ(10))) - bw_flags |= IEEE80211_CHAN_NO_10MHZ; - if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), - MHZ_TO_KHZ(20))) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; - - if (max_bandwidth_khz < MHZ_TO_KHZ(10)) - bw_flags |= IEEE80211_CHAN_NO_10MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(20)) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags |= IEEE80211_CHAN_NO_HT40; - if (max_bandwidth_khz < MHZ_TO_KHZ(80)) - bw_flags |= IEEE80211_CHAN_NO_80MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(160)) - bw_flags |= IEEE80211_CHAN_NO_160MHZ; + bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan); chan->dfs_state_entered = jiffies; chan->dfs_state = NL80211_DFS_USABLE; -- cgit v1.2.3 From c39b336deb2ec92751488531a05fd14bbe7fa89e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 26 Nov 2015 20:49:38 +0200 Subject: mac80211: Allow a STA to join an IBSS with 80+80 MHz channel While it was possible to create an IBSS with 80+80 MHz channel, joining such an IBSS resulted in falling back to 20 MHz channel with VHT disabled due to a missing switch case for 80+80. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 337bb5d78003..f7fc0e00497f 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -428,6 +428,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, chandef.width = sdata->u.ibss.chandef.width; break; case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: chandef = sdata->u.ibss.chandef; chandef.chan = cbss->channel; -- cgit v1.2.3 From 7d37fcd409199f76da522e6f6670a354ac468002 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Dec 2015 23:15:26 +0100 Subject: mac80211: reject zero cookie in mgmt-tx/roc cancel When cancelling, you can cancel "any" (first in list) mgmt-tx or remain-on-channel operation by using the value 0 for the cookie along with the *opposite* operation, i.e. * cancel the first mgmt-tx by cancelling roc with 0 cookie * cancel the first roc by cancelling mgmt-tx with 0 cookie This isn't really that bad since userspace should only pass cookies that we gave it, but could lead to hard-to-debug issues so better prevent it and reject zero values since we never hand those out. Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index cfd3356e26fd..6719b27aad66 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -697,6 +697,9 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, struct ieee80211_roc_work *roc, *tmp, *found = NULL; int ret; + if (!cookie) + return -ENOENT; + mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (!mgmt_tx && roc->cookie != cookie) -- cgit v1.2.3 From 3ef0952ca85e28226b09a6d833c30e3e604a63c8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 3 Dec 2015 21:12:32 +0100 Subject: ipv6: Only act upon NETDEV_*_TYPE_CHANGE if we have ipv6 addresses An interface changing type may not have IPv6 addresses. Don't call the address configuration type change in this case. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6936d0d8c6b1..5e9111da449d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3287,7 +3287,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_PRE_TYPE_CHANGE: case NETDEV_POST_TYPE_CHANGE: - addrconf_type_change(dev, event); + if (idev) + addrconf_type_change(dev, event); break; } -- cgit v1.2.3 From a1a66b1100373ead1fa2383bc3dee42c508bb504 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 3 Dec 2015 21:12:33 +0100 Subject: batman-adv: Act on NETDEV_*_TYPE_CHANGE events A network interface can change type. It may change from a type which batman does not support, e.g. hdlc, to one it does, e.g. hdlc-eth. When an interface changes type, it sends two notifications. Handle these notifications. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/batman-adv/hard-interface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index aa8867e1d983..a58184fdf5fd 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -709,7 +709,8 @@ static int batadv_hard_if_event(struct notifier_block *this, } hard_iface = batadv_hardif_get_by_netdev(net_dev); - if (!hard_iface && event == NETDEV_REGISTER) + if (!hard_iface && (event == NETDEV_REGISTER || + event == NETDEV_POST_TYPE_CHANGE)) hard_iface = batadv_hardif_add_interface(net_dev); if (!hard_iface) @@ -724,6 +725,7 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_hardif_deactivate_interface(hard_iface); break; case NETDEV_UNREGISTER: + case NETDEV_PRE_TYPE_CHANGE: list_del_rcu(&hard_iface->list); batadv_hardif_remove_interface(hard_iface); -- cgit v1.2.3 From b618aaa91b5870e7bd139987ac4b7bf0851142d0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Dec 2015 15:01:31 +0100 Subject: net: constify netif_is_* helpers net_device param As suggested by Eric, these helpers should have const dev param. Suggested-by: Eric Dumazet Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- include/linux/netdevice.h | 22 +++++++++++----------- net/core/dev.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 67ce5bd3b56a..05f5879821b8 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -73,7 +73,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline bool is_vlan_dev(struct net_device *dev) +static inline bool is_vlan_dev(const struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3efe017fe419..1bb21ff0fa64 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3661,7 +3661,7 @@ extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(struct net_device *dev)); + bool (*type_check)(const struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); @@ -3858,32 +3858,32 @@ static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, skb->mac_len = mac_len; } -static inline bool netif_is_macvlan(struct net_device *dev) +static inline bool netif_is_macvlan(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; } -static inline bool netif_is_macvlan_port(struct net_device *dev) +static inline bool netif_is_macvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN_PORT; } -static inline bool netif_is_ipvlan(struct net_device *dev) +static inline bool netif_is_ipvlan(const struct net_device *dev) { return dev->priv_flags & IFF_IPVLAN_SLAVE; } -static inline bool netif_is_ipvlan_port(struct net_device *dev) +static inline bool netif_is_ipvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_IPVLAN_MASTER; } -static inline bool netif_is_bond_master(struct net_device *dev) +static inline bool netif_is_bond_master(const struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; } -static inline bool netif_is_bond_slave(struct net_device *dev) +static inline bool netif_is_bond_slave(const struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } @@ -3918,22 +3918,22 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } -static inline bool netif_is_team_master(struct net_device *dev) +static inline bool netif_is_team_master(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM; } -static inline bool netif_is_team_port(struct net_device *dev) +static inline bool netif_is_team_port(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM_PORT; } -static inline bool netif_is_lag_master(struct net_device *dev) +static inline bool netif_is_lag_master(const struct net_device *dev) { return netif_is_bond_master(dev) || netif_is_team_master(dev); } -static inline bool netif_is_lag_port(struct net_device *dev) +static inline bool netif_is_lag_port(const struct net_device *dev) { return netif_is_bond_slave(dev) || netif_is_team_port(dev); } diff --git a/net/core/dev.c b/net/core/dev.c index d1706e88fbeb..e5c395473eba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5734,7 +5734,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private); int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(struct net_device *dev)) + bool (*type_check)(const struct net_device *dev)) { struct net_device *lower = NULL; struct list_head *iter; -- cgit v1.2.3 From ea3793ee29d3621faf857fa8ef5425e9ff9a756d Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Sun, 6 Dec 2015 21:11:34 +0000 Subject: core: enable more fine-grained datagram reception control The __skb_recv_datagram routine in core/ datagram.c provides a general skb reception factility supposed to be utilized by protocol modules providing datagram sockets. It encompasses both the actual recvmsg code and a surrounding 'sleep until data is available' loop. This is inconvenient if a protocol module has to use additional locking in order to maintain some per-socket state the generic datagram socket code is unaware of (as the af_unix code does). The patch below moves the recvmsg proper code into a new __skb_try_recv_datagram routine which doesn't sleep and renames wait_for_more_packets to __skb_wait_for_more_packets, both routines being exported interfaces. The original __skb_recv_datagram routine is reimplemented on top of these two functions such that its user-visible behaviour remains unchanged. Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++ net/core/datagram.c | 77 +++++++++++++++++++++++++++++++------------------- 2 files changed, 54 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9c394bf0771..9b9b9ead7bb3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2785,6 +2785,12 @@ static inline void skb_frag_list_init(struct sk_buff *skb) #define skb_walk_frags(skb, iter) \ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) + +int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, + const struct sk_buff *skb); +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + int *peeked, int *off, int *err, + struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, diff --git a/net/core/datagram.c b/net/core/datagram.c index d62af69ad844..7daff66d3d0b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -83,8 +83,8 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn /* * Wait for the last received packet to be different from skb */ -static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, - const struct sk_buff *skb) +int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, + const struct sk_buff *skb) { int error; DEFINE_WAIT_FUNC(wait, receiver_wake_function); @@ -130,6 +130,7 @@ out_noerr: error = 1; goto out; } +EXPORT_SYMBOL(__skb_wait_for_more_packets); static struct sk_buff *skb_set_peeked(struct sk_buff *skb) { @@ -161,13 +162,15 @@ done: } /** - * __skb_recv_datagram - Receive a datagram skbuff + * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts * @err: error code returned + * @last: set to last peeked message to inform the wait function + * what to look for when peeking * * Get a datagram skbuff, understands the peeking, nonblocking wakeups * and possible races. This replaces identical code in packet, raw and @@ -175,9 +178,11 @@ done: * the long standing peek and read race for datagram sockets. If you * alter this routine remember it must be re-entrant. * - * This function will lock the socket if a skb is returned, so the caller - * needs to unlock the socket in that case (usually by calling - * skb_free_datagram) + * This function will lock the socket if a skb is returned, so + * the caller needs to unlock the socket in that case (usually by + * calling skb_free_datagram). Returns NULL with *err set to + * -EAGAIN if no data was available or to some other value if an + * error was detected. * * * It does not lock socket since today. This function is * * free of race conditions. This measure should/can improve @@ -191,13 +196,13 @@ done: * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, - int *peeked, int *off, int *err) +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err, + struct sk_buff **last) { struct sk_buff_head *queue = &sk->sk_receive_queue; - struct sk_buff *skb, *last; + struct sk_buff *skb; unsigned long cpu_flags; - long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() */ @@ -206,8 +211,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, if (error) goto no_packet; - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - do { /* Again only user level code calls this function, so nothing * interrupt level will suddenly eat the receive_queue. @@ -217,10 +220,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, */ int _off = *off; - last = (struct sk_buff *)queue; + *last = (struct sk_buff *)queue; spin_lock_irqsave(&queue->lock, cpu_flags); skb_queue_walk(queue, skb) { - last = skb; + *last = skb; *peeked = skb->peeked; if (flags & MSG_PEEK) { if (_off >= skb->len && (skb->len || _off || @@ -231,8 +234,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, skb = skb_set_peeked(skb); error = PTR_ERR(skb); - if (IS_ERR(skb)) - goto unlock_err; + if (IS_ERR(skb)) { + spin_unlock_irqrestore(&queue->lock, + cpu_flags); + goto no_packet; + } atomic_inc(&skb->users); } else @@ -242,25 +248,38 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, *off = _off; return skb; } + spin_unlock_irqrestore(&queue->lock, cpu_flags); + } while (sk_can_busy_loop(sk) && + sk_busy_loop(sk, flags & MSG_DONTWAIT)); - if (sk_can_busy_loop(sk) && - sk_busy_loop(sk, flags & MSG_DONTWAIT)) - continue; + error = -EAGAIN; - /* User doesn't want to wait */ - error = -EAGAIN; - if (!timeo) - goto no_packet; +no_packet: + *err = error; + return NULL; +} +EXPORT_SYMBOL(__skb_try_recv_datagram); - } while (!wait_for_more_packets(sk, err, &timeo, last)); +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err) +{ + struct sk_buff *skb, *last; + long timeo; - return NULL; + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + do { + skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, + &last); + if (skb) + return skb; + + if (*err != EAGAIN) + break; + } while (timeo && + !__skb_wait_for_more_packets(sk, err, &timeo, last)); -unlock_err: - spin_unlock_irqrestore(&queue->lock, cpu_flags); -no_packet: - *err = error; return NULL; } EXPORT_SYMBOL(__skb_recv_datagram); -- cgit v1.2.3 From 64874280889e7c0b2c9266705363627d4c92cf01 Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Sun, 6 Dec 2015 21:11:38 +0000 Subject: af_unix: fix unix_dgram_recvmsg entry locking The current unix_dgram_recvsmg code acquires the u->readlock mutex in order to protect access to the peek offset prior to calling __skb_recv_datagram for actually receiving data. This implies that a blocking reader will go to sleep with this mutex held if there's presently no data to return to userspace. Two non-desirable side effects of this are that a later non-blocking read call on the same socket will block on the ->readlock mutex until the earlier blocking call releases it (or the readers is interrupted) and that later blocking read calls will wait longer than the effective socket read timeout says they should: The timeout will only start 'ticking' once such a reader hits the schedule_timeout in wait_for_more_packets (core.c) while the time it already had to wait until it could acquire the mutex is unaccounted for. The patch avoids both by using the __skb_try_recv_datagram and __skb_wait_for_more packets functions created by the first patch to implement a unix_dgram_recvmsg read loop which releases the readlock mutex prior to going to sleep and reacquires it as needed afterwards. Non-blocking readers will thus immediately return with -EAGAIN if there's no data available regardless of any concurrent blocking readers and all blocking readers will end up sleeping via schedule_timeout, thus honouring the configured socket receive timeout. Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller --- net/unix/af_unix.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 502e572af3fd..1c3c1f3a3ec4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2078,8 +2078,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); - int noblock = flags & MSG_DONTWAIT; - struct sk_buff *skb; + struct sk_buff *skb, *last; + long timeo; int err; int peeked, skip; @@ -2087,26 +2087,32 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, if (flags&MSG_OOB) goto out; - err = mutex_lock_interruptible(&u->readlock); - if (unlikely(err)) { - /* recvmsg() in non blocking mode is supposed to return -EAGAIN - * sk_rcvtimeo is not honored by mutex_lock_interruptible() - */ - err = noblock ? -EAGAIN : -ERESTARTSYS; - goto out; - } + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - skip = sk_peek_offset(sk, flags); + do { + mutex_lock(&u->readlock); - skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); - if (!skb) { + skip = sk_peek_offset(sk, flags); + skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, + &last); + if (skb) + break; + + mutex_unlock(&u->readlock); + + if (err != -EAGAIN) + break; + } while (timeo && + !__skb_wait_for_more_packets(sk, &err, &timeo, last)); + + if (!skb) { /* implies readlock unlocked */ unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN)) err = 0; unix_state_unlock(sk); - goto out_unlock; + goto out; } if (wq_has_sleeper(&u->peer_wait)) @@ -2164,7 +2170,6 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, out_free: skb_free_datagram(sk, skb); -out_unlock: mutex_unlock(&u->readlock); out: return err; -- cgit v1.2.3 From 1b894521e60c1b91db1e8ba1278660e5c89f1b5f Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 6 Dec 2015 21:19:15 +0200 Subject: mac80211: handle HW ROC expired properly In case of HW ROC, when the driver reports that the ROC expired, it is not sufficient to purge the ROCs based on the remaining time, as it possible that the device finished the ROC session before the actual requested duration. To handle such cases, in case of ROC expired notification from the driver, complete all the ROCs which are marked with hw_begun, regardless of the remaining duration. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 6719b27aad66..8b2f4eaac2ba 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -224,7 +224,11 @@ static unsigned long ieee80211_end_finished_rocs(struct ieee80211_local *local, msecs_to_jiffies(roc->duration) - now; - if (roc->abort || remaining <= 0) + /* In case of HW ROC, it is possible that the HW finished the + * ROC session before the actual requested time. In such a case + * end the ROC session (disregarding the remaining time). + */ + if (roc->abort || roc->hw_begun || remaining <= 0) ieee80211_roc_notify_destroy(roc); else remaining_dur_min = min(remaining_dur_min, remaining); -- cgit v1.2.3 From 4baee937b8d551c89f61542a575378e407b63415 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:32 +0100 Subject: net: dsa: remove DSA link polling Since no more DSA driver uses the polling callback, and since the phylib handles the link detection, remove the link polling work and timer code. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- include/net/dsa.h | 12 ------------ net/dsa/dsa.c | 43 ------------------------------------------- 2 files changed, 55 deletions(-) (limited to 'net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 3f23dd9d6a69..26a0e86e611e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -116,13 +116,6 @@ struct dsa_switch_tree { s8 cpu_switch; s8 cpu_port; - /* - * Link state polling. - */ - int link_poll_needed; - struct work_struct link_poll_work; - struct timer_list link_poll_timer; - /* * Data for the individual switch chips. */ @@ -231,11 +224,6 @@ struct dsa_switch_driver { int (*phy_write)(struct dsa_switch *ds, int port, int regnum, u16 val); - /* - * Link state polling and IRQ handling. - */ - void (*poll_link)(struct dsa_switch *ds); - /* * Link state adjustment (called from libphy) */ diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index b7448c8490ac..0f41f71efac1 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -508,33 +508,6 @@ static int dsa_switch_resume(struct dsa_switch *ds) } #endif - -/* link polling *************************************************************/ -static void dsa_link_poll_work(struct work_struct *ugly) -{ - struct dsa_switch_tree *dst; - int i; - - dst = container_of(ugly, struct dsa_switch_tree, link_poll_work); - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL && ds->drv->poll_link != NULL) - ds->drv->poll_link(ds); - } - - mod_timer(&dst->link_poll_timer, round_jiffies(jiffies + HZ)); -} - -static void dsa_link_poll_timer(unsigned long _dst) -{ - struct dsa_switch_tree *dst = (void *)_dst; - - schedule_work(&dst->link_poll_work); -} - - /* platform driver init and cleanup *****************************************/ static int dev_is_class(struct device *dev, void *class) { @@ -877,8 +850,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, } dst->ds[i] = ds; - if (ds->drv->poll_link != NULL) - dst->link_poll_needed = 1; ++configured; } @@ -897,15 +868,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = (void *)dst; - if (dst->link_poll_needed) { - INIT_WORK(&dst->link_poll_work, dsa_link_poll_work); - init_timer(&dst->link_poll_timer); - dst->link_poll_timer.data = (unsigned long)dst; - dst->link_poll_timer.function = dsa_link_poll_timer; - dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); - add_timer(&dst->link_poll_timer); - } - return 0; } @@ -972,11 +934,6 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - if (dst->link_poll_needed) - del_timer_sync(&dst->link_poll_timer); - - flush_work(&dst->link_poll_work); - for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; -- cgit v1.2.3 From b0dc635d923cd5aafa4e99973f529bf68c582738 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:33 +0100 Subject: net: dsa: cleanup resources upon module removal Make sure that we unassign the master_netdev dsa_ptr to make the packet processing go through the regular Ethernet receive path. Suggested-by: Florian Fainelli Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0f41f71efac1..d9e0172116b6 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -985,6 +985,14 @@ static int dsa_suspend(struct device *d) struct dsa_switch_tree *dst = platform_get_drvdata(pdev); int i, ret = 0; + dst->master_netdev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); + for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; -- cgit v1.2.3 From 679fb46c57859b59a70257477bfbdfc7edfac4f5 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:34 +0100 Subject: net: dsa: Add missing master netdev dev_put() calls Upon probe failure or unbinding, add missing dev_put() calls. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d9e0172116b6..d22d303efd5c 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -919,8 +919,10 @@ static int dsa_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dst); ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); - if (ret) + if (ret) { + dev_put(dev); goto out; + } return 0; @@ -940,6 +942,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) if (ds) dsa_switch_destroy(ds); } + + dev_put(dst->master_netdev); } static int dsa_remove(struct platform_device *pdev) -- cgit v1.2.3 From cda5c15b23fb9d683a491e8bd137d11d8552ac02 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:35 +0100 Subject: net: dsa: move dsa slave destroy code to slave.c Move dsa slave dedicated code from dsa_switch_destroy to a new dsa_slave_destroy function in slave.c. Add the netif_carrier_off and phy_disconnect calls in order to correctly cleanup the netdev state and PHY state machine. Signed-off-by: Frode Isaksen Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 3 +-- net/dsa/dsa_priv.h | 1 + net/dsa/slave.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d22d303efd5c..208d1b257194 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -456,8 +456,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (!ds->ports[port]) continue; - unregister_netdev(ds->ports[port]); - free_netdev(ds->ports[port]); + dsa_slave_destroy(ds->ports[port]); } mdiobus_unregister(ds->slave_mii_bus); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 311796c809af..1d1a54687e4a 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -61,6 +61,7 @@ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); int dsa_slave_create(struct dsa_switch *ds, struct device *parent, int port, char *name); +void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_netdevice_event(struct notifier_block *unused, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7bc787b095c8..1e9e9424a33d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1212,6 +1212,17 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, return 0; } +void dsa_slave_destroy(struct net_device *slave_dev) +{ + struct dsa_slave_priv *p = netdev_priv(slave_dev); + + netif_carrier_off(slave_dev); + if (p->phy) + phy_disconnect(p->phy); + unregister_netdev(slave_dev); + free_netdev(slave_dev); +} + static bool dsa_slave_dev_check(struct net_device *dev) { return dev->netdev_ops == &dsa_slave_netdev_ops; -- cgit v1.2.3 From 760a4322470e3990b14e09bfe80c9c75c77f33dd Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Tue, 8 Dec 2015 14:47:56 +0000 Subject: net: Fix inverted test in __skb_recv_datagram As the kernel generally uses negated error numbers, *err needs to be compared with -EAGAIN (d'oh). Signed-off-by: Rainer Weikusat Fixes: ea3793ee29d3 ("core: enable more fine-grained datagram reception control") Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 7daff66d3d0b..fa9dc6450b08 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -275,7 +275,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, if (skb) return skb; - if (*err != EAGAIN) + if (*err != -EAGAIN) break; } while (timeo && !__skb_wait_for_more_packets(sk, err, &timeo, last)); -- cgit v1.2.3 From 8ac2837c89c8c0fcad557e4380aeef80580390f9 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 9 Dec 2015 10:51:12 +0800 Subject: Revert "Merge branch 'vsock-virtio'" This reverts commit 0d76d6e8b2507983a2cae4c09880798079007421 and merge commit c402293bd76fbc93e52ef8c0947ab81eea3ae019, reversing changes made to c89359a42e2a49656451569c382eed63e781153c. The virtio-vsock device specification is not finalized yet. Michael Tsirkin voiced concerned about merging this code when the hardware interface (and possibly the userspace interface) could still change. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- drivers/vhost/Kconfig | 4 - drivers/vhost/Kconfig.vsock | 7 - drivers/vhost/Makefile | 4 - drivers/vhost/vsock.c | 630 --------------- drivers/vhost/vsock.h | 4 - include/linux/virtio_vsock.h | 209 ----- include/net/af_vsock.h | 2 - include/uapi/linux/virtio_ids.h | 1 - include/uapi/linux/virtio_vsock.h | 89 --- net/vmw_vsock/Kconfig | 18 - net/vmw_vsock/Makefile | 2 - net/vmw_vsock/af_vsock.c | 70 -- net/vmw_vsock/virtio_transport.c | 466 ----------- net/vmw_vsock/virtio_transport_common.c | 1272 ------------------------------- 14 files changed, 2778 deletions(-) delete mode 100644 drivers/vhost/Kconfig.vsock delete mode 100644 drivers/vhost/vsock.c delete mode 100644 drivers/vhost/vsock.h delete mode 100644 include/linux/virtio_vsock.h delete mode 100644 include/uapi/linux/virtio_vsock.h delete mode 100644 net/vmw_vsock/virtio_transport.c delete mode 100644 net/vmw_vsock/virtio_transport_common.c (limited to 'net') diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 81449bfc8d3b..533eaf04f12f 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -47,7 +47,3 @@ config VHOST_CROSS_ENDIAN_LEGACY adds some overhead, it is disabled by default. If unsure, say "N". - -if STAGING -source "drivers/vhost/Kconfig.vsock" -endif diff --git a/drivers/vhost/Kconfig.vsock b/drivers/vhost/Kconfig.vsock deleted file mode 100644 index 3491865d3eb9..000000000000 --- a/drivers/vhost/Kconfig.vsock +++ /dev/null @@ -1,7 +0,0 @@ -config VHOST_VSOCK - tristate "vhost virtio-vsock driver" - depends on VSOCKETS && EVENTFD - select VIRTIO_VSOCKETS_COMMON - default n - ---help--- - Say M here to enable the vhost-vsock for virtio-vsock guests diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index 6b012b986b57..e0441c34db1c 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -4,9 +4,5 @@ vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o -obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o -vhost_vsock-y := vsock.o - obj-$(CONFIG_VHOST_RING) += vringh.o - obj-$(CONFIG_VHOST) += vhost.o diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c deleted file mode 100644 index 64bcb10bb901..000000000000 --- a/drivers/vhost/vsock.c +++ /dev/null @@ -1,630 +0,0 @@ -/* - * vhost transport for vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include - -#include -#include "vhost.h" -#include "vsock.h" - -#define VHOST_VSOCK_DEFAULT_HOST_CID 2 - -static int vhost_transport_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk); - -enum { - VHOST_VSOCK_FEATURES = VHOST_FEATURES, -}; - -/* Used to track all the vhost_vsock instances on the system. */ -static LIST_HEAD(vhost_vsock_list); -static DEFINE_MUTEX(vhost_vsock_mutex); - -struct vhost_vsock_virtqueue { - struct vhost_virtqueue vq; -}; - -struct vhost_vsock { - /* Vhost device */ - struct vhost_dev dev; - /* Vhost vsock virtqueue*/ - struct vhost_vsock_virtqueue vqs[VSOCK_VQ_MAX]; - /* Link to global vhost_vsock_list*/ - struct list_head list; - /* Head for pkt from host to guest */ - struct list_head send_pkt_list; - /* Work item to send pkt */ - struct vhost_work send_pkt_work; - /* Wait queue for send pkt */ - wait_queue_head_t queue_wait; - /* Used for global tx buf limitation */ - u32 total_tx_buf; - /* Guest contex id this vhost_vsock instance handles */ - u32 guest_cid; -}; - -static u32 vhost_transport_get_local_cid(void) -{ - return VHOST_VSOCK_DEFAULT_HOST_CID; -} - -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -{ - struct vhost_vsock *vsock; - - mutex_lock(&vhost_vsock_mutex); - list_for_each_entry(vsock, &vhost_vsock_list, list) { - if (vsock->guest_cid == guest_cid) { - mutex_unlock(&vhost_vsock_mutex); - return vsock; - } - } - mutex_unlock(&vhost_vsock_mutex); - - return NULL; -} - -static void -vhost_transport_do_send_pkt(struct vhost_vsock *vsock, - struct vhost_virtqueue *vq) -{ - bool added = false; - - mutex_lock(&vq->mutex); - vhost_disable_notify(&vsock->dev, vq); - for (;;) { - struct virtio_vsock_pkt *pkt; - struct iov_iter iov_iter; - unsigned out, in; - struct sock *sk; - size_t nbytes; - size_t len; - int head; - - if (list_empty(&vsock->send_pkt_list)) { - vhost_enable_notify(&vsock->dev, vq); - break; - } - - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - &out, &in, NULL, NULL); - pr_debug("%s: head = %d\n", __func__, head); - if (head < 0) - break; - - if (head == vq->num) { - if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { - vhost_disable_notify(&vsock->dev, vq); - continue; - } - break; - } - - pkt = list_first_entry(&vsock->send_pkt_list, - struct virtio_vsock_pkt, list); - list_del_init(&pkt->list); - - if (out) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Expected 0 output buffers, got %u\n", out); - break; - } - - len = iov_length(&vq->iov[out], in); - iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); - - nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); - if (nbytes != sizeof(pkt->hdr)) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Faulted on copying pkt hdr\n"); - break; - } - - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Faulted on copying pkt buf\n"); - break; - } - - vhost_add_used(vq, head, pkt->len); /* TODO should this be sizeof(pkt->hdr) + pkt->len? */ - added = true; - - virtio_transport_dec_tx_pkt(pkt); - vsock->total_tx_buf -= pkt->len; - - sk = sk_vsock(pkt->trans->vsk); - /* Release refcnt taken in vhost_transport_send_pkt */ - sock_put(sk); - - virtio_transport_free_pkt(pkt); - } - if (added) - vhost_signal(&vsock->dev, vq); - mutex_unlock(&vq->mutex); - - if (added) - wake_up(&vsock->queue_wait); -} - -static void vhost_transport_send_pkt_work(struct vhost_work *work) -{ - struct vhost_virtqueue *vq; - struct vhost_vsock *vsock; - - vsock = container_of(work, struct vhost_vsock, send_pkt_work); - vq = &vsock->vqs[VSOCK_VQ_RX].vq; - - vhost_transport_do_send_pkt(vsock, vq); -} - -static int -vhost_transport_send_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info) -{ - u32 src_cid, src_port, dst_cid, dst_port; - struct virtio_transport *trans; - struct virtio_vsock_pkt *pkt; - struct vhost_virtqueue *vq; - struct vhost_vsock *vsock; - u32 pkt_len = info->pkt_len; - DEFINE_WAIT(wait); - - src_cid = vhost_transport_get_local_cid(); - src_port = vsk->local_addr.svm_port; - if (!info->remote_cid) { - dst_cid = vsk->remote_addr.svm_cid; - dst_port = vsk->remote_addr.svm_port; - } else { - dst_cid = info->remote_cid; - dst_port = info->remote_port; - } - - /* Find the vhost_vsock according to guest context id */ - vsock = vhost_vsock_get(dst_cid); - if (!vsock) - return -ENODEV; - - trans = vsk->trans; - vq = &vsock->vqs[VSOCK_VQ_RX].vq; - - /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - - /* virtio_transport_get_credit might return less than pkt_len credit */ - pkt_len = virtio_transport_get_credit(trans, pkt_len); - - /* Do not send zero length OP_RW pkt*/ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; - - /* Respect global tx buf limitation */ - mutex_lock(&vq->mutex); - while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vq->mutex); - schedule(); - mutex_lock(&vq->mutex); - finish_wait(&vsock->queue_wait, &wait); - } - vsock->total_tx_buf += pkt_len; - mutex_unlock(&vq->mutex); - - pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, - src_cid, src_port, - dst_cid, dst_port); - if (!pkt) { - mutex_lock(&vq->mutex); - vsock->total_tx_buf -= pkt_len; - mutex_unlock(&vq->mutex); - virtio_transport_put_credit(trans, pkt_len); - return -ENOMEM; - } - - pr_debug("%s:info->pkt_len= %d\n", __func__, pkt_len); - /* Released in vhost_transport_do_send_pkt */ - sock_hold(&trans->vsk->sk); - virtio_transport_inc_tx_pkt(pkt); - - /* Queue it up in vhost work */ - mutex_lock(&vq->mutex); - list_add_tail(&pkt->list, &vsock->send_pkt_list); - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); - mutex_unlock(&vq->mutex); - - return pkt_len; -} - -static struct virtio_transport_pkt_ops vhost_ops = { - .send_pkt = vhost_transport_send_pkt, -}; - -static struct virtio_vsock_pkt * -vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, - unsigned int out, unsigned int in) -{ - struct virtio_vsock_pkt *pkt; - struct iov_iter iov_iter; - size_t nbytes; - size_t len; - - if (in != 0) { - vq_err(vq, "Expected 0 input buffers, got %u\n", in); - return NULL; - } - - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return NULL; - - len = iov_length(vq->iov, out); - iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); - - nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); - if (nbytes != sizeof(pkt->hdr)) { - vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", - sizeof(pkt->hdr), nbytes); - kfree(pkt); - return NULL; - } - - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) - pkt->len = le32_to_cpu(pkt->hdr.len) & 0XFFFF; - else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) - pkt->len = le32_to_cpu(pkt->hdr.len); - - /* No payload */ - if (!pkt->len) - return pkt; - - /* The pkt is too big */ - if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { - kfree(pkt); - return NULL; - } - - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); - if (!pkt->buf) { - kfree(pkt); - return NULL; - } - - nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - vq_err(vq, "Expected %u byte payload, got %zu bytes\n", - pkt->len, nbytes); - virtio_transport_free_pkt(pkt); - return NULL; - } - - return pkt; -} - -static void vhost_vsock_handle_ctl_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - - pr_debug("%s vq=%p, vsock=%p\n", __func__, vq, vsock); -} - -static void vhost_vsock_handle_tx_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - struct virtio_vsock_pkt *pkt; - int head; - unsigned int out, in; - bool added = false; - u32 len; - - mutex_lock(&vq->mutex); - vhost_disable_notify(&vsock->dev, vq); - for (;;) { - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - &out, &in, NULL, NULL); - if (head < 0) - break; - - if (head == vq->num) { - if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { - vhost_disable_notify(&vsock->dev, vq); - continue; - } - break; - } - - pkt = vhost_vsock_alloc_pkt(vq, out, in); - if (!pkt) { - vq_err(vq, "Faulted on pkt\n"); - continue; - } - - len = pkt->len; - - /* Only accept correctly addressed packets */ - if (le32_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && - le32_to_cpu(pkt->hdr.dst_cid) == vhost_transport_get_local_cid()) - virtio_transport_recv_pkt(pkt); - else - virtio_transport_free_pkt(pkt); - - vhost_add_used(vq, head, len); - added = true; - } - if (added) - vhost_signal(&vsock->dev, vq); - mutex_unlock(&vq->mutex); -} - -static void vhost_vsock_handle_rx_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - - vhost_transport_do_send_pkt(vsock, vq); -} - -static int vhost_vsock_dev_open(struct inode *inode, struct file *file) -{ - struct vhost_virtqueue **vqs; - struct vhost_vsock *vsock; - int ret; - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) - return -ENOMEM; - - pr_debug("%s:vsock=%p\n", __func__, vsock); - - vqs = kmalloc(VSOCK_VQ_MAX * sizeof(*vqs), GFP_KERNEL); - if (!vqs) { - ret = -ENOMEM; - goto out; - } - - vqs[VSOCK_VQ_CTRL] = &vsock->vqs[VSOCK_VQ_CTRL].vq; - vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX].vq; - vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX].vq; - vsock->vqs[VSOCK_VQ_CTRL].vq.handle_kick = vhost_vsock_handle_ctl_kick; - vsock->vqs[VSOCK_VQ_TX].vq.handle_kick = vhost_vsock_handle_tx_kick; - vsock->vqs[VSOCK_VQ_RX].vq.handle_kick = vhost_vsock_handle_rx_kick; - - vhost_dev_init(&vsock->dev, vqs, VSOCK_VQ_MAX); - - file->private_data = vsock; - init_waitqueue_head(&vsock->queue_wait); - INIT_LIST_HEAD(&vsock->send_pkt_list); - vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); - - mutex_lock(&vhost_vsock_mutex); - list_add_tail(&vsock->list, &vhost_vsock_list); - mutex_unlock(&vhost_vsock_mutex); - return 0; - -out: - kfree(vsock); - return ret; -} - -static void vhost_vsock_flush(struct vhost_vsock *vsock) -{ - int i; - - for (i = 0; i < VSOCK_VQ_MAX; i++) - vhost_poll_flush(&vsock->vqs[i].vq.poll); - vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); -} - -static int vhost_vsock_dev_release(struct inode *inode, struct file *file) -{ - struct vhost_vsock *vsock = file->private_data; - - mutex_lock(&vhost_vsock_mutex); - list_del(&vsock->list); - mutex_unlock(&vhost_vsock_mutex); - - vhost_dev_stop(&vsock->dev); - vhost_vsock_flush(vsock); - vhost_dev_cleanup(&vsock->dev, false); - kfree(vsock->dev.vqs); - kfree(vsock); - return 0; -} - -static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u32 guest_cid) -{ - struct vhost_vsock *other; - - /* Refuse reserved CIDs */ - if (guest_cid <= VMADDR_CID_HOST) { - return -EINVAL; - } - - /* Refuse if CID is already in use */ - other = vhost_vsock_get(guest_cid); - if (other && other != vsock) { - return -EADDRINUSE; - } - - mutex_lock(&vhost_vsock_mutex); - vsock->guest_cid = guest_cid; - pr_debug("%s:guest_cid=%d\n", __func__, guest_cid); - mutex_unlock(&vhost_vsock_mutex); - - return 0; -} - -static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) -{ - struct vhost_virtqueue *vq; - int i; - - if (features & ~VHOST_VSOCK_FEATURES) - return -EOPNOTSUPP; - - mutex_lock(&vsock->dev.mutex); - if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&vsock->dev)) { - mutex_unlock(&vsock->dev.mutex); - return -EFAULT; - } - - for (i = 0; i < VSOCK_VQ_MAX; i++) { - vq = &vsock->vqs[i].vq; - mutex_lock(&vq->mutex); - vq->acked_features = features; - mutex_unlock(&vq->mutex); - } - mutex_unlock(&vsock->dev.mutex); - return 0; -} - -static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, - unsigned long arg) -{ - struct vhost_vsock *vsock = f->private_data; - void __user *argp = (void __user *)arg; - u64 __user *featurep = argp; - u32 __user *cidp = argp; - u32 guest_cid; - u64 features; - int r; - - switch (ioctl) { - case VHOST_VSOCK_SET_GUEST_CID: - if (get_user(guest_cid, cidp)) - return -EFAULT; - return vhost_vsock_set_cid(vsock, guest_cid); - case VHOST_GET_FEATURES: - features = VHOST_VSOCK_FEATURES; - if (copy_to_user(featurep, &features, sizeof(features))) - return -EFAULT; - return 0; - case VHOST_SET_FEATURES: - if (copy_from_user(&features, featurep, sizeof(features))) - return -EFAULT; - return vhost_vsock_set_features(vsock, features); - default: - mutex_lock(&vsock->dev.mutex); - r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); - if (r == -ENOIOCTLCMD) - r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); - else - vhost_vsock_flush(vsock); - mutex_unlock(&vsock->dev.mutex); - return r; - } -} - -static const struct file_operations vhost_vsock_fops = { - .owner = THIS_MODULE, - .open = vhost_vsock_dev_open, - .release = vhost_vsock_dev_release, - .llseek = noop_llseek, - .unlocked_ioctl = vhost_vsock_dev_ioctl, -}; - -static struct miscdevice vhost_vsock_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = "vhost-vsock", - .fops = &vhost_vsock_fops, -}; - -static int -vhost_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) -{ - struct virtio_transport *trans; - int ret; - - ret = virtio_transport_do_socket_init(vsk, psk); - if (ret) - return ret; - - trans = vsk->trans; - trans->ops = &vhost_ops; - - return ret; -} - -static struct vsock_transport vhost_transport = { - .get_local_cid = vhost_transport_get_local_cid, - - .init = vhost_transport_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_bind = virtio_transport_dgram_bind, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, -}; - -static int __init vhost_vsock_init(void) -{ - int ret; - - ret = vsock_core_init(&vhost_transport); - if (ret < 0) - return ret; - return misc_register(&vhost_vsock_misc); -}; - -static void __exit vhost_vsock_exit(void) -{ - misc_deregister(&vhost_vsock_misc); - vsock_core_exit(); -}; - -module_init(vhost_vsock_init); -module_exit(vhost_vsock_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("vhost transport for vsock "); diff --git a/drivers/vhost/vsock.h b/drivers/vhost/vsock.h deleted file mode 100644 index 0ddb107b86ca..000000000000 --- a/drivers/vhost/vsock.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef VHOST_VSOCK_H -#define VHOST_VSOCK_H -#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u32) -#endif diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h deleted file mode 100644 index a5f3ecc038f7..000000000000 --- a/include/linux/virtio_vsock.h +++ /dev/null @@ -1,209 +0,0 @@ -/* - * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers: - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) Red Hat, Inc., 2013-2015 - * Copyright (C) Asias He , 2013 - * Copyright (C) Stefan Hajnoczi , 2015 - */ - -#ifndef _LINUX_VIRTIO_VSOCK_H -#define _LINUX_VIRTIO_VSOCK_H - -#include -#include -#include - -#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 -#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) -#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) -#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) -#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL -#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) -#define VIRTIO_VSOCK_MAX_TX_BUF_SIZE (1024 * 1024 * 16) -#define VIRTIO_VSOCK_MAX_DGRAM_SIZE (1024 * 64) - -struct vsock_transport_recv_notify_data; -struct vsock_transport_send_notify_data; -struct sockaddr_vm; -struct vsock_sock; - -enum { - VSOCK_VQ_CTRL = 0, - VSOCK_VQ_RX = 1, /* for host to guest data */ - VSOCK_VQ_TX = 2, /* for guest to host data */ - VSOCK_VQ_MAX = 3, -}; - -/* virtio transport socket state */ -struct virtio_transport { - struct virtio_transport_pkt_ops *ops; - struct vsock_sock *vsk; - - u32 buf_size; - u32 buf_size_min; - u32 buf_size_max; - - struct mutex tx_lock; - struct mutex rx_lock; - - struct list_head rx_queue; - u32 rx_bytes; - - /* Protected by trans->tx_lock */ - u32 tx_cnt; - u32 buf_alloc; - u32 peer_fwd_cnt; - u32 peer_buf_alloc; - /* Protected by trans->rx_lock */ - u32 fwd_cnt; - - /* Protected by sk_lock */ - u16 dgram_id; - struct list_head incomplete_dgrams; /* dgram fragments */ -}; - -struct virtio_vsock_pkt { - struct virtio_vsock_hdr hdr; - struct virtio_transport *trans; - struct work_struct work; - struct list_head list; - void *buf; - u32 len; - u32 off; -}; - -struct virtio_vsock_pkt_info { - u32 remote_cid, remote_port; - struct msghdr *msg; - u32 pkt_len; - u16 type; - u16 op; - u32 flags; - u16 dgram_id; - u16 dgram_len; -}; - -struct virtio_transport_pkt_ops { - int (*send_pkt)(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info); -}; - -void virtio_vsock_dumppkt(const char *func, - const struct virtio_vsock_pkt *pkt); - -struct sock * -virtio_transport_get_pending(struct sock *listener, - struct virtio_vsock_pkt *pkt); -struct virtio_vsock_pkt * -virtio_transport_alloc_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port); -ssize_t -virtio_transport_stream_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, - int type); -int -virtio_transport_dgram_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags); - -s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); -s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); - -int virtio_transport_do_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk); -u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); -u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); -u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); -void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); -void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); -void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); -int -virtio_transport_notify_poll_in(struct vsock_sock *vsk, - size_t target, - bool *data_ready_now); -int -virtio_transport_notify_poll_out(struct vsock_sock *vsk, - size_t target, - bool *space_available_now); - -int virtio_transport_notify_recv_init(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, - size_t target, ssize_t copied, bool data_read, - struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_send_init(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, - ssize_t written, struct vsock_transport_send_notify_data *data); - -u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); -bool virtio_transport_stream_is_active(struct vsock_sock *vsk); -bool virtio_transport_stream_allow(u32 cid, u32 port); -int virtio_transport_dgram_bind(struct vsock_sock *vsk, - struct sockaddr_vm *addr); -bool virtio_transport_dgram_allow(u32 cid, u32 port); - -int virtio_transport_connect(struct vsock_sock *vsk); - -int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); - -void virtio_transport_release(struct vsock_sock *vsk); - -ssize_t -virtio_transport_stream_enqueue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len); -int -virtio_transport_dgram_enqueue(struct vsock_sock *vsk, - struct sockaddr_vm *remote_addr, - struct msghdr *msg, - size_t len); - -void virtio_transport_destruct(struct vsock_sock *vsk); - -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt); -u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 wanted); -void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit); -#endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index a0c8fa2ababf..e9eb2d6791b3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -175,10 +175,8 @@ void vsock_insert_connected(struct vsock_sock *vsk); void vsock_remove_bound(struct vsock_sock *vsk); void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); -struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); -int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr); #endif /* __AF_VSOCK_H__ */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 16dcf5d06cd7..77925f587b15 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -39,7 +39,6 @@ #define VIRTIO_ID_9P 9 /* 9p virtio console */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ -#define VIRTIO_ID_VSOCK 13 /* virtio vsock transport */ #define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_INPUT 18 /* virtio input */ diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h deleted file mode 100644 index 8cf9b5682628..000000000000 --- a/include/uapi/linux/virtio_vsock.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers: - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) Red Hat, Inc., 2013-2015 - * Copyright (C) Asias He , 2013 - * Copyright (C) Stefan Hajnoczi , 2015 - */ - -#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H -#define _UAPI_LINUX_VIRTIO_VOSCK_H - -#include -#include -#include - -struct virtio_vsock_config { - __le32 guest_cid; - __le32 max_virtqueue_pairs; -}; - -struct virtio_vsock_hdr { - __le32 src_cid; - __le32 src_port; - __le32 dst_cid; - __le32 dst_port; - __le32 len; - __le16 type; /* enum virtio_vsock_type */ - __le16 op; /* enum virtio_vsock_op */ - __le32 flags; - __le32 buf_alloc; - __le32 fwd_cnt; -}; - -enum virtio_vsock_type { - VIRTIO_VSOCK_TYPE_STREAM = 1, - VIRTIO_VSOCK_TYPE_DGRAM = 2, -}; - -enum virtio_vsock_op { - VIRTIO_VSOCK_OP_INVALID = 0, - - /* Connect operations */ - VIRTIO_VSOCK_OP_REQUEST = 1, - VIRTIO_VSOCK_OP_RESPONSE = 2, - VIRTIO_VSOCK_OP_ACK = 3, - VIRTIO_VSOCK_OP_RST = 4, - VIRTIO_VSOCK_OP_SHUTDOWN = 5, - - /* To send payload */ - VIRTIO_VSOCK_OP_RW = 6, - - /* Tell the peer our credit info */ - VIRTIO_VSOCK_OP_CREDIT_UPDATE = 7, - /* Request the peer to send the credit info to us */ - VIRTIO_VSOCK_OP_CREDIT_REQUEST = 8, -}; - -/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ -enum virtio_vsock_shutdown { - VIRTIO_VSOCK_SHUTDOWN_RCV = 1, - VIRTIO_VSOCK_SHUTDOWN_SEND = 2, -}; - -#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 74e0bc887a33..14810abedc2e 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -26,21 +26,3 @@ config VMWARE_VMCI_VSOCKETS To compile this driver as a module, choose M here: the module will be called vmw_vsock_vmci_transport. If unsure, say N. - -config VIRTIO_VSOCKETS - tristate "virtio transport for Virtual Sockets" - depends on VSOCKETS && VIRTIO - select VIRTIO_VSOCKETS_COMMON - help - This module implements a virtio transport for Virtual Sockets. - - Enable this transport if your Virtual Machine runs on Qemu/KVM. - - To compile this driver as a module, choose M here: the module - will be called virtio_vsock_transport. If unsure, say N. - -config VIRTIO_VSOCKETS_COMMON - tristate - ---help--- - This option is selected by any driver which needs to access - the virtio_vsock. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index cf4c29439081..2ce52d70f224 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,7 +1,5 @@ obj-$(CONFIG_VSOCKETS) += vsock.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o -obj-$(CONFIG_VIRTIO_VSOCKETS) += virtio_transport.o -obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += virtio_transport_common.o vsock-y += af_vsock.o vsock_addr.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 77247a2b670b..7fd1220fbfa0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -223,17 +223,6 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) return NULL; } -static struct sock *__vsock_find_unbound_socket(struct sockaddr_vm *addr) -{ - struct vsock_sock *vsk; - - list_for_each_entry(vsk, vsock_unbound_sockets, bound_table) - if (addr->svm_port == vsk->local_addr.svm_port) - return sk_vsock(vsk); - - return NULL; -} - static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -309,21 +298,6 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) } EXPORT_SYMBOL_GPL(vsock_find_bound_socket); -struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr) -{ - struct sock *sk; - - spin_lock_bh(&vsock_table_lock); - sk = __vsock_find_unbound_socket(addr); - if (sk) - sock_hold(sk); - - spin_unlock_bh(&vsock_table_lock); - - return sk; -} -EXPORT_SYMBOL_GPL(vsock_find_unbound_socket); - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -558,50 +532,6 @@ static int __vsock_bind_stream(struct vsock_sock *vsk, return 0; } -int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr) -{ - static u32 port = LAST_RESERVED_PORT + 1; - struct sockaddr_vm new_addr; - - vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); - - if (addr->svm_port == VMADDR_PORT_ANY) { - bool found = false; - unsigned int i; - - for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) - port = LAST_RESERVED_PORT + 1; - - new_addr.svm_port = port++; - - if (!__vsock_find_unbound_socket(&new_addr)) { - found = true; - break; - } - } - - if (!found) - return -EADDRNOTAVAIL; - } else { - /* If port is in reserved range, ensure caller - * has necessary privileges. - */ - if (addr->svm_port <= LAST_RESERVED_PORT && - !capable(CAP_NET_BIND_SERVICE)) { - return -EACCES; - } - - if (__vsock_find_unbound_socket(&new_addr)) - return -EADDRINUSE; - } - - vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); - - return 0; -} -EXPORT_SYMBOL_GPL(vsock_bind_dgram_generic); - static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c deleted file mode 100644 index df65dca55fa1..000000000000 --- a/net/vmw_vsock/virtio_transport.c +++ /dev/null @@ -1,466 +0,0 @@ -/* - * virtio transport for vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * Some of the code is take from Gerd Hoffmann 's - * early virtio-vsock proof-of-concept bits. - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct workqueue_struct *virtio_vsock_workqueue; -static struct virtio_vsock *the_virtio_vsock; -static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ -static void virtio_vsock_rx_fill(struct virtio_vsock *vsock); - -struct virtio_vsock { - /* Virtio device */ - struct virtio_device *vdev; - /* Virtio virtqueue */ - struct virtqueue *vqs[VSOCK_VQ_MAX]; - /* Wait queue for send pkt */ - wait_queue_head_t queue_wait; - /* Work item to send pkt */ - struct work_struct tx_work; - /* Work item to recv pkt */ - struct work_struct rx_work; - /* Mutex to protect send pkt*/ - struct mutex tx_lock; - /* Mutex to protect recv pkt*/ - struct mutex rx_lock; - /* Number of recv buffers */ - int rx_buf_nr; - /* Number of max recv buffers */ - int rx_buf_max_nr; - /* Used for global tx buf limitation */ - u32 total_tx_buf; - /* Guest context id, just like guest ip address */ - u32 guest_cid; -}; - -static struct virtio_vsock *virtio_vsock_get(void) -{ - return the_virtio_vsock; -} - -static u32 virtio_transport_get_local_cid(void) -{ - struct virtio_vsock *vsock = virtio_vsock_get(); - - return vsock->guest_cid; -} - -static int -virtio_transport_send_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info) -{ - u32 src_cid, src_port, dst_cid, dst_port; - int ret, in_sg = 0, out_sg = 0; - struct virtio_transport *trans; - struct virtio_vsock_pkt *pkt; - struct virtio_vsock *vsock; - struct scatterlist hdr, buf, *sgs[2]; - struct virtqueue *vq; - u32 pkt_len = info->pkt_len; - DEFINE_WAIT(wait); - - vsock = virtio_vsock_get(); - if (!vsock) - return -ENODEV; - - src_cid = virtio_transport_get_local_cid(); - src_port = vsk->local_addr.svm_port; - if (!info->remote_cid) { - dst_cid = vsk->remote_addr.svm_cid; - dst_port = vsk->remote_addr.svm_port; - } else { - dst_cid = info->remote_cid; - dst_port = info->remote_port; - } - - trans = vsk->trans; - vq = vsock->vqs[VSOCK_VQ_TX]; - - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - pkt_len = virtio_transport_get_credit(trans, pkt_len); - /* Do not send zero length OP_RW pkt*/ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; - - /* Respect global tx buf limitation */ - mutex_lock(&vsock->tx_lock); - while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vsock->tx_lock); - schedule(); - mutex_lock(&vsock->tx_lock); - finish_wait(&vsock->queue_wait, &wait); - } - vsock->total_tx_buf += pkt_len; - mutex_unlock(&vsock->tx_lock); - - pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, - src_cid, src_port, - dst_cid, dst_port); - if (!pkt) { - mutex_lock(&vsock->tx_lock); - vsock->total_tx_buf -= pkt_len; - mutex_unlock(&vsock->tx_lock); - virtio_transport_put_credit(trans, pkt_len); - return -ENOMEM; - } - - pr_debug("%s:info->pkt_len= %d\n", __func__, info->pkt_len); - - /* Will be released in virtio_transport_send_pkt_work */ - sock_hold(&trans->vsk->sk); - virtio_transport_inc_tx_pkt(pkt); - - /* Put pkt in the virtqueue */ - sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); - sgs[out_sg++] = &hdr; - if (info->msg && info->pkt_len > 0) { - sg_init_one(&buf, pkt->buf, pkt->len); - sgs[out_sg++] = &buf; - } - - mutex_lock(&vsock->tx_lock); - while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, - GFP_KERNEL)) < 0) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vsock->tx_lock); - schedule(); - mutex_lock(&vsock->tx_lock); - finish_wait(&vsock->queue_wait, &wait); - } - virtqueue_kick(vq); - mutex_unlock(&vsock->tx_lock); - - return pkt_len; -} - -static struct virtio_transport_pkt_ops virtio_ops = { - .send_pkt = virtio_transport_send_pkt, -}; - -static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) -{ - int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - struct virtio_vsock_pkt *pkt; - struct scatterlist hdr, buf, *sgs[2]; - struct virtqueue *vq; - int ret; - - vq = vsock->vqs[VSOCK_VQ_RX]; - - do { - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - pr_debug("%s: fail to allocate pkt\n", __func__); - goto out; - } - - /* TODO: use mergeable rx buffer */ - pkt->buf = kmalloc(buf_len, GFP_KERNEL); - if (!pkt->buf) { - pr_debug("%s: fail to allocate pkt->buf\n", __func__); - goto err; - } - - sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); - sgs[0] = &hdr; - - sg_init_one(&buf, pkt->buf, buf_len); - sgs[1] = &buf; - ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); - if (ret) - goto err; - vsock->rx_buf_nr++; - } while (vq->num_free); - if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) - vsock->rx_buf_max_nr = vsock->rx_buf_nr; -out: - virtqueue_kick(vq); - return; -err: - virtqueue_kick(vq); - virtio_transport_free_pkt(pkt); - return; -} - -static void virtio_transport_send_pkt_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, tx_work); - struct virtio_vsock_pkt *pkt; - bool added = false; - struct virtqueue *vq; - unsigned int len; - struct sock *sk; - - vq = vsock->vqs[VSOCK_VQ_TX]; - mutex_lock(&vsock->tx_lock); - do { - virtqueue_disable_cb(vq); - while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { - sk = &pkt->trans->vsk->sk; - virtio_transport_dec_tx_pkt(pkt); - /* Release refcnt taken in virtio_transport_send_pkt */ - sock_put(sk); - vsock->total_tx_buf -= pkt->len; - virtio_transport_free_pkt(pkt); - added = true; - } - } while (!virtqueue_enable_cb(vq)); - mutex_unlock(&vsock->tx_lock); - - if (added) - wake_up(&vsock->queue_wait); -} - -static void virtio_transport_recv_pkt_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, rx_work); - struct virtio_vsock_pkt *pkt; - struct virtqueue *vq; - unsigned int len; - - vq = vsock->vqs[VSOCK_VQ_RX]; - mutex_lock(&vsock->rx_lock); - do { - virtqueue_disable_cb(vq); - while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { - pkt->len = len; - virtio_transport_recv_pkt(pkt); - vsock->rx_buf_nr--; - } - } while (!virtqueue_enable_cb(vq)); - - if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); -} - -static void virtio_vsock_ctrl_done(struct virtqueue *vq) -{ -} - -static void virtio_vsock_tx_done(struct virtqueue *vq) -{ - struct virtio_vsock *vsock = vq->vdev->priv; - - if (!vsock) - return; - queue_work(virtio_vsock_workqueue, &vsock->tx_work); -} - -static void virtio_vsock_rx_done(struct virtqueue *vq) -{ - struct virtio_vsock *vsock = vq->vdev->priv; - - if (!vsock) - return; - queue_work(virtio_vsock_workqueue, &vsock->rx_work); -} - -static int -virtio_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) -{ - struct virtio_transport *trans; - int ret; - - ret = virtio_transport_do_socket_init(vsk, psk); - if (ret) - return ret; - - trans = vsk->trans; - trans->ops = &virtio_ops; - return ret; -} - -static struct vsock_transport virtio_transport = { - .get_local_cid = virtio_transport_get_local_cid, - - .init = virtio_transport_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - - .dgram_bind = virtio_transport_dgram_bind, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, -}; - -static int virtio_vsock_probe(struct virtio_device *vdev) -{ - vq_callback_t *callbacks[] = { - virtio_vsock_ctrl_done, - virtio_vsock_rx_done, - virtio_vsock_tx_done, - }; - const char *names[] = { - "ctrl", - "rx", - "tx", - }; - struct virtio_vsock *vsock = NULL; - u32 guest_cid; - int ret; - - ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); - if (ret) - return ret; - - /* Only one virtio-vsock device per guest is supported */ - if (the_virtio_vsock) { - ret = -EBUSY; - goto out; - } - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) { - ret = -ENOMEM; - goto out; - } - - vsock->vdev = vdev; - - ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names); - if (ret < 0) - goto out; - - vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), - &guest_cid, sizeof(guest_cid)); - vsock->guest_cid = le32_to_cpu(guest_cid); - pr_debug("%s:guest_cid=%d\n", __func__, vsock->guest_cid); - - ret = vsock_core_init(&virtio_transport); - if (ret < 0) - goto out_vqs; - - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - - vdev->priv = the_virtio_vsock = vsock; - init_waitqueue_head(&vsock->queue_wait); - mutex_init(&vsock->tx_lock); - mutex_init(&vsock->rx_lock); - INIT_WORK(&vsock->rx_work, virtio_transport_recv_pkt_work); - INIT_WORK(&vsock->tx_work, virtio_transport_send_pkt_work); - - mutex_lock(&vsock->rx_lock); - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); - - mutex_unlock(&the_virtio_vsock_mutex); - return 0; - -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); -out: - kfree(vsock); - mutex_unlock(&the_virtio_vsock_mutex); - return ret; -} - -static void virtio_vsock_remove(struct virtio_device *vdev) -{ - struct virtio_vsock *vsock = vdev->priv; - - mutex_lock(&the_virtio_vsock_mutex); - the_virtio_vsock = NULL; - vsock_core_exit(); - mutex_unlock(&the_virtio_vsock_mutex); - - kfree(vsock); -} - -static struct virtio_device_id id_table[] = { - { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, - { 0 }, -}; - -static unsigned int features[] = { -}; - -static struct virtio_driver virtio_vsock_driver = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtio_vsock_probe, - .remove = virtio_vsock_remove, -}; - -static int __init virtio_vsock_init(void) -{ - int ret; - - virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); - if (!virtio_vsock_workqueue) - return -ENOMEM; - ret = register_virtio_driver(&virtio_vsock_driver); - if (ret) - destroy_workqueue(virtio_vsock_workqueue); - return ret; -} - -static void __exit virtio_vsock_exit(void) -{ - unregister_virtio_driver(&virtio_vsock_driver); - destroy_workqueue(virtio_vsock_workqueue); -} - -module_init(virtio_vsock_init); -module_exit(virtio_vsock_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("virtio transport for vsock"); -MODULE_DEVICE_TABLE(virtio, id_table); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c deleted file mode 100644 index 28f790da6f15..000000000000 --- a/net/vmw_vsock/virtio_transport_common.c +++ /dev/null @@ -1,1272 +0,0 @@ -/* - * common code for virtio vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define COOKIEBITS 24 -#define COOKIEMASK (((u32)1 << COOKIEBITS) - 1) -#define VSOCK_TIMEOUT_INIT 4 - -#define SHA_MESSAGE_WORDS 16 -#define SHA_VSOCK_WORDS 5 - -static u32 vsockcookie_secret[2][SHA_MESSAGE_WORDS - SHA_VSOCK_WORDS + - SHA_DIGEST_WORDS]; - -static DEFINE_PER_CPU(__u32[SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS + - SHA_WORKSPACE_WORDS], vsock_cookie_scratch); - -static u32 cookie_hash(u32 saddr, u32 daddr, u16 sport, u16 dport, - u32 count, int c) -{ - __u32 *tmp = this_cpu_ptr(vsock_cookie_scratch); - - memcpy(tmp + SHA_VSOCK_WORDS, vsockcookie_secret[c], - sizeof(vsockcookie_secret[c])); - tmp[0] = saddr; - tmp[1] = daddr; - tmp[2] = sport; - tmp[3] = dport; - tmp[4] = count; - sha_transform(tmp + SHA_MESSAGE_WORDS, (__u8 *)tmp, - tmp + SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS); - - return tmp[17]; -} - -static u32 -virtio_vsock_secure_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, - u32 count) -{ - u32 h1, h2; - - h1 = cookie_hash(saddr, daddr, sport, dport, 0, 0); - h2 = cookie_hash(saddr, daddr, sport, dport, count, 1); - - return h1 + (count << COOKIEBITS) + (h2 & COOKIEMASK); -} - -static u32 -virtio_vsock_check_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, - u32 count, u32 cookie, u32 maxdiff) -{ - u32 diff; - u32 ret; - - cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0); - - diff = (count - (cookie >> COOKIEBITS)) & ((u32)-1 >> COOKIEBITS); - pr_debug("%s: diff=%x\n", __func__, diff); - if (diff >= maxdiff) - return (u32)-1; - - ret = (cookie - - cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) - & COOKIEMASK; - pr_debug("%s: ret=%x\n", __func__, diff); - - return ret; -} - -void virtio_vsock_dumppkt(const char *func, const struct virtio_vsock_pkt *pkt) -{ - pr_debug("%s: pkt=%p, op=%d, len=%d, %d:%d---%d:%d, len=%d\n", - func, pkt, - le16_to_cpu(pkt->hdr.op), - le32_to_cpu(pkt->hdr.len), - le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port), - pkt->len); -} -EXPORT_SYMBOL_GPL(virtio_vsock_dumppkt); - -struct virtio_vsock_pkt * -virtio_transport_alloc_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt *pkt; - int err; - - BUG_ON(!trans); - - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return NULL; - - pkt->hdr.type = cpu_to_le16(info->type); - pkt->hdr.op = cpu_to_le16(info->op); - pkt->hdr.src_cid = cpu_to_le32(src_cid); - pkt->hdr.src_port = cpu_to_le32(src_port); - pkt->hdr.dst_cid = cpu_to_le32(dst_cid); - pkt->hdr.dst_port = cpu_to_le32(dst_port); - pkt->hdr.flags = cpu_to_le32(info->flags); - pkt->len = len; - pkt->trans = trans; - if (info->type == VIRTIO_VSOCK_TYPE_DGRAM) - pkt->hdr.len = cpu_to_le32(len + (info->dgram_len << 16)); - else if (info->type == VIRTIO_VSOCK_TYPE_STREAM) - pkt->hdr.len = cpu_to_le32(len); - - if (info->msg && len > 0) { - pkt->buf = kmalloc(len, GFP_KERNEL); - if (!pkt->buf) - goto out_pkt; - err = memcpy_from_msg(pkt->buf, info->msg, len); - if (err) - goto out; - } - - return pkt; - -out: - kfree(pkt->buf); -out_pkt: - kfree(pkt); - return NULL; -} -EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); - -struct sock * -virtio_transport_get_pending(struct sock *listener, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vlistener; - struct vsock_sock *vpending; - struct sockaddr_vm src; - struct sockaddr_vm dst; - struct sock *pending; - - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - - vlistener = vsock_sk(listener); - list_for_each_entry(vpending, &vlistener->pending_links, - pending_links) { - if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && - vsock_addr_equals_addr(&dst, &vpending->local_addr)) { - pending = sk_vsock(vpending); - sock_hold(pending); - return pending; - } - } - - return NULL; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_pending); - -static void virtio_transport_inc_rx_pkt(struct virtio_vsock_pkt *pkt) -{ - pkt->trans->rx_bytes += pkt->len; -} - -static void virtio_transport_dec_rx_pkt(struct virtio_vsock_pkt *pkt) -{ - pkt->trans->rx_bytes -= pkt->len; - pkt->trans->fwd_cnt += pkt->len; -} - -void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt) -{ - mutex_lock(&pkt->trans->tx_lock); - pkt->hdr.fwd_cnt = cpu_to_le32(pkt->trans->fwd_cnt); - pkt->hdr.buf_alloc = cpu_to_le32(pkt->trans->buf_alloc); - mutex_unlock(&pkt->trans->tx_lock); -} -EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); - -void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt) -{ -} -EXPORT_SYMBOL_GPL(virtio_transport_dec_tx_pkt); - -u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 credit) -{ - u32 ret; - - mutex_lock(&trans->tx_lock); - ret = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); - if (ret > credit) - ret = credit; - trans->tx_cnt += ret; - mutex_unlock(&trans->tx_lock); - - pr_debug("%s: ret=%d, buf_alloc=%d, peer_buf_alloc=%d," - "tx_cnt=%d, fwd_cnt=%d, peer_fwd_cnt=%d\n", __func__, - ret, trans->buf_alloc, trans->peer_buf_alloc, - trans->tx_cnt, trans->fwd_cnt, trans->peer_fwd_cnt); - - return ret; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_credit); - -void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit) -{ - mutex_lock(&trans->tx_lock); - trans->tx_cnt -= credit; - mutex_unlock(&trans->tx_lock); -} -EXPORT_SYMBOL_GPL(virtio_transport_put_credit); - -static int virtio_transport_send_credit_update(struct vsock_sock *vsk, int type, struct virtio_vsock_hdr *hdr) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, - .type = type, - }; - - if (hdr && type == VIRTIO_VSOCK_TYPE_DGRAM) { - info.remote_cid = le32_to_cpu(hdr->src_cid); - info.remote_port = le32_to_cpu(hdr->src_port); - } - - pr_debug("%s: sk=%p send_credit_update\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static int virtio_transport_send_credit_request(struct vsock_sock *vsk, int type) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_CREDIT_REQUEST, - .type = type, - }; - - pr_debug("%s: sk=%p send_credit_request\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static ssize_t -virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt *pkt; - size_t bytes, total = 0; - int err = -EFAULT; - - mutex_lock(&trans->rx_lock); - while (total < len && trans->rx_bytes > 0 && - !list_empty(&trans->rx_queue)) { - pkt = list_first_entry(&trans->rx_queue, - struct virtio_vsock_pkt, list); - - bytes = len - total; - if (bytes > pkt->len - pkt->off) - bytes = pkt->len - pkt->off; - - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); - if (err) - goto out; - total += bytes; - pkt->off += bytes; - if (pkt->off == pkt->len) { - virtio_transport_dec_rx_pkt(pkt); - list_del(&pkt->list); - virtio_transport_free_pkt(pkt); - } - } - mutex_unlock(&trans->rx_lock); - - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, - NULL); - - return total; - -out: - mutex_unlock(&trans->rx_lock); - if (total) - err = total; - return err; -} - -ssize_t -virtio_transport_stream_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags) -{ - if (flags & MSG_PEEK) - return -EOPNOTSUPP; - - return virtio_transport_stream_do_dequeue(vsk, msg, len); -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); - -struct dgram_skb { - struct list_head list; - struct sk_buff *skb; - u16 id; -}; - -static struct dgram_skb *dgram_id_to_skb(struct virtio_transport *trans, - u16 id) -{ - struct dgram_skb *dgram_skb; - - list_for_each_entry(dgram_skb, &trans->incomplete_dgrams, list) { - if (dgram_skb->id == id) - return dgram_skb; - } - - return NULL; -} - -static void -virtio_transport_recv_dgram(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct sk_buff *skb = NULL; - struct vsock_sock *vsk; - struct virtio_transport *trans; - size_t size; - u16 dgram_id, pkt_off, dgram_len, pkt_len; - u32 flags, len; - struct dgram_skb *dgram_skb; - - vsk = vsock_sk(sk); - trans = vsk->trans; - - /* len: dgram_len | pkt_len */ - len = le32_to_cpu(pkt->hdr.len); - dgram_len = len >> 16; - pkt_len = len & 0xFFFF; - - /* flags: dgram_id | pkt_off */ - flags = le32_to_cpu(pkt->hdr.flags); - dgram_id = flags >> 16; - pkt_off = flags & 0xFFFF; - - pr_debug("%s: dgram_len=%d, pkt_len=%d, id=%d, off=%d\n", __func__, - dgram_len, pkt_len, dgram_id, pkt_off); - - dgram_skb = dgram_id_to_skb(trans, dgram_id); - if (dgram_skb) { - /* This pkt is for a existing dgram */ - skb = dgram_skb->skb; - pr_debug("%s:found skb\n", __func__); - } - - /* Packet payload must be within datagram bounds */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - goto drop; - if (pkt_len > dgram_len) - goto drop; - if (pkt_off > dgram_len) - goto drop; - if (dgram_len - pkt_off < pkt_len) - goto drop; - - if (!skb) { - /* This pkt is for a new dgram */ - pr_debug("%s:create skb\n", __func__); - - size = sizeof(pkt->hdr) + dgram_len; - /* Attach the packet to the socket's receive queue as an sk_buff. */ - dgram_skb = kzalloc(sizeof(struct dgram_skb), GFP_ATOMIC); - if (!dgram_skb) - goto drop; - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - kfree(dgram_skb); - dgram_skb = NULL; - goto drop; - } - dgram_skb->id = dgram_id; - dgram_skb->skb = skb; - list_add_tail(&dgram_skb->list, &trans->incomplete_dgrams); - - /* sk_receive_skb() will do a sock_put(), so hold here. */ - sock_hold(sk); - skb_put(skb, size); - memcpy(skb->data, &pkt->hdr, sizeof(pkt->hdr)); - } - - memcpy(skb->data + sizeof(pkt->hdr) + pkt_off, pkt->buf, pkt_len); - - pr_debug("%s:C, off=%d, pkt_len=%d, dgram_len=%d\n", __func__, - pkt_off, pkt_len, dgram_len); - - /* We are done with this dgram */ - if (pkt_off + pkt_len == dgram_len) { - pr_debug("%s:dgram_id=%d is done\n", __func__, dgram_id); - list_del(&dgram_skb->list); - kfree(dgram_skb); - sk_receive_skb(sk, skb, 0); - } - virtio_transport_free_pkt(pkt); - return; - -drop: - if (dgram_skb) { - list_del(&dgram_skb->list); - kfree(dgram_skb); - kfree_skb(skb); - sock_put(sk); - } - virtio_transport_free_pkt(pkt); -} - -int -virtio_transport_dgram_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags) -{ - struct virtio_vsock_hdr *hdr; - struct sk_buff *skb; - int noblock; - int err; - int dgram_len; - - noblock = flags & MSG_DONTWAIT; - - if (flags & MSG_OOB || flags & MSG_ERRQUEUE) - return -EOPNOTSUPP; - - /* Retrieve the head sk_buff from the socket's receive queue. */ - err = 0; - skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); - if (err) - return err; - if (!skb) - return -EAGAIN; - - hdr = (struct virtio_vsock_hdr *)skb->data; - if (!hdr) - goto out; - - dgram_len = le32_to_cpu(hdr->len) >> 16; - /* Place the datagram payload in the user's iovec. */ - err = skb_copy_datagram_msg(skb, sizeof(*hdr), msg, dgram_len); - if (err) - goto out; - - if (msg->msg_name) { - /* Provide the address of the sender. */ - DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); - vsock_addr_init(vm_addr, le32_to_cpu(hdr->src_cid), le32_to_cpu(hdr->src_port)); - msg->msg_namelen = sizeof(*vm_addr); - } - err = dgram_len; - - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, hdr); - - pr_debug("%s:done, recved =%d\n", __func__, dgram_len); -out: - skb_free_datagram(&vsk->sk, skb); - return err; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); - -s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - mutex_lock(&trans->rx_lock); - bytes = trans->rx_bytes; - mutex_unlock(&trans->rx_lock); - - return bytes; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); - -static s64 virtio_transport_has_space(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - bytes = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); - if (bytes < 0) - bytes = 0; - - return bytes; -} - -s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - mutex_lock(&trans->tx_lock); - bytes = virtio_transport_has_space(vsk); - mutex_unlock(&trans->tx_lock); - - pr_debug("%s: bytes=%lld\n", __func__, bytes); - - return bytes; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); - -int virtio_transport_do_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk) -{ - struct virtio_transport *trans; - - trans = kzalloc(sizeof(*trans), GFP_KERNEL); - if (!trans) - return -ENOMEM; - - vsk->trans = trans; - trans->vsk = vsk; - if (psk) { - struct virtio_transport *ptrans = psk->trans; - trans->buf_size = ptrans->buf_size; - trans->buf_size_min = ptrans->buf_size_min; - trans->buf_size_max = ptrans->buf_size_max; - trans->peer_buf_alloc = ptrans->peer_buf_alloc; - } else { - trans->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; - trans->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; - trans->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; - } - - trans->buf_alloc = trans->buf_size; - - pr_debug("%s: trans->buf_alloc=%d\n", __func__, trans->buf_alloc); - - mutex_init(&trans->rx_lock); - mutex_init(&trans->tx_lock); - INIT_LIST_HEAD(&trans->rx_queue); - INIT_LIST_HEAD(&trans->incomplete_dgrams); - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); - -u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); - -u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size_min; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); - -u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size_max; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); - -void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val < trans->buf_size_min) - trans->buf_size_min = val; - if (val > trans->buf_size_max) - trans->buf_size_max = val; - trans->buf_size = val; - trans->buf_alloc = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); - -void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val > trans->buf_size) - trans->buf_size = val; - trans->buf_size_min = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); - -void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val < trans->buf_size) - trans->buf_size = val; - trans->buf_size_max = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); - -int -virtio_transport_notify_poll_in(struct vsock_sock *vsk, - size_t target, - bool *data_ready_now) -{ - if (vsock_stream_has_data(vsk)) - *data_ready_now = true; - else - *data_ready_now = false; - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); - -int -virtio_transport_notify_poll_out(struct vsock_sock *vsk, - size_t target, - bool *space_avail_now) -{ - s64 free_space; - - free_space = vsock_stream_has_space(vsk); - if (free_space > 0) - *space_avail_now = true; - else if (free_space == 0) - *space_avail_now = false; - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); - -int virtio_transport_notify_recv_init(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); - -int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); - -int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); - -int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, - size_t target, ssize_t copied, bool data_read, - struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); - -int virtio_transport_notify_send_init(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); - -int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); - -int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); - -int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, - ssize_t written, struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); - -u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); - -bool virtio_transport_stream_is_active(struct vsock_sock *vsk) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); - -bool virtio_transport_stream_allow(u32 cid, u32 port) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); - -int virtio_transport_dgram_bind(struct vsock_sock *vsk, - struct sockaddr_vm *addr) -{ - return vsock_bind_dgram_generic(vsk, addr); -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); - -bool virtio_transport_dgram_allow(u32 cid, u32 port) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); - -int virtio_transport_connect(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_REQUEST, - .type = VIRTIO_VSOCK_TYPE_STREAM, - }; - - pr_debug("%s: vsk=%p send_request\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_connect); - -int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_SHUTDOWN, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .flags = (mode & RCV_SHUTDOWN ? - VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | - (mode & SEND_SHUTDOWN ? - VIRTIO_VSOCK_SHUTDOWN_SEND : 0), - }; - - pr_debug("%s: vsk=%p: send_shutdown\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_shutdown); - -void virtio_transport_release(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - struct sock *sk = &vsk->sk; - struct dgram_skb *dgram_skb; - struct dgram_skb *dgram_skb_tmp; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - - /* Tell other side to terminate connection */ - if (sk->sk_type == SOCK_STREAM && sk->sk_state == SS_CONNECTED) { - virtio_transport_shutdown(vsk, SHUTDOWN_MASK); - } - - /* Free incomplete dgrams */ - lock_sock(sk); - list_for_each_entry_safe(dgram_skb, dgram_skb_tmp, - &trans->incomplete_dgrams, list) { - list_del(&dgram_skb->list); - kfree_skb(dgram_skb->skb); - kfree(dgram_skb); - sock_put(sk); /* held in virtio_transport_recv_dgram() */ - } - release_sock(sk); -} -EXPORT_SYMBOL_GPL(virtio_transport_release); - -int -virtio_transport_dgram_enqueue(struct vsock_sock *vsk, - struct sockaddr_vm *remote_addr, - struct msghdr *msg, - size_t dgram_len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_DGRAM, - .msg = msg, - }; - size_t total_written = 0, pkt_off = 0, written; - u16 dgram_id; - - /* The max size of a single dgram we support is 64KB */ - if (dgram_len > VIRTIO_VSOCK_MAX_DGRAM_SIZE) - return -EMSGSIZE; - - info.dgram_len = dgram_len; - vsk->remote_addr = *remote_addr; - - dgram_id = trans->dgram_id++; - - /* TODO: To optimize, if we have enough credit to send the pkt already, - * do not ask the peer to send credit to use */ - virtio_transport_send_credit_request(vsk, VIRTIO_VSOCK_TYPE_DGRAM); - - while (total_written < dgram_len) { - info.pkt_len = dgram_len - total_written; - info.flags = dgram_id << 16 | pkt_off; - written = trans->ops->send_pkt(vsk, &info); - if (written < 0) - return -ENOMEM; - if (written == 0) { - /* TODO: if written = 0, we need a sleep & wakeup - * instead of sleep */ - pr_debug("%s: SHOULD WAIT written==0", __func__); - msleep(10); - } - total_written += written; - pkt_off += written; - pr_debug("%s:id=%d, dgram_len=%zu, off=%zu, total_written=%zu, written=%zu\n", - __func__, dgram_id, dgram_len, pkt_off, total_written, written); - } - - return dgram_len; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); - -ssize_t -virtio_transport_stream_enqueue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .msg = msg, - .pkt_len = len, - }; - - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); - -void virtio_transport_destruct(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - kfree(trans); -} -EXPORT_SYMBOL_GPL(virtio_transport_destruct); - -static int virtio_transport_send_ack(struct vsock_sock *vsk, u32 cookie) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_ACK, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .flags = cpu_to_le32(cookie), - }; - - pr_debug("%s: sk=%p send_offer\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static int virtio_transport_send_reset(struct vsock_sock *vsk, - struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RST, - .type = VIRTIO_VSOCK_TYPE_STREAM, - }; - - pr_debug("%s\n", __func__); - - /* Send RST only if the original pkt is not a RST pkt */ - if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) - return 0; - - return trans->ops->send_pkt(vsk, &info); -} - -static int -virtio_transport_recv_connecting(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - int err; - int skerr; - u32 cookie; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_RESPONSE: - cookie = le32_to_cpu(pkt->hdr.flags); - pr_debug("%s: got RESPONSE and send ACK, cookie=%x\n", __func__, cookie); - err = virtio_transport_send_ack(vsk, cookie); - if (err < 0) { - skerr = -err; - goto destroy; - } - sk->sk_state = SS_CONNECTED; - sk->sk_socket->state = SS_CONNECTED; - vsock_insert_connected(vsk); - sk->sk_state_change(sk); - break; - case VIRTIO_VSOCK_OP_INVALID: - pr_debug("%s: got invalid\n", __func__); - break; - case VIRTIO_VSOCK_OP_RST: - pr_debug("%s: got rst\n", __func__); - skerr = ECONNRESET; - err = 0; - goto destroy; - default: - pr_debug("%s: got def\n", __func__); - skerr = EPROTO; - err = -EINVAL; - goto destroy; - } - return 0; - -destroy: - virtio_transport_send_reset(vsk, pkt); - sk->sk_state = SS_UNCONNECTED; - sk->sk_err = skerr; - sk->sk_error_report(sk); - return err; -} - -static int -virtio_transport_recv_connected(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_transport *trans = vsk->trans; - int err = 0; - - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_RW: - pkt->len = le32_to_cpu(pkt->hdr.len); - pkt->off = 0; - pkt->trans = trans; - - mutex_lock(&trans->rx_lock); - virtio_transport_inc_rx_pkt(pkt); - list_add_tail(&pkt->list, &trans->rx_queue); - mutex_unlock(&trans->rx_lock); - - sk->sk_data_ready(sk); - return err; - case VIRTIO_VSOCK_OP_CREDIT_UPDATE: - sk->sk_write_space(sk); - break; - case VIRTIO_VSOCK_OP_SHUTDOWN: - pr_debug("%s: got shutdown\n", __func__); - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) - vsk->peer_shutdown |= RCV_SHUTDOWN; - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) - vsk->peer_shutdown |= SEND_SHUTDOWN; - if (le32_to_cpu(pkt->hdr.flags)) - sk->sk_state_change(sk); - break; - case VIRTIO_VSOCK_OP_RST: - pr_debug("%s: got rst\n", __func__); - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; - sk->sk_state_change(sk); - break; - default: - err = -EINVAL; - break; - } - - virtio_transport_free_pkt(pkt); - return err; -} - -static int -virtio_transport_send_response(struct vsock_sock *vsk, - struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RESPONSE, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .remote_cid = le32_to_cpu(pkt->hdr.src_cid), - .remote_port = le32_to_cpu(pkt->hdr.src_port), - }; - u32 cookie; - - cookie = virtio_vsock_secure_cookie(le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_port), - jiffies / (HZ * 60)); - info.flags = cpu_to_le32(cookie); - - pr_debug("%s: send_response, cookie=%x\n", __func__, le32_to_cpu(cookie)); - - return trans->ops->send_pkt(vsk, &info); -} - -/* Handle server socket */ -static int -virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct vsock_sock *vpending; - struct sock *pending; - int err; - u32 cookie; - - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_REQUEST: - err = virtio_transport_send_response(vsk, pkt); - if (err < 0) { - // FIXME vsk should be vpending - virtio_transport_send_reset(vsk, pkt); - return err; - } - break; - case VIRTIO_VSOCK_OP_ACK: - cookie = le32_to_cpu(pkt->hdr.flags); - err = virtio_vsock_check_cookie(le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_port), - jiffies / (HZ * 60), - le32_to_cpu(pkt->hdr.flags), - VSOCK_TIMEOUT_INIT); - pr_debug("%s: cookie=%x, err=%d\n", __func__, cookie, err); - if (err) - return err; - - /* So no pending socket are responsible for this pkt, create one */ - pr_debug("%s: create pending\n", __func__); - pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, - sk->sk_type, 0); - if (!pending) { - virtio_transport_send_reset(vsk, pkt); - return -ENOMEM; - } - sk->sk_ack_backlog++; - pending->sk_state = SS_CONNECTING; - - vpending = vsock_sk(pending); - vsock_addr_init(&vpending->local_addr, le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port)); - vsock_addr_init(&vpending->remote_addr, le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port)); - vsock_add_pending(sk, pending); - - pr_debug("%s: get pending\n", __func__); - pending = virtio_transport_get_pending(sk, pkt); - vpending = vsock_sk(pending); - lock_sock(pending); - switch (pending->sk_state) { - case SS_CONNECTING: - if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_ACK) { - pr_debug("%s: op=%d != OP_ACK\n", __func__, - le16_to_cpu(pkt->hdr.op)); - virtio_transport_send_reset(vpending, pkt); - pending->sk_err = EPROTO; - pending->sk_state = SS_UNCONNECTED; - sock_put(pending); - } else { - pending->sk_state = SS_CONNECTED; - vsock_insert_connected(vpending); - - vsock_remove_pending(sk, pending); - vsock_enqueue_accept(sk, pending); - - sk->sk_data_ready(sk); - } - err = 0; - break; - default: - pr_debug("%s: sk->sk_ack_backlog=%d\n", __func__, - sk->sk_ack_backlog); - virtio_transport_send_reset(vpending, pkt); - err = -EINVAL; - break; - } - if (err < 0) - vsock_remove_pending(sk, pending); - release_sock(pending); - - /* Release refcnt obtained in virtio_transport_get_pending */ - sock_put(pending); - break; - default: - break; - } - - return 0; -} - -static void virtio_transport_space_update(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_transport *trans = vsk->trans; - bool space_available; - - /* buf_alloc and fwd_cnt is always included in the hdr */ - mutex_lock(&trans->tx_lock); - trans->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); - trans->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); - space_available = virtio_transport_has_space(vsk); - mutex_unlock(&trans->tx_lock); - - if (space_available) - sk->sk_write_space(sk); -} - -/* We are under the virtio-vsock's vsock->rx_lock or - * vhost-vsock's vq->mutex lock */ -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans; - struct sockaddr_vm src, dst; - struct vsock_sock *vsk; - struct sock *sk; - - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - - virtio_vsock_dumppkt(__func__, pkt); - - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) { - sk = vsock_find_unbound_socket(&dst); - if (!sk) - goto free_pkt; - - vsk = vsock_sk(sk); - trans = vsk->trans; - BUG_ON(!trans); - - virtio_transport_space_update(sk, pkt); - - lock_sock(sk); - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_CREDIT_UPDATE: - virtio_transport_free_pkt(pkt); - break; - case VIRTIO_VSOCK_OP_CREDIT_REQUEST: - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, - &pkt->hdr); - virtio_transport_free_pkt(pkt); - break; - case VIRTIO_VSOCK_OP_RW: - virtio_transport_recv_dgram(sk, pkt); - break; - default: - virtio_transport_free_pkt(pkt); - break; - } - release_sock(sk); - - /* Release refcnt obtained when we fetched this socket out of - * the unbound list. - */ - sock_put(sk); - return; - } else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) { - /* The socket must be in connected or bound table - * otherwise send reset back - */ - sk = vsock_find_connected_socket(&src, &dst); - if (!sk) { - sk = vsock_find_bound_socket(&dst); - if (!sk) { - pr_debug("%s: can not find bound_socket\n", __func__); - virtio_vsock_dumppkt(__func__, pkt); - /* Ignore this pkt instead of sending reset back */ - /* TODO send a RST unless this packet is a RST (to avoid infinite loops) */ - goto free_pkt; - } - } - - vsk = vsock_sk(sk); - trans = vsk->trans; - BUG_ON(!trans); - - virtio_transport_space_update(sk, pkt); - - lock_sock(sk); - switch (sk->sk_state) { - case VSOCK_SS_LISTEN: - virtio_transport_recv_listen(sk, pkt); - virtio_transport_free_pkt(pkt); - break; - case SS_CONNECTING: - virtio_transport_recv_connecting(sk, pkt); - virtio_transport_free_pkt(pkt); - break; - case SS_CONNECTED: - virtio_transport_recv_connected(sk, pkt); - break; - default: - virtio_transport_free_pkt(pkt); - break; - } - release_sock(sk); - - /* Release refcnt obtained when we fetched this socket out of the - * bound or connected list. - */ - sock_put(sk); - } - return; - -free_pkt: - virtio_transport_free_pkt(pkt); -} -EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); - -void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) -{ - kfree(pkt->buf); - kfree(pkt); -} -EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); - -static int __init virtio_vsock_common_init(void) -{ - get_random_bytes(vsockcookie_secret, sizeof(vsockcookie_secret)); - return 0; -} - -static void __exit virtio_vsock_common_exit(void) -{ -} - -module_init(virtio_vsock_common_init); -module_exit(virtio_vsock_common_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("common code for virtio vsock"); -- cgit v1.2.3 From 297dbde19cf6a0ccb6fd4396c6220a5912ed61e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:51 -0500 Subject: netprio_cgroup: limit the maximum css->id to USHRT_MAX netprio builds per-netdev contiguous priomap array which is indexed by css->id. The array is allocated using kzalloc() effectively limiting the maximum ID supported to some thousand range. This patch caps the maximum supported css->id to USHRT_MAX which should be way above what is actually useable. This allows reducing sock->sk_cgrp_prioidx to u16 from u32. The freed up part will be used to overload the cgroup related fields. sock->sk_cgrp_prioidx's position is swapped with sk_mark so that the two cgroup related fields are adjacent. Signed-off-by: Tejun Heo Acked-by: Daniel Wagner Cc: Daniel Borkmann CC: Neil Horman Signed-off-by: David S. Miller --- include/net/sock.h | 10 +++++----- net/core/netprio_cgroup.c | 9 +++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 6f58b84fc742..a95bcf7d6efa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -288,7 +288,6 @@ struct cg_proto; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting - * @sk_cgrp_prioidx: socket group's priority map index * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family * @sk_peer_pid: &struct pid for this socket's peer @@ -309,6 +308,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark + * @sk_cgrp_prioidx: socket group's priority map index * @sk_classid: this socket's cgroup classid * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start @@ -425,9 +425,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; __u32 sk_priority; -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - __u32 sk_cgrp_prioidx; -#endif + __u32 sk_mark; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -445,7 +443,9 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif - __u32 sk_mark; +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) + u16 sk_cgrp_prioidx; +#endif #ifdef CONFIG_CGROUP_NET_CLASSID u32 sk_classid; #endif diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index cbd0a199bf52..2b9159b7a28a 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -27,6 +27,12 @@ #include +/* + * netprio allocates per-net_device priomap array which is indexed by + * css->id. Limiting css ID to 16bits doesn't lose anything. + */ +#define NETPRIO_ID_MAX USHRT_MAX + #define PRIOMAP_MIN_SZ 128 /* @@ -144,6 +150,9 @@ static int cgrp_css_online(struct cgroup_subsys_state *css) struct net_device *dev; int ret = 0; + if (css->id > NETPRIO_ID_MAX) + return -ENOSPC; + if (!parent_css) return 0; -- cgit v1.2.3 From 2a56a1fec290bf0bc4676bbf4efdb3744953a3e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:52 -0500 Subject: net: wrap sock->sk_cgrp_prioidx and ->sk_classid inside a struct Introduce sock->sk_cgrp_data which is a struct sock_cgroup_data. ->sk_cgroup_prioidx and ->sk_classid are moved into it. The struct and its accessors are defined in cgroup-defs.h. This is to prepare for overloading the fields with a cgroup pointer. This patch mostly performs equivalent conversions but the followings are noteworthy. * Equality test before updating classid is removed from sock_update_classid(). This shouldn't make any noticeable difference and a similar test will be implemented on the helper side later. * sock_update_netprioidx() now takes struct sock_cgroup_data and can be moved to netprio_cgroup.h without causing include dependency loop. Moved. * The dummy version of sock_update_netprioidx() converted to a static inline function while at it. Signed-off-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 36 ++++++++++++++++++++++++++++++++++++ include/net/cls_cgroup.h | 11 +++++------ include/net/netprio_cgroup.h | 16 +++++++++++++--- include/net/sock.h | 11 +++-------- net/Kconfig | 6 ++++++ net/core/dev.c | 3 ++- net/core/netclassid_cgroup.c | 4 ++-- net/core/netprio_cgroup.c | 3 ++- net/core/scm.c | 4 ++-- net/core/sock.c | 15 ++------------- net/netfilter/nft_meta.c | 2 +- net/netfilter/xt_cgroup.c | 3 ++- 12 files changed, 76 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 504d8591b6d3..ed128fed0335 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -542,4 +542,40 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +struct sock_cgroup_data { + u16 prioidx; + u32 classid; +}; + +static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +{ + return skcd->prioidx; +} + +static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +{ + return skcd->classid; +} + +static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, + u16 prioidx) +{ + skcd->prioidx = prioidx; +} + +static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, + u32 classid) +{ + skcd->classid = classid; +} + +#else /* CONFIG_SOCK_CGROUP_DATA */ + +struct sock_cgroup_data { +}; + +#endif /* CONFIG_SOCK_CGROUP_DATA */ + #endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index ccd6d8bffa4d..c0a92e2c286d 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -41,13 +41,12 @@ static inline u32 task_cls_classid(struct task_struct *p) return classid; } -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { u32 classid; classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; + sock_cgroup_set_classid(skcd, classid); } static inline u32 task_get_classid(const struct sk_buff *skb) @@ -64,17 +63,17 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * softirqs always disables bh. */ if (in_serving_softirq()) { - /* If there is an sk_classid we'll use that. */ + /* If there is an sock_cgroup_classid we'll use that. */ if (!skb->sk) return 0; - classid = skb->sk->sk_classid; + classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data); } return classid; } #else /* !CONFIG_CGROUP_NET_CLASSID */ -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { } diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index f2a9597ff53c..604190596cde 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -25,8 +25,6 @@ struct netprio_map { u32 priomap[]; }; -void sock_update_netprioidx(struct sock *sk); - static inline u32 task_netprioidx(struct task_struct *p) { struct cgroup_subsys_state *css; @@ -38,13 +36,25 @@ static inline u32 task_netprioidx(struct task_struct *p) rcu_read_unlock(); return idx; } + +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ + if (in_interrupt()) + return; + + sock_cgroup_set_prioidx(skcd, task_netprioidx(current)); +} + #else /* !CONFIG_CGROUP_NET_PRIO */ + static inline u32 task_netprioidx(struct task_struct *p) { return 0; } -#define sock_update_netprioidx(sk) +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ +} #endif /* CONFIG_CGROUP_NET_PRIO */ #endif /* _NET_CLS_CGROUP_H */ diff --git a/include/net/sock.h b/include/net/sock.h index a95bcf7d6efa..0ca22b014de1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -308,8 +309,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark - * @sk_cgrp_prioidx: socket group's priority map index - * @sk_classid: this socket's cgroup classid + * @sk_cgrp_data: cgroup data for this cgroup * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock @@ -443,12 +443,7 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - u16 sk_cgrp_prioidx; -#endif -#ifdef CONFIG_CGROUP_NET_CLASSID - u32 sk_classid; -#endif + struct sock_cgroup_data sk_cgrp_data; struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); diff --git a/net/Kconfig b/net/Kconfig index 127da94ae25e..11f8c22af34d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -250,9 +250,14 @@ config XPS depends on SMP default y +config SOCK_CGROUP_DATA + bool + default n + config CGROUP_NET_PRIO bool "Network priority cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis. @@ -260,6 +265,7 @@ config CGROUP_NET_PRIO config CGROUP_NET_CLASSID bool "Network classid cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use as general purpose socket classid marker that is being used in cls_cgroup and for netfilter matching. diff --git a/net/core/dev.c b/net/core/dev.c index e5c395473eba..8f705fcedb94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2929,7 +2929,8 @@ static void skb_update_prio(struct sk_buff *skb) struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); if (!skb->priority && skb->sk && map) { - unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + unsigned int prioidx = + sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); if (prioidx < map->priomap_len) skb->priority = map->priomap[prioidx]; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 2e4df84c34a1..e60ded46b3ac 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -62,8 +62,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) struct socket *sock = sock_from_file(file, &err); if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, + (unsigned long)v); return 0; } diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 2b9159b7a28a..de42aa7f6c77 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -223,7 +223,8 @@ static int update_netprio(const void *v, struct file *file, unsigned n) int err; struct socket *sock = sock_from_file(file, &err); if (sock) - sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, + (unsigned long)v); return 0; } diff --git a/net/core/scm.c b/net/core/scm.c index 8a1741b14302..14596fb37172 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -289,8 +289,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk); - sock_update_classid(sock->sk); + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index 7965ef487375..947741dc43fa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1393,17 +1393,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) -void sock_update_netprioidx(struct sock *sk) -{ - if (in_interrupt()) - return; - - sk->sk_cgrp_prioidx = task_netprioidx(current); -} -EXPORT_SYMBOL_GPL(sock_update_netprioidx); -#endif - /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace @@ -1432,8 +1421,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); - sock_update_netprioidx(sk); + sock_update_classid(&sk->sk_cgrp_data); + sock_update_netprioidx(&sk->sk_cgrp_data); } return sk; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d55ee0..1915cab7f32d 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -174,7 +174,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) goto err; - *dest = sk->sk_classid; + *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; #endif default: diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index a1d126f29463..54eaeb45ce99 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -42,7 +42,8 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; - return (info->id == skb->sk->sk_classid) ^ info->invert; + return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^ + info->invert; } static struct xt_match cgroup_mt_reg __read_mostly = { -- cgit v1.2.3 From bd1060a1d67128bb8fbe2e1384c518912cbe54e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:53 -0500 Subject: sock, cgroup: add sock->sk_cgroup In cgroup v1, dealing with cgroup membership was difficult because the number of membership associations was unbound. As a result, cgroup v1 grew several controllers whose primary purpose is either tagging membership or pull in configuration knobs from other subsystems so that cgroup membership test can be avoided. net_cls and net_prio controllers are examples of the latter. They allow configuring network-specific attributes from cgroup side so that network subsystem can avoid testing cgroup membership; unfortunately, these are not only cumbersome but also problematic. Both net_cls and net_prio aren't properly hierarchical. Both inherit configuration from the parent on creation but there's no interaction afterwards. An ancestor doesn't restrict the behavior in its subtree in anyway and configuration changes aren't propagated downwards. Especially when combined with cgroup delegation, this is problematic because delegatees can mess up whatever network configuration implemented at the system level. net_prio would allow the delegatees to set whatever priority value regardless of CAP_NET_ADMIN and net_cls the same for classid. While it is possible to solve these issues from controller side by implementing hierarchical allowable ranges in both controllers, it would involve quite a bit of complexity in the controllers and further obfuscate network configuration as it becomes even more difficult to tell what's actually being configured looking from the network side. While not much can be done for v1 at this point, as membership handling is sane on cgroup v2, it'd be better to make cgroup matching behave like other network matches and classifiers than introducing further complications. In preparation, this patch updates sock->sk_cgrp_data handling so that it points to the v2 cgroup that sock was created in until either net_prio or net_cls is used. Once either of the two is used, sock->sk_cgrp_data reverts to its previous role of carrying prioidx and classid. This is to avoid adding yet another cgroup related field to struct sock. As the mode switching can happen at most once per boot, the switching mechanism is aimed at lowering hot path overhead. It may leak a finite, likely small, number of cgroup refs and report spurious prioidx or classid on switching; however, dynamic updates of prioidx and classid have always been racy and lossy - socks between creation and fd installation are never updated, config changes don't update existing sockets at all, and prioidx may index with dead and recycled cgroup IDs. Non-critical inaccuracies from small race windows won't make any noticeable difference. This patch doesn't make use of the pointer yet. The following patch will implement netfilter match for cgroup2 membership. v2: Use sock_cgroup_data to avoid inflating struct sock w/ another cgroup specific field. v3: Add comments explaining why sock_data_prioidx() and sock_data_classid() use different fallback values. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 88 +++++++++++++++++++++++++++++++++++++++++--- include/linux/cgroup.h | 41 +++++++++++++++++++++ kernel/cgroup.c | 55 ++++++++++++++++++++++++++- net/core/netclassid_cgroup.c | 7 +++- net/core/netprio_cgroup.c | 7 +++- net/core/sock.c | 2 + 6 files changed, 191 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ed128fed0335..9dc226345e4e 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -544,31 +544,107 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #ifdef CONFIG_SOCK_CGROUP_DATA +/* + * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains + * per-socket cgroup information except for memcg association. + * + * On legacy hierarchies, net_prio and net_cls controllers directly set + * attributes on each sock which can then be tested by the network layer. + * On the default hierarchy, each sock is associated with the cgroup it was + * created in and the networking layer can match the cgroup directly. + * + * To avoid carrying all three cgroup related fields separately in sock, + * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. + * On boot, sock_cgroup_data records the cgroup that the sock was created + * in so that cgroup2 matches can be made; however, once either net_prio or + * net_cls starts being used, the area is overriden to carry prioidx and/or + * classid. The two modes are distinguished by whether the lowest bit is + * set. Clear bit indicates cgroup pointer while set bit prioidx and + * classid. + * + * While userland may start using net_prio or net_cls at any time, once + * either is used, cgroup2 matching no longer works. There is no reason to + * mix the two and this is in line with how legacy and v2 compatibility is + * handled. On mode switch, cgroup references which are already being + * pointed to by socks may be leaked. While this can be remedied by adding + * synchronization around sock_cgroup_data, given that the number of leaked + * cgroups is bound and highly unlikely to be high, this seems to be the + * better trade-off. + */ struct sock_cgroup_data { - u16 prioidx; - u32 classid; + union { +#ifdef __LITTLE_ENDIAN + struct { + u8 is_data; + u8 padding; + u16 prioidx; + u32 classid; + } __packed; +#else + struct { + u32 classid; + u16 prioidx; + u8 padding; + u8 is_data; + } __packed; +#endif + u64 val; + }; }; +/* + * There's a theoretical window where the following accessors race with + * updaters and return part of the previous pointer as the prioidx or + * classid. Such races are short-lived and the result isn't critical. + */ static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) { - return skcd->prioidx; + /* fallback to 1 which is always the ID of the root cgroup */ + return (skcd->is_data & 1) ? skcd->prioidx : 1; } static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) { - return skcd->classid; + /* fallback to 0 which is the unconfigured default classid */ + return (skcd->is_data & 1) ? skcd->classid : 0; } +/* + * If invoked concurrently, the updaters may clobber each other. The + * caller is responsible for synchronization. + */ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) { - skcd->prioidx = prioidx; + struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) }; + + if (sock_cgroup_prioidx(&skcd_buf) == prioidx) + return; + + if (!(skcd_buf.is_data & 1)) { + skcd_buf.val = 0; + skcd_buf.is_data = 1; + } + + skcd_buf.prioidx = prioidx; + WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ } static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { - skcd->classid = classid; + struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) }; + + if (sock_cgroup_classid(&skcd_buf) == classid) + return; + + if (!(skcd_buf.is_data & 1)) { + skcd_buf.val = 0; + skcd_buf.is_data = 1; + } + + skcd_buf.classid = classid; + WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ } #else /* CONFIG_SOCK_CGROUP_DATA */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4c3ffab81ba7..a8ba1ea0ea5a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,4 +578,45 @@ static inline int cgroup_init(void) { return 0; } #endif /* !CONFIG_CGROUPS */ +/* + * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data + * definition in cgroup-defs.h. + */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) +extern spinlock_t cgroup_sk_update_lock; +#endif + +void cgroup_sk_alloc_disable(void); +void cgroup_sk_alloc(struct sock_cgroup_data *skcd); +void cgroup_sk_free(struct sock_cgroup_data *skcd); + +static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) +{ +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) + unsigned long v; + + /* + * @skcd->val is 64bit but the following is safe on 32bit too as we + * just need the lower ulong to be written and read atomically. + */ + v = READ_ONCE(skcd->val); + + if (v & 1) + return &cgrp_dfl_root.cgrp; + + return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; +#else + return (struct cgroup *)(unsigned long)skcd->val; +#endif +} + +#else /* CONFIG_CGROUP_DATA */ + +static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} + +#endif /* CONFIG_CGROUP_DATA */ + #endif /* _LINUX_CGROUP_H */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3db5e8f5b702..4f8f7927b422 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -57,8 +57,8 @@ #include /* TODO: replace with more sophisticated array */ #include #include - #include +#include /* * pidlists linger the following amount before being destroyed. The goal @@ -5782,6 +5782,59 @@ struct cgroup *cgroup_get_from_path(const char *path) } EXPORT_SYMBOL_GPL(cgroup_get_from_path); +/* + * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data + * definition in cgroup-defs.h. + */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) + +spinlock_t cgroup_sk_update_lock; +static bool cgroup_sk_alloc_disabled __read_mostly; + +void cgroup_sk_alloc_disable(void) +{ + if (cgroup_sk_alloc_disabled) + return; + pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n"); + cgroup_sk_alloc_disabled = true; +} + +#else + +#define cgroup_sk_alloc_disabled false + +#endif + +void cgroup_sk_alloc(struct sock_cgroup_data *skcd) +{ + if (cgroup_sk_alloc_disabled) + return; + + rcu_read_lock(); + + while (true) { + struct css_set *cset; + + cset = task_css_set(current); + if (likely(cgroup_tryget(cset->dfl_cgrp))) { + skcd->val = (unsigned long)cset->dfl_cgrp; + break; + } + cpu_relax(); + } + + rcu_read_unlock(); +} + +void cgroup_sk_free(struct sock_cgroup_data *skcd) +{ + cgroup_put(sock_cgroup_ptr(skcd)); +} + +#endif /* CONFIG_SOCK_CGROUP_DATA */ + #ifdef CONFIG_CGROUP_DEBUG static struct cgroup_subsys_state * debug_css_alloc(struct cgroup_subsys_state *parent_css) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index e60ded46b3ac..04257a0e3534 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -61,9 +61,12 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) int err; struct socket *sock = sock_from_file(file, &err); - if (sock) + if (sock) { + spin_lock(&cgroup_sk_update_lock); sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, (unsigned long)v); + spin_unlock(&cgroup_sk_update_lock); + } return 0; } @@ -98,6 +101,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, { struct cgroup_cls_state *cs = css_cls_state(css); + cgroup_sk_alloc_disable(); + cs->classid = (u32)value; update_classid(css, (void *)(unsigned long)cs->classid); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index de42aa7f6c77..053d60c33395 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -209,6 +209,8 @@ static ssize_t write_priomap(struct kernfs_open_file *of, if (!dev) return -ENODEV; + cgroup_sk_alloc_disable(); + rtnl_lock(); ret = netprio_set_prio(of_css(of), dev, prio); @@ -222,9 +224,12 @@ static int update_netprio(const void *v, struct file *file, unsigned n) { int err; struct socket *sock = sock_from_file(file, &err); - if (sock) + if (sock) { + spin_lock(&cgroup_sk_update_lock); sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, (unsigned long)v); + spin_unlock(&cgroup_sk_update_lock); + } return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 947741dc43fa..1278d7b7bd9a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1363,6 +1363,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; sk_tx_queue_clear(sk); + cgroup_sk_alloc(&sk->sk_cgrp_data); } return sk; @@ -1385,6 +1386,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) owner = prot->owner; slab = prot->slab; + cgroup_sk_free(&sk->sk_cgrp_data); security_sk_free(sk); if (slab != NULL) kmem_cache_free(slab, sk); -- cgit v1.2.3 From 33d5a7b14bfd02e60af9d223db8dfff0cbcabe6b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:04 +0100 Subject: netfilter: nf_tables: extend tracing infrastructure nft monitor mode can then decode and display this trace data. Parts of LL/Network/Transport headers are provided as separate attributes. Otherwise, printing IP address data becomes virtually impossible for userspace since in the case of the netdev family we really don't want userspace to have to know all the possible link layer types and/or sizes just to display/print an ip address. We also don't want userspace to have to follow ipv6 header chains to get the s/dport info, the kernel already did this work for us. To avoid bloating nft_do_chain all data required for tracing is encapsulated in nft_traceinfo. The structure is initialized unconditionally(!) for each nft_do_chain invocation. This unconditionall call will be moved under a static key in a followup patch. With lots of help from Patrick McHardy and Pablo Neira. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 32 ++++ include/uapi/linux/netfilter/nf_tables.h | 52 ++++++ include/uapi/linux/netfilter/nfnetlink.h | 2 + net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 12 +- net/netfilter/nf_tables_core.c | 45 +++-- net/netfilter/nf_tables_trace.c | 271 +++++++++++++++++++++++++++++++ net/netfilter/nfnetlink.c | 1 + 8 files changed, 398 insertions(+), 19 deletions(-) create mode 100644 net/netfilter/nf_tables_trace.c (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 101d7d7ec243..b313cda49194 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -888,6 +888,38 @@ void nft_unregister_chain_type(const struct nf_chain_type *); int nft_register_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *); +int nft_verdict_dump(struct sk_buff *skb, int type, + const struct nft_verdict *v); + +/** + * struct nft_traceinfo - nft tracing information and state + * + * @pkt: pktinfo currently processed + * @basechain: base chain currently processed + * @chain: chain currently processed + * @rule: rule that was evaluated + * @verdict: verdict given by rule + * @type: event type (enum nft_trace_types) + * @packet_dumped: packet headers sent in a previous traceinfo message + * @trace: other struct members are initialised + */ +struct nft_traceinfo { + const struct nft_pktinfo *pkt; + const struct nft_base_chain *basechain; + const struct nft_chain *chain; + const struct nft_rule *rule; + const struct nft_verdict *verdict; + enum nft_trace_types type; + bool packet_dumped; + bool trace; +}; + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *basechain); + +void nft_trace_notify(struct nft_traceinfo *info); + #define nft_dereference(p) \ nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 5f3ececf84b3..b48a3ab761f8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -83,6 +83,7 @@ enum nft_verdicts { * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) + * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -102,6 +103,7 @@ enum nf_tables_msg_types { NFT_MSG_DELSETELEM, NFT_MSG_NEWGEN, NFT_MSG_GETGEN, + NFT_MSG_TRACE, NFT_MSG_MAX, }; @@ -987,4 +989,54 @@ enum nft_gen_attributes { }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) +/** + * enum nft_trace_attributes - nf_tables trace netlink attributes + * + * @NFTA_TRACE_TABLE: name of the table (NLA_STRING) + * @NFTA_TRACE_CHAIN: name of the chain (NLA_STRING) + * @NFTA_TRACE_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_TRACE_TYPE: type of the event (NLA_U32: nft_trace_types) + * @NFTA_TRACE_VERDICT: verdict returned by hook (NLA_NESTED: nft_verdicts) + * @NFTA_TRACE_ID: pseudo-id, same for each skb traced (NLA_U32) + * @NFTA_TRACE_LL_HEADER: linklayer header (NLA_BINARY) + * @NFTA_TRACE_NETWORK_HEADER: network header (NLA_BINARY) + * @NFTA_TRACE_TRANSPORT_HEADER: transport header (NLA_BINARY) + * @NFTA_TRACE_IIF: indev ifindex (NLA_U32) + * @NFTA_TRACE_IIFTYPE: netdev->type of indev (NLA_U16) + * @NFTA_TRACE_OIF: outdev ifindex (NLA_U32) + * @NFTA_TRACE_OIFTYPE: netdev->type of outdev (NLA_U16) + * @NFTA_TRACE_MARK: nfmark (NLA_U32) + * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) + * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) + */ +enum nft_trace_attibutes { + NFTA_TRACE_UNSPEC, + NFTA_TRACE_TABLE, + NFTA_TRACE_CHAIN, + NFTA_TRACE_RULE_HANDLE, + NFTA_TRACE_TYPE, + NFTA_TRACE_VERDICT, + NFTA_TRACE_ID, + NFTA_TRACE_LL_HEADER, + NFTA_TRACE_NETWORK_HEADER, + NFTA_TRACE_TRANSPORT_HEADER, + NFTA_TRACE_IIF, + NFTA_TRACE_IIFTYPE, + NFTA_TRACE_OIF, + NFTA_TRACE_OIFTYPE, + NFTA_TRACE_MARK, + NFTA_TRACE_NFPROTO, + NFTA_TRACE_POLICY, + __NFTA_TRACE_MAX +}; +#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1) + +enum nft_trace_types { + NFT_TRACETYPE_UNSPEC, + NFT_TRACETYPE_POLICY, + NFT_TRACETYPE_RETURN, + NFT_TRACETYPE_RULE, + __NFT_TRACETYPE_MAX +}; +#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1) #endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 354a7e5e50f2..4bb8cb7730e7 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -22,6 +22,8 @@ enum nfnetlink_groups { #define NFNLGRP_NFTABLES NFNLGRP_NFTABLES NFNLGRP_ACCT_QUOTA, #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA + NFNLGRP_NFTRACE, +#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7638c36b498c..22934846b5d1 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -67,7 +67,7 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables -nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 93cc4737018f..c4969a0d54ba 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4446,22 +4446,22 @@ static void nft_verdict_uninit(const struct nft_data *data) } } -static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + nest = nla_nest_start(skb, type); if (!nest) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code))) + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code))) goto nla_put_failure; - switch (data->verdict.code) { + switch (v->code) { case NFT_JUMP: case NFT_GOTO: if (nla_put_string(skb, NFTA_VERDICT_CHAIN, - data->verdict.chain->name)) + v->chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4572,7 +4572,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, err = nft_value_dump(skb, data, len); break; case NFT_DATA_VERDICT: - err = nft_verdict_dump(skb, data); + err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict); break; default: err = -EINVAL; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f3695a497408..2395de7c8ab2 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -44,22 +44,36 @@ static struct nf_loginfo trace_loginfo = { }, }; -static void __nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) +static noinline void __nft_trace_packet(struct nft_traceinfo *info, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) { + const struct nft_pktinfo *pkt = info->pkt; + + if (!pkt->skb->nf_trace) + return; + + info->chain = chain; + info->type = type; + + nft_trace_notify(info); + nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); } -static inline void nft_trace_packet(const struct nft_pktinfo *pkt, +static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + const struct nft_rule *rule, + int rulenum, + enum nft_trace_types type) { - if (unlikely(pkt->skb->nf_trace)) - __nft_trace_packet(pkt, chain, rulenum, type); + if (unlikely(info->trace)) { + info->rule = rule; + __nft_trace_packet(info, chain, rulenum, type); + } } static void nft_cmp_fast_eval(const struct nft_expr *expr, @@ -121,7 +135,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_stats *stats; int rulenum; unsigned int gencursor = nft_genmask_cur(net); + struct nft_traceinfo info; + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); @@ -151,7 +167,8 @@ next_rule: regs.verdict.code = NFT_CONTINUE; continue; case NFT_CONTINUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); continue; } break; @@ -161,7 +178,8 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -174,7 +192,8 @@ next_rule: stackptr++; /* fall through */ case NFT_GOTO: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); chain = regs.verdict.chain; goto do_chain; @@ -182,7 +201,8 @@ next_rule: rulenum++; /* fall through */ case NFT_RETURN: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RETURN); break; default: WARN_ON(1); @@ -196,7 +216,8 @@ next_rule: goto next_rule; } - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(&info, basechain, NULL, -1, + NFT_TRACETYPE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c new file mode 100644 index 000000000000..36fd7ad6729a --- /dev/null +++ b/net/netfilter/nf_tables_trace.c @@ -0,0 +1,271 @@ +/* + * (C) 2015 Red Hat GmbH + * Author: Florian Westphal + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_TRACETYPE_LL_HSIZE 20 +#define NFT_TRACETYPE_NETWORK_HSIZE 40 +#define NFT_TRACETYPE_TRANSPORT_HSIZE 20 + +static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) +{ + __be32 id; + + /* using skb address as ID results in a limited number of + * values (and quick reuse). + * + * So we attempt to use as many skb members that will not + * change while skb is with netfilter. + */ + id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), + skb->skb_iif); + + return nla_put_be32(nlskb, NFTA_TRACE_ID, id); +} + +static int trace_fill_header(struct sk_buff *nlskb, u16 type, + const struct sk_buff *skb, + int off, unsigned int len) +{ + struct nlattr *nla; + + if (len == 0) + return 0; + + nla = nla_reserve(nlskb, type, len); + if (!nla || skb_copy_bits(skb, off, nla_data(nla), len)) + return -1; + + return 0; +} + +static int nf_trace_fill_ll_header(struct sk_buff *nlskb, + const struct sk_buff *skb) +{ + struct vlan_ethhdr veth; + int off; + + BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE); + + off = skb_mac_header(skb) - skb->data; + if (off != -ETH_HLEN) + return -1; + + if (skb_copy_bits(skb, off, &veth, ETH_HLEN)) + return -1; + + veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth); +} + +static int nf_trace_fill_dev_info(struct sk_buff *nlskb, + const struct net_device *indev, + const struct net_device *outdev) +{ + if (indev) { + if (nla_put_be32(nlskb, NFTA_TRACE_IIF, + htonl(indev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE, + htons(indev->type))) + return -1; + } + + if (outdev) { + if (nla_put_be32(nlskb, NFTA_TRACE_OIF, + htonl(outdev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE, + htons(outdev->type))) + return -1; + } + + return 0; +} + +static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, + const struct nft_pktinfo *pkt) +{ + const struct sk_buff *skb = pkt->skb; + unsigned int len = min_t(unsigned int, + pkt->xt.thoff - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); + int off = skb_network_offset(skb); + + if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) + return -1; + + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_get(skb)) + return nf_trace_fill_ll_header(nlskb, skb); + + off = skb_mac_header(skb) - skb->data; + len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE); + return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER, + skb, off, len); +} + +static int nf_trace_fill_rule_info(struct sk_buff *nlskb, + const struct nft_traceinfo *info) +{ + if (!info->rule) + return 0; + + /* a continue verdict with ->type == RETURN means that this is + * an implicit return (end of chain reached). + * + * Since no rule matched, the ->rule pointer is invalid. + */ + if (info->type == NFT_TRACETYPE_RETURN && + info->verdict->code == NFT_CONTINUE) + return 0; + + return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, + cpu_to_be64(info->rule->handle)); +} + +void nft_trace_notify(struct nft_traceinfo *info) +{ + const struct nft_pktinfo *pkt = info->pkt; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct sk_buff *skb; + unsigned int size; + int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; + + if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + return; + + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + + nla_total_size(NFT_TABLE_MAXNAMELEN) + + nla_total_size(NFT_CHAIN_MAXNAMELEN) + + nla_total_size(sizeof(__be64)) + /* rule handle */ + nla_total_size(sizeof(__be32)) + /* trace type */ + nla_total_size(0) + /* VERDICT, nested */ + nla_total_size(sizeof(u32)) + /* verdict code */ + nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */ + nla_total_size(sizeof(u32)) + /* id */ + nla_total_size(NFT_TRACETYPE_LL_HSIZE) + + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + + nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + + nla_total_size(sizeof(u32)) + /* iif */ + nla_total_size(sizeof(__be16)) + /* iiftype */ + nla_total_size(sizeof(u32)) + /* oif */ + nla_total_size(sizeof(__be16)) + /* oiftype */ + nla_total_size(sizeof(u32)) + /* mark */ + nla_total_size(sizeof(u32)) + /* nfproto */ + nla_total_size(sizeof(u32)); /* policy */ + + skb = nlmsg_new(size, GFP_ATOMIC); + if (!skb) + return; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + if (!nlh) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = info->basechain->type->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) + goto nla_put_failure; + + if (trace_fill_id(skb, pkt->skb)) + goto nla_put_failure; + + if (info->chain) { + if (nla_put_string(skb, NFTA_TRACE_CHAIN, + info->chain->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_TRACE_TABLE, + info->chain->table->name)) + goto nla_put_failure; + } + + if (nf_trace_fill_rule_info(skb, info)) + goto nla_put_failure; + + switch (info->type) { + case NFT_TRACETYPE_UNSPEC: + case __NFT_TRACETYPE_MAX: + break; + case NFT_TRACETYPE_RETURN: + case NFT_TRACETYPE_RULE: + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) + goto nla_put_failure; + break; + case NFT_TRACETYPE_POLICY: + if (nla_put_be32(skb, NFTA_TRACE_POLICY, + info->basechain->policy)) + goto nla_put_failure; + break; + } + + if (pkt->skb->mark && + nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + goto nla_put_failure; + + if (!info->packet_dumped) { + if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + goto nla_put_failure; + + if (nf_trace_fill_pkt_info(skb, pkt)) + goto nla_put_failure; + info->packet_dumped = true; + } + + nlmsg_end(skb, nlh); + nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + return; + + nla_put_failure: + WARN_ON_ONCE(1); + kfree_skb(skb); +} + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *chain) +{ + info->basechain = nft_base_chain(chain); + info->trace = true; + info->packet_dumped = false; + info->pkt = pkt; + info->verdict = verdict; +} diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 46453ab318db..28591fa94ba5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -49,6 +49,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, + [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, }; void nfnl_lock(__u8 subsys_id) -- cgit v1.2.3 From e639f7ab079b5256660018511d87aa34b54f1a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:05 +0100 Subject: netfilter: nf_tables: wrap tracing with a static key Only needed when meta nftrace rule(s) were added. The assumption is that no such rules are active, so the call to nft_trace_init is "never" needed. When nftrace rules are active, we always call the nft_trace_* functions, but will only send netlink messages when all of the following are true: - traceinfo structure was initialised - skb->nf_trace == 1 - at least one subscriber to trace group. Adding an extra conditional (static_branch ... && skb->nf_trace) nft_trace_init( ..) Is possible but results in a larger nft_do_chain footprint. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 1 + include/net/netfilter/nft_meta.h | 3 +++ net/bridge/netfilter/nft_meta_bridge.c | 1 + net/netfilter/nf_tables_core.c | 9 ++++++--- net/netfilter/nf_tables_trace.c | 4 ++++ net/netfilter/nft_meta.c | 16 ++++++++++++++++ 6 files changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 4ff5424909aa..a9060dd99db7 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -57,6 +57,7 @@ struct nft_payload_set { }; extern const struct nft_expr_ops nft_payload_fast_ops; +extern struct static_key_false nft_trace_enabled; int nft_payload_module_init(void); void nft_payload_module_exit(void); diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 711887a09e91..d27588c8dbd9 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -33,4 +33,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr); + #endif diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index a21269b83f16..4b901d9f2e7c 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -84,6 +84,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 2395de7c8ab2..67fa41d317f6 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, { const struct nft_pktinfo *pkt = info->pkt; - if (!pkt->skb->nf_trace) + if (!info->trace || !pkt->skb->nf_trace) return; info->chain = chain; @@ -70,7 +71,7 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, int rulenum, enum nft_trace_types type) { - if (unlikely(info->trace)) { + if (static_branch_unlikely(&nft_trace_enabled)) { info->rule = rule; __nft_trace_packet(info, chain, rulenum, type); } @@ -137,7 +138,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) unsigned int gencursor = nft_genmask_cur(net); struct nft_traceinfo info; - nft_trace_init(&info, pkt, ®s.verdict, basechain); + info.trace = false; + if (static_branch_unlikely(&nft_trace_enabled)) + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 36fd7ad6729a..e9e959f65d91 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -24,6 +25,9 @@ #define NFT_TRACETYPE_NETWORK_HSIZE 40 #define NFT_TRACETYPE_TRANSPORT_HSIZE 20 +DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); +EXPORT_SYMBOL_GPL(nft_trace_enabled); + static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) { __be32 id; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d55ee0..85a465b773e5 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include /* for TCP_TIME_WAIT */ #include +#include #include void nft_meta_get_eval(const struct nft_expr *expr, @@ -297,6 +299,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (priv->key == NFT_META_NFTRACE) + static_branch_inc(&nft_trace_enabled); + return 0; } EXPORT_SYMBOL_GPL(nft_meta_set_init); @@ -334,6 +339,16 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_meta_set_dump); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (priv->key == NFT_META_NFTRACE) + static_branch_dec(&nft_trace_enabled); +} +EXPORT_SYMBOL_GPL(nft_meta_set_destroy); + static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, @@ -348,6 +363,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; -- cgit v1.2.3 From e97ac12859dbf4d3ee0eddb9798867541d1d1e1e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Dec 2015 23:35:19 +0100 Subject: netfilter: ipv6: nf_defrag: fix NULL deref panic Valdis reports NULL deref in nf_ct_frag6_gather. Problem is bogus use of skb_queue_walk() -- we miss first skb in the list since we start with head->next instead of head. In case the element we're looking for was head->next we won't find a result and then trip over NULL iter. (defrag uses plain NULL-terminated list rather than one terminated by head-of-list-pointer, which is what skb_queue_walk expects). Fixes: 029f7f3b8701cc7a ("netfilter: ipv6: nf_defrag: avoid/free clone operations") Reported-by: Valdis Kletnieks Tested-by: Valdis Kletnieks Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_reasm.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 912bc3afc183..6e5f0e0d49e0 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -441,11 +441,14 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic return false; fp->next = prev->next; - skb_queue_walk(head, iter) { - if (iter->next != prev) - continue; - iter->next = fp; - break; + + iter = head; + while (iter) { + if (iter->next == prev) { + iter->next = fp; + break; + } + iter = iter->next; } skb_morph(prev, head); -- cgit v1.2.3 From 23509fcd4ec5eadcca7a958b354f79dedc2765cc Mon Sep 17 00:00:00 2001 From: "Rosen, Rami" Date: Tue, 8 Dec 2015 07:09:24 -0500 Subject: netfilter: nfnetlink_log: Change setter functions to be void Change return type of nfulnl_set_timeout() and nfulnl_set_qthresh() to be void. This patch changes the return type of the static methods nfulnl_set_timeout() and nfulnl_set_qthresh() to be void, as there is no justification and no need for these methods to return int. Signed-off-by: Rami Rosen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index dea467647c90..70b6bd3b781e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -293,24 +293,20 @@ nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz) return status; } -static int +static void nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout) { spin_lock_bh(&inst->lock); inst->flushtimeout = timeout; spin_unlock_bh(&inst->lock); - - return 0; } -static int +static void nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh) { spin_lock_bh(&inst->lock); inst->qthreshold = qthresh; spin_unlock_bh(&inst->lock); - - return 0; } static int -- cgit v1.2.3 From 9fb0b519c7e094e741a3fc3fd4d854a8bc74b6dc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Dec 2015 16:31:21 +0100 Subject: netfilter: nf_tables: fix nf_log_trace based tracing nf_log_trace() outputs bogus 'TRACE:' strings because I forgot to update the comments array. Fixes: 33d5a7b14bfd0 ("netfilter: nf_tables: extend tracing infrastructure") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 67fa41d317f6..e9f8dffcc244 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -23,16 +23,10 @@ #include #include -enum nft_trace { - NFT_TRACE_RULE, - NFT_TRACE_RETURN, - NFT_TRACE_POLICY, -}; - -static const char *const comments[] = { - [NFT_TRACE_RULE] = "rule", - [NFT_TRACE_RETURN] = "return", - [NFT_TRACE_POLICY] = "policy", +static const char *const comments[__NFT_TRACETYPE_MAX] = { + [NFT_TRACETYPE_POLICY] = "policy", + [NFT_TRACETYPE_RETURN] = "return", + [NFT_TRACETYPE_RULE] = "rule", }; static struct nf_loginfo trace_loginfo = { @@ -47,7 +41,7 @@ static struct nf_loginfo trace_loginfo = { static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + int rulenum, enum nft_trace_types type) { const struct nft_pktinfo *pkt = info->pkt; -- cgit v1.2.3 From 01b1cb87d37fb19cdaa5e7002416fdde156873d0 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 16 Nov 2015 12:52:21 +0200 Subject: Bluetooth: Run page scan updates through hdev->req_workqueue Since Add/Remove Device perform the page scan updates independently from the HCI command completion we've introduced a potential race when multiple mgmt commands are queued. Doing the page scan updates through the req_workqueue ensures that the state changes are performed in a race-free manner. At the same time, to make the request helper more widely usable, extend it to also cover Inquiry Scan changes since those are behind the same HCI command. This is also reflected in the new name of the API as well as the work struct name. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 4 ++-- net/bluetooth/hci_request.c | 27 ++++++++++++++++++--------- net/bluetooth/hci_request.h | 8 ++++++-- net/bluetooth/mgmt.c | 16 ++++++++-------- 5 files changed, 35 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 55ce209157b1..eda809a5c3df 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -329,6 +329,7 @@ struct hci_dev { struct work_struct discov_update; struct work_struct bg_scan_update; + struct work_struct scan_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d57c11c1c6b5..703e37f1a955 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2176,7 +2176,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); } /* Set packet type for incoming connection */ @@ -2362,7 +2362,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); } params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e639671f54bd..78c026b4ffa1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -637,7 +637,7 @@ static bool disconnected_whitelist_entries(struct hci_dev *hdev) return false; } -void __hci_update_page_scan(struct hci_request *req) +void __hci_req_update_scan(struct hci_request *req) { struct hci_dev *hdev = req->hdev; u8 scan; @@ -657,22 +657,29 @@ void __hci_update_page_scan(struct hci_request *req) else scan = SCAN_DISABLED; - if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) - return; - if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) scan |= SCAN_INQUIRY; + if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE) && + test_bit(HCI_ISCAN, &hdev->flags) == !!(scan & SCAN_INQUIRY)) + return; + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -void hci_update_page_scan(struct hci_dev *hdev) +static int update_scan(struct hci_request *req, unsigned long opt) { - struct hci_request req; + hci_dev_lock(req->hdev); + __hci_req_update_scan(req); + hci_dev_unlock(req->hdev); + return 0; +} - hci_req_init(&req, hdev); - __hci_update_page_scan(&req); - hci_req_run(&req, NULL); +static void scan_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, scan_update); + + hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } /* This function controls the background scanning based on hdev->pend_le_conns @@ -1270,6 +1277,7 @@ void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); + INIT_WORK(&hdev->scan_update, scan_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); } @@ -1280,6 +1288,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); + cancel_work_sync(&hdev->scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6b9e59f7f7a9..cc8275520fb2 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -61,8 +61,12 @@ void hci_req_add_le_passive_scan(struct hci_request *req); /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); -void hci_update_page_scan(struct hci_dev *hdev); -void __hci_update_page_scan(struct hci_request *req); +static inline void hci_req_update_scan(struct hci_dev *hdev) +{ + queue_work(hdev->req_workqueue, &hdev->scan_update); +} + +void __hci_req_update_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, u8 *own_addr_type); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3d9d2e4839c5..0d20e1328528 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1810,7 +1810,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, * entries. */ hci_req_init(&req, hdev); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); update_class(&req); hci_req_run(&req, NULL); @@ -2058,7 +2058,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, if (conn_changed || discov_changed) { new_settings(hdev, cmd->sk); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); if (discov_changed) mgmt_update_adv_data(hdev); hci_update_background_scan(hdev); @@ -2092,7 +2092,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev, return err; if (changed) { - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); hci_update_background_scan(hdev); return new_settings(hdev, sk); } @@ -5041,7 +5041,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); write_fast_connectable(&req, false); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); /* Since only the advertising data flags will change, there * is no need to update the scan response data. @@ -5927,7 +5927,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); goto added; } @@ -6024,7 +6024,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -6089,7 +6089,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -7397,7 +7397,7 @@ static int powered_update_hci(struct hci_dev *hdev) write_fast_connectable(&req, true); else write_fast_connectable(&req, false); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); update_class(&req); update_name(&req); update_eir(&req); -- cgit v1.2.3 From 196a5e97d13092f783e41001c1112d7f31518ea2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 18:55:44 +0200 Subject: Bluetooth: Move __hci_update_background_scan up in hci_request.c This way we avoid the need to do a forward declaration in later patches. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 146 ++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 78c026b4ffa1..7c85435b8982 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -346,6 +346,79 @@ void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, hci_req_add_ev(req, opcode, plen, param, 0); } +/* This function controls the background scanning based on hdev->pend_le_conns + * list. If there are pending LE connection we start the background scanning, + * otherwise we stop it. + * + * This function requires the caller holds hdev->lock. + */ +static void __hci_update_background_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + + if (!test_bit(HCI_UP, &hdev->flags) || + test_bit(HCI_INIT, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG) || + hci_dev_test_flag(hdev, HCI_AUTO_OFF) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return; + + /* No point in doing scanning if LE support hasn't been enabled */ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + /* If discovery is active don't interfere with it */ + if (hdev->discovery.state != DISCOVERY_STOPPED) + return; + + /* Reset RSSI and UUID filters when starting background scanning + * since these filters are meant for service discovery only. + * + * The Start Discovery and Start Service Discovery operations + * ensure to set proper values for RSSI threshold and UUID + * filter list. So it is safe to just reset them here. + */ + hci_discovery_filter_clear(hdev); + + if (list_empty(&hdev->pend_le_conns) && + list_empty(&hdev->pend_le_reports)) { + /* If there is no pending LE connections or devices + * to be scanned for, we should stop the background + * scanning. + */ + + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return; + + hci_req_add_le_scan_disable(req); + + BT_DBG("%s stopping background scanning", hdev->name); + } else { + /* If there is at least one pending LE connection, we should + * keep the background scan running. + */ + + /* If controller is connecting, we should not start scanning + * since some controllers are not able to scan and connect at + * the same time. + */ + if (hci_lookup_le_connect(hdev)) + return; + + /* If controller is currently scanning, we stop it to ensure we + * don't miss any advertising (due to duplicates filter). + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req); + + hci_req_add_le_passive_scan(req); + + BT_DBG("%s starting background scanning", hdev->name); + } +} + void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_cp_le_set_scan_enable cp; @@ -682,79 +755,6 @@ static void scan_update_work(struct work_struct *work) hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } -/* This function controls the background scanning based on hdev->pend_le_conns - * list. If there are pending LE connection we start the background scanning, - * otherwise we stop it. - * - * This function requires the caller holds hdev->lock. - */ -static void __hci_update_background_scan(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - - if (!test_bit(HCI_UP, &hdev->flags) || - test_bit(HCI_INIT, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_SETUP) || - hci_dev_test_flag(hdev, HCI_CONFIG) || - hci_dev_test_flag(hdev, HCI_AUTO_OFF) || - hci_dev_test_flag(hdev, HCI_UNREGISTER)) - return; - - /* No point in doing scanning if LE support hasn't been enabled */ - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - /* If discovery is active don't interfere with it */ - if (hdev->discovery.state != DISCOVERY_STOPPED) - return; - - /* Reset RSSI and UUID filters when starting background scanning - * since these filters are meant for service discovery only. - * - * The Start Discovery and Start Service Discovery operations - * ensure to set proper values for RSSI threshold and UUID - * filter list. So it is safe to just reset them here. - */ - hci_discovery_filter_clear(hdev); - - if (list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)) { - /* If there is no pending LE connections or devices - * to be scanned for, we should stop the background - * scanning. - */ - - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; - - hci_req_add_le_scan_disable(req); - - BT_DBG("%s stopping background scanning", hdev->name); - } else { - /* If there is at least one pending LE connection, we should - * keep the background scan running. - */ - - /* If controller is connecting, we should not start scanning - * since some controllers are not able to scan and connect at - * the same time. - */ - if (hci_lookup_le_connect(hdev)) - return; - - /* If controller is currently scanning, we stop it to ensure we - * don't miss any advertising (due to duplicates filter). - */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req); - - hci_req_add_le_passive_scan(req); - - BT_DBG("%s starting background scanning", hdev->name); - } -} - void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { -- cgit v1.2.3 From f22525700b2ae34eb97a29a91e2eee902062b484 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 18 Nov 2015 12:49:20 +0200 Subject: Bluetooth: Move advertising instance management to hci_request.c This paves the way for eventually performing advertising changes through the hdev->req_workqueue. Some new APIs need to be exposed from mgmt.c to hci_request.c and vice-versa, but many of them will go away once hdev->req_workqueue gets used. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 9 +- net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_core.c | 19 +- net/bluetooth/hci_event.c | 4 +- net/bluetooth/hci_request.c | 533 +++++++++++++++++++++++++++++++++++- net/bluetooth/hci_request.h | 14 + net/bluetooth/mgmt.c | 574 +++------------------------------------ 7 files changed, 589 insertions(+), 566 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index eda809a5c3df..b56085b6ecce 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1434,9 +1434,7 @@ void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); void mgmt_set_powered_failed(struct hci_dev *hdev, int err); int mgmt_powered(struct hci_dev *hdev, u8 powered); -int mgmt_update_adv_data(struct hci_dev *hdev); void mgmt_discoverable_timeout(struct hci_dev *hdev); -void mgmt_adv_timeout_expired(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, @@ -1491,8 +1489,13 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u8 store_hint, u16 min_interval, u16 max_interval, u16 latency, u16 timeout); -void mgmt_reenable_advertising(struct hci_dev *hdev); void mgmt_smp_complete(struct hci_conn *conn, bool complete); +bool mgmt_get_connectable(struct hci_dev *hdev); +u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); +void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, + u8 instance); +void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, + u8 instance); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2d334e07fd77..e2600213cd50 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -683,7 +683,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) /* Re-enable advertising in case this was a failed connection * attempt as a peripheral. */ - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); } static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 89af7e4fac02..bab8958bf46e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1549,11 +1549,6 @@ int hci_dev_do_close(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); - if (hdev->adv_instance_timeout) { - cancel_delayed_work_sync(&hdev->adv_instance_expire); - hdev->adv_instance_timeout = 0; - } - /* Avoid potential lockdep warnings from the *_flush() calls by * ensuring the workqueue is empty up front. */ @@ -1774,7 +1769,7 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - mgmt_update_adv_data(hdev); + hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); mgmt_new_settings(hdev); } @@ -2112,17 +2107,6 @@ static void hci_discov_off(struct work_struct *work) mgmt_discoverable_timeout(hdev); } -static void hci_adv_timeout_expire(struct work_struct *work) -{ - struct hci_dev *hdev; - - hdev = container_of(work, struct hci_dev, adv_instance_expire.work); - - BT_DBG("%s", hdev->name); - - mgmt_adv_timeout_expired(hdev); -} - void hci_uuids_clear(struct hci_dev *hdev) { struct bt_uuid *uuid, *tmp; @@ -3003,7 +2987,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); - INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 703e37f1a955..7554da5b7a8f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1183,7 +1183,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, hci_discovery_set_state(hdev, DISCOVERY_STOPPED); else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) && hdev->discovery.state == DISCOVERY_FINDING) - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); break; @@ -2401,7 +2401,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) * is timed out due to Directed Advertising." */ if (type == LE_LINK) - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); unlock: hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7c85435b8982..e6622bd1926d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -23,6 +23,7 @@ #include #include +#include #include "smp.h" #include "hci_request.h" @@ -580,6 +581,524 @@ void hci_req_add_le_passive_scan(struct hci_request *req) &enable_cp); } +static u8 get_current_adv_instance(struct hci_dev *hdev) +{ + /* The "Set Advertising" setting supersedes the "Add Advertising" + * setting. Here we set the advertising data based on which + * setting was set. When neither apply, default to the global settings, + * represented by instance "0". + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return hdev->cur_adv_instance; + + return 0x00; +} + +static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) +{ + u8 instance = get_current_adv_instance(hdev); + struct adv_info *adv_instance; + + /* Ignore instance 0 */ + if (instance == 0x00) + return 0; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + + /* TODO: Take into account the "appearance" and "local-name" flags here. + * These are currently being ignored as they are not supported. + */ + return adv_instance->scan_rsp_len; +} + +void __hci_req_disable_advertising(struct hci_request *req) +{ + u8 enable = 0x00; + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) +{ + u32 flags; + struct adv_info *adv_instance; + + if (instance == 0x00) { + /* Instance 0 always manages the "Tx Power" and "Flags" + * fields + */ + flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; + + /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting + * corresponds to the "connectable" instance flag. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) + flags |= MGMT_ADV_FLAG_CONNECTABLE; + + return flags; + } + + adv_instance = hci_find_adv_instance(hdev, instance); + + /* Return 0 when we got an invalid instance identifier. */ + if (!adv_instance) + return 0; + + return adv_instance->flags; +} + +void __hci_req_enable_advertising(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_param cp; + u8 own_addr_type, enable = 0x01; + bool connectable; + u8 instance; + u32 flags; + + if (hci_conn_num(hdev, LE_LINK) > 0) + return; + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_disable_advertising(req); + + /* Clear the HCI_LE_ADV bit temporarily so that the + * hci_update_random_address knows that it's safe to go ahead + * and write a new random address. The flag will be set back on + * as soon as the SET_ADV_ENABLE HCI command completes. + */ + hci_dev_clear_flag(hdev, HCI_LE_ADV); + + instance = get_current_adv_instance(hdev); + flags = get_adv_instance_flags(hdev, instance); + + /* If the "connectable" instance flag was not set, then choose between + * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. + */ + connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || + mgmt_get_connectable(hdev); + + /* Set require_privacy to true only when non-connectable + * advertising is used. In that case it is fine to use a + * non-resolvable private address. + */ + if (hci_update_random_address(req, !connectable, &own_addr_type) < 0) + return; + + memset(&cp, 0, sizeof(cp)); + cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); + cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); + + if (connectable) + cp.type = LE_ADV_IND; + else if (get_cur_adv_instance_scan_rsp_len(hdev)) + cp.type = LE_ADV_SCAN_IND; + else + cp.type = LE_ADV_NONCONN_IND; + + cp.own_address_type = own_addr_type; + cp.channel_map = hdev->le_adv_channel_map; + + hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + u8 ad_len = 0; + size_t name_len; + + name_len = strlen(hdev->dev_name); + if (name_len > 0) { + size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; + + if (name_len > max_len) { + name_len = max_len; + ptr[1] = EIR_NAME_SHORT; + } else + ptr[1] = EIR_NAME_COMPLETE; + + ptr[0] = name_len + 1; + + memcpy(ptr + 2, hdev->dev_name, name_len); + + ad_len += (name_len + 2); + ptr += (name_len + 2); + } + + return ad_len; +} + +static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, + u8 *ptr) +{ + struct adv_info *adv_instance; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + + /* TODO: Set the appropriate entries based on advertising instance flags + * here once flags other than 0 are supported. + */ + memcpy(ptr, adv_instance->scan_rsp_data, + adv_instance->scan_rsp_len); + + return adv_instance->scan_rsp_len; +} + +static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_rsp_data cp; + u8 len; + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + memset(&cp, 0, sizeof(cp)); + + if (instance) + len = create_instance_scan_rsp_data(hdev, instance, cp.data); + else + len = create_default_scan_rsp_data(hdev, cp.data); + + if (hdev->scan_rsp_data_len == len && + !memcmp(cp.data, hdev->scan_rsp_data, len)) + return; + + memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); + hdev->scan_rsp_data_len = len; + + cp.length = len; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); +} + +void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance) +{ + if (instance == HCI_ADV_CURRENT) + instance = get_current_adv_instance(req->hdev); + + update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev)); +} + +static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +{ + struct adv_info *adv_instance = NULL; + u8 ad_len = 0, flags = 0; + u32 instance_flags; + + /* Return 0 when the current instance identifier is invalid. */ + if (instance) { + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + } + + instance_flags = get_adv_instance_flags(hdev, instance); + + /* The Add Advertising command allows userspace to set both the general + * and limited discoverable flags. + */ + if (instance_flags & MGMT_ADV_FLAG_DISCOV) + flags |= LE_AD_GENERAL; + + if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) + flags |= LE_AD_LIMITED; + + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { + /* If a discovery flag wasn't provided, simply use the global + * settings. + */ + if (!flags) + flags |= mgmt_get_adv_discov_flags(hdev); + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + + /* If flags would still be empty, then there is no need to + * include the "Flags" AD field". + */ + if (flags) { + ptr[0] = 0x02; + ptr[1] = EIR_FLAGS; + ptr[2] = flags; + + ad_len += 3; + ptr += 3; + } + } + + if (adv_instance) { + memcpy(ptr, adv_instance->adv_data, + adv_instance->adv_data_len); + ad_len += adv_instance->adv_data_len; + ptr += adv_instance->adv_data_len; + } + + /* Provide Tx Power only if we can provide a valid value for it */ + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID && + (instance_flags & MGMT_ADV_FLAG_TX_POWER)) { + ptr[0] = 0x02; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8)hdev->adv_tx_power; + + ad_len += 3; + ptr += 3; + } + + return ad_len; +} + +static void update_inst_adv_data(struct hci_request *req, u8 instance) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_data cp; + u8 len; + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + memset(&cp, 0, sizeof(cp)); + + len = create_instance_adv_data(hdev, instance, cp.data); + + /* There's nothing to do if the data hasn't changed */ + if (hdev->adv_data_len == len && + memcmp(cp.data, hdev->adv_data, len) == 0) + return; + + memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); + hdev->adv_data_len = len; + + cp.length = len; + + hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); +} + +void __hci_req_update_adv_data(struct hci_request *req, int instance) +{ + if (instance == HCI_ADV_CURRENT) + instance = get_current_adv_instance(req->hdev); + + update_inst_adv_data(req, instance); +} + +int hci_req_update_adv_data(struct hci_dev *hdev, int instance) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + __hci_req_update_adv_data(&req, instance); + + return hci_req_run(&req, NULL); +} + +static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + BT_DBG("%s status %u", hdev->name, status); +} + +void hci_req_reenable_advertising(struct hci_dev *hdev) +{ + struct hci_request req; + u8 instance; + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + return; + + instance = get_current_adv_instance(hdev); + + hci_req_init(&req, hdev); + + if (instance) { + __hci_req_schedule_adv_instance(&req, instance, true); + } else { + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_enable_advertising(&req); + } + + hci_req_run(&req, adv_enable_complete); +} + +static void adv_timeout_expire(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + adv_instance_expire.work); + + struct hci_request req; + u8 instance; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + hdev->adv_instance_timeout = 0; + + instance = get_current_adv_instance(hdev); + if (instance == 0x00) + goto unlock; + + hci_req_init(&req, hdev); + + hci_req_clear_adv_instance(hdev, &req, instance, false); + + if (list_empty(&hdev->adv_instances)) + __hci_req_disable_advertising(&req); + + if (!skb_queue_empty(&req.cmd_q)) + hci_req_run(&req, NULL); + +unlock: + hci_dev_unlock(hdev); +} + +int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, + bool force) +{ + struct hci_dev *hdev = req->hdev; + struct adv_info *adv_instance = NULL; + u16 timeout; + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + return -EPERM; + + if (hdev->adv_instance_timeout) + return -EBUSY; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return -ENOENT; + + /* A zero timeout means unlimited advertising. As long as there is + * only one instance, duration should be ignored. We still set a timeout + * in case further instances are being added later on. + * + * If the remaining lifetime of the instance is more than the duration + * then the timeout corresponds to the duration, otherwise it will be + * reduced to the remaining instance lifetime. + */ + if (adv_instance->timeout == 0 || + adv_instance->duration <= adv_instance->remaining_time) + timeout = adv_instance->duration; + else + timeout = adv_instance->remaining_time; + + /* The remaining time is being reduced unless the instance is being + * advertised without time limit. + */ + if (adv_instance->timeout) + adv_instance->remaining_time = + adv_instance->remaining_time - timeout; + + hdev->adv_instance_timeout = timeout; + queue_delayed_work(hdev->req_workqueue, + &hdev->adv_instance_expire, + msecs_to_jiffies(timeout * 1000)); + + /* If we're just re-scheduling the same instance again then do not + * execute any HCI commands. This happens when a single instance is + * being advertised. + */ + if (!force && hdev->cur_adv_instance == instance && + hci_dev_test_flag(hdev, HCI_LE_ADV)) + return 0; + + hdev->cur_adv_instance = instance; + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); + __hci_req_enable_advertising(req); + + return 0; +} + +static void cancel_adv_timeout(struct hci_dev *hdev) +{ + if (hdev->adv_instance_timeout) { + hdev->adv_instance_timeout = 0; + cancel_delayed_work(&hdev->adv_instance_expire); + } +} + +/* For a single instance: + * - force == true: The instance will be removed even when its remaining + * lifetime is not zero. + * - force == false: the instance will be deactivated but kept stored unless + * the remaining lifetime is zero. + * + * For instance == 0x00: + * - force == true: All instances will be removed regardless of their timeout + * setting. + * - force == false: Only instances that have a timeout will be removed. + */ +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, + u8 instance, bool force) +{ + struct adv_info *adv_instance, *n, *next_instance = NULL; + int err; + u8 rem_inst; + + /* Cancel any timeout concerning the removed instance(s). */ + if (!instance || hdev->cur_adv_instance == instance) + cancel_adv_timeout(hdev); + + /* Get the next instance to advertise BEFORE we remove + * the current one. This can be the same instance again + * if there is only one instance. + */ + if (instance && hdev->cur_adv_instance == instance) + next_instance = hci_get_next_instance(hdev, instance); + + if (instance == 0x00) { + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, + list) { + if (!(force || adv_instance->timeout)) + continue; + + rem_inst = adv_instance->instance; + err = hci_remove_adv_instance(hdev, rem_inst); + if (!err) + mgmt_advertising_removed(NULL, hdev, rem_inst); + } + hdev->cur_adv_instance = 0x00; + } else { + adv_instance = hci_find_adv_instance(hdev, instance); + + if (force || (adv_instance && adv_instance->timeout && + !adv_instance->remaining_time)) { + /* Don't advertise a removed instance. */ + if (next_instance && + next_instance->instance == instance) + next_instance = NULL; + + err = hci_remove_adv_instance(hdev, instance); + if (!err) + mgmt_advertising_removed(NULL, hdev, instance); + } + } + + if (list_empty(&hdev->adv_instances)) { + hdev->cur_adv_instance = 0x00; + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + } + + if (!req || !hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return; + + if (next_instance) + __hci_req_schedule_adv_instance(req, next_instance->instance, + false); +} + static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) { struct hci_dev *hdev = req->hdev; @@ -1031,14 +1550,6 @@ unlock: hci_dev_unlock(hdev); } -static void cancel_adv_timeout(struct hci_dev *hdev) -{ - if (hdev->adv_instance_timeout) { - hdev->adv_instance_timeout = 0; - cancel_delayed_work(&hdev->adv_instance_expire); - } -} - static void disable_advertising(struct hci_request *req) { u8 enable = 0x00; @@ -1280,6 +1791,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->scan_update, scan_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); + INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } void hci_request_cancel_all(struct hci_dev *hdev) @@ -1291,4 +1803,9 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); + + if (hdev->adv_instance_timeout) { + cancel_delayed_work_sync(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index cc8275520fb2..5358b1b12ca0 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -58,6 +58,20 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +#define HCI_ADV_CURRENT (-1) + +void hci_req_reenable_advertising(struct hci_dev *hdev); +void __hci_req_enable_advertising(struct hci_request *req); +void __hci_req_disable_advertising(struct hci_request *req); +void __hci_req_update_adv_data(struct hci_request *req, int instance); +int hci_req_update_adv_data(struct hci_dev *hdev, int instance); +void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance); + +int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, + bool force); +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, + u8 instance, bool force); + /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0d20e1328528..6d0f0025052f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -841,98 +841,7 @@ static struct mgmt_pending_cmd *pending_find_data(u16 opcode, return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data); } -static u8 get_current_adv_instance(struct hci_dev *hdev) -{ - /* The "Set Advertising" setting supersedes the "Add Advertising" - * setting. Here we set the advertising data based on which - * setting was set. When neither apply, default to the global settings, - * represented by instance "0". - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return hdev->cur_adv_instance; - - return 0x00; -} - -static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) -{ - u8 ad_len = 0; - size_t name_len; - - name_len = strlen(hdev->dev_name); - if (name_len > 0) { - size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; - - if (name_len > max_len) { - name_len = max_len; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; - - ptr[0] = name_len + 1; - - memcpy(ptr + 2, hdev->dev_name, name_len); - - ad_len += (name_len + 2); - ptr += (name_len + 2); - } - - return ad_len; -} - -static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, - u8 *ptr) -{ - struct adv_info *adv_instance; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - - /* TODO: Set the appropriate entries based on advertising instance flags - * here once flags other than 0 are supported. - */ - memcpy(ptr, adv_instance->scan_rsp_data, - adv_instance->scan_rsp_len); - - return adv_instance->scan_rsp_len; -} - -static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_rsp_data cp; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - memset(&cp, 0, sizeof(cp)); - - if (instance) - len = create_instance_scan_rsp_data(hdev, instance, cp.data); - else - len = create_default_scan_rsp_data(hdev, cp.data); - - if (hdev->scan_rsp_data_len == len && - !memcmp(cp.data, hdev->scan_rsp_data, len)) - return; - - memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); - hdev->scan_rsp_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); -} - -static void update_scan_rsp_data(struct hci_request *req) -{ - update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev)); -} - -static u8 get_adv_discov_flags(struct hci_dev *hdev) +u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; @@ -956,7 +865,7 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev) return 0; } -static bool get_connectable(struct hci_dev *hdev) +bool mgmt_get_connectable(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; @@ -973,163 +882,6 @@ static bool get_connectable(struct hci_dev *hdev) return hci_dev_test_flag(hdev, HCI_CONNECTABLE); } -static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) -{ - u32 flags; - struct adv_info *adv_instance; - - if (instance == 0x00) { - /* Instance 0 always manages the "Tx Power" and "Flags" - * fields - */ - flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; - - /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting - * corresponds to the "connectable" instance flag. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) - flags |= MGMT_ADV_FLAG_CONNECTABLE; - - return flags; - } - - adv_instance = hci_find_adv_instance(hdev, instance); - - /* Return 0 when we got an invalid instance identifier. */ - if (!adv_instance) - return 0; - - return adv_instance->flags; -} - -static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) -{ - u8 instance = get_current_adv_instance(hdev); - struct adv_info *adv_instance; - - /* Ignore instance 0 */ - if (instance == 0x00) - return 0; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - - /* TODO: Take into account the "appearance" and "local-name" flags here. - * These are currently being ignored as they are not supported. - */ - return adv_instance->scan_rsp_len; -} - -static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) -{ - struct adv_info *adv_instance = NULL; - u8 ad_len = 0, flags = 0; - u32 instance_flags; - - /* Return 0 when the current instance identifier is invalid. */ - if (instance) { - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - } - - instance_flags = get_adv_instance_flags(hdev, instance); - - /* The Add Advertising command allows userspace to set both the general - * and limited discoverable flags. - */ - if (instance_flags & MGMT_ADV_FLAG_DISCOV) - flags |= LE_AD_GENERAL; - - if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) - flags |= LE_AD_LIMITED; - - if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { - /* If a discovery flag wasn't provided, simply use the global - * settings. - */ - if (!flags) - flags |= get_adv_discov_flags(hdev); - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - flags |= LE_AD_NO_BREDR; - - /* If flags would still be empty, then there is no need to - * include the "Flags" AD field". - */ - if (flags) { - ptr[0] = 0x02; - ptr[1] = EIR_FLAGS; - ptr[2] = flags; - - ad_len += 3; - ptr += 3; - } - } - - if (adv_instance) { - memcpy(ptr, adv_instance->adv_data, - adv_instance->adv_data_len); - ad_len += adv_instance->adv_data_len; - ptr += adv_instance->adv_data_len; - } - - /* Provide Tx Power only if we can provide a valid value for it */ - if (hdev->adv_tx_power != HCI_TX_POWER_INVALID && - (instance_flags & MGMT_ADV_FLAG_TX_POWER)) { - ptr[0] = 0x02; - ptr[1] = EIR_TX_POWER; - ptr[2] = (u8)hdev->adv_tx_power; - - ad_len += 3; - ptr += 3; - } - - return ad_len; -} - -static void update_inst_adv_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_adv_data cp; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - memset(&cp, 0, sizeof(cp)); - - len = create_instance_adv_data(hdev, instance, cp.data); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); -} - -static void update_adv_data(struct hci_request *req) -{ - update_inst_adv_data(req, get_current_adv_instance(req->hdev)); -} - -int mgmt_update_adv_data(struct hci_dev *hdev) -{ - struct hci_request req; - - hci_req_init(&req, hdev); - update_adv_data(&req); - - return hci_req_run(&req, NULL); -} - static void create_eir(struct hci_dev *hdev, u8 *data) { u8 *ptr = data; @@ -1247,70 +999,6 @@ static void update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } -static void disable_advertising(struct hci_request *req) -{ - u8 enable = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - -static void enable_advertising(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_adv_param cp; - u8 own_addr_type, enable = 0x01; - bool connectable; - u8 instance; - u32 flags; - - if (hci_conn_num(hdev, LE_LINK) > 0) - return; - - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(req); - - /* Clear the HCI_LE_ADV bit temporarily so that the - * hci_update_random_address knows that it's safe to go ahead - * and write a new random address. The flag will be set back on - * as soon as the SET_ADV_ENABLE HCI command completes. - */ - hci_dev_clear_flag(hdev, HCI_LE_ADV); - - instance = get_current_adv_instance(hdev); - flags = get_adv_instance_flags(hdev, instance); - - /* If the "connectable" instance flag was not set, then choose between - * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. - */ - connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || - get_connectable(hdev); - - /* Set require_privacy to true only when non-connectable - * advertising is used. In that case it is fine to use a - * non-resolvable private address. - */ - if (hci_update_random_address(req, !connectable, &own_addr_type) < 0) - return; - - memset(&cp, 0, sizeof(cp)); - cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); - cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); - - if (connectable) - cp.type = LE_ADV_IND; - else if (get_cur_adv_instance_scan_rsp_len(hdev)) - cp.type = LE_ADV_SCAN_IND; - else - cp.type = LE_ADV_NONCONN_IND; - - cp.own_address_type = own_addr_type; - cp.channel_map = hdev->le_adv_channel_map; - - hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1346,10 +1034,11 @@ static void rpa_expired(struct work_struct *work) return; /* The generation of a new RPA and programming it into the - * controller happens in the enable_advertising() function. + * controller happens in the hci_req_enable_advertising() + * function. */ hci_req_init(&req, hdev); - enable_advertising(&req); + __hci_req_enable_advertising(&req); hci_req_run(&req, NULL); } @@ -1417,8 +1106,7 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) } } -static void advertising_added(struct sock *sk, struct hci_dev *hdev, - u8 instance) +void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance) { struct mgmt_ev_advertising_added ev; @@ -1427,8 +1115,8 @@ static void advertising_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk); } -static void advertising_removed(struct sock *sk, struct hci_dev *hdev, - u8 instance) +void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, + u8 instance) { struct mgmt_ev_advertising_removed ev; @@ -1437,65 +1125,6 @@ static void advertising_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk); } -static int schedule_adv_instance(struct hci_request *req, u8 instance, - bool force) { - struct hci_dev *hdev = req->hdev; - struct adv_info *adv_instance = NULL; - u16 timeout; - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - return -EPERM; - - if (hdev->adv_instance_timeout) - return -EBUSY; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return -ENOENT; - - /* A zero timeout means unlimited advertising. As long as there is - * only one instance, duration should be ignored. We still set a timeout - * in case further instances are being added later on. - * - * If the remaining lifetime of the instance is more than the duration - * then the timeout corresponds to the duration, otherwise it will be - * reduced to the remaining instance lifetime. - */ - if (adv_instance->timeout == 0 || - adv_instance->duration <= adv_instance->remaining_time) - timeout = adv_instance->duration; - else - timeout = adv_instance->remaining_time; - - /* The remaining time is being reduced unless the instance is being - * advertised without time limit. - */ - if (adv_instance->timeout) - adv_instance->remaining_time = - adv_instance->remaining_time - timeout; - - hdev->adv_instance_timeout = timeout; - queue_delayed_work(hdev->workqueue, - &hdev->adv_instance_expire, - msecs_to_jiffies(timeout * 1000)); - - /* If we're just re-scheduling the same instance again then do not - * execute any HCI commands. This happens when a single instance is - * being advertised. - */ - if (!force && hdev->cur_adv_instance == instance && - hci_dev_test_flag(hdev, HCI_LE_ADV)) - return 0; - - hdev->cur_adv_instance = instance; - update_adv_data(req); - update_scan_rsp_data(req); - enable_advertising(req); - - return 0; -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { @@ -1504,76 +1133,6 @@ static void cancel_adv_timeout(struct hci_dev *hdev) } } -/* For a single instance: - * - force == true: The instance will be removed even when its remaining - * lifetime is not zero. - * - force == false: the instance will be deactivated but kept stored unless - * the remaining lifetime is zero. - * - * For instance == 0x00: - * - force == true: All instances will be removed regardless of their timeout - * setting. - * - force == false: Only instances that have a timeout will be removed. - */ -static void clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force) -{ - struct adv_info *adv_instance, *n, *next_instance = NULL; - int err; - u8 rem_inst; - - /* Cancel any timeout concerning the removed instance(s). */ - if (!instance || hdev->cur_adv_instance == instance) - cancel_adv_timeout(hdev); - - /* Get the next instance to advertise BEFORE we remove - * the current one. This can be the same instance again - * if there is only one instance. - */ - if (instance && hdev->cur_adv_instance == instance) - next_instance = hci_get_next_instance(hdev, instance); - - if (instance == 0x00) { - list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, - list) { - if (!(force || adv_instance->timeout)) - continue; - - rem_inst = adv_instance->instance; - err = hci_remove_adv_instance(hdev, rem_inst); - if (!err) - advertising_removed(NULL, hdev, rem_inst); - } - hdev->cur_adv_instance = 0x00; - } else { - adv_instance = hci_find_adv_instance(hdev, instance); - - if (force || (adv_instance && adv_instance->timeout && - !adv_instance->remaining_time)) { - /* Don't advertise a removed instance. */ - if (next_instance && - next_instance->instance == instance) - next_instance = NULL; - - err = hci_remove_adv_instance(hdev, instance); - if (!err) - advertising_removed(NULL, hdev, instance); - } - } - - if (list_empty(&hdev->adv_instances)) { - hdev->cur_adv_instance = 0x00; - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - } - - if (!req || !hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return; - - if (next_instance) - schedule_adv_instance(req, next_instance->instance, false); -} - static int clean_up_hci_state(struct hci_dev *hdev) { struct hci_request req; @@ -1589,10 +1148,10 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - clear_adv_instance(hdev, NULL, 0x00, false); + hci_req_clear_adv_instance(hdev, NULL, 0x00, false); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); discov_stopped = hci_req_stop_discovery(&req); @@ -1975,7 +1534,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); update_ad: - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_discoverable_complete); if (err < 0) @@ -2060,7 +1619,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, new_settings(hdev, cmd->sk); hci_req_update_scan(hdev); if (discov_changed) - mgmt_update_adv_data(hdev); + hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); hci_update_background_scan(hdev); } @@ -2151,7 +1710,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); } - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) { if (cp->val) { scan = SCAN_PAGE; @@ -2181,7 +1740,7 @@ no_scan_update: /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - enable_advertising(&req); + __hci_req_enable_advertising(&req); err = hci_req_run(&req, set_connectable_complete); if (err < 0) { @@ -2466,8 +2025,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) struct hci_request req; hci_req_init(&req, hdev); - update_adv_data(&req); - update_scan_rsp_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); hci_req_run(&req, NULL); hci_update_background_scan(hdev); } @@ -2518,7 +2077,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) enabled = lmp_host_le_capable(hdev); if (!val) - clear_adv_instance(hdev, NULL, 0x00, true); + hci_req_clear_adv_instance(hdev, NULL, 0x00, true); if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; @@ -2565,7 +2124,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_cp.simul = 0x00; } else { if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); } hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(hci_cp), @@ -3856,7 +3415,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, * no need to udpate the advertising data here. */ if (lmp_le_capable(hdev)) - update_scan_rsp_data(&req); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_name_complete); if (err < 0) @@ -4600,7 +4159,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, hci_req_init(&req, hdev); - err = schedule_adv_instance(&req, instance, true); + err = __hci_req_schedule_adv_instance(&req, instance, true); if (!err) err = hci_req_run(&req, enable_advertising_instance); @@ -4697,11 +4256,11 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, * We cannot use update_[adv|scan_rsp]_data() here as the * HCI_ADVERTISING flag is not yet set. */ - update_inst_adv_data(&req, 0x00); - update_inst_scan_rsp_data(&req, 0x00); - enable_advertising(&req); + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); + __hci_req_enable_advertising(&req); } else { - disable_advertising(&req); + __hci_req_disable_advertising(&req); } err = hci_req_run(&req, set_advertising_complete); @@ -5033,8 +4592,8 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto unlock; } - /* We need to flip the bit already here so that update_adv_data - * generates the correct flags. + /* We need to flip the bit already here so that + * hci_req_update_adv_data generates the correct flags. */ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); @@ -5046,7 +4605,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* Since only the advertising data flags will change, there * is no need to update the scan response data. */ - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_bredr_complete); if (err < 0) @@ -6583,7 +6142,7 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, rand, sizeof(rand)); } - flags = get_adv_discov_flags(hdev); + flags = mgmt_get_adv_discov_flags(hdev); if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) flags |= LE_AD_NO_BREDR; @@ -6772,7 +6331,7 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, cancel_adv_timeout(hdev); hci_remove_adv_instance(hdev, instance); - advertising_removed(cmd ? cmd->sk : NULL, hdev, instance); + mgmt_advertising_removed(cmd ? cmd->sk : NULL, hdev, instance); } if (!cmd) @@ -6794,31 +6353,6 @@ unlock: hci_dev_unlock(hdev); } -void mgmt_adv_timeout_expired(struct hci_dev *hdev) -{ - u8 instance; - struct hci_request req; - - hdev->adv_instance_timeout = 0; - - instance = get_current_adv_instance(hdev); - if (instance == 0x00) - return; - - hci_dev_lock(hdev); - hci_req_init(&req, hdev); - - clear_adv_instance(hdev, &req, instance, false); - - if (list_empty(&hdev->adv_instances)) - disable_advertising(&req); - - if (!skb_queue_empty(&req.cmd_q)) - hci_req_run(&req, NULL); - - hci_dev_unlock(hdev); -} - static int add_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -6897,7 +6431,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, * actually added. */ if (hdev->adv_instance_cnt > prev_instance_cnt) - advertising_added(sk, hdev, cp->instance); + mgmt_advertising_added(sk, hdev, cp->instance); hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE); @@ -6944,7 +6478,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - err = schedule_adv_instance(&req, schedule_instance, true); + err = __hci_req_schedule_adv_instance(&req, schedule_instance, true); if (!err) err = hci_req_run(&req, add_advertising_complete); @@ -7024,10 +6558,10 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - clear_adv_instance(hdev, &req, cp->instance, true); + hci_req_clear_adv_instance(hdev, &req, cp->instance, true); if (list_empty(&hdev->adv_instances)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); /* If no HCI commands have been collected so far or the HCI_ADVERTISING * flag is set or the device isn't powered then we have no HCI @@ -7367,8 +6901,8 @@ static int powered_update_hci(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && (hci_dev_test_flag(hdev, HCI_ADVERTISING) || !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { - update_adv_data(&req); - update_scan_rsp_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); } if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && @@ -7380,11 +6914,12 @@ static int powered_update_hci(struct hci_dev *hdev) } if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - enable_advertising(&req); + __hci_req_enable_advertising(&req); else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && hdev->cur_adv_instance) - schedule_adv_instance(&req, hdev->cur_adv_instance, - true); + __hci_req_schedule_adv_instance(&req, + hdev->cur_adv_instance, + true); } link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); @@ -7505,7 +7040,7 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev) * only update AD if advertising was enabled using Set Advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); hci_req_run(&req, NULL); @@ -8352,35 +7887,6 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering) mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL); } -static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - BT_DBG("%s status %u", hdev->name, status); -} - -void mgmt_reenable_advertising(struct hci_dev *hdev) -{ - struct hci_request req; - u8 instance; - - if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - return; - - instance = get_current_adv_instance(hdev); - - hci_req_init(&req, hdev); - - if (instance) { - schedule_adv_instance(&req, instance, true); - } else { - update_adv_data(&req); - update_scan_rsp_data(&req); - enable_advertising(&req); - } - - hci_req_run(&req, adv_enable_complete); -} - static struct hci_mgmt_chan chan = { .channel = HCI_CHANNEL_CONTROL, .handler_count = ARRAY_SIZE(mgmt_handlers), -- cgit v1.2.3 From 53c0ba74510c1182786dcd1e3710215467777601 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 16:43:43 +0300 Subject: Bluetooth: Move connectable changes to hdev->req_workqueue This way the connectable changes are synchronized against each other, which helps avoid potential races. The connectable mode is also linked together with LE advertising which makes is more convenient to have it behind the same workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_request.c | 39 ++++++++++++++++++ net/bluetooth/mgmt.c | 86 ++++++---------------------------------- 3 files changed, 53 insertions(+), 74 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b56085b6ecce..a855e41df68c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -330,6 +330,7 @@ struct hci_dev { struct work_struct discov_update; struct work_struct bg_scan_update; struct work_struct scan_update; + struct work_struct connectable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -1491,6 +1492,7 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u16 max_interval, u16 latency, u16 timeout); void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); +void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e6622bd1926d..167c90644b4b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1274,6 +1274,43 @@ static void scan_update_work(struct work_struct *work) hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } +static int connectable_update(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + hci_dev_lock(hdev); + + __hci_req_update_scan(req); + + /* If BR/EDR is not enabled and we disable advertising as a + * by-product of disabling connectable, we need to update the + * advertising flags. + */ + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + + /* Update the advertising parameters if necessary */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + __hci_req_enable_advertising(req); + + __hci_update_background_scan(req); + + hci_dev_unlock(hdev); + + return 0; +} + +static void connectable_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + connectable_update); + u8 status; + + hci_req_sync(hdev, connectable_update, 0, HCI_CMD_TIMEOUT, &status); + mgmt_set_connectable_complete(hdev, status); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -1789,6 +1826,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_WORK(&hdev->scan_update, scan_update_work); + INIT_WORK(&hdev->connectable_update, connectable_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1801,6 +1839,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); cancel_work_sync(&hdev->scan_update); + cancel_work_sync(&hdev->connectable_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6d0f0025052f..d8b76ca5c820 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1580,12 +1580,9 @@ static void write_fast_connectable(struct hci_request *req, bool enable) hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); } -static void set_connectable_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - struct mgmt_mode *cp; - bool conn_changed, discov_changed; BT_DBG("status 0x%02x", status); @@ -1601,27 +1598,8 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, goto remove_cmd; } - cp = cmd->param; - if (cp->val) { - conn_changed = !hci_dev_test_and_set_flag(hdev, - HCI_CONNECTABLE); - discov_changed = false; - } else { - conn_changed = hci_dev_test_and_clear_flag(hdev, - HCI_CONNECTABLE); - discov_changed = hci_dev_test_and_clear_flag(hdev, - HCI_DISCOVERABLE); - } - send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); - - if (conn_changed || discov_changed) { - new_settings(hdev, cmd->sk); - hci_req_update_scan(hdev); - if (discov_changed) - hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); - hci_update_background_scan(hdev); - } + new_settings(hdev, cmd->sk); remove_cmd: mgmt_pending_remove(cmd); @@ -1664,8 +1642,6 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; - u8 scan; int err; BT_DBG("request for %s", hdev->name); @@ -1699,57 +1675,19 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - hci_req_init(&req, hdev); - - /* If BR/EDR is not enabled and we disable advertising as a - * by-product of disabling connectable, we need to update the - * advertising flags. - */ - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - if (!cp->val) { - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); - } - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) { - if (cp->val) { - scan = SCAN_PAGE; - } else { - /* If we don't have any whitelist entries just - * disable all scanning. If there are entries - * and we had both page and inquiry scanning - * enabled then fall back to only page scanning. - * Otherwise no changes are needed. - */ - if (list_empty(&hdev->whitelist)) - scan = SCAN_DISABLED; - else if (test_bit(HCI_ISCAN, &hdev->flags)) - scan = SCAN_PAGE; - else - goto no_scan_update; - - if (test_bit(HCI_ISCAN, &hdev->flags) && - hdev->discov_timeout > 0) - cancel_delayed_work(&hdev->discov_off); - } + if (cp->val) { + hci_dev_set_flag(hdev, HCI_CONNECTABLE); + } else { + if (hdev->discov_timeout > 0) + cancel_delayed_work(&hdev->discov_off); - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_CONNECTABLE); } -no_scan_update: - /* Update the advertising parameters if necessary */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - __hci_req_enable_advertising(&req); - - err = hci_req_run(&req, set_connectable_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - if (err == -ENODATA) - err = set_connectable_update_settings(hdev, sk, - cp->val); - goto failed; - } + queue_work(hdev->req_workqueue, &hdev->connectable_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.2.3 From 14bf5eac7a4f4bf0729ff8eb358de4fab967cee1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 19:00:22 +0200 Subject: Bluetooth: Perform Class of Device changes through hdev->req_workqueue The Class of Device needs to be changed e.g. for limited discoverable mode. In preparation of moving the discoverable mode to hci_request.c and hdev->req_workqueue, move the Class of Device helpers there first. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 40 +++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 2 ++ net/bluetooth/mgmt.c | 54 ++++++--------------------------------------- 3 files changed, 49 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 167c90644b4b..e5e827b762b9 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1311,6 +1311,46 @@ static void connectable_update_work(struct work_struct *work) mgmt_set_connectable_complete(hdev, status); } +static u8 get_service_classes(struct hci_dev *hdev) +{ + struct bt_uuid *uuid; + u8 val = 0; + + list_for_each_entry(uuid, &hdev->uuids, list) + val |= uuid->svc_hint; + + return val; +} + +void __hci_req_update_class(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 cod[3]; + + BT_DBG("%s", hdev->name); + + if (!hdev_is_powered(hdev)) + return; + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return; + + if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) + return; + + cod[0] = hdev->minor_class; + cod[1] = hdev->major_class; + cod[2] = get_service_classes(hdev); + + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) + cod[1] |= 0x20; + + if (memcmp(cod, hdev->dev_class, 3) == 0) + return; + + hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 5358b1b12ca0..41920348d68b 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -72,6 +72,8 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, u8 instance, bool force); +void __hci_req_update_class(struct hci_request *req); + /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d8b76ca5c820..f5a4ee92f2bf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -959,46 +959,6 @@ static void update_eir(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); } -static u8 get_service_classes(struct hci_dev *hdev) -{ - struct bt_uuid *uuid; - u8 val = 0; - - list_for_each_entry(uuid, &hdev->uuids, list) - val |= uuid->svc_hint; - - return val; -} - -static void update_class(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - u8 cod[3]; - - BT_DBG("%s", hdev->name); - - if (!hdev_is_powered(hdev)) - return; - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - return; - - if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) - return; - - cod[0] = hdev->minor_class; - cod[1] = hdev->major_class; - cod[2] = get_service_classes(hdev); - - if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) - cod[1] |= 0x20; - - if (memcmp(cod, hdev->dev_class, 3) == 0) - return; - - hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1013,7 +973,7 @@ static void service_cache_off(struct work_struct *work) hci_dev_lock(hdev); update_eir(&req); - update_class(&req); + __hci_req_update_class(&req); hci_dev_unlock(hdev); @@ -1370,7 +1330,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, */ hci_req_init(&req, hdev); __hci_req_update_scan(&req); - update_class(&req); + __hci_req_update_class(&req); hci_req_run(&req, NULL); remove_cmd: @@ -2177,7 +2137,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); - update_class(&req); + __hci_req_update_class(&req); update_eir(&req); err = hci_req_run(&req, add_uuid_complete); @@ -2277,7 +2237,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, update_class: hci_req_init(&req, hdev); - update_class(&req); + __hci_req_update_class(&req); update_eir(&req); err = hci_req_run(&req, remove_uuid_complete); @@ -2356,7 +2316,7 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, update_eir(&req); } - update_class(&req); + __hci_req_update_class(&req); err = hci_req_run(&req, set_class_complete); if (err < 0) { @@ -6871,7 +6831,7 @@ static int powered_update_hci(struct hci_dev *hdev) else write_fast_connectable(&req, false); __hci_req_update_scan(&req); - update_class(&req); + __hci_req_update_class(&req); update_name(&req); update_eir(&req); } @@ -6972,7 +6932,7 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); } - update_class(&req); + __hci_req_update_class(&req); /* Advertising instances don't use the global discoverable setting, so * only update AD if advertising was enabled using Set Advertising. -- cgit v1.2.3 From aed1a8851db022c3bd22af41a343068b8c6e40c1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 17:24:44 +0300 Subject: Bluetooth: Move discoverable changes to hdev->req_workqueue The discoverable mode is intrinsically linked with the connectable mode e.g. through sharing the same HCI command (Write Scan Enable) for BR/EDR. It makes therefore sense to move it to hci_request.c and run the changes through the same hdev->req_workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_request.c | 64 ++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 90 ++++++---------------------------------- 3 files changed, 79 insertions(+), 77 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a855e41df68c..0a6966ed7ee1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -331,6 +331,7 @@ struct hci_dev { struct work_struct bg_scan_update; struct work_struct scan_update; struct work_struct connectable_update; + struct work_struct discoverable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -1493,6 +1494,7 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); +void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e5e827b762b9..8f72218ed805 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1351,6 +1351,68 @@ void __hci_req_update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } +static void write_iac(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_current_iac_lap cp; + + if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) + return; + + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { + /* Limited discoverable mode */ + cp.num_iac = min_t(u8, hdev->num_iac, 2); + cp.iac_lap[0] = 0x00; /* LIAC */ + cp.iac_lap[1] = 0x8b; + cp.iac_lap[2] = 0x9e; + cp.iac_lap[3] = 0x33; /* GIAC */ + cp.iac_lap[4] = 0x8b; + cp.iac_lap[5] = 0x9e; + } else { + /* General discoverable mode */ + cp.num_iac = 1; + cp.iac_lap[0] = 0x33; /* GIAC */ + cp.iac_lap[1] = 0x8b; + cp.iac_lap[2] = 0x9e; + } + + hci_req_add(req, HCI_OP_WRITE_CURRENT_IAC_LAP, + (cp.num_iac * 3) + 1, &cp); +} + +static int discoverable_update(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + hci_dev_lock(hdev); + + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + write_iac(req); + __hci_req_update_scan(req); + __hci_req_update_class(req); + } + + /* Advertising instances don't use the global discoverable setting, so + * only update AD if advertising was enabled using Set Advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + + hci_dev_unlock(hdev); + + return 0; +} + +static void discoverable_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discoverable_update); + u8 status; + + hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, &status); + mgmt_set_discoverable_complete(hdev, status); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -1867,6 +1929,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_WORK(&hdev->scan_update, scan_update_work); INIT_WORK(&hdev->connectable_update, connectable_update_work); + INIT_WORK(&hdev->discoverable_update, discoverable_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1880,6 +1943,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->bg_scan_update); cancel_work_sync(&hdev->scan_update); cancel_work_sync(&hdev->connectable_update); + cancel_work_sync(&hdev->discoverable_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f5a4ee92f2bf..8846cb3b0aaa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1282,13 +1282,9 @@ static u8 mgmt_le_support(struct hci_dev *hdev) return MGMT_STATUS_SUCCESS; } -static void set_discoverable_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - struct mgmt_mode *cp; - struct hci_request req; - bool changed; BT_DBG("status 0x%02x", status); @@ -1305,33 +1301,14 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, goto remove_cmd; } - cp = cmd->param; - if (cp->val) { - changed = !hci_dev_test_and_set_flag(hdev, HCI_DISCOVERABLE); - - if (hdev->discov_timeout > 0) { - int to = msecs_to_jiffies(hdev->discov_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->discov_off, - to); - } - } else { - changed = hci_dev_test_and_clear_flag(hdev, HCI_DISCOVERABLE); + if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && + hdev->discov_timeout > 0) { + int to = msecs_to_jiffies(hdev->discov_timeout * 1000); + queue_delayed_work(hdev->req_workqueue, &hdev->discov_off, to); } send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev); - - if (changed) - new_settings(hdev, cmd->sk); - - /* When the discoverable mode gets changed, make sure - * that class of device has the limited discoverable - * bit correctly set. Also update page scan based on whitelist - * entries. - */ - hci_req_init(&req, hdev); - __hci_req_update_scan(&req); - __hci_req_update_class(&req); - hci_req_run(&req, NULL); + new_settings(hdev, cmd->sk); remove_cmd: mgmt_pending_remove(cmd); @@ -1345,9 +1322,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_set_discoverable *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; u16 timeout; - u8 scan; int err; BT_DBG("request for %s", hdev->name); @@ -1447,58 +1422,19 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = timeout; + if (cp->val) + hci_dev_set_flag(hdev, HCI_DISCOVERABLE); + else + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + /* Limited discoverable mode */ if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE); else hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_req_init(&req, hdev); - - /* The procedure for LE-only controllers is much simpler - just - * update the advertising data. - */ - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - goto update_ad; - - scan = SCAN_PAGE; - - if (cp->val) { - struct hci_cp_write_current_iac_lap hci_cp; - - if (cp->val == 0x02) { - /* Limited discoverable mode */ - hci_cp.num_iac = min_t(u8, hdev->num_iac, 2); - hci_cp.iac_lap[0] = 0x00; /* LIAC */ - hci_cp.iac_lap[1] = 0x8b; - hci_cp.iac_lap[2] = 0x9e; - hci_cp.iac_lap[3] = 0x33; /* GIAC */ - hci_cp.iac_lap[4] = 0x8b; - hci_cp.iac_lap[5] = 0x9e; - } else { - /* General discoverable mode */ - hci_cp.num_iac = 1; - hci_cp.iac_lap[0] = 0x33; /* GIAC */ - hci_cp.iac_lap[1] = 0x8b; - hci_cp.iac_lap[2] = 0x9e; - } - - hci_req_add(&req, HCI_OP_WRITE_CURRENT_IAC_LAP, - (hci_cp.num_iac * 3) + 1, &hci_cp); - - scan |= SCAN_INQUIRY; - } else { - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - } - - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); - -update_ad: - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - - err = hci_req_run(&req, set_discoverable_complete); - if (err < 0) - mgmt_pending_remove(cmd); + queue_work(hdev->req_workqueue, &hdev->discoverable_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.2.3 From c366f555b8df67633b849a5088bb897d6c63aaa5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 23 Nov 2015 15:43:06 +0200 Subject: Bluetooth: Move discoverable timeout behind hdev->req_workqueue Since the other discoverable changes are behind req_workqueue now it only makes sense to move the discoverable timeout there as well. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 13 ------------- net/bluetooth/hci_request.c | 26 +++++++++++++++++++++++++ net/bluetooth/mgmt.c | 41 ++-------------------------------------- 4 files changed, 28 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0a6966ed7ee1..319bf020cea6 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1436,7 +1436,6 @@ void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); void mgmt_set_powered_failed(struct hci_dev *hdev, int err); int mgmt_powered(struct hci_dev *hdev, u8 powered); -void mgmt_discoverable_timeout(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bab8958bf46e..484c75f3332c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1537,7 +1537,6 @@ int hci_dev_do_close(struct hci_dev *hdev) flush_work(&hdev->rx_work); if (hdev->discov_timeout > 0) { - cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = 0; hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); @@ -2096,17 +2095,6 @@ static void hci_error_reset(struct work_struct *work) hci_dev_do_open(hdev); } -static void hci_discov_off(struct work_struct *work) -{ - struct hci_dev *hdev; - - hdev = container_of(work, struct hci_dev, discov_off.work); - - BT_DBG("%s", hdev->name); - - mgmt_discoverable_timeout(hdev); -} - void hci_uuids_clear(struct hci_dev *hdev) { struct bt_uuid *uuid, *tmp; @@ -2986,7 +2974,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->error_reset, hci_error_reset); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); - INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 8f72218ed805..fe14fd121d36 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1923,6 +1923,30 @@ static void discov_update(struct work_struct *work) } } +static void discov_off(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discov_off.work); + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + /* When discoverable timeout triggers, then just make sure + * the limited discoverable flag is cleared. Even in the case + * of a timeout triggered from general discoverable, it is + * safe to unconditionally clear the flag. + */ + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hdev->discov_timeout = 0; + + hci_dev_unlock(hdev); + + hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, NULL); + mgmt_new_settings(hdev); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); @@ -1930,6 +1954,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->scan_update, scan_update_work); INIT_WORK(&hdev->connectable_update, connectable_update_work); INIT_WORK(&hdev->discoverable_update, discoverable_update_work); + INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1944,6 +1969,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->scan_update); cancel_work_sync(&hdev->connectable_update); cancel_work_sync(&hdev->discoverable_update); + cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8846cb3b0aaa..29b3bb70ae9f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1401,8 +1401,8 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->val && hdev->discov_timeout > 0) { int to = msecs_to_jiffies(hdev->discov_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->discov_off, - to); + queue_delayed_work(hdev->req_workqueue, + &hdev->discov_off, to); } err = send_settings_rsp(sk, MGMT_OP_SET_DISCOVERABLE, hdev); @@ -6848,43 +6848,6 @@ void mgmt_set_powered_failed(struct hci_dev *hdev, int err) mgmt_pending_remove(cmd); } -void mgmt_discoverable_timeout(struct hci_dev *hdev) -{ - struct hci_request req; - - hci_dev_lock(hdev); - - /* When discoverable timeout triggers, then just make sure - * the limited discoverable flag is cleared. Even in the case - * of a timeout triggered from general discoverable, it is - * safe to unconditionally clear the flag. - */ - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); - - hci_req_init(&req, hdev); - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - u8 scan = SCAN_PAGE; - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, - sizeof(scan), &scan); - } - __hci_req_update_class(&req); - - /* Advertising instances don't use the global discoverable setting, so - * only update AD if advertising was enabled using Set Advertising. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - - hci_req_run(&req, NULL); - - hdev->discov_timeout = 0; - - new_settings(hdev, NULL); - - hci_dev_unlock(hdev); -} - void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent) { -- cgit v1.2.3 From 00cf5040b39638588cd10ae4ffcc76a1be6ecf2c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:41 +0200 Subject: Bluetooth: HCI name update to hci_request.c We'll soon need this both from hci_request.c and mgmt.c so move it as a request helper function to hci_request.c. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 10 ++++++++++ net/bluetooth/hci_request.h | 2 ++ net/bluetooth/mgmt.c | 14 ++------------ 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index fe14fd121d36..3150461c52a4 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -420,6 +420,16 @@ static void __hci_update_background_scan(struct hci_request *req) } } +void __hci_req_update_name(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_local_name cp; + + memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); + + hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); +} + void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_cp_le_set_scan_enable cp; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 41920348d68b..4e65a9c7906a 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -55,6 +55,8 @@ void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); +void __hci_req_update_name(struct hci_request *req); + void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 29b3bb70ae9f..001a29a320e6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3153,16 +3153,6 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } -static void update_name(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_local_name cp; - - memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); - - hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); -} - static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; @@ -3241,7 +3231,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_init(&req, hdev); if (lmp_bredr_capable(hdev)) { - update_name(&req); + __hci_req_update_name(&req); update_eir(&req); } @@ -6768,7 +6758,7 @@ static int powered_update_hci(struct hci_dev *hdev) write_fast_connectable(&req, false); __hci_req_update_scan(&req); __hci_req_update_class(&req); - update_name(&req); + __hci_req_update_name(&req); update_eir(&req); } -- cgit v1.2.3 From b1a8917c9bcbf42113dfacb6492228e094c96862 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:42 +0200 Subject: Bluetooth: Move EIR update to hci_request.c We'll soon need to update the EIR both from hci_request.c and mgmt.c so move update_eir() as a more generic request helper to hci_request.c. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 189 +++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 1 + net/bluetooth/mgmt.c | 203 ++------------------------------------------ 3 files changed, 198 insertions(+), 195 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 3150461c52a4..030a1bb66ef5 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -21,6 +21,8 @@ SOFTWARE IS DISCLAIMED. */ +#include + #include #include #include @@ -430,6 +432,193 @@ void __hci_req_update_name(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); } +#define PNP_INFO_SVCLASS_ID 0x1200 + +static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 4) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + u16 uuid16; + + if (uuid->size != 16) + continue; + + uuid16 = get_unaligned_le16(&uuid->uuid[12]); + if (uuid16 < 0x1100) + continue; + + if (uuid16 == PNP_INFO_SVCLASS_ID) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID16_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u16) > len) { + uuids_start[1] = EIR_UUID16_SOME; + break; + } + + *ptr++ = (uuid16 & 0x00ff); + *ptr++ = (uuid16 & 0xff00) >> 8; + uuids_start[0] += sizeof(uuid16); + } + + return ptr; +} + +static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 6) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 32) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID32_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u32) > len) { + uuids_start[1] = EIR_UUID32_SOME; + break; + } + + memcpy(ptr, &uuid->uuid[12], sizeof(u32)); + ptr += sizeof(u32); + uuids_start[0] += sizeof(u32); + } + + return ptr; +} + +static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 18) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 128) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID128_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + 16 > len) { + uuids_start[1] = EIR_UUID128_SOME; + break; + } + + memcpy(ptr, uuid->uuid, 16); + ptr += 16; + uuids_start[0] += 16; + } + + return ptr; +} + +static void create_eir(struct hci_dev *hdev, u8 *data) +{ + u8 *ptr = data; + size_t name_len; + + name_len = strlen(hdev->dev_name); + + if (name_len > 0) { + /* EIR Data type */ + if (name_len > 48) { + name_len = 48; + ptr[1] = EIR_NAME_SHORT; + } else + ptr[1] = EIR_NAME_COMPLETE; + + /* EIR Data length */ + ptr[0] = name_len + 1; + + memcpy(ptr + 2, hdev->dev_name, name_len); + + ptr += (name_len + 2); + } + + if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) { + ptr[0] = 2; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8) hdev->inq_tx_power; + + ptr += 3; + } + + if (hdev->devid_source > 0) { + ptr[0] = 9; + ptr[1] = EIR_DEVICE_ID; + + put_unaligned_le16(hdev->devid_source, ptr + 2); + put_unaligned_le16(hdev->devid_vendor, ptr + 4); + put_unaligned_le16(hdev->devid_product, ptr + 6); + put_unaligned_le16(hdev->devid_version, ptr + 8); + + ptr += 10; + } + + ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); +} + +void __hci_req_update_eir(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_eir cp; + + if (!hdev_is_powered(hdev)) + return; + + if (!lmp_ext_inq_capable(hdev)) + return; + + if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) + return; + + if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) + return; + + memset(&cp, 0, sizeof(cp)); + + create_eir(hdev, cp.data); + + if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) + return; + + memcpy(hdev->eir, cp.data, sizeof(cp.data)); + + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); +} + void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_cp_le_set_scan_enable cp; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 4e65a9c7906a..5af40395afa8 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -56,6 +56,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); void __hci_req_update_name(struct hci_request *req); +void __hci_req_update_eir(struct hci_request *req); void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 001a29a320e6..fa5dc67a800a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -719,116 +719,6 @@ static u32 get_current_settings(struct hci_dev *hdev) return settings; } -#define PNP_INFO_SVCLASS_ID 0x1200 - -static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 4) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - u16 uuid16; - - if (uuid->size != 16) - continue; - - uuid16 = get_unaligned_le16(&uuid->uuid[12]); - if (uuid16 < 0x1100) - continue; - - if (uuid16 == PNP_INFO_SVCLASS_ID) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID16_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + sizeof(u16) > len) { - uuids_start[1] = EIR_UUID16_SOME; - break; - } - - *ptr++ = (uuid16 & 0x00ff); - *ptr++ = (uuid16 & 0xff00) >> 8; - uuids_start[0] += sizeof(uuid16); - } - - return ptr; -} - -static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 6) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - if (uuid->size != 32) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID32_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + sizeof(u32) > len) { - uuids_start[1] = EIR_UUID32_SOME; - break; - } - - memcpy(ptr, &uuid->uuid[12], sizeof(u32)); - ptr += sizeof(u32); - uuids_start[0] += sizeof(u32); - } - - return ptr; -} - -static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 18) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - if (uuid->size != 128) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID128_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + 16 > len) { - uuids_start[1] = EIR_UUID128_SOME; - break; - } - - memcpy(ptr, uuid->uuid, 16); - ptr += 16; - uuids_start[0] += 16; - } - - return ptr; -} - static struct mgmt_pending_cmd *pending_find(u16 opcode, struct hci_dev *hdev) { return mgmt_pending_find(HCI_CHANNEL_CONTROL, opcode, hdev); @@ -882,83 +772,6 @@ bool mgmt_get_connectable(struct hci_dev *hdev) return hci_dev_test_flag(hdev, HCI_CONNECTABLE); } -static void create_eir(struct hci_dev *hdev, u8 *data) -{ - u8 *ptr = data; - size_t name_len; - - name_len = strlen(hdev->dev_name); - - if (name_len > 0) { - /* EIR Data type */ - if (name_len > 48) { - name_len = 48; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; - - /* EIR Data length */ - ptr[0] = name_len + 1; - - memcpy(ptr + 2, hdev->dev_name, name_len); - - ptr += (name_len + 2); - } - - if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) { - ptr[0] = 2; - ptr[1] = EIR_TX_POWER; - ptr[2] = (u8) hdev->inq_tx_power; - - ptr += 3; - } - - if (hdev->devid_source > 0) { - ptr[0] = 9; - ptr[1] = EIR_DEVICE_ID; - - put_unaligned_le16(hdev->devid_source, ptr + 2); - put_unaligned_le16(hdev->devid_vendor, ptr + 4); - put_unaligned_le16(hdev->devid_product, ptr + 6); - put_unaligned_le16(hdev->devid_version, ptr + 8); - - ptr += 10; - } - - ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); - ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); - ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); -} - -static void update_eir(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_eir cp; - - if (!hdev_is_powered(hdev)) - return; - - if (!lmp_ext_inq_capable(hdev)) - return; - - if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) - return; - - if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) - return; - - memset(&cp, 0, sizeof(cp)); - - create_eir(hdev, cp.data); - - if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) - return; - - memcpy(hdev->eir, cp.data, sizeof(cp.data)); - - hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -972,7 +785,7 @@ static void service_cache_off(struct work_struct *work) hci_dev_lock(hdev); - update_eir(&req); + __hci_req_update_eir(&req); __hci_req_update_class(&req); hci_dev_unlock(hdev); @@ -2074,7 +1887,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); __hci_req_update_class(&req); - update_eir(&req); + __hci_req_update_eir(&req); err = hci_req_run(&req, add_uuid_complete); if (err < 0) { @@ -2174,7 +1987,7 @@ update_class: hci_req_init(&req, hdev); __hci_req_update_class(&req); - update_eir(&req); + __hci_req_update_eir(&req); err = hci_req_run(&req, remove_uuid_complete); if (err < 0) { @@ -2249,7 +2062,7 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); cancel_delayed_work_sync(&hdev->service_cache); hci_dev_lock(hdev); - update_eir(&req); + __hci_req_update_eir(&req); } __hci_req_update_class(&req); @@ -3232,7 +3045,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (lmp_bredr_capable(hdev)) { __hci_req_update_name(&req); - update_eir(&req); + __hci_req_update_eir(&req); } /* The name is stored in the scan response data and so @@ -3917,7 +3730,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, NULL, 0); hci_req_init(&req, hdev); - update_eir(&req); + __hci_req_update_eir(&req); hci_req_run(&req, NULL); hci_dev_unlock(hdev); @@ -6759,7 +6572,7 @@ static int powered_update_hci(struct hci_dev *hdev) __hci_req_update_scan(&req); __hci_req_update_class(&req); __hci_req_update_name(&req); - update_eir(&req); + __hci_req_update_eir(&req); } return hci_req_run(&req, powered_complete); @@ -7380,7 +7193,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(enable), &enable); - update_eir(&req); + __hci_req_update_eir(&req); } else { clear_eir(&req); } -- cgit v1.2.3 From bf943cbf76ecd3b9838a80d5e08777b0f4ccc665 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:43 +0200 Subject: Bluetooth: Move fast connectable code to hci_request.c We'll soon need this both in hci_request.c and mgmt.c so move it to hci_request.c as a generic helper. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 35 +++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 1 + net/bluetooth/mgmt.c | 43 ++++--------------------------------------- 3 files changed, 40 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 030a1bb66ef5..0abd83ddd4fb 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -349,6 +349,41 @@ void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, hci_req_add_ev(req, opcode, plen, param, 0); } +void __hci_req_write_fast_connectable(struct hci_request *req, bool enable) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_page_scan_activity acp; + u8 type; + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return; + + if (hdev->hci_ver < BLUETOOTH_VER_1_2) + return; + + if (enable) { + type = PAGE_SCAN_TYPE_INTERLACED; + + /* 160 msec page scan interval */ + acp.interval = cpu_to_le16(0x0100); + } else { + type = PAGE_SCAN_TYPE_STANDARD; /* default */ + + /* default 1.28 sec page scan */ + acp.interval = cpu_to_le16(0x0800); + } + + acp.window = cpu_to_le16(0x0012); + + if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval || + __cpu_to_le16(hdev->page_scan_window) != acp.window) + hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, + sizeof(acp), &acp); + + if (hdev->page_scan_type != type) + hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); +} + /* This function controls the background scanning based on hdev->pend_le_conns * list. If there are pending LE connection we start the background scanning, * otherwise we stop it. diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 5af40395afa8..d3dd24deca74 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -55,6 +55,7 @@ void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); +void __hci_req_write_fast_connectable(struct hci_request *req, bool enable); void __hci_req_update_name(struct hci_request *req); void __hci_req_update_eir(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fa5dc67a800a..0a7e6f4de383 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1254,41 +1254,6 @@ failed: return err; } -static void write_fast_connectable(struct hci_request *req, bool enable) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_page_scan_activity acp; - u8 type; - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - return; - - if (hdev->hci_ver < BLUETOOTH_VER_1_2) - return; - - if (enable) { - type = PAGE_SCAN_TYPE_INTERLACED; - - /* 160 msec page scan interval */ - acp.interval = cpu_to_le16(0x0100); - } else { - type = PAGE_SCAN_TYPE_STANDARD; /* default */ - - /* default 1.28 sec page scan */ - acp.interval = cpu_to_le16(0x0800); - } - - acp.window = cpu_to_le16(0x0012); - - if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval || - __cpu_to_le16(hdev->page_scan_window) != acp.window) - hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, - sizeof(acp), &acp); - - if (hdev->page_scan_type != type) - hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); -} - void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; @@ -4094,7 +4059,7 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - write_fast_connectable(&req, cp->val); + __hci_req_write_fast_connectable(&req, cp->val); err = hci_req_run(&req, fast_connectable_complete); if (err < 0) { @@ -4236,7 +4201,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); - write_fast_connectable(&req, false); + __hci_req_write_fast_connectable(&req, false); __hci_req_update_scan(&req); /* Since only the advertising data flags will change, there @@ -6566,9 +6531,9 @@ static int powered_update_hci(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) - write_fast_connectable(&req, true); + __hci_req_write_fast_connectable(&req, true); else - write_fast_connectable(&req, false); + __hci_req_write_fast_connectable(&req, false); __hci_req_update_scan(&req); __hci_req_update_class(&req); __hci_req_update_name(&req); -- cgit v1.2.3 From 2ff13894cfb877cb3d02d96a8402202f0a6f3efd Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:44 +0200 Subject: Bluetooth: Perform HCI update for power on synchronously The request to update HCI during power on is always coming either from hdev->req_workqueue or through an ioctl, so it's safe to use hci_req_sync for it. This way we also eliminate potential races with incoming mgmt commands or other actions while powering on. Part of this refactoring is the splitting of mgmt_powered() into mgmt_power_on() and __mgmt_power_off() functions. The main reason is the different requirements as far as hdev locking is concerned, as highlighted with the __ prefix of the power off API. Since the power on in the case of clearing the AUTO_OFF flag cannot be done synchronously in the set_powered mgmt handler, the hci_power_on work callback is extended to cover this (which also simplifies the set_powered helper a lot). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 +- net/bluetooth/hci_core.c | 21 ++++-- net/bluetooth/hci_request.c | 100 ++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 2 + net/bluetooth/mgmt.c | 136 +++------------------------------------ 5 files changed, 128 insertions(+), 134 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 319bf020cea6..c95e0326c41a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1435,7 +1435,8 @@ int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); void mgmt_set_powered_failed(struct hci_dev *hdev, int err); -int mgmt_powered(struct hci_dev *hdev, u8 powered); +void mgmt_power_on(struct hci_dev *hdev, int err); +void __mgmt_power_off(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 484c75f3332c..eac3f6fa1272 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1399,10 +1399,10 @@ static int hci_dev_do_open(struct hci_dev *hdev) !hci_dev_test_flag(hdev, HCI_CONFIG) && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_dev_test_flag(hdev, HCI_MGMT) && hdev->dev_type == HCI_BREDR) { - hci_dev_lock(hdev); - mgmt_powered(hdev, 1); - hci_dev_unlock(hdev); + ret = __hci_req_hci_power_on(hdev); + mgmt_power_on(hdev, ret); } } else { /* Init failed, cleanup */ @@ -1559,8 +1559,9 @@ int hci_dev_do_close(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); - if (!auto_off && hdev->dev_type == HCI_BREDR) - mgmt_powered(hdev, 0); + if (!auto_off && hdev->dev_type == HCI_BREDR && + hci_dev_test_flag(hdev, HCI_MGMT)) + __mgmt_power_off(hdev); hci_inquiry_cache_flush(hdev); hci_pend_le_actions_clear(hdev); @@ -2013,6 +2014,16 @@ static void hci_power_on(struct work_struct *work) BT_DBG("%s", hdev->name); + if (test_bit(HCI_UP, &hdev->flags) && + hci_dev_test_flag(hdev, HCI_MGMT) && + hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { + hci_req_sync_lock(hdev); + err = __hci_req_hci_power_on(hdev); + hci_req_sync_unlock(hdev); + mgmt_power_on(hdev, err); + return; + } + err = hci_dev_do_open(hdev); if (err < 0) { hci_dev_lock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0abd83ddd4fb..7cc24f1448bd 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2181,6 +2181,106 @@ static void discov_off(struct work_struct *work) mgmt_new_settings(hdev); } +static int powered_update_hci(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + struct adv_info *adv_instance; + u8 link_sec; + + hci_dev_lock(hdev); + + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && + !lmp_host_ssp_capable(hdev)) { + u8 mode = 0x01; + + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); + + if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { + u8 support = 0x01; + + hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, + sizeof(support), &support); + } + } + + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + lmp_bredr_capable(hdev)) { + struct hci_cp_write_le_host_supported cp; + + cp.le = 0x01; + cp.simul = 0x00; + + /* Check first if we already have the right + * host state (host features set) + */ + if (cp.le != lmp_host_le_capable(hdev) || + cp.simul != lmp_host_le_br_capable(hdev)) + hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); + } + + if (lmp_le_capable(hdev)) { + /* Make sure the controller has a good default for + * advertising data. This also applies to the case + * where BR/EDR was toggled during the AUTO_OFF phase. + */ + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); + } + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + hdev->cur_adv_instance == 0x00 && + !list_empty(&hdev->adv_instances)) { + adv_instance = list_first_entry(&hdev->adv_instances, + struct adv_info, list); + hdev->cur_adv_instance = adv_instance->instance; + } + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_enable_advertising(req); + else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + hdev->cur_adv_instance) + __hci_req_schedule_adv_instance(req, + hdev->cur_adv_instance, + true); + } + + link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); + if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, + sizeof(link_sec), &link_sec); + + if (lmp_bredr_capable(hdev)) { + if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) + __hci_req_write_fast_connectable(req, true); + else + __hci_req_write_fast_connectable(req, false); + __hci_req_update_scan(req); + __hci_req_update_class(req); + __hci_req_update_name(req); + __hci_req_update_eir(req); + } + + hci_dev_unlock(hdev); + return 0; +} + +int __hci_req_hci_power_on(struct hci_dev *hdev) +{ + /* Register the available SMP channels (BR/EDR and LE) only when + * successfully powering on the controller. This late + * registration is required so that LE SMP can clearly decide if + * the public address or static address is used. + */ + smp_register(hdev); + + return __hci_req_sync(hdev, powered_update_hci, 0, HCI_CMD_TIMEOUT, + NULL); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index d3dd24deca74..a24d3b55094c 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -55,6 +55,8 @@ void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); +int __hci_req_hci_power_on(struct hci_dev *hdev); + void __hci_req_write_fast_connectable(struct hci_request *req, bool enable); void __hci_req_update_name(struct hci_request *req); void __hci_req_update_eir(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0a7e6f4de383..468402ad933c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -961,17 +961,6 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { - cancel_delayed_work(&hdev->power_off); - - if (cp->val) { - mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, - data, len); - err = mgmt_powered(hdev, 1); - goto failed; - } - } - if (!!cp->val == hdev_is_powered(hdev)) { err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev); goto failed; @@ -6434,139 +6423,33 @@ static void restart_le_actions(struct hci_dev *hdev) } } -static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) +void mgmt_power_on(struct hci_dev *hdev, int err) { struct cmd_lookup match = { NULL, hdev }; - BT_DBG("status 0x%02x", status); + BT_DBG("err %d", err); - if (!status) { + hci_dev_lock(hdev); + + if (!err) { restart_le_actions(hdev); hci_update_background_scan(hdev); } - hci_dev_lock(hdev); - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); new_settings(hdev, match.sk); - hci_dev_unlock(hdev); - if (match.sk) sock_put(match.sk); -} -static int powered_update_hci(struct hci_dev *hdev) -{ - struct hci_request req; - struct adv_info *adv_instance; - u8 link_sec; - - hci_req_init(&req, hdev); - - if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && - !lmp_host_ssp_capable(hdev)) { - u8 mode = 0x01; - - hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); - - if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { - u8 support = 0x01; - - hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, - sizeof(support), &support); - } - } - - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - lmp_bredr_capable(hdev)) { - struct hci_cp_write_le_host_supported cp; - - cp.le = 0x01; - cp.simul = 0x00; - - /* Check first if we already have the right - * host state (host features set) - */ - if (cp.le != lmp_host_le_capable(hdev) || - cp.simul != lmp_host_le_br_capable(hdev)) - hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, - sizeof(cp), &cp); - } - - if (lmp_le_capable(hdev)) { - /* Make sure the controller has a good default for - * advertising data. This also applies to the case - * where BR/EDR was toggled during the AUTO_OFF phase. - */ - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance == 0x00 && - !list_empty(&hdev->adv_instances)) { - adv_instance = list_first_entry(&hdev->adv_instances, - struct adv_info, list); - hdev->cur_adv_instance = adv_instance->instance; - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_enable_advertising(&req); - else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance) - __hci_req_schedule_adv_instance(&req, - hdev->cur_adv_instance, - true); - } - - link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); - if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) - hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE, - sizeof(link_sec), &link_sec); - - if (lmp_bredr_capable(hdev)) { - if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) - __hci_req_write_fast_connectable(&req, true); - else - __hci_req_write_fast_connectable(&req, false); - __hci_req_update_scan(&req); - __hci_req_update_class(&req); - __hci_req_update_name(&req); - __hci_req_update_eir(&req); - } - - return hci_req_run(&req, powered_complete); + hci_dev_unlock(hdev); } -int mgmt_powered(struct hci_dev *hdev, u8 powered) +void __mgmt_power_off(struct hci_dev *hdev) { struct cmd_lookup match = { NULL, hdev }; u8 status, zero_cod[] = { 0, 0, 0 }; - int err; - - if (!hci_dev_test_flag(hdev, HCI_MGMT)) - return 0; - - if (powered) { - /* Register the available SMP channels (BR/EDR and LE) only - * when successfully powering on the controller. This late - * registration is required so that LE SMP can clearly - * decide if the public address or static address is used. - */ - smp_register(hdev); - - if (powered_update_hci(hdev) == 0) - return 0; - - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, - &match); - goto new_settings; - } mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -6588,13 +6471,10 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, zero_cod, sizeof(zero_cod), NULL); -new_settings: - err = new_settings(hdev, match.sk); + new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); - - return err; } void mgmt_set_powered_failed(struct hci_dev *hdev, int err) -- cgit v1.2.3 From 02c04afea93fbba7925984df455bc63e7d92da97 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 26 Nov 2015 12:15:58 +0200 Subject: Bluetooth: Simplify read_adv_features code The code in the Read Advertising Features mgmt command handler is unnecessarily complicated. Clean it up and remove unnecessary variables & branches. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 468402ad933c..9ce2bb2fc977 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5788,10 +5788,10 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, { struct mgmt_rp_read_adv_features *rp; size_t rp_len; - int err, i; - bool instance; + int err; struct adv_info *adv_instance; u32 supported_flags; + u8 *instance; BT_DBG("%s", hdev->name); @@ -5801,12 +5801,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - rp_len = sizeof(*rp); - - instance = hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE); - if (instance) - rp_len += hdev->adv_instance_cnt; - + rp_len = sizeof(*rp) + hdev->adv_instance_cnt; rp = kmalloc(rp_len, GFP_ATOMIC); if (!rp) { hci_dev_unlock(hdev); @@ -5819,19 +5814,12 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, rp->max_adv_data_len = HCI_MAX_AD_LENGTH; rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; rp->max_instances = HCI_MAX_ADV_INSTANCES; + rp->num_instances = hdev->adv_instance_cnt; - if (instance) { - i = 0; - list_for_each_entry(adv_instance, &hdev->adv_instances, list) { - if (i >= hdev->adv_instance_cnt) - break; - - rp->instance[i] = adv_instance->instance; - i++; - } - rp->num_instances = hdev->adv_instance_cnt; - } else { - rp->num_instances = 0; + instance = rp->instance; + list_for_each_entry(adv_instance, &hdev->adv_instances, list) { + *instance = adv_instance->instance; + instance++; } hci_dev_unlock(hdev); -- cgit v1.2.3 From 17fd08ffb5981cff2c921eb479f46b872b02b2b9 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 26 Nov 2015 12:15:59 +0200 Subject: Bluetooth: Remove unnecessary HCI_ADVERTISING_INSTANCE flag This flag just tells us whether hdev->adv_instances is empty or not. We can equally well use the list_empty() function to get this information. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 - net/bluetooth/hci_request.c | 19 ++++++++----------- net/bluetooth/mgmt.c | 8 +------- 3 files changed, 9 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cc2216727655..339ea57be423 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -239,7 +239,6 @@ enum { HCI_LE_ENABLED, HCI_ADVERTISING, HCI_ADVERTISING_CONNECTABLE, - HCI_ADVERTISING_INSTANCE, HCI_CONNECTABLE, HCI_DISCOVERABLE, HCI_LIMITED_DISCOVERABLE, diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7cc24f1448bd..adfcd6f1d0de 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -822,7 +822,7 @@ static u8 get_current_adv_instance(struct hci_dev *hdev) * setting was set. When neither apply, default to the global settings, * represented by instance "0". */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + if (!list_empty(&hdev->adv_instances) && !hci_dev_test_flag(hdev, HCI_ADVERTISING)) return hdev->cur_adv_instance; @@ -1144,7 +1144,7 @@ void hci_req_reenable_advertising(struct hci_dev *hdev) u8 instance; if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + list_empty(&hdev->adv_instances)) return; instance = get_current_adv_instance(hdev); @@ -1202,7 +1202,7 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, u16 timeout; if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + list_empty(&hdev->adv_instances)) return -EPERM; if (hdev->adv_instance_timeout) @@ -1319,10 +1319,8 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, } } - if (list_empty(&hdev->adv_instances)) { + if (list_empty(&hdev->adv_instances)) hdev->cur_adv_instance = 0x00; - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - } if (!req || !hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) @@ -1525,7 +1523,7 @@ static int connectable_update(struct hci_request *req, unsigned long opt) /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + !list_empty(&hdev->adv_instances)) __hci_req_enable_advertising(req); __hci_update_background_scan(req); @@ -2226,13 +2224,12 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt) */ if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { + list_empty(&hdev->adv_instances))) { __hci_req_update_adv_data(req, HCI_ADV_CURRENT); __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); } - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance == 0x00 && + if (hdev->cur_adv_instance == 0x00 && !list_empty(&hdev->adv_instances)) { adv_instance = list_first_entry(&hdev->adv_instances, struct adv_info, list); @@ -2241,7 +2238,7 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt) if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) __hci_req_enable_advertising(req); - else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + else if (!list_empty(&hdev->adv_instances) && hdev->cur_adv_instance) __hci_req_schedule_adv_instance(req, hdev->cur_adv_instance, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9ce2bb2fc977..03a65e89a7d7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3734,7 +3734,6 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, * set up earlier, then re-enable multi-instance advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) || list_empty(&hdev->adv_instances)) goto unlock; @@ -5892,9 +5891,6 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); - if (status) - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { if (!adv_instance->pending) continue; @@ -6012,8 +6008,6 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, if (hdev->adv_instance_cnt > prev_instance_cnt) mgmt_advertising_added(sk, hdev, cp->instance); - hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE); - if (hdev->cur_adv_instance == cp->instance) { /* If the currently advertised instance is being changed then * cancel the current advertising and schedule the next @@ -6129,7 +6123,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) { + if (list_empty(&hdev->adv_instances)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); goto unlock; -- cgit v1.2.3 From d6dac32e84e407ba15f257b5df2f4cb263005ab4 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 27 Nov 2015 10:52:39 +0200 Subject: Bluetooth: Fix updating wrong instance's scan_rsp data The __hci_req_update_scan_rsp_data gets the instance to be updated which should get passed to update_inst_scan_rsp_data() instead of always enabling the current instance. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index adfcd6f1d0de..edf2199de4ff 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1018,7 +1018,7 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance) if (instance == HCI_ADV_CURRENT) instance = get_current_adv_instance(req->hdev); - update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev)); + update_inst_scan_rsp_data(req, instance); } static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) -- cgit v1.2.3 From 550a8ca765a154ca38dcd888b4f12a173e761bdc Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 27 Nov 2015 11:11:52 +0200 Subject: Bluetooth: Remove redundant check for req.cmd_q The hci_req_run() function already checks for empty cmd_q and bails out if necessary. Also, req.cmd_q should really be treated as private data of the request and not accessed directly. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index edf2199de4ff..f1529d7740f6 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1187,8 +1187,7 @@ static void adv_timeout_expire(struct work_struct *work) if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); - if (!skb_queue_empty(&req.cmd_q)) - hci_req_run(&req, NULL); + hci_req_run(&req, NULL); unlock: hci_dev_unlock(hdev); -- cgit v1.2.3 From d6b7e2cddb72a87c2597af43ba9f5f2b03a2208b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 30 Nov 2015 11:21:44 +0200 Subject: Bluetooth: Clean up advertising initialization in powered_update_hci() The logic in powered_update_hci() to initialize the advertising data & state is a bit more complicated than it needs to be. It was previously not doing anything if HCI_LE_ENABLED wasn't set, but this was not obvious by quickly looking at the code. Now the conditions for the various actions are more explicit. Another simplification is due to the fact that __hci_req_schedule_adv_instance() takes care of setting hdev->cur_adv_instance so there's no need to set it before calling the function. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index f1529d7740f6..14db777a6bb1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2181,7 +2181,6 @@ static void discov_off(struct work_struct *work) static int powered_update_hci(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; - struct adv_info *adv_instance; u8 link_sec; hci_dev_lock(hdev); @@ -2216,32 +2215,27 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt) sizeof(cp), &cp); } - if (lmp_le_capable(hdev)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { /* Make sure the controller has a good default for * advertising data. This also applies to the case * where BR/EDR was toggled during the AUTO_OFF phase. */ - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - list_empty(&hdev->adv_instances))) { - __hci_req_update_adv_data(req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); - } + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + list_empty(&hdev->adv_instances)) { + __hci_req_update_adv_data(req, 0x00); + __hci_req_update_scan_rsp_data(req, 0x00); + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_enable_advertising(req); + } else if (!list_empty(&hdev->adv_instances)) { + struct adv_info *adv_instance; - if (hdev->cur_adv_instance == 0x00 && - !list_empty(&hdev->adv_instances)) { adv_instance = list_first_entry(&hdev->adv_instances, struct adv_info, list); - hdev->cur_adv_instance = adv_instance->instance; - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_enable_advertising(req); - else if (!list_empty(&hdev->adv_instances) && - hdev->cur_adv_instance) __hci_req_schedule_adv_instance(req, - hdev->cur_adv_instance, + adv_instance->instance, true); + } } link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); -- cgit v1.2.3 From cab054ab47fa3fdf1c597a9874363680bfdab33e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 30 Nov 2015 11:21:45 +0200 Subject: Bluetooth: Clean up current advertising instance tracking We can simplify a lot of code by making sure hdev->cur_adv_instance is always up-to-date. This allows e.g. the removal of the get_current_adv_instance() helper function and the special HCI_ADV_CURRENT value. This patch also makes selecting instance 0x00 explicit in the various calls where advertising instances aren't enabled, e.g. when HCI_ADVERTISING is set or we've just finished enabling LE. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 12 +++++--- net/bluetooth/hci_request.c | 68 ++++++++++----------------------------------- net/bluetooth/hci_request.h | 8 ++---- net/bluetooth/mgmt.c | 10 ++++--- 4 files changed, 32 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index eac3f6fa1272..9fb443a5473a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1769,7 +1769,7 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); + hci_req_update_adv_data(hdev, hdev->cur_adv_instance); mgmt_new_settings(hdev); } @@ -2610,9 +2610,12 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance) BT_DBG("%s removing %dMR", hdev->name, instance); - if (hdev->cur_adv_instance == instance && hdev->adv_instance_timeout) { - cancel_delayed_work(&hdev->adv_instance_expire); - hdev->adv_instance_timeout = 0; + if (hdev->cur_adv_instance == instance) { + if (hdev->adv_instance_timeout) { + cancel_delayed_work(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } + hdev->cur_adv_instance = 0x00; } list_del(&adv_instance->list); @@ -2639,6 +2642,7 @@ void hci_adv_instances_clear(struct hci_dev *hdev) } hdev->adv_instance_cnt = 0; + hdev->cur_adv_instance = 0x00; } /* This function requires the caller holds hdev->lock */ diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 14db777a6bb1..9997c31ef987 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -815,23 +815,9 @@ void hci_req_add_le_passive_scan(struct hci_request *req) &enable_cp); } -static u8 get_current_adv_instance(struct hci_dev *hdev) -{ - /* The "Set Advertising" setting supersedes the "Add Advertising" - * setting. Here we set the advertising data based on which - * setting was set. When neither apply, default to the global settings, - * represented by instance "0". - */ - if (!list_empty(&hdev->adv_instances) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return hdev->cur_adv_instance; - - return 0x00; -} - static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) { - u8 instance = get_current_adv_instance(hdev); + u8 instance = hdev->cur_adv_instance; struct adv_info *adv_instance; /* Ignore instance 0 */ @@ -890,7 +876,6 @@ void __hci_req_enable_advertising(struct hci_request *req) struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; - u8 instance; u32 flags; if (hci_conn_num(hdev, LE_LINK) > 0) @@ -906,8 +891,7 @@ void __hci_req_enable_advertising(struct hci_request *req) */ hci_dev_clear_flag(hdev, HCI_LE_ADV); - instance = get_current_adv_instance(hdev); - flags = get_adv_instance_flags(hdev, instance); + flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. @@ -985,7 +969,7 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, return adv_instance->scan_rsp_len; } -static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) +void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_scan_rsp_data cp; @@ -1013,14 +997,6 @@ static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); } -void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance) -{ - if (instance == HCI_ADV_CURRENT) - instance = get_current_adv_instance(req->hdev); - - update_inst_scan_rsp_data(req, instance); -} - static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { struct adv_info *adv_instance = NULL; @@ -1089,7 +1065,7 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) return ad_len; } -static void update_inst_adv_data(struct hci_request *req, u8 instance) +void __hci_req_update_adv_data(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_adv_data cp; @@ -1115,15 +1091,7 @@ static void update_inst_adv_data(struct hci_request *req, u8 instance) hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } -void __hci_req_update_adv_data(struct hci_request *req, int instance) -{ - if (instance == HCI_ADV_CURRENT) - instance = get_current_adv_instance(req->hdev); - - update_inst_adv_data(req, instance); -} - -int hci_req_update_adv_data(struct hci_dev *hdev, int instance) +int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance) { struct hci_request req; @@ -1141,21 +1109,19 @@ static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) void hci_req_reenable_advertising(struct hci_dev *hdev) { struct hci_request req; - u8 instance; if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && list_empty(&hdev->adv_instances)) return; - instance = get_current_adv_instance(hdev); - hci_req_init(&req, hdev); - if (instance) { - __hci_req_schedule_adv_instance(&req, instance, true); + if (hdev->cur_adv_instance) { + __hci_req_schedule_adv_instance(&req, hdev->cur_adv_instance, + true); } else { - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); __hci_req_enable_advertising(&req); } @@ -1176,7 +1142,7 @@ static void adv_timeout_expire(struct work_struct *work) hdev->adv_instance_timeout = 0; - instance = get_current_adv_instance(hdev); + instance = hdev->cur_adv_instance; if (instance == 0x00) goto unlock; @@ -1246,8 +1212,8 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, return 0; hdev->cur_adv_instance = instance; - __hci_req_update_adv_data(req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(req, instance); + __hci_req_update_scan_rsp_data(req, instance); __hci_req_enable_advertising(req); return 0; @@ -1301,7 +1267,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, if (!err) mgmt_advertising_removed(NULL, hdev, rem_inst); } - hdev->cur_adv_instance = 0x00; } else { adv_instance = hci_find_adv_instance(hdev, instance); @@ -1318,9 +1283,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, } } - if (list_empty(&hdev->adv_instances)) - hdev->cur_adv_instance = 0x00; - if (!req || !hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) return; @@ -1518,7 +1480,7 @@ static int connectable_update(struct hci_request *req, unsigned long opt) * advertising flags. */ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(req, hdev->cur_adv_instance); /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || @@ -1627,7 +1589,7 @@ static int discoverable_update(struct hci_request *req, unsigned long opt) * only update AD if advertising was enabled using Set Advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(req, 0x00); hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index a24d3b55094c..64ff8c040d50 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -64,14 +64,12 @@ void __hci_req_update_eir(struct hci_request *req); void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); -#define HCI_ADV_CURRENT (-1) - void hci_req_reenable_advertising(struct hci_dev *hdev); void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); -void __hci_req_update_adv_data(struct hci_request *req, int instance); -int hci_req_update_adv_data(struct hci_dev *hdev, int instance); -void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance); +void __hci_req_update_adv_data(struct hci_request *req, u8 instance); +int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance); +void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, bool force); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 03a65e89a7d7..621f6fdd0dd1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1626,8 +1626,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) struct hci_request req; hci_req_init(&req, hdev); - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); hci_req_run(&req, NULL); hci_update_background_scan(hdev); } @@ -3006,7 +3006,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, * no need to udpate the advertising data here. */ if (lmp_le_capable(hdev)) - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance); err = hci_req_run(&req, set_name_complete); if (err < 0) @@ -3799,6 +3799,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, bool changed; if (cp->val) { + hdev->cur_adv_instance = 0x00; changed = !hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING); if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); @@ -3846,6 +3847,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, * We cannot use update_[adv|scan_rsp]_data() here as the * HCI_ADVERTISING flag is not yet set. */ + hdev->cur_adv_instance = 0x00; __hci_req_update_adv_data(&req, 0x00); __hci_req_update_scan_rsp_data(&req, 0x00); __hci_req_enable_advertising(&req); @@ -4195,7 +4197,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* Since only the advertising data flags will change, there * is no need to update the scan response data. */ - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(&req, hdev->cur_adv_instance); err = hci_req_run(&req, set_bredr_complete); if (err < 0) -- cgit v1.2.3 From 2f99536a5b34d5b0f54723067d68f6cef3f0fdc6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 3 Dec 2015 12:45:19 +0200 Subject: Bluetooth: Use continuous scanning when creating LE connections All LE connections are now triggered through a preceding passive scan and waiting for a connectable advertising report. This means we've got the best possible guarantee that the device is within range and should be able to request the controller to perform continuous scanning. This way we minimize the risk that we miss out on any advertising packets. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org # 4.3+ --- net/bluetooth/hci_conn.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index e2600213cd50..48a7eac6ef71 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -726,8 +726,12 @@ static void hci_req_add_le_create_conn(struct hci_request *req, if (hci_update_random_address(req, false, &own_addr_type)) return; + /* Set window to be the same value as the interval to enable + * continuous scanning. + */ cp.scan_interval = cpu_to_le16(hdev->le_scan_interval); - cp.scan_window = cpu_to_le16(hdev->le_scan_window); + cp.scan_window = cp.scan_interval; + bacpy(&cp.peer_addr, &conn->dst); cp.peer_addr_type = conn->dst_type; cp.own_address_type = own_addr_type; -- cgit v1.2.3 From acb9f911ea1f828822001d72b21f7cc06e6718c7 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 3 Dec 2015 12:45:20 +0200 Subject: Bluetooth: Don't treat connection timeout as a failure When we're doing background scanning and connection attempts it's possible we timeout trying to connect and go back to scanning again. The timeout triggers a HCI_LE_Create_Connection_Cancel which will trigger a Connection Complete with "Unknown Connection Identifier" error status. Since we go back to scanning this isn't really a failure and shouldn't be presented as such to user space through mgmt. The exception to this is if the connection attempt was due to an explicit request on an L2CAP socket (indicated by params->explicit_connect being true). Since the socket will get an error it's consistent to also notify the failure on mgmt in this case. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 48a7eac6ef71..32575b49f4a0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -668,8 +668,16 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) conn->state = BT_CLOSED; - mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, - status); + /* If the status indicates successful cancellation of + * the attempt (i.e. Unkown Connection Id) there's no point of + * notifying failure since we'll go back to keep trying to + * connect. The only exception is explicit connect requests + * where a timeout + cancel does indicate an actual failure. + */ + if (status != HCI_ERROR_UNKNOWN_CONN_ID || + (params && params->explicit_connect)) + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); hci_connect_cfm(conn, status); -- cgit v1.2.3 From 1a11ec89dba7bcfb1cb502fc5945533f317ddd03 Mon Sep 17 00:00:00 2001 From: Yichen Zhao Date: Tue, 1 Dec 2015 11:11:01 -0800 Subject: Bluetooth: Fix locking in bt_accept_dequeue after disconnection Fix a crash that may happen when bt_accept_dequeue is run after a Bluetooth connection has been disconnected. bt_accept_unlink was called after release_sock, permitting bt_accept_unlink to run twice on the same socket and cause a NULL pointer dereference. [50510.241632] BUG: unable to handle kernel NULL pointer dereference at 00000000000001a8 [50510.241694] IP: [] bt_accept_unlink+0x47/0xa0 [bluetooth] [50510.241759] PGD 0 [50510.241776] Oops: 0002 [#1] SMP [50510.241802] Modules linked in: rtl8192cu rtl_usb rtlwifi rtl8192c_common 8021q garp stp mrp llc rfcomm bnep nls_iso8859_1 intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp arc4 ath9k ath9k_common ath9k_hw ath kvm eeepc_wmi asus_wmi mac80211 snd_hda_codec_hdmi snd_hda_codec_realtek sparse_keymap crct10dif_pclmul snd_hda_codec_generic crc32_pclmul snd_hda_intel snd_hda_controller cfg80211 snd_hda_codec i915 snd_hwdep snd_pcm ghash_clmulni_intel snd_timer snd soundcore serio_raw cryptd drm_kms_helper drm i2c_algo_bit shpchp ath3k mei_me lpc_ich btusb bluetooth 6lowpan_iphc mei lp parport wmi video mac_hid psmouse ahci libahci r8169 mii [50510.242279] CPU: 0 PID: 934 Comm: krfcommd Not tainted 3.16.0-49-generic #65~14.04.1-Ubuntu [50510.242327] Hardware name: ASUSTeK Computer INC. VM40B/VM40B, BIOS 1501 12/09/2014 [50510.242370] task: ffff8800d9068a30 ti: ffff8800d7a54000 task.ti: ffff8800d7a54000 [50510.242413] RIP: 0010:[] [] bt_accept_unlink+0x47/0xa0 [bluetooth] [50510.242480] RSP: 0018:ffff8800d7a57d58 EFLAGS: 00010246 [50510.242511] RAX: 0000000000000000 RBX: ffff880119bb8c00 RCX: ffff880119bb8eb0 [50510.242552] RDX: ffff880119bb8eb0 RSI: 00000000fffffe01 RDI: ffff880119bb8c00 [50510.242592] RBP: ffff8800d7a57d60 R08: 0000000000000283 R09: 0000000000000001 [50510.242633] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800d8da9eb0 [50510.242673] R13: ffff8800d74fdb80 R14: ffff880119bb8c00 R15: ffff8800d8da9c00 [50510.242715] FS: 0000000000000000(0000) GS:ffff88011fa00000(0000) knlGS:0000000000000000 [50510.242761] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [50510.242794] CR2: 00000000000001a8 CR3: 0000000001c13000 CR4: 00000000001407f0 [50510.242835] Stack: [50510.242849] ffff880119bb8eb0 ffff8800d7a57da0 ffffffffc0124506 ffff8800d8da9eb0 [50510.242899] ffff8800d8da9c00 ffff8800d9068a30 0000000000000000 ffff8800d74fdb80 [50510.242949] ffff8800d6f85208 ffff8800d7a57e08 ffffffffc0159985 000000000000001f [50510.242999] Call Trace: [50510.243027] [] bt_accept_dequeue+0xb6/0x180 [bluetooth] [50510.243085] [] l2cap_sock_accept+0x125/0x220 [bluetooth] [50510.243128] [] ? wake_up_state+0x20/0x20 [50510.243163] [] kernel_accept+0x4e/0xa0 [50510.243200] [] rfcomm_run+0x1ad/0x890 [rfcomm] [50510.243238] [] ? rfcomm_process_rx+0x8a0/0x8a0 [rfcomm] [50510.243281] [] kthread+0xd2/0xf0 [50510.243312] [] ? kthread_create_on_node+0x1c0/0x1c0 [50510.243353] [] ret_from_fork+0x58/0x90 [50510.243387] [] ? kthread_create_on_node+0x1c0/0x1c0 [50510.243424] Code: 00 48 8b 93 b8 02 00 00 48 8d 83 b0 02 00 00 48 89 51 08 48 89 0a 48 89 83 b0 02 00 00 48 89 83 b8 02 00 00 48 8b 83 c0 02 00 00 <66> 83 a8 a8 01 00 00 01 48 c7 83 c0 02 00 00 00 00 00 00 f0 ff [50510.243685] RIP [] bt_accept_unlink+0x47/0xa0 [bluetooth] [50510.243737] RSP [50510.243758] CR2: 00000000000001a8 [50510.249457] ---[ end trace bb984f932c4e3ab3 ]--- Signed-off-by: Yichen Zhao Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 5785e8e6400e..cb4e8d4f7c25 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -186,8 +186,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { - release_sock(sk); bt_accept_unlink(sk); + release_sock(sk); continue; } -- cgit v1.2.3 From 5e5c08cbee7d75d026ff50a5051f2ed19b4ba301 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:22 +0100 Subject: 6lowpan: clarify Kconfig entries for upcoming GHC support Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 7fa0f382e7d1..6af7a4686060 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -6,11 +6,12 @@ menuconfig 6LOWPAN "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. menuconfig 6LOWPAN_NHC - tristate "Next Header Compression Support" + tristate "Next Header and Generic Header Compression Support" depends on 6LOWPAN default y ---help--- - Support for next header compression. + Support for next header and generic header compression defined in + RFC6282 and RFC7400. if 6LOWPAN_NHC -- cgit v1.2.3 From 7e568f50c19c731938fee24a0f048f35120080f3 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:23 +0100 Subject: 6lowpan: add nhc module for GHC hop-by-hopextension header detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 6 ++++++ net/6lowpan/Makefile | 3 +++ net/6lowpan/nhc_ghc_ext_hop.c | 27 +++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_ext_hop.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 6af7a4686060..1bd49ebb1789 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -59,4 +59,10 @@ config 6LOWPAN_NHC_UDP ---help--- 6LoWPAN IPv6 UDP Header compression according to RFC6282. +config 6LOWPAN_GHC_EXT_HDR_HOP + tristate "GHC Hop-by-Hop Options Header Support" + ---help--- + 6LoWPAN IPv6 Hop-by-Hop option generic header compression according + to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index c6ffc55ee0d7..ba20e01b3e42 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -10,3 +10,6 @@ obj-$(CONFIG_6LOWPAN_NHC_IPV6) += nhc_ipv6.o obj-$(CONFIG_6LOWPAN_NHC_MOBILITY) += nhc_mobility.o obj-$(CONFIG_6LOWPAN_NHC_ROUTING) += nhc_routing.o obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o + +#rfc7400 ghcs +obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o diff --git a/net/6lowpan/nhc_ghc_ext_hop.c b/net/6lowpan/nhc_ghc_ext_hop.c new file mode 100644 index 000000000000..baec86fd1974 --- /dev/null +++ b/net/6lowpan/nhc_ghc_ext_hop.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN Extension Header compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_EXT_HOP_IDLEN 1 +#define LOWPAN_GHC_EXT_HOP_ID_0 0xb0 +#define LOWPAN_GHC_EXT_HOP_MASK_0 0xfe + +static void hop_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_EXT_HOP_ID_0; + nhc->idmask[0] = LOWPAN_GHC_EXT_HOP_MASK_0; +} + +LOWPAN_NHC(ghc_ext_hop, "RFC7400 Hop-by-Hop Extension Header", NEXTHDR_HOP, 0, + hop_ghid_setup, LOWPAN_GHC_EXT_HOP_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_ext_hop); +MODULE_DESCRIPTION("6LoWPAN generic header hop-by-hop extension compression"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 70cc86752e59ec26fcd31679b1eef23e8cb4b516 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:24 +0100 Subject: 6lowpan: add nhc module for GHC UDP detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 5 +++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_udp.c | 27 +++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_udp.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 1bd49ebb1789..94d517893432 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -65,4 +65,9 @@ config 6LOWPAN_GHC_EXT_HDR_HOP 6LoWPAN IPv6 Hop-by-Hop option generic header compression according to RFC7400. +config 6LOWPAN_GHC_UDP + tristate "GHC UDP Support" + ---help--- + 6LoWPAN IPv6 UDP generic header compression according to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index ba20e01b3e42..5e4f2f3a2f57 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o #rfc7400 ghcs obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o +obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o diff --git a/net/6lowpan/nhc_ghc_udp.c b/net/6lowpan/nhc_ghc_udp.c new file mode 100644 index 000000000000..17beefa52ca8 --- /dev/null +++ b/net/6lowpan/nhc_ghc_udp.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN UDP compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_UDP_IDLEN 1 +#define LOWPAN_GHC_UDP_ID_0 0xd0 +#define LOWPAN_GHC_UDP_MASK_0 0xf8 + +static void udp_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_UDP_ID_0; + nhc->idmask[0] = LOWPAN_GHC_UDP_MASK_0; +} + +LOWPAN_NHC(ghc_udp, "RFC7400 UDP", NEXTHDR_UDP, 0, + udp_ghid_setup, LOWPAN_GHC_UDP_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_udp); +MODULE_DESCRIPTION("6LoWPAN generic header UDP compression"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From c39da3bb5b978ca03f1702c99965f3db1204516a Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:25 +0100 Subject: 6lowpan: add nhc module for GHC ICMPv6 detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 5 +++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_icmpv6.c | 27 +++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_icmpv6.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 94d517893432..0a3f5a8b0f6d 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -70,4 +70,9 @@ config 6LOWPAN_GHC_UDP ---help--- 6LoWPAN IPv6 UDP generic header compression according to RFC7400. +config 6LOWPAN_GHC_ICMPV6 + tristate "GHC ICMPv6 Support" + ---help--- + 6LoWPAN IPv6 ICMPv6 generic header compression according to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 5e4f2f3a2f57..86af3fd141a1 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o #rfc7400 ghcs obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o +obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o diff --git a/net/6lowpan/nhc_ghc_icmpv6.c b/net/6lowpan/nhc_ghc_icmpv6.c new file mode 100644 index 000000000000..32e7c2c66bbc --- /dev/null +++ b/net/6lowpan/nhc_ghc_icmpv6.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN ICMPv6 compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_ICMPV6_IDLEN 1 +#define LOWPAN_GHC_ICMPV6_ID_0 0xdf +#define LOWPAN_GHC_ICMPV6_MASK_0 0xff + +static void icmpv6_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_ICMPV6_ID_0; + nhc->idmask[0] = LOWPAN_GHC_ICMPV6_MASK_0; +} + +LOWPAN_NHC(ghc_icmpv6, "RFC7400 ICMPv6", NEXTHDR_ICMP, 0, + icmpv6_ghid_setup, LOWPAN_GHC_ICMPV6_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_icmpv6); +MODULE_DESCRIPTION("6LoWPAN generic header ICMPv6 compression"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 20616a5a1e3bb47c385c6d5f27520e7a3cc82864 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:26 +0100 Subject: 6lowpan: add nhc module for GHC destination extension header detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 6 ++++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_ext_dest.c | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_ext_dest.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 0a3f5a8b0f6d..e5184e65d4d3 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -75,4 +75,10 @@ config 6LOWPAN_GHC_ICMPV6 ---help--- 6LoWPAN IPv6 ICMPv6 generic header compression according to RFC7400. +config 6LOWPAN_GHC_EXT_HDR_DEST + tristate "GHC Destination Options Header Support" + ---help--- + 6LoWPAN IPv6 destination option generic header compression according + to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 86af3fd141a1..fc4bac00bd20 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o +obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_DEST) += nhc_ghc_ext_dest.o diff --git a/net/6lowpan/nhc_ghc_ext_dest.c b/net/6lowpan/nhc_ghc_ext_dest.c new file mode 100644 index 000000000000..9887b3a15348 --- /dev/null +++ b/net/6lowpan/nhc_ghc_ext_dest.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN Extension Header compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_EXT_DEST_IDLEN 1 +#define LOWPAN_GHC_EXT_DEST_ID_0 0xb6 +#define LOWPAN_GHC_EXT_DEST_MASK_0 0xfe + +static void dest_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_EXT_DEST_ID_0; + nhc->idmask[0] = LOWPAN_GHC_EXT_DEST_MASK_0; +} + +LOWPAN_NHC(ghc_ext_dest, "RFC7400 Destination Extension Header", NEXTHDR_DEST, + 0, dest_ghid_setup, LOWPAN_GHC_EXT_DEST_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_ext_dest); +MODULE_DESCRIPTION("6LoWPAN generic header destination extension compression"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 2f4799478c94928802c79edd12711a0e9e8b6f1b Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:27 +0100 Subject: 6lowpan: add nhc module for GHC fragmentation extension header detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 6 ++++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_ext_frag.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_ext_frag.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index e5184e65d4d3..13abcc56eae6 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -81,4 +81,10 @@ config 6LOWPAN_GHC_EXT_HDR_DEST 6LoWPAN IPv6 destination option generic header compression according to RFC7400. +config 6LOWPAN_GHC_EXT_HDR_FRAG + tristate "GHC Fragmentation Options Header Support" + ---help--- + 6LoWPAN IPv6 fragmentation option generic header compression + according to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index fc4bac00bd20..fb3f48d78604 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_DEST) += nhc_ghc_ext_dest.o +obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG) += nhc_ghc_ext_frag.o diff --git a/net/6lowpan/nhc_ghc_ext_frag.c b/net/6lowpan/nhc_ghc_ext_frag.c new file mode 100644 index 000000000000..1308b79e939d --- /dev/null +++ b/net/6lowpan/nhc_ghc_ext_frag.c @@ -0,0 +1,28 @@ +/* + * 6LoWPAN Extension Header compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_EXT_FRAG_IDLEN 1 +#define LOWPAN_GHC_EXT_FRAG_ID_0 0xb4 +#define LOWPAN_GHC_EXT_FRAG_MASK_0 0xfe + +static void frag_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_EXT_FRAG_ID_0; + nhc->idmask[0] = LOWPAN_GHC_EXT_FRAG_MASK_0; +} + +LOWPAN_NHC(ghc_ext_frag, "RFC7400 Fragmentation Extension Header", + NEXTHDR_FRAGMENT, 0, frag_ghid_setup, + LOWPAN_GHC_EXT_FRAG_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_ext_frag); +MODULE_DESCRIPTION("6LoWPAN generic header fragmentation extension compression"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 43f26e17d02f5c772cedc3ee16b192ed79764474 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:28 +0100 Subject: 6lowpan: add nhc module for GHC routing extension header detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 6 ++++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_ext_route.c | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_ext_route.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 13abcc56eae6..bcb9d8a21fc0 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -87,4 +87,10 @@ config 6LOWPAN_GHC_EXT_HDR_FRAG 6LoWPAN IPv6 fragmentation option generic header compression according to RFC7400. +config 6LOWPAN_GHC_EXT_HDR_ROUTE + tristate "GHC Routing Options Header Support" + ---help--- + 6LoWPAN IPv6 routing option generic header compression according + to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index fb3f48d78604..9e35a5d7b92d 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_DEST) += nhc_ghc_ext_dest.o obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG) += nhc_ghc_ext_frag.o +obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE) += nhc_ghc_ext_route.o diff --git a/net/6lowpan/nhc_ghc_ext_route.c b/net/6lowpan/nhc_ghc_ext_route.c new file mode 100644 index 000000000000..d7e5bd791c62 --- /dev/null +++ b/net/6lowpan/nhc_ghc_ext_route.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN Extension Header compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_EXT_ROUTE_IDLEN 1 +#define LOWPAN_GHC_EXT_ROUTE_ID_0 0xb2 +#define LOWPAN_GHC_EXT_ROUTE_MASK_0 0xfe + +static void route_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_EXT_ROUTE_ID_0; + nhc->idmask[0] = LOWPAN_GHC_EXT_ROUTE_MASK_0; +} + +LOWPAN_NHC(ghc_ext_route, "RFC7400 Routing Extension Header", NEXTHDR_ROUTING, + 0, route_ghid_setup, LOWPAN_GHC_EXT_ROUTE_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_ext_route); +MODULE_DESCRIPTION("6LoWPAN generic header routing extension compression"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 00f59314111a6b18ee65b238b38c470dbdbf3be5 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 22:46:29 +0100 Subject: 6lowpan: add lowpan dev register helpers This patch introduces register and unregister functionality for lowpan interfaces. While register a lowpan interface there are several things which need to be initialize by the 6lowpan subsystem. Upcoming functionality need to register/unregister per interface components e.g. debugfs entry. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/6lowpan.h | 7 ++++++- net/6lowpan/core.c | 33 +++++++++++++++++++++++++++++++-- net/bluetooth/6lowpan.c | 8 +++----- net/ieee802154/6lowpan/core.c | 6 ++---- 4 files changed, 42 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index cf3bc564ac03..730211fd8ed7 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -185,7 +185,12 @@ static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data, *hc_ptr += len; } -void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype); +int lowpan_register_netdevice(struct net_device *dev, + enum lowpan_lltypes lltype); +int lowpan_register_netdev(struct net_device *dev, + enum lowpan_lltypes lltype); +void lowpan_unregister_netdevice(struct net_device *dev); +void lowpan_unregister_netdev(struct net_device *dev); /** * lowpan_header_decompress - replace 6LoWPAN header with IPv6 header diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 83b19e072224..80fc50987cf3 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -15,7 +15,8 @@ #include -void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype) +int lowpan_register_netdevice(struct net_device *dev, + enum lowpan_lltypes lltype) { dev->addr_len = EUI64_ADDR_LEN; dev->type = ARPHRD_6LOWPAN; @@ -23,8 +24,36 @@ void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype) dev->priv_flags |= IFF_NO_QUEUE; lowpan_priv(dev)->lltype = lltype; + + return register_netdevice(dev); +} +EXPORT_SYMBOL(lowpan_register_netdevice); + +int lowpan_register_netdev(struct net_device *dev, + enum lowpan_lltypes lltype) +{ + int ret; + + rtnl_lock(); + ret = lowpan_register_netdevice(dev, lltype); + rtnl_unlock(); + return ret; +} +EXPORT_SYMBOL(lowpan_register_netdev); + +void lowpan_unregister_netdevice(struct net_device *dev) +{ + unregister_netdevice(dev); +} +EXPORT_SYMBOL(lowpan_unregister_netdevice); + +void lowpan_unregister_netdev(struct net_device *dev) +{ + rtnl_lock(); + lowpan_unregister_netdevice(dev); + rtnl_unlock(); } -EXPORT_SYMBOL(lowpan_netdev_setup); +EXPORT_SYMBOL(lowpan_unregister_netdev); static int __init lowpan_module_init(void) { diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9e9cca3689a0..d040365ba98e 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -825,9 +825,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) list_add_rcu(&(*dev)->list, &bt_6lowpan_devices); spin_unlock(&devices_lock); - lowpan_netdev_setup(netdev, LOWPAN_LLTYPE_BTLE); - - err = register_netdev(netdev); + err = lowpan_register_netdev(netdev, LOWPAN_LLTYPE_BTLE); if (err < 0) { BT_INFO("register_netdev failed %d", err); spin_lock(&devices_lock); @@ -890,7 +888,7 @@ static void delete_netdev(struct work_struct *work) struct lowpan_dev *entry = container_of(work, struct lowpan_dev, delete_netdev); - unregister_netdev(entry->netdev); + lowpan_unregister_netdev(entry->netdev); /* The entry pointer is deleted by the netdev destructor. */ } @@ -1348,7 +1346,7 @@ static void disconnect_devices(void) ifdown(entry->netdev); BT_DBG("Unregistering netdev %s %p", entry->netdev->name, entry->netdev); - unregister_netdev(entry->netdev); + lowpan_unregister_netdev(entry->netdev); kfree(entry); } } diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 20c49c724ba0..737c87a2a41e 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -161,9 +161,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, wdev->needed_headroom; ldev->needed_tailroom = wdev->needed_tailroom; - lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154); - - ret = register_netdevice(ldev); + ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154); if (ret < 0) { dev_put(wdev); return ret; @@ -180,7 +178,7 @@ static void lowpan_dellink(struct net_device *ldev, struct list_head *head) ASSERT_RTNL(); wdev->ieee802154_ptr->lowpan_dev = NULL; - unregister_netdevice(ldev); + lowpan_unregister_netdevice(ldev); dev_put(wdev); } -- cgit v1.2.3 From b1815fd949e5bd06d118019acf68f87c9414f705 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 22:46:30 +0100 Subject: 6lowpan: add debugfs support This patch will introduce a 6lowpan entry into the debugfs if enabled. Inside this 6lowpan directory we create a subdirectories of all 6lowpan interfaces to offer a per interface debugfs support. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/6lowpan.h | 3 +++ net/6lowpan/6lowpan_i.h | 28 ++++++++++++++++++++++++++ net/6lowpan/Kconfig | 8 ++++++++ net/6lowpan/Makefile | 1 + net/6lowpan/core.c | 28 +++++++++++++++++++++++++- net/6lowpan/debugfs.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 net/6lowpan/6lowpan_i.h create mode 100644 net/6lowpan/debugfs.c (limited to 'net') diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index 730211fd8ed7..2f6a3f2233ed 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -53,6 +53,8 @@ #ifndef __6LOWPAN_H__ #define __6LOWPAN_H__ +#include + #include #include @@ -98,6 +100,7 @@ enum lowpan_lltypes { struct lowpan_priv { enum lowpan_lltypes lltype; + struct dentry *iface_debugfs; /* must be last */ u8 priv[0] __aligned(sizeof(void *)); diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h new file mode 100644 index 000000000000..d16bb4b14aa1 --- /dev/null +++ b/net/6lowpan/6lowpan_i.h @@ -0,0 +1,28 @@ +#ifndef __6LOWPAN_I_H +#define __6LOWPAN_I_H + +#include + +#ifdef CONFIG_6LOWPAN_DEBUGFS +int lowpan_dev_debugfs_init(struct net_device *dev); +void lowpan_dev_debugfs_exit(struct net_device *dev); + +int __init lowpan_debugfs_init(void); +void lowpan_debugfs_exit(void); +#else +static inline int lowpan_dev_debugfs_init(struct net_device *dev) +{ + return 0; +} + +static inline void lowpan_dev_debugfs_exit(struct net_device *dev) { } + +static inline int __init lowpan_debugfs_init(void) +{ + return 0; +} + +static inline void lowpan_debugfs_exit(void) { } +#endif /* CONFIG_6LOWPAN_DEBUGFS */ + +#endif /* __6LOWPAN_I_H */ diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index bcb9d8a21fc0..9c051512d14f 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -5,6 +5,14 @@ menuconfig 6LOWPAN This enables IPv6 over Low power Wireless Personal Area Network - "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. +config 6LOWPAN_DEBUGFS + bool "6LoWPAN debugfs support" + depends on 6LOWPAN + depends on DEBUG_FS + ---help--- + This enables 6LoWPAN debugfs support. For example to manipulate + IPHC context information at runtime. + menuconfig 6LOWPAN_NHC tristate "Next Header and Generic Header Compression Support" depends on 6LOWPAN diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 9e35a5d7b92d..e44f3bf2dd42 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_6LOWPAN) += 6lowpan.o 6lowpan-y := core.o iphc.o nhc.o +6lowpan-$(CONFIG_6LOWPAN_DEBUGFS) += debugfs.o #rfc6282 nhcs obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 80fc50987cf3..c7f06f5c0121 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -15,9 +15,13 @@ #include +#include "6lowpan_i.h" + int lowpan_register_netdevice(struct net_device *dev, enum lowpan_lltypes lltype) { + int ret; + dev->addr_len = EUI64_ADDR_LEN; dev->type = ARPHRD_6LOWPAN; dev->mtu = IPV6_MIN_MTU; @@ -25,7 +29,15 @@ int lowpan_register_netdevice(struct net_device *dev, lowpan_priv(dev)->lltype = lltype; - return register_netdevice(dev); + ret = lowpan_dev_debugfs_init(dev); + if (ret < 0) + return ret; + + ret = register_netdevice(dev); + if (ret < 0) + lowpan_dev_debugfs_exit(dev); + + return ret; } EXPORT_SYMBOL(lowpan_register_netdevice); @@ -44,6 +56,7 @@ EXPORT_SYMBOL(lowpan_register_netdev); void lowpan_unregister_netdevice(struct net_device *dev) { unregister_netdevice(dev); + lowpan_dev_debugfs_exit(dev); } EXPORT_SYMBOL(lowpan_unregister_netdevice); @@ -57,6 +70,12 @@ EXPORT_SYMBOL(lowpan_unregister_netdev); static int __init lowpan_module_init(void) { + int ret; + + ret = lowpan_debugfs_init(); + if (ret < 0) + return ret; + request_module_nowait("ipv6"); request_module_nowait("nhc_dest"); @@ -69,6 +88,13 @@ static int __init lowpan_module_init(void) return 0; } + +static void __exit lowpan_module_exit(void) +{ + lowpan_debugfs_exit(); +} + module_init(lowpan_module_init); +module_exit(lowpan_module_exit); MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c new file mode 100644 index 000000000000..88eef84df0fc --- /dev/null +++ b/net/6lowpan/debugfs.c @@ -0,0 +1,53 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: + * (C) 2015 Pengutronix, Alexander Aring + * Copyright (c) 2015 Nordic Semiconductor. All Rights Reserved. + */ + +#include + +#include "6lowpan_i.h" + +static struct dentry *lowpan_debugfs; + +int lowpan_dev_debugfs_init(struct net_device *dev) +{ + struct lowpan_priv *lpriv = lowpan_priv(dev); + + /* creating the root */ + lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); + if (!lpriv->iface_debugfs) + goto fail; + + return 0; + +fail: + return -EINVAL; +} + +void lowpan_dev_debugfs_exit(struct net_device *dev) +{ + debugfs_remove_recursive(lowpan_priv(dev)->iface_debugfs); +} + +int __init lowpan_debugfs_init(void) +{ + lowpan_debugfs = debugfs_create_dir("6lowpan", NULL); + if (!lowpan_debugfs) + return -EINVAL; + + return 0; +} + +void lowpan_debugfs_exit(void) +{ + debugfs_remove_recursive(lowpan_debugfs); +} -- cgit v1.2.3 From c38383530fb5e160b739aff4bf08c1cc2dfcc659 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 23:23:56 +0100 Subject: mac802154: tx: fix synced xmit deadlock This patch reverts 6001d52 ("mac802154: tx: don't allow if down while sync tx"). This has side effects with stop callback which flush the transmit workqueue. The stop callback will wait until the workqueue is flushed and holding the rtnl lock. That means it can happen that the stop callback waits forever because it try to lock the rtnl mutex which is already hold by stop callback. Cc: Michael Hennerich Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/driver-ops.h | 3 --- net/mac802154/tx.c | 9 --------- 2 files changed, 12 deletions(-) (limited to 'net') diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index 0550f3365e33..fd9daf2ecec9 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -18,9 +18,6 @@ drv_xmit_async(struct ieee802154_local *local, struct sk_buff *skb) static inline int drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb) { - /* don't allow other operations while sync xmit */ - ASSERT_RTNL(); - might_sleep(); return local->ops->xmit_sync(&local->hw, skb); diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 3827f359b336..7e253455f9dd 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -38,12 +38,6 @@ void ieee802154_xmit_worker(struct work_struct *work) struct net_device *dev = skb->dev; int res; - rtnl_lock(); - - /* check if ifdown occurred while schedule */ - if (!netif_running(dev)) - goto err_tx; - res = drv_xmit_sync(local, skb); if (res) goto err_tx; @@ -53,14 +47,11 @@ void ieee802154_xmit_worker(struct work_struct *work) dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - rtnl_unlock(); - return; err_tx: /* Restart the netif queue on each sub_if_data object. */ ieee802154_wake_queue(&local->hw); - rtnl_unlock(); kfree_skb(skb); netdev_dbg(dev, "transmission failed\n"); } -- cgit v1.2.3 From 4ada1282d86865671abdfcf9410b895af8491213 Mon Sep 17 00:00:00 2001 From: Danny Schweizer Date: Fri, 11 Dec 2015 10:04:54 +0100 Subject: Bluetooth: Do not filter multicast addresses by default A Linux PC is connected with another device over Bluetooth PAN using a BNEP interface. Whenever a packet is tried to be sent over the BNEP interface, the function "bnep_net_xmit()" in "net/bluetooth/bnep/netdev.c" is called. This function calls "bnep_net_mc_filter()", which checks (if the destination address is multicast) if the address is set in a certain multicast filter (&s->mc_filter). If it is not, then it is not sent out. This filter is only changed in two other functions, found in net/bluetooth/bnep/core.c": in "bnep_ctrl_set_mc_filter()", which is only called if a message of type "BNEP_FILTER_MULTI_ADDR_SET" is received. Otherwise, it is set in "bnep_add_connection()", where it is set to a default value which only adds the broadcast address to the filter: set_bit(bnep_mc_hash(dev->broadcast), (ulong *) &s->mc_filter); To sum up, if the BNEP interface does not receive any message of type "BNEP_FILTER_MULTI_ADDR_SET", it will not send out any messages with multicast destination addresses except for broadcast. However, in the BNEP specification (page 27 in http://grouper.ieee.org/groups/802/15/Bluetooth/BNEP.pdf), it is said that per default, all multicast addresses should not be filtered, i.e. the BNEP interface should be able to send packets with any multicast destination address. It seems that the default case is wrong: the multicast filter should not block almost all multicast addresses, but should not filter out any. This leads to the problem that e.g. Neighbor Solicitation messages sent with Bluetooth PAN over the BNEP interface to a multicast destination address other than broadcast are blocked and not sent out. Therefore, in the default case, we set the mc_filter to ~0LL to not filter out any multicast addresses. Signed-off-by: Danny Schweizer Signed-off-by: Marcel Holtmann --- net/bluetooth/bnep/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 1641367e54ca..fbf251fef70f 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -608,8 +608,11 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) s->msg.msg_flags = MSG_NOSIGNAL; #ifdef CONFIG_BT_BNEP_MC_FILTER - /* Set default mc filter */ - set_bit(bnep_mc_hash(dev->broadcast), (ulong *) &s->mc_filter); + /* Set default mc filter to not filter out any mc addresses + * as defined in the BNEP specification (revision 0.95a) + * http://grouper.ieee.org/groups/802/15/Bluetooth/BNEP.pdf + */ + s->mc_filter = ~0LL; #endif #ifdef CONFIG_BT_BNEP_PROTO_FILTER -- cgit v1.2.3 From 6e71b29908e9e9bffc03b8e991c9e58a0fa92d9c Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 9 Dec 2015 06:56:41 -0800 Subject: mpls_iptunnel: add static qualifier to mpls_output This gets rid of the following compile warn: net/mpls/mpls_iptunnel.c:40:5: warning: no previous prototype for mpls_output [-Wmissing-prototypes] Signed-off-by: Roopa Prabhu Acked-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/mpls_iptunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 67591aef9cae..cdd01e6416db 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) return en->labels * sizeof(struct mpls_shim_hdr); } -int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct mpls_iptunnel_encap *tun_encap_info; struct mpls_shim_hdr *hdr; -- cgit v1.2.3 From 19576c9478682a398276c994ea0d2696474df32b Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 9 Dec 2015 14:07:40 +0100 Subject: netfilter: cttimeout: add netns support Add a per-netns list of timeout objects and adjust code to use it. Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 3 + include/net/netfilter/nf_conntrack_timeout.h | 2 +- net/netfilter/nf_conntrack_timeout.c | 2 +- net/netfilter/nfnetlink_cttimeout.c | 82 +++++++++++++++++----------- net/netfilter/xt_CT.c | 2 +- 5 files changed, 57 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2dcea635ecce..4089abc6e9c0 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -121,6 +121,9 @@ struct net { #if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) struct list_head nfnl_acct_list; #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + struct list_head nfct_timeout_list; +#endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index f72be38860a7..5cc5e9e6171a 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -104,7 +104,7 @@ static inline void nf_conntrack_timeout_fini(void) #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(const char *name); +extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout); #endif diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 93da609d9d29..26e742006c48 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -25,7 +25,7 @@ #include struct ctnl_timeout * -(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly; +(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c7a2d0e1c462..3921d544f5ba 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -38,8 +38,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); -static LIST_HEAD(cttimeout_list); - static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, .len = CTNL_TIMEOUT_NAME_MAX - 1}, @@ -90,7 +88,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - list_for_each_entry(timeout, &cttimeout_list, head) { + list_for_each_entry(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -145,7 +143,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, timeout->l3num = l3num; timeout->l4proto = l4proto; atomic_set(&timeout->refcnt, 1); - list_add_tail_rcu(&timeout->head, &cttimeout_list); + list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); return 0; err: @@ -209,6 +207,7 @@ nla_put_failure: static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; if (cb->args[2]) @@ -219,7 +218,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &cttimeout_list, head) { + list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { if (last) { if (cur != last) continue; @@ -245,6 +244,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -260,7 +260,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) @@ -301,17 +301,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i, RCU_INIT_POINTER(timeout_ext->timeout, NULL); } -static void ctnl_untimeout(struct ctnl_timeout *timeout) +static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { struct nf_conntrack_tuple_hash *h; const struct hlist_nulls_node *nn; int i; local_bh_disable(); - for (i = 0; i < init_net.ct.htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < init_net.ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode) + if (i < net->ct.htable_size) { + hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) untimeout(h, timeout); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -320,7 +320,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout) } /* try to delete object, fail if it is still in use. */ -static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) +static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; @@ -329,7 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); - ctnl_untimeout(timeout); + ctnl_untimeout(net, timeout); kfree_rcu(timeout, rcu_head); } else { /* still in use, restore reference counter. */ @@ -344,23 +344,24 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); char *name; struct ctnl_timeout *cur; int ret = -ENOENT; if (!cda[CTA_TIMEOUT_NAME]) { - list_for_each_entry(cur, &cttimeout_list, head) - ctnl_timeout_try_del(cur); + list_for_each_entry(cur, &net->nfct_timeout_list, head) + ctnl_timeout_try_del(net, cur); return 0; } name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - ret = ctnl_timeout_try_del(cur); + ret = ctnl_timeout_try_del(net, cur); if (ret < 0) return ret; @@ -511,12 +512,13 @@ err: } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) +static struct ctnl_timeout * +ctnl_timeout_find_get(struct net *net, const char *name) { struct ctnl_timeout *timeout, *matching = NULL; rcu_read_lock(); - list_for_each_entry_rcu(timeout, &cttimeout_list, head) { + list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -569,10 +571,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); +static int __net_init cttimeout_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfct_timeout_list); + + return 0; +} + +static void __net_exit cttimeout_net_exit(struct net *net) +{ + struct ctnl_timeout *cur, *tmp; + + ctnl_untimeout(net, NULL); + + list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { + list_del_rcu(&cur->head); + nf_ct_l4proto_put(cur->l4proto); + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations cttimeout_ops = { + .init = cttimeout_net_init, + .exit = cttimeout_net_exit, +}; + static int __init cttimeout_init(void) { int ret; + ret = register_pernet_subsys(&cttimeout_ops); + if (ret < 0) + return ret; + ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with " @@ -586,28 +617,17 @@ static int __init cttimeout_init(void) return 0; err_out: + unregister_pernet_subsys(&cttimeout_ops); return ret; } static void __exit cttimeout_exit(void) { - struct ctnl_timeout *cur, *tmp; - pr_info("cttimeout: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&cttimeout_subsys); - /* Make sure no conntrack objects refer to custom timeouts anymore. */ - ctnl_untimeout(NULL); - - list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. - */ - nf_ct_l4proto_put(cur->l4proto); - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&cttimeout_ops); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index e7ac07e53b59..6669e68d589e 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, goto out; } - timeout = timeout_find_get(timeout_name); + timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; pr_info("No such timeout policy \"%s\"\n", timeout_name); -- cgit v1.2.3 From 4ec8ff0edccffe7a77f18e2a1e2ce86f03e08b5c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:54 -0500 Subject: netfilter: prepare xt_cgroup for multi revisions xt_cgroup will grow cgroup2 path based match. Postfix existing symbols with _v0 and prepare for multi revision registration. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Cc: Jan Engelhardt Cc: Pablo Neira Ayuso Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_cgroup.h | 2 +- net/netfilter/xt_cgroup.c | 36 +++++++++++++++++--------------- 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h index 43acb7e175f6..577c9e0b9406 100644 --- a/include/uapi/linux/netfilter/xt_cgroup.h +++ b/include/uapi/linux/netfilter/xt_cgroup.h @@ -3,7 +3,7 @@ #include -struct xt_cgroup_info { +struct xt_cgroup_info_v0 { __u32 id; __u32 invert; }; diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 54eaeb45ce99..17300256772a 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: process control group matching"); MODULE_ALIAS("ipt_cgroup"); MODULE_ALIAS("ip6t_cgroup"); -static int cgroup_mt_check(const struct xt_mtchk_param *par) +static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) { - struct xt_cgroup_info *info = par->matchinfo; + struct xt_cgroup_info_v0 *info = par->matchinfo; if (info->invert & ~1) return -EINVAL; @@ -35,9 +35,9 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) } static bool -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - const struct xt_cgroup_info *info = par->matchinfo; + const struct xt_cgroup_info_v0 *info = par->matchinfo; if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; @@ -46,27 +46,29 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) info->invert; } -static struct xt_match cgroup_mt_reg __read_mostly = { - .name = "cgroup", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = cgroup_mt_check, - .match = cgroup_mt, - .matchsize = sizeof(struct xt_cgroup_info), - .me = THIS_MODULE, - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_IN), +static struct xt_match cgroup_mt_reg[] __read_mostly = { + { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v0, + .match = cgroup_mt_v0, + .matchsize = sizeof(struct xt_cgroup_info_v0), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) { - return xt_register_match(&cgroup_mt_reg); + return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } static void __exit cgroup_mt_exit(void) { - xt_unregister_match(&cgroup_mt_reg); + xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } module_init(cgroup_mt_init); -- cgit v1.2.3 From c38c4597e4bf3e99860eac98211748e1ecb0e139 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:55 -0500 Subject: netfilter: implement xt_cgroup cgroup2 path match This patch implements xt_cgroup path match which matches cgroup2 membership of the associated socket. The match is recursive and invertible. For rationales on introducing another cgroup based match, please refer to a preceding commit "sock, cgroup: add sock->sk_cgroup". v3: Folded into xt_cgroup as a new revision interface as suggested by Pablo. v2: Included linux/limits.h from xt_cgroup2.h for PATH_MAX. Added explicit alignment to the priv field. Both suggested by Jan. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Cc: Jan Engelhardt Cc: Pablo Neira Ayuso Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_cgroup.h | 13 ++++++ net/netfilter/xt_cgroup.c | 69 ++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h index 577c9e0b9406..1e4b37b93bef 100644 --- a/include/uapi/linux/netfilter/xt_cgroup.h +++ b/include/uapi/linux/netfilter/xt_cgroup.h @@ -2,10 +2,23 @@ #define _UAPI_XT_CGROUP_H #include +#include struct xt_cgroup_info_v0 { __u32 id; __u32 invert; }; +struct xt_cgroup_info_v1 { + __u8 has_path; + __u8 has_classid; + __u8 invert_path; + __u8 invert_classid; + char path[PATH_MAX]; + __u32 classid; + + /* kernel internal data */ + void *priv __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_CGROUP_H */ diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 17300256772a..a086a914865f 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -34,6 +34,37 @@ static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) return 0; } +static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info_v1 *info = par->matchinfo; + struct cgroup *cgrp; + + if ((info->invert_path & ~1) || (info->invert_classid & ~1)) + return -EINVAL; + + if (!info->has_path && !info->has_classid) { + pr_info("xt_cgroup: no path or classid specified\n"); + return -EINVAL; + } + + if (info->has_path && info->has_classid) { + pr_info("xt_cgroup: both path and classid specified\n"); + return -EINVAL; + } + + if (info->has_path) { + cgrp = cgroup_get_from_path(info->path); + if (IS_ERR(cgrp)) { + pr_info("xt_cgroup: invalid path, errno=%ld\n", + PTR_ERR(cgrp)); + return -EINVAL; + } + info->priv = cgrp; + } + + return 0; +} + static bool cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { @@ -46,6 +77,31 @@ cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) info->invert; } +static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info_v1 *info = par->matchinfo; + struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data; + struct cgroup *ancestor = info->priv; + + if (!skb->sk || !sk_fullsock(skb->sk)) + return false; + + if (ancestor) + return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^ + info->invert_path; + else + return (info->classid == sock_cgroup_classid(skcd)) ^ + info->invert_classid; +} + +static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_cgroup_info_v1 *info = par->matchinfo; + + if (info->priv) + cgroup_put(info->priv); +} + static struct xt_match cgroup_mt_reg[] __read_mostly = { { .name = "cgroup", @@ -59,6 +115,19 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = { (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), }, + { + .name = "cgroup", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v1, + .match = cgroup_mt_v1, + .matchsize = sizeof(struct xt_cgroup_info_v1), + .destroy = cgroup_mt_destroy_v1, + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) -- cgit v1.2.3 From 5241c2d7c52757e6df79877ba282762df0caea9f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 14 Dec 2015 20:55:22 +0100 Subject: ipv6: addrconf: drop ieee802154 specific things This patch removes ARPHRD_IEEE802154 from addrconf handling. In the earlier days of 802.15.4 6LoWPAN, the interface type was ARPHRD_IEEE802154 which introduced several issues, because 802.15.4 interfaces used the same type. Since commit 965e613d299c ("ieee802154: 6lowpan: fix ARPHRD to ARPHRD_6LOWPAN") we use ARPHRD_6LOWPAN for 6LoWPAN interfaces. This patch will remove ARPHRD_IEEE802154 which is currently deadcode, because ARPHRD_IEEE802154 doesn't reach the minimum 1280 MTU of IPv6. Also we use 6LoWPAN EUI64 specific defines instead using link-layer constanst from 802.15.4 link-layer header. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5e9111da449d..7082fb79d876 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -70,7 +70,7 @@ #include #include -#include +#include #include #include #include @@ -1947,9 +1947,9 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) { - if (dev->addr_len != IEEE802154_ADDR_LEN) + if (dev->addr_len != EUI64_ADDR_LEN) return -1; - memcpy(eui, dev->dev_addr, 8); + memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN); eui[0] ^= 2; return 0; } @@ -2041,7 +2041,6 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) case ARPHRD_IPGRE: return addrconf_ifid_gre(eui, dev); case ARPHRD_6LOWPAN: - case ARPHRD_IEEE802154: return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: return addrconf_ifid_ieee1394(eui, dev); @@ -3066,7 +3065,6 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_FDDI) && (dev->type != ARPHRD_ARCNET) && (dev->type != ARPHRD_INFINIBAND) && - (dev->type != ARPHRD_IEEE802154) && (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && (dev->type != ARPHRD_6LOWPAN)) { -- cgit v1.2.3 From 0506eb01f70bd4d7e999c11488a6a892e01c42e2 Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Fri, 11 Dec 2015 12:27:50 +0100 Subject: iucv: prevent information leak in iucv_message Initialize storage for the future IUCV header that will be included in the transmitted packet. Some of the header fields are unused with HiperSockets transport, and will contain data left from some other functions. Signed-off-by: Eugene Crosser Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 3ea4c98d94dc..5bc473b01a00 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1031,7 +1031,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); struct sk_buff *skb; - struct iucv_message txmsg; + struct iucv_message txmsg = {0}; struct cmsghdr *cmsg; int cmsg_done; long timeo; -- cgit v1.2.3 From 979f66b32dbbf928635dbf44fd9843d27c4ed8f9 Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Fri, 11 Dec 2015 12:27:51 +0100 Subject: iucv: call skb_linearize() when needed When the linear buffer of the received sk_buff is shorter than the header, use skb_linearize(). sk_buffs with short linear buffer happen on the sending side under high traffic, and some kernel configurations, when allocated buffer starts just before page boundary, and IUCV transport has to send it as two separate QDIO buffer elements, with fist element shorter than the header. Signed-off-by: Eugene Crosser Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 5bc473b01a00..ef50a94d3eb7 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2084,11 +2084,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } - /* write stuff from iucv_msg to skb cb */ - if (skb->len < sizeof(struct af_iucv_trans_hdr)) { - kfree_skb(skb); - return NET_RX_SUCCESS; - } + /* write stuff from iucv_msg to skb cb */ skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); skb_reset_transport_header(skb); skb_reset_network_header(skb); @@ -2119,6 +2115,20 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, char nullstring[8]; int err = 0; + if (skb->len < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) { + WARN_ONCE(1, "AF_IUCV too short skb, len=%d, min=%d", + (int)skb->len, + (int)(ETH_HLEN + sizeof(struct af_iucv_trans_hdr))); + kfree_skb(skb); + return NET_RX_SUCCESS; + } + if (skb_headlen(skb) < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) + if (skb_linearize(skb)) { + WARN_ONCE(1, "AF_IUCV skb_linearize failed, len=%d", + (int)skb->len); + kfree_skb(skb); + return NET_RX_SUCCESS; + } skb_pull(skb, ETH_HLEN); trans_hdr = (struct af_iucv_trans_hdr *)skb->data; EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName)); -- cgit v1.2.3 From 9c55d3b5459bffe8ac098175e672a90260c0cfa4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Dec 2015 10:49:42 +0100 Subject: nfnetlink: add nfnl_dereference_protected helper to avoid overly long line in followup patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 28591fa94ba5..aebf5cd45602 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -33,6 +33,10 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); +#define nfnl_dereference_protected(id) \ + rcu_dereference_protected(table[(id)].subsys, \ + lockdep_nfnl_is_held((id))) + static char __initdata nfversion[] = "0.30"; static struct { @@ -208,8 +212,7 @@ replay: } else { rcu_read_unlock(); nfnl_lock(subsys_id); - if (rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)) != ss || + if (nfnl_dereference_protected(subsys_id) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) @@ -299,15 +302,13 @@ replay: skb->sk = oskb->sk; nfnl_lock(subsys_id); - ss = rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)); + ss = nfnl_dereference_protected(subsys_id); if (!ss) { #ifdef CONFIG_MODULES nfnl_unlock(subsys_id); request_module("nfnetlink-subsys-%d", subsys_id); nfnl_lock(subsys_id); - ss = rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)); + ss = nfnl_dereference_protected(subsys_id); if (!ss) #endif { -- cgit v1.2.3 From cef63419f7dbd52e535d1932a88904b3facb1546 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 4 Aug 2015 21:09:55 +0800 Subject: batman-adv: add list of unique single hop neighbors per hard-interface Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 4 + net/batman-adv/originator.c | 157 ++++++++++++++++++++++++++++++++++++++++ net/batman-adv/originator.h | 5 ++ net/batman-adv/types.h | 22 ++++++ 4 files changed, 188 insertions(+) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index a58184fdf5fd..01acccc4d218 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -639,9 +640,12 @@ batadv_hardif_add_interface(struct net_device *net_dev) goto free_sysfs; INIT_LIST_HEAD(&hard_iface->list); + INIT_HLIST_HEAD(&hard_iface->neigh_list); INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); + spin_lock_init(&hard_iface->neigh_list_lock); + hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; if (batadv_is_wifi_netdev(net_dev)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 7486df9ed48d..a8671c656742 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -201,6 +201,47 @@ void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu); } +/** + * batadv_hardif_neigh_free_rcu - free the hardif neigh_node + * @rcu: rcu pointer of the neigh_node + */ +static void batadv_hardif_neigh_free_rcu(struct rcu_head *rcu) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + + hardif_neigh = container_of(rcu, struct batadv_hardif_neigh_node, rcu); + + spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock); + hlist_del_init_rcu(&hardif_neigh->list); + spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock); + + batadv_hardif_free_ref_now(hardif_neigh->if_incoming); + kfree(hardif_neigh); +} + +/** + * batadv_hardif_neigh_free_now - decrement the hardif neighbors refcounter + * and possibly free it (without rcu callback) + * @hardif_neigh: hardif neigh neighbor to free + */ +static void +batadv_hardif_neigh_free_now(struct batadv_hardif_neigh_node *hardif_neigh) +{ + if (atomic_dec_and_test(&hardif_neigh->refcount)) + batadv_hardif_neigh_free_rcu(&hardif_neigh->rcu); +} + +/** + * batadv_hardif_neigh_free_ref - decrement the hardif neighbors refcounter + * and possibly free it + * @hardif_neigh: hardif neigh neighbor to free + */ +void batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh) +{ + if (atomic_dec_and_test(&hardif_neigh->refcount)) + call_rcu(&hardif_neigh->rcu, batadv_hardif_neigh_free_rcu); +} + /** * batadv_neigh_node_free_rcu - free the neigh_node * @rcu: rcu pointer of the neigh_node @@ -209,6 +250,7 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; + struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_neigh_ifinfo *neigh_ifinfo; struct batadv_algo_ops *bao; @@ -220,6 +262,14 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); } + hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming, + neigh_node->addr); + if (hardif_neigh) { + /* batadv_hardif_neigh_get() increases refcount too */ + batadv_hardif_neigh_free_now(hardif_neigh); + batadv_hardif_neigh_free_now(hardif_neigh); + } + if (bao->bat_neigh_free) bao->bat_neigh_free(neigh_node); @@ -478,6 +528,102 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, return res; } +/** + * batadv_hardif_neigh_create - create a hardif neighbour node + * @hard_iface: the interface this neighbour is connected to + * @neigh_addr: the interface address of the neighbour to retrieve + * + * Returns the hardif neighbour node if found or created or NULL otherwise. + */ +static struct batadv_hardif_neigh_node * +batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_hardif_neigh_node *hardif_neigh = NULL; + + spin_lock_bh(&hard_iface->neigh_list_lock); + + /* check if neighbor hasn't been added in the meantime */ + hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr); + if (hardif_neigh) + goto out; + + if (!atomic_inc_not_zero(&hard_iface->refcount)) + goto out; + + hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC); + if (!hardif_neigh) { + batadv_hardif_free_ref(hard_iface); + goto out; + } + + INIT_HLIST_NODE(&hardif_neigh->list); + ether_addr_copy(hardif_neigh->addr, neigh_addr); + hardif_neigh->if_incoming = hard_iface; + hardif_neigh->last_seen = jiffies; + + atomic_set(&hardif_neigh->refcount, 1); + + hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); + +out: + spin_unlock_bh(&hard_iface->neigh_list_lock); + return hardif_neigh; +} + +/** + * batadv_hardif_neigh_get_or_create - retrieve or create a hardif neighbour + * node + * @hard_iface: the interface this neighbour is connected to + * @neigh_addr: the interface address of the neighbour to retrieve + * + * Returns the hardif neighbour node if found or created or NULL otherwise. + */ +static struct batadv_hardif_neigh_node * +batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_hardif_neigh_node *hardif_neigh = NULL; + + /* first check without locking to avoid the overhead */ + hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr); + if (hardif_neigh) + return hardif_neigh; + + return batadv_hardif_neigh_create(hard_iface, neigh_addr); +} + +/** + * batadv_hardif_neigh_get - retrieve a hardif neighbour from the list + * @hard_iface: the interface where this neighbour is connected to + * @neigh_addr: the address of the neighbour + * + * Looks for and possibly returns a neighbour belonging to this hard interface. + * Returns NULL if the neighbour is not found. + */ +struct batadv_hardif_neigh_node * +batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_hardif_neigh_node *tmp_hardif_neigh, *hardif_neigh = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_hardif_neigh, + &hard_iface->neigh_list, list) { + if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr)) + continue; + + if (!atomic_inc_not_zero(&tmp_hardif_neigh->refcount)) + continue; + + hardif_neigh = tmp_hardif_neigh; + break; + } + rcu_read_unlock(); + + return hardif_neigh; +} + /** * batadv_neigh_node_new - create and init a new neigh_node object * @orig_node: originator object representing the neighbour @@ -493,11 +639,17 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, const u8 *neigh_addr) { struct batadv_neigh_node *neigh_node; + struct batadv_hardif_neigh_node *hardif_neigh = NULL; neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); if (neigh_node) goto out; + hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface, + neigh_addr); + if (!hardif_neigh) + goto out; + neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC); if (!neigh_node) goto out; @@ -523,11 +675,16 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); spin_unlock_bh(&orig_node->neigh_list_lock); + /* increment unique neighbor refcount */ + atomic_inc(&hardif_neigh->refcount); + batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", neigh_addr, orig_node->orig, hard_iface->net_dev->name); out: + if (hardif_neigh) + batadv_hardif_neigh_free_ref(hardif_neigh); return neigh_node; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index fa18f9bf266b..eae0557fa5fa 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -41,6 +41,11 @@ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const u8 *addr); +struct batadv_hardif_neigh_node * +batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr); +void +batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh); struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_orig_node *orig_node, struct batadv_hard_iface *hard_iface, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d260efd70499..71c7d9f1f79f 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -100,6 +100,8 @@ struct batadv_hard_iface_bat_iv { * @bat_iv: BATMAN IV specific per hard interface data * @cleanup_work: work queue callback item for hard interface deinit * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs + * @neigh_list: list of unique single hop neighbors via this interface + * @neigh_list_lock: lock protecting neigh_list */ struct batadv_hard_iface { struct list_head list; @@ -115,6 +117,9 @@ struct batadv_hard_iface { struct batadv_hard_iface_bat_iv bat_iv; struct work_struct cleanup_work; struct dentry *debug_dir; + struct hlist_head neigh_list; + /* neigh_list_lock protects: neigh_list */ + spinlock_t neigh_list_lock; }; /** @@ -340,6 +345,23 @@ struct batadv_gw_node { struct rcu_head rcu; }; +/** + * batadv_hardif_neigh_node - unique neighbor per hard interface + * @list: list node for batadv_hard_iface::neigh_list + * @addr: the MAC address of the neighboring interface + * @if_incoming: pointer to incoming hard interface + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in a RCU-safe manner + */ +struct batadv_hardif_neigh_node { + struct hlist_node list; + u8 addr[ETH_ALEN]; + struct batadv_hard_iface *if_incoming; + unsigned long last_seen; + atomic_t refcount; + struct rcu_head rcu; +}; + /** * struct batadv_neigh_node - structure for single hops neighbors * @list: list node for batadv_orig_node::neigh_list -- cgit v1.2.3 From 8248a4c7c866a9c15b6b379ca98aa8c95363f502 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 4 Aug 2015 21:09:56 +0800 Subject: batman-adv: add bat_hardif_neigh_init algo ops call Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 4 ++++ net/batman-adv/types.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a8671c656742..27dd326f93ad 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -539,6 +539,7 @@ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr) { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hardif_neigh_node *hardif_neigh = NULL; spin_lock_bh(&hard_iface->neigh_list_lock); @@ -564,6 +565,9 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, atomic_set(&hardif_neigh->refcount, 1); + if (bat_priv->bat_algo_ops->bat_hardif_neigh_init) + bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh); + hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); out: diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 71c7d9f1f79f..838d55ef546a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1153,6 +1153,7 @@ struct batadv_forw_packet { * @bat_primary_iface_set: called when primary interface is selected / changed * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue * @bat_ogm_emit: send scheduled OGM + * @bat_hardif_neigh_init: called on creation of single hop entry * @bat_neigh_cmp: compare the metrics of two neighbors for their respective * outgoing interfaces * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better @@ -1178,6 +1179,7 @@ struct batadv_algo_ops { void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); /* neigh_node handling API */ + void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh); int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, -- cgit v1.2.3 From 7587405ab93e5383e64ac311f460c30a02a8e9cb Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 4 Aug 2015 21:09:57 +0800 Subject: batman-adv: export single hop neighbor list via debugfs Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ net/batman-adv/debugfs.c | 9 ++++++++ net/batman-adv/originator.c | 34 +++++++++++++++++++++++++++++ net/batman-adv/originator.h | 2 ++ net/batman-adv/types.h | 2 ++ 5 files changed, 100 insertions(+) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 912d9c36fb1c..1efdb5ccfb23 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1861,6 +1861,58 @@ next: seq_puts(seq, "No batman nodes in range ...\n"); } +/** + * batadv_iv_hardif_neigh_print - print a single hop neighbour node + * @seq: neighbour table seq_file struct + * @hardif_neigh: hardif neighbour information + */ +static void +batadv_iv_hardif_neigh_print(struct seq_file *seq, + struct batadv_hardif_neigh_node *hardif_neigh) +{ + int last_secs, last_msecs; + + last_secs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) / 1000; + last_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) % 1000; + + seq_printf(seq, " %10s %pM %4i.%03is\n", + hardif_neigh->if_incoming->net_dev->name, + hardif_neigh->addr, last_secs, last_msecs); +} + +/** + * batadv_iv_ogm_neigh_print - print the single hop neighbour list + * @bat_priv: the bat priv with all the soft interface information + * @seq: neighbour table seq_file struct + */ +static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_hardif_neigh_node *hardif_neigh; + struct batadv_hard_iface *hard_iface; + int batman_count = 0; + + seq_printf(seq, " %10s %-13s %s\n", + "IF", "Neighbor", "last-seen"); + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface != net_dev) + continue; + + hlist_for_each_entry_rcu(hardif_neigh, + &hard_iface->neigh_list, list) { + batadv_iv_hardif_neigh_print(seq, hardif_neigh); + batman_count++; + } + } + rcu_read_unlock(); + + if (batman_count == 0) + seq_puts(seq, "No batman nodes in range ...\n"); +} + /** * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors * @neigh1: the first neighbor object of the comparison @@ -1954,6 +2006,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .bat_ogm_emit = batadv_iv_ogm_emit, .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp, .bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob, + .bat_neigh_print = batadv_iv_neigh_print, .bat_orig_print = batadv_iv_ogm_orig_print, .bat_orig_free = batadv_iv_ogm_orig_free, .bat_orig_add_if = batadv_iv_ogm_orig_add_if, diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index c4c1e8030ba0..037ad0a5f485 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -262,6 +262,13 @@ static int batadv_algorithms_open(struct inode *inode, struct file *file) return single_open(file, batadv_algo_seq_print_text, NULL); } +static int neighbors_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + + return single_open(file, batadv_hardif_neigh_seq_print_text, net_dev); +} + static int batadv_originators_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; @@ -375,6 +382,7 @@ static struct batadv_debuginfo *batadv_general_debuginfos[] = { }; /* The following attributes are per soft interface */ +static BATADV_DEBUGINFO(neighbors, S_IRUGO, neighbors_open); static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open); static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open); static BATADV_DEBUGINFO(transtable_global, S_IRUGO, @@ -394,6 +402,7 @@ static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); #endif static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { + &batadv_debuginfo_neighbors, &batadv_debuginfo_originators, &batadv_debuginfo_gateways, &batadv_debuginfo_transtable_global, diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 27dd326f93ad..3c782a33bdac 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -692,6 +692,40 @@ out: return neigh_node; } +/** + * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list + * @seq: neighbour table seq_file struct + * @offset: not used + * + * Always returns 0. + */ +int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + return 0; + + seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", + BATADV_SOURCE_VERSION, primary_if->net_dev->name, + primary_if->net_dev->dev_addr, net_dev->name, + bat_priv->bat_algo_ops->name); + + batadv_hardif_free_ref(primary_if); + + if (!bat_priv->bat_algo_ops->bat_neigh_print) { + seq_puts(seq, + "No printing function for this routing protocol\n"); + return 0; + } + + bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq); + return 0; +} + /** * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object * @rcu: rcu pointer of the orig_ifinfo object diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index eae0557fa5fa..29557753d552 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -62,6 +62,8 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo); +int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset); + struct batadv_orig_ifinfo * batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 838d55ef546a..815c960d31b4 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1159,6 +1159,7 @@ struct batadv_forw_packet { * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better * than neigh2 for their respective outgoing interface from the metric * prospective + * @bat_neigh_print: print the single hop neighbor list (optional) * @bat_neigh_free: free the resources allocated by the routing algorithm for a * neigh_node object * @bat_orig_print: print the originator table (optional) @@ -1189,6 +1190,7 @@ struct batadv_algo_ops { struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); + void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq); void (*bat_neigh_free)(struct batadv_neigh_node *neigh); /* orig_node handling API */ void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq, -- cgit v1.2.3 From 4ff1e2a738c2c954ea2c0d6a7c2b06056d8d3849 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 4 Aug 2015 21:09:58 +0800 Subject: batman-adv: update last seen field of single hop originators Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1efdb5ccfb23..3738a2f0a03d 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1379,6 +1379,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_hardif_neigh_node *hardif_neigh = NULL; struct batadv_neigh_node *router = NULL; struct batadv_neigh_node *router_router = NULL; struct batadv_orig_node *orig_neigh_node; @@ -1423,6 +1424,13 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, goto out; } + if (is_single_hop_neigh) { + hardif_neigh = batadv_hardif_neigh_get(if_incoming, + ethhdr->h_source); + if (hardif_neigh) + hardif_neigh->last_seen = jiffies; + } + router = batadv_orig_router_get(orig_node, if_outgoing); if (router) { router_router = batadv_orig_router_get(router->orig_node, @@ -1557,6 +1565,8 @@ out: batadv_neigh_node_free_ref(router_router); if (orig_neigh_router) batadv_neigh_node_free_ref(orig_neigh_router); + if (hardif_neigh) + batadv_hardif_neigh_free_ref(hardif_neigh); kfree_skb(skb_priv); } -- cgit v1.2.3 From 18165f6f6570318ad0bb1e60c2ae597cdfd09a50 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sat, 8 Aug 2015 02:01:50 +0200 Subject: batman-adv: rename equiv/equal or better to similar or better Since the function applies a threshold and also slightly worse values are accepted, ''equal or better'' does not represent the intention of the function. ''Similar or better'' represents that better. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 8 ++++---- net/batman-adv/main.c | 2 +- net/batman-adv/routing.c | 6 +++--- net/batman-adv/types.h | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 3738a2f0a03d..5677169c1b98 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1964,8 +1964,8 @@ out: } /** - * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than - * neigh2 from the metric prospective + * batadv_iv_ogm_neigh_is_sob - check if neigh1 is similarly good or better + * than neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison @@ -1975,7 +1975,7 @@ out: * the metric via neigh2, false otherwise. */ static bool -batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1, +batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) @@ -2015,7 +2015,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .bat_ogm_schedule = batadv_iv_ogm_schedule, .bat_ogm_emit = batadv_iv_ogm_emit, .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp, - .bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob, + .bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob, .bat_neigh_print = batadv_iv_neigh_print, .bat_orig_print = batadv_iv_ogm_orig_print, .bat_orig_free = batadv_iv_ogm_orig_free, diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d7f17c1aa4a4..45952dcb0b68 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -552,7 +552,7 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) !bat_algo_ops->bat_ogm_schedule || !bat_algo_ops->bat_ogm_emit || !bat_algo_ops->bat_neigh_cmp || - !bat_algo_ops->bat_neigh_is_equiv_or_better) { + !bat_algo_ops->bat_neigh_is_similar_or_better) { pr_info("Routing algo '%s' does not implement required ops\n", bat_algo_ops->name); return -EINVAL; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 8d990b070a2e..a43f02e2d423 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -497,9 +497,9 @@ batadv_find_router(struct batadv_priv *bat_priv, /* alternative candidate should be good enough to be * considered */ - if (!bao->bat_neigh_is_equiv_or_better(cand_router, - cand->if_outgoing, - router, recv_if)) + if (!bao->bat_neigh_is_similar_or_better(cand_router, + cand->if_outgoing, + router, recv_if)) goto next; /* don't use the same router twice */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 815c960d31b4..9bdb21c2368a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1156,8 +1156,8 @@ struct batadv_forw_packet { * @bat_hardif_neigh_init: called on creation of single hop entry * @bat_neigh_cmp: compare the metrics of two neighbors for their respective * outgoing interfaces - * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better - * than neigh2 for their respective outgoing interface from the metric + * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or + * better than neigh2 for their respective outgoing interface from the metric * prospective * @bat_neigh_print: print the single hop neighbor list (optional) * @bat_neigh_free: free the resources allocated by the routing algorithm for a @@ -1185,7 +1185,7 @@ struct batadv_algo_ops { struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); - bool (*bat_neigh_is_equiv_or_better) + bool (*bat_neigh_is_similar_or_better) (struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, -- cgit v1.2.3 From 9d547833f02fb8b52ab824adae8f850f3c22fd4f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Dec 2015 16:03:34 +0100 Subject: switchdev: vlan: Use switchdev_port* in vlan_netdev_ops We need to be able to propagate static FDB entries and certain bridge port attributes (e.g. learning, flooding) down to the port netdev driver when bridge port is a VLAN interface. Achieve that by setting ndo_bridge* and ndo_fdb* in vlan_netdev_ops to the corresponding switchdev_port* functions. This is consistent with team and bond devices. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index fded86508117..9f4bd137e045 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "vlan.h" #include "vlanproc.h" @@ -774,6 +775,12 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, .ndo_get_iflink = vlan_dev_get_iflink, }; -- cgit v1.2.3 From 6ff64f6f9242d7e50f3e99cb280f69d1927a5fa6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Dec 2015 16:03:35 +0100 Subject: switchdev: Pass original device to port netdev driver switchdev drivers need to know the netdev on which the switchdev op was invoked. For example, the STP state of a VLAN interface configured on top of a port can change while being member in a bridge. In this case, the underlying driver should only change the STP state of that particular VLAN and not of all the VLANs configured on the port. However, current switchdev infrastructure only passes the port netdev down to the driver. Solve that by passing the original device down to the driver as part of the required switchdev object / attribute. This doesn't entail any change in current switchdev drivers. It simply enables those supporting stacked devices to know the originating device and act accordingly. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 ++ net/bridge/br_fdb.c | 1 + net/bridge/br_stp.c | 2 ++ net/bridge/br_stp_if.c | 1 + net/bridge/br_vlan.c | 2 ++ net/core/net-sysfs.c | 1 + net/core/rtnetlink.c | 1 + net/switchdev/switchdev.c | 12 ++++++++++++ 8 files changed, 22 insertions(+) (limited to 'net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 1d22ce9f352e..6612946167fe 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -50,6 +50,7 @@ enum switchdev_attr_id { }; struct switchdev_attr { + struct net_device *orig_dev; enum switchdev_attr_id id; u32 flags; union { @@ -68,6 +69,7 @@ enum switchdev_obj_id { }; struct switchdev_obj { + struct net_device *orig_dev; enum switchdev_obj_id id; u32 flags; }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a642bb829d09..82e3e9705017 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -135,6 +135,7 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) { struct switchdev_obj_port_fdb fdb = { .obj = { + .orig_dev = f->dst->dev, .id = SWITCHDEV_OBJ_ID_PORT_FDB, .flags = SWITCHDEV_F_DEFER, }, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 5f3f64553179..b3cca126b103 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -40,6 +40,7 @@ void br_log_state(const struct net_bridge_port *p) void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { + .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, .flags = SWITCHDEV_F_DEFER, .u.stp_state = state, @@ -570,6 +571,7 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) { struct switchdev_attr attr = { + .orig_dev = br->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, .u.ageing_time = ageing_time, diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 5396ff08af32..775e00fbeb1e 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -37,6 +37,7 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) void br_init_port(struct net_bridge_port *p) { struct switchdev_attr attr = { + .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, .u.ageing_time = p->br->ageing_time, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 1394da63614a..66c4549efbbb 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -73,6 +73,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, u16 vid, u16 flags) { struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .flags = flags, .vid_begin = vid, @@ -120,6 +121,7 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, u16 vid) { struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .vid_begin = vid, .vid_end = vid, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f88a62ab019d..bca8c350e7f3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -471,6 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev, if (dev_isalive(netdev)) { struct switchdev_attr attr = { + .orig_dev = netdev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34ba7a08876d..d8b0113d3eec 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1027,6 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index f34e535e93bd..df790d3385a2 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -723,6 +723,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, u32 filter_mask) { struct switchdev_vlan_dump dump = { + .vlan.obj.orig_dev = dev, .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .skb = skb, .filter_mask = filter_mask, @@ -757,6 +758,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, int nlflags) { struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u16 mode = BRIDGE_MODE_UNDEF; @@ -778,6 +780,7 @@ static int switchdev_port_br_setflag(struct net_device *dev, unsigned long brport_flag) { struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u8 flag = nla_get_u8(nlattr); @@ -853,6 +856,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, struct nlattr *attr; struct bridge_vlan_info *vinfo; struct switchdev_obj_port_vlan vlan = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, }; int rem; @@ -975,6 +979,7 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], u16 vid, u16 nlm_flags) { struct switchdev_obj_port_fdb fdb = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .vid = vid, }; @@ -1000,6 +1005,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], u16 vid) { struct switchdev_obj_port_fdb fdb = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .vid = vid, }; @@ -1077,6 +1083,7 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *filter_dev, int idx) { struct switchdev_fdb_dump dump = { + .fdb.obj.orig_dev = dev, .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .dev = dev, .skb = skb, @@ -1135,6 +1142,7 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (!dev) return NULL; + attr.orig_dev = dev; if (switchdev_port_attr_get(dev, &attr)) return NULL; @@ -1194,6 +1202,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; + ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_add(dev, &ipv4_fib.obj); if (!err) fi->fib_flags |= RTNH_F_OFFLOAD; @@ -1238,6 +1247,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; + ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_del(dev, &ipv4_fib.obj); if (!err) fi->fib_flags &= ~RTNH_F_OFFLOAD; @@ -1270,10 +1280,12 @@ static bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { struct switchdev_attr a_attr = { + .orig_dev = a, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; struct switchdev_attr b_attr = { + .orig_dev = b, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; -- cgit v1.2.3 From 53692b1de419c1b59106909c7f6b4dd3dbc768ac Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:41 -0800 Subject: sctp: Rename NETIF_F_SCTP_CSUM to NETIF_F_SCTP_CRC The SCTP checksum is really a CRC and is very different from the standards 1's complement checksum that serves as the checksum for IP protocols. This offload interface is also very different. Rename NETIF_F_SCTP_CSUM to NETIF_F_SCTP_CRC to highlight these differences. The term CSUM should be reserved in the stack to refer to the standard 1's complement IP checksum. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- drivers/net/loopback.c | 2 +- include/linux/netdev_features.h | 4 ++-- net/8021q/vlan_dev.c | 2 +- net/core/ethtool.c | 4 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/sctp/output.c | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b118deb08ce6..a63d980f478e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8842,7 +8842,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_SCTP_CSUM | + NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 455394cf7f80..4d05ff6f0423 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -2321,7 +2321,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter) netdev->features |= NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_SCTP_CSUM | + NETIF_F_SCTP_CRC | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7afde455326d..31e5f3942839 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2379,8 +2379,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (hw->mac.type >= e1000_82576) { - netdev->hw_features |= NETIF_F_SCTP_CSUM; - netdev->features |= NETIF_F_SCTP_CSUM; + netdev->hw_features |= NETIF_F_SCTP_CRC; + netdev->features |= NETIF_F_SCTP_CRC; } netdev->priv_flags |= IFF_UNICAST_FLT; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 66c64a376719..9f27001cac1f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8995,8 +8995,8 @@ skip_sriov: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: - netdev->features |= NETIF_F_SCTP_CSUM; - netdev->hw_features |= NETIF_F_SCTP_CSUM | + netdev->features |= NETIF_F_SCTP_CRC; + netdev->hw_features |= NETIF_F_SCTP_CRC | NETIF_F_NTUPLE; break; default: diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index dc7d970bd1c0..a400288cb37b 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -175,7 +175,7 @@ static void loopback_setup(struct net_device *dev) | NETIF_F_UFO | NETIF_F_HW_CSUM | NETIF_F_RXCSUM - | NETIF_F_SCTP_CSUM + | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index f0d87347df19..6395f8309393 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -52,7 +52,7 @@ enum { NETIF_F_GSO_TUNNEL_REMCSUM_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ - NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ + NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ NETIF_F_FCOE_MTU_BIT, /* Supports max FCoE MTU, 2158 bytes*/ NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ @@ -103,7 +103,7 @@ enum { #define NETIF_F_NTUPLE __NETIF_F(NTUPLE) #define NETIF_F_RXCSUM __NETIF_F(RXCSUM) #define NETIF_F_RXHASH __NETIF_F(RXHASH) -#define NETIF_F_SCTP_CSUM __NETIF_F(SCTP_CSUM) +#define NETIF_F_SCTP_CRC __NETIF_F(SCTP_CRC) #define NETIF_F_SG __NETIF_F(SG) #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9f4bd137e045..45b74e875381 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -545,7 +545,7 @@ static int vlan_dev_init(struct net_device *dev) dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | - NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM | + NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; dev->features |= real_dev->vlan_features | NETIF_F_LLTX | diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 29edf74846fc..4a0cab85d67d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -87,7 +87,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", - [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", + [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", [NETIF_F_RXHASH_BIT] = "rx-hashing", @@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 010ddeec135f..d952d67f904d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -169,7 +169,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, /* Only update csum if we really have to */ if (sctph->dest != cp->dport || payload_csum || (skb->ip_summed == CHECKSUM_PARTIAL && - !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { diff --git a/net/sctp/output.c b/net/sctp/output.c index abe7c2db2412..9d610eddd19e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -534,7 +534,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in . */ if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || + if (!(dst->dev->features & NETIF_F_SCTP_CRC) || (dst_xfrm(dst) != NULL) || packet->ipfragok) { sh->checksum = sctp_compute_cksum(nskb, 0); } else { -- cgit v1.2.3 From a188222b6ed29404ac2d4232d35d1fe0e77af370 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:43 -0800 Subject: net: Rename NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK The name NETIF_F_ALL_CSUM is a misnomer. This does not correspond to the set of features for offloading all checksums. This is a mask of the checksum offload related features bits. It is incorrect to set both NETIF_F_HW_CSUM and NETIF_F_IP_CSUM or NETIF_F_IPV6 at the same time for features of a device. This patch: - Changes instances of NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK (where NETIF_F_ALL_CSUM is being used as a mask). - Changes bonding, sfc/efx, ipvlan, macvlan, vlan, and team drivers to use NEITF_F_HW_CSUM in features list instead of NETIF_F_ALL_CSUM. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 +++---- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/ibm/ibmveth.c | 5 +++-- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/jme.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++-- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c | 2 +- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- drivers/net/ipvlan/ipvlan_main.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/macvtap.c | 2 +- drivers/net/team/team.c | 3 +-- drivers/net/usb/r8152.c | 2 +- drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c | 2 +- include/linux/netdev_features.h | 7 ++++++- include/linux/netdevice.h | 6 +++--- include/net/vxlan.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/core/dev.c | 10 +++++----- net/core/ethtool.c | 2 +- net/ipv4/tcp.c | 4 ++-- 25 files changed, 43 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fe0e7a6f4d72..cab99fd44c8e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1067,12 +1067,12 @@ static netdev_features_t bond_fix_features(struct net_device *dev, return features; } -#define BOND_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ +#define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) -#define BOND_ENC_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ - NETIF_F_ALL_TSO) +#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_ALL_TSO) static void bond_compute_features(struct bonding *bond) { @@ -4182,7 +4182,6 @@ void bond_setup(struct net_device *bond_dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; bond_dev->features |= bond_dev->hw_features; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4cab8879f5ae..34e324f20d80 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5289,7 +5289,7 @@ static netdev_features_t be_features_check(struct sk_buff *skb, skb->inner_protocol != htons(ETH_P_TEB) || skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 7af870a3c549..6691b5a45b9d 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -763,7 +763,7 @@ static netdev_features_t ibmveth_fix_features(struct net_device *dev, */ if (!(features & NETIF_F_RXCSUM)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } @@ -928,7 +928,8 @@ static int ibmveth_set_features(struct net_device *dev, rc1 = ibmveth_set_csum_offload(dev, rx_csum); if (rc1 && !adapter->rx_csum) dev->features = - features & ~(NETIF_F_ALL_CSUM | NETIF_F_RXCSUM); + features & ~(NETIF_F_CSUM_MASK | + NETIF_F_RXCSUM); } if (large_send != adapter->large_send) { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index d9854d39576d..83ddf362ea77 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1357,7 +1357,7 @@ static netdev_features_t fm10k_features_check(struct sk_buff *skb, if (!skb->encapsulation || fm10k_tx_encap_offload(skb)) return features; - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } static const struct net_device_ops fm10k_netdev_ops = { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a63d980f478e..c284e4341c7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8766,7 +8766,7 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, if (skb->encapsulation && (skb_inner_mac_header(skb) - skb_transport_header(skb) > I40E_MAX_TUNNEL_HDR_LEN)) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 9f27001cac1f..fca35aa90d0f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8598,7 +8598,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, if (unlikely(skb_inner_mac_header(skb) - skb_transport_header(skb) > IXGBE_MAX_TUNNEL_HDR_LEN)) - return features & ~NETIF_F_ALL_CSUM; + return features & ~NETIF_F_CSUM_MASK; return features; } diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 060dd3922974..b1de7afd4116 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2753,7 +2753,7 @@ static netdev_features_t jme_fix_features(struct net_device *netdev, netdev_features_t features) { if (netdev->mtu > 1900) - features &= ~(NETIF_F_ALL_TSO | NETIF_F_ALL_CSUM); + features &= ~(NETIF_F_ALL_TSO | NETIF_F_CSUM_MASK); return features; } diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 5606a043063e..ec0a22119e09 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4380,7 +4380,7 @@ static netdev_features_t sky2_fix_features(struct net_device *dev, */ if (dev->mtu > ETH_DATA_LEN && hw->chip_id == CHIP_ID_YUKON_EC_U) { netdev_info(dev, "checksum offload not possible with jumbo frames\n"); - features &= ~(NETIF_F_TSO|NETIF_F_SG|NETIF_F_ALL_CSUM); + features &= ~(NETIF_F_TSO | NETIF_F_SG | NETIF_F_CSUM_MASK); } /* Some hardware requires receive checksum for RSS to work. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 038ac6b14a60..7060539d276a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2071,7 +2071,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || @@ -2080,7 +2080,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, (l4_hdr == IPPROTO_UDP && (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c index 08d4be616064..e097e6baaac4 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c @@ -500,7 +500,7 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter) val = XsumTX; pch_gbe_validate_option(&val, &opt, adapter); if (!val) - dev->features &= ~NETIF_F_ALL_CSUM; + dev->features &= ~NETIF_F_CSUM_MASK; } { /* Flow Control */ static const struct pch_gbe_option opt = { diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b405349a570c..1fe13c733c1e 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3131,7 +3131,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (efx->type->offload_features & NETIF_F_V6_CSUM) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ - net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | + net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | NETIF_F_RXCSUM); /* All offloads can be toggled */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3c6549aee11d..0b0fea73a7a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2402,7 +2402,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, features &= ~NETIF_F_RXCSUM; if (!priv->plat->tx_coe) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; /* Some GMAC devices have a bugged Jumbo frame support that * needs to have the Tx COE disabled for oversized frames @@ -2410,7 +2410,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, * the TX csum insertionin the TDES and not use SF. */ if (priv->plat->bugged_jumbo && (dev->mtu > ETH_DATA_LEN)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index a9268db4e349..f94392d07126 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -88,7 +88,7 @@ static struct lock_class_key ipvlan_netdev_xmit_lock_key; static struct lock_class_key ipvlan_netdev_addr_lock_key; #define IPVLAN_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 06c8bfeaccd6..ae3b486fb663 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -762,7 +762,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; NETIF_F_GSO_ROBUST) #define MACVLAN_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0fc521941c71..d636d051fac8 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -388,7 +388,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) * check, we either support them all or none. */ if (skb->ip_summed == CHECKSUM_PARTIAL && - !(features & NETIF_F_ALL_CSUM) && + !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto drop; skb_queue_tail(&q->sk.sk_receive_queue, skb); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 059c0f60a2b2..915f60fce186 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -981,7 +981,7 @@ static void team_port_disable(struct team *team, team_lower_state_changed(port); } -#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ +#define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) @@ -2091,7 +2091,6 @@ static void team_setup(struct net_device *dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); dev->features |= dev->hw_features; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d9427ca3dba7..34642a9583e0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1986,7 +1986,7 @@ rtl8152_features_check(struct sk_buff *skb, struct net_device *dev, int offset = skb_transport_offset(skb); if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset) - features &= ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz) features &= ~NETIF_F_GSO_MASK; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 679785b0209c..9de4f23910d8 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -69,7 +69,7 @@ ksocknal_lib_zc_capable(ksock_conn_t *conn) /* ZC if the socket supports scatter/gather and doesn't need software * checksums */ - return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0); + return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_CSUM_MASK) != 0); } int diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 6395f8309393..2c4e94ab88da 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -149,7 +149,12 @@ enum { #define NETIF_F_GEN_CSUM NETIF_F_HW_CSUM #define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) + +/* List of IP checksum features. Note that NETIF_HW_CSUM should not be + * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- + * this would be contradictory + */ +#define NETIF_F_CSUM_MASK (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1bb21ff0fa64..a54223a113b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3763,12 +3763,12 @@ static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { if (f1 & NETIF_F_GEN_CSUM) - f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); if (f2 & NETIF_F_GEN_CSUM) - f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f2 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); f1 &= f2; if (f1 & NETIF_F_GEN_CSUM) - f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); return f1; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index c1c899c3a51b..b5a1aec1a167 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -232,7 +232,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 45b74e875381..ad5e2fd1012c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -543,7 +543,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; diff --git a/net/core/dev.c b/net/core/dev.c index 8f705fcedb94..5a3b5a404642 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2645,7 +2645,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -2792,7 +2792,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device else skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!(features & NETIF_F_ALL_CSUM) && + if (!(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto out_kfree_skb; } @@ -7572,15 +7572,15 @@ netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask) { if (mask & NETIF_F_GEN_CSUM) - mask |= NETIF_F_ALL_CSUM; + mask |= NETIF_F_CSUM_MASK; mask |= NETIF_F_VLAN_CHALLENGED; - all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; + all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask; all &= one | ~NETIF_F_ALL_FOR_ALL; /* If one device supports hw checksumming, set for all. */ if (all & NETIF_F_GEN_CSUM) - all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); return all; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4a0cab85d67d..09948a726347 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CRC; + return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82cca18c90f..cf7ef7be79f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) + !(sk->sk_route_caps & NETIF_F_CSUM_MASK)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1175,7 +1175,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + if (sk->sk_route_caps & NETIF_F_CSUM_MASK) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); -- cgit v1.2.3 From c8cd0989bd151fda87bbf10887b3df18021284bc Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:44 -0800 Subject: net: Eliminate NETIF_F_GEN_CSUM and NETIF_F_V[46]_CSUM These netif flags are unnecessary convolutions. It is more straightforward to just use NETIF_F_HW_CSUM, NETIF_F_IP_CSUM, and NETIF_F_IPV6_CSUM directly. This patch also: - Cleans up can_checksum_protocol - Simplifies netdev_intersect_features Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/macvlan.c | 2 +- include/linux/if_vlan.h | 2 +- include/linux/netdev_features.h | 9 +++---- include/linux/netdevice.h | 40 ++++++++++++++++++++------------ net/core/dev.c | 12 +++++----- net/core/pktgen.c | 4 ++-- net/ipv4/ip_output.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 3 ++- net/ipv4/udp.c | 3 ++- net/ipv4/udp_offload.c | 5 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 3 ++- 13 files changed, 50 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 1fe13c733c1e..6f697438545d 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3128,7 +3128,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, net_dev->features |= (efx->type->offload_features | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_RXCSUM); - if (efx->type->offload_features & NETIF_F_V6_CSUM) + if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ae3b486fb663..6a57a005e0ca 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -758,7 +758,7 @@ static struct lock_class_key macvlan_netdev_xmit_lock_key; static struct lock_class_key macvlan_netdev_addr_lock_key; #define ALWAYS_ON_FEATURES \ - (NETIF_F_SG | NETIF_F_GEN_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX | \ NETIF_F_GSO_ROBUST) #define MACVLAN_FEATURES \ diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 05f5879821b8..a5f6ce6b578c 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -621,7 +621,7 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2c4e94ab88da..d9654f0eecb3 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -146,15 +146,12 @@ enum { #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ NETIF_F_TSO6 | NETIF_F_UFO) -#define NETIF_F_GEN_CSUM NETIF_F_HW_CSUM -#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) -#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) - -/* List of IP checksum features. Note that NETIF_HW_CSUM should not be +/* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- * this would be contradictory */ -#define NETIF_F_CSUM_MASK (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) +#define NETIF_F_CSUM_MASK (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ + NETIF_F_HW_CSUM) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a54223a113b1..283984b67cd9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3691,13 +3691,24 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); + if (protocol == htons(ETH_P_FCOE)) + return !!(features & NETIF_F_FCOE_CRC); + + /* Assume this is an IP checksum (not SCTP CRC) */ + + if (features & NETIF_F_HW_CSUM) { + /* Can checksum everything */ + return true; + } + + switch (protocol) { + case htons(ETH_P_IP): + return !!(features & NETIF_F_IP_CSUM); + case htons(ETH_P_IPV6): + return !!(features & NETIF_F_IPV6_CSUM); + default: + return false; + } } #ifdef CONFIG_BUG @@ -3762,15 +3773,14 @@ void linkwatch_run_queue(void); static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { - if (f1 & NETIF_F_GEN_CSUM) - f1 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); - if (f2 & NETIF_F_GEN_CSUM) - f2 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); - f1 &= f2; - if (f1 & NETIF_F_GEN_CSUM) - f1 &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); + if ((f1 ^ f2) & NETIF_F_HW_CSUM) { + if (f1 & NETIF_F_HW_CSUM) + f1 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM); + else + f2 |= (NETIF_F_IP_CSUM|NETIF_F_IP_CSUM); + } - return f1; + return f1 & f2; } static inline netdev_features_t netdev_get_wanted_features( diff --git a/net/core/dev.c b/net/core/dev.c index 5a3b5a404642..45b013f27625 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6467,9 +6467,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ - if (!((features & NETIF_F_GEN_CSUM) || - (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) - == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + if (!(features & NETIF_F_HW_CSUM) && + ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { netdev_dbg(dev, "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; @@ -7571,7 +7571,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask) { - if (mask & NETIF_F_GEN_CSUM) + if (mask & NETIF_F_HW_CSUM) mask |= NETIF_F_CSUM_MASK; mask |= NETIF_F_VLAN_CHALLENGED; @@ -7579,8 +7579,8 @@ netdev_features_t netdev_increment_features(netdev_features_t all, all &= one | ~NETIF_F_ALL_FOR_ALL; /* If one device supports hw checksumming, set for all. */ - if (all & NETIF_F_GEN_CSUM) - all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); + if (all & NETIF_F_HW_CSUM) + all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM); return all; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de8d5cc5eb24..2be144498bcf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2898,7 +2898,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; - } else if (odev->features & NETIF_F_V4_CSUM) { + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; udp4_hwcsum(skb, iph->saddr, iph->daddr); @@ -3032,7 +3032,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; - } else if (odev->features & NETIF_F_V6_CSUM) { + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e0b94cd843d7..568e2bc0d93d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -911,7 +911,7 @@ static int __ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->dst.dev->features & NETIF_F_V4_CSUM && + rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && !(flags & MSG_MORE) && !exthdrlen) csummode = CHECKSUM_PARTIAL; diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 5075b7ecd26d..61c7cc22ea68 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -132,7 +132,8 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + (!skb->dev || skb->dev->features & + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0c7b0e61b917..8841e984f8bf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -772,7 +772,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, else if (skb_is_gso(skb)) uh->check = ~udp_v4_check(len, saddr, daddr, 0); else if (skb_dst(skb) && skb_dst(skb)->dev && - (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + (skb_dst(skb)->dev->features & + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9386160cbee..130042660181 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -60,8 +60,9 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, /* Try to offload checksum if possible */ offload_csum = !!(need_csum && - (skb->dev->features & - (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM))); + ((skb->dev->features & NETIF_F_HW_CSUM) || + (skb->dev->features & (is_ipv6 ? + NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & features; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e6a7bd15b9b7..2f748452b4aa 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1322,7 +1322,7 @@ emsgsize: headersize == sizeof(struct ipv6hdr) && length < mtu - headersize && !(flags & MSG_MORE) && - rt->dst.dev->features & NETIF_F_V6_CSUM) + rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 238e70c3f7b7..6ce309928841 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -136,7 +136,8 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt6i_flags & RTF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) { + (!skb->dev || skb->dev->features & + (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + -- cgit v1.2.3 From 9a49850d0af7b9fd14d091dfe61ef6cb369f86b9 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:45 -0800 Subject: tcp: Fix conditions to determine checksum offload In tcp_send_sendpage and tcp_sendmsg we check the route capabilities to determine if checksum offload can be performed. This check currently does not take the IP protocol into account for devices that advertise only one of NETIF_F_IPV6_CSUM or NETIF_F_IP_CSUM. This patch adds a function to check capabilities for checksum offload with a socket called sk_check_csum_caps. This function checks for specific IPv4 or IPv6 offload support based on the family of the socket. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/sock.h | 9 +++++++++ net/ipv4/tcp.c | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 0ca22b014de1..ab0269f4b2cc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1791,6 +1791,15 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) sk->sk_route_caps &= ~flags; } +static inline bool sk_check_csum_caps(struct sock *sk) +{ + return (sk->sk_route_caps & NETIF_F_HW_CSUM) || + (sk->sk_family == PF_INET && + (sk->sk_route_caps & NETIF_F_IP_CSUM)) || + (sk->sk_family == PF_INET6 && + (sk->sk_route_caps & NETIF_F_IPV6_CSUM)); +} + static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, char *to, int copy, int offset) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf7ef7be79f0..92b3e61b847d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_CSUM_MASK)) + !sk_check_csum_caps(sk)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1175,7 +1175,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_CSUM_MASK) + if (sk_check_csum_caps(sk)) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); -- cgit v1.2.3 From 6ae23ad36253a8033c5714c52b691b84456487c5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:46 -0800 Subject: net: Add driver helper functions to determine checksum offloadability Add skb_csum_offload_chk driver helper function to determine if a device with limited checksum offload capabilities is able to offload the checksum for a given packet. This patch includes: - The skb_csum_offload_chk function. Returns true if checksum is offloadable, else false. Optionally, in the case that the checksum is not offloable, the function can call skb_checksum_help to resolve the checksum. skb_csum_offload_chk also returns whether the checksum refers to an encapsulated checksum. - Definition of skb_csum_offl_spec structure that caller uses to indicate rules about what it can offload (e.g. IPv4/v6, TCP/UDP only, whether encapsulated checksums can be offloaded, whether checksum with IPv6 extension headers can be offloaded). - Ancilary functions called skb_csum_offload_chk_help, skb_csum_off_chk_help_cmn, skb_csum_off_chk_help_cmn_v4_only. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 78 ++++++++++++++++++++++++++ net/core/dev.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 283984b67cd9..9fb6395967de 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2522,6 +2522,71 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } +struct skb_csum_offl_spec { + __u16 ipv4_okay:1, + ipv6_okay:1, + encap_okay:1, + ip_options_okay:1, + ext_hdrs_okay:1, + tcp_okay:1, + udp_okay:1, + sctp_okay:1, + vlan_okay:1, + no_encapped_ipv6:1, + no_not_encapped:1; +}; + +bool __skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help); + +static inline bool skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return false; + + return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help); +} + +static inline bool skb_csum_offload_chk_help(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec) +{ + bool csum_encapped; + + return skb_csum_offload_chk(skb, spec, &csum_encapped, true); +} + +static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb) +{ + static const struct skb_csum_offl_spec csum_offl_spec = { + .ipv4_okay = 1, + .ip_options_okay = 1, + .ipv6_okay = 1, + .vlan_okay = 1, + .tcp_okay = 1, + .udp_okay = 1, + }; + + return skb_csum_offload_chk_help(skb, &csum_offl_spec); +} + +static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb) +{ + static const struct skb_csum_offl_spec csum_offl_spec = { + .ipv4_okay = 1, + .ip_options_okay = 1, + .tcp_okay = 1, + .udp_okay = 1, + .vlan_okay = 1, + }; + + return skb_csum_offload_chk_help(skb, &csum_offl_spec); +} + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -3711,6 +3776,19 @@ static inline bool can_checksum_protocol(netdev_features_t features, } } +/* Map an ethertype into IP protocol if possible */ +static inline int eproto_to_ipproto(int eproto) +{ + switch (eproto) { + case htons(ETH_P_IP): + return IPPROTO_IP; + case htons(ETH_P_IPV6): + return IPPROTO_IPV6; + default: + return -1; + } +} + #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev); #else diff --git a/net/core/dev.c b/net/core/dev.c index 45b013f27625..914b4a24c654 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -138,6 +138,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2471,6 +2472,141 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +/* skb_csum_offload_check - Driver helper function to determine if a device + * with limited checksum offload capabilities is able to offload the checksum + * for a given packet. + * + * Arguments: + * skb - sk_buff for the packet in question + * spec - contains the description of what device can offload + * csum_encapped - returns true if the checksum being offloaded is + * encpasulated. That is it is checksum for the transport header + * in the inner headers. + * checksum_help - when set indicates that helper function should + * call skb_checksum_help if offload checks fail + * + * Returns: + * true: Packet has passed the checksum checks and should be offloadable to + * the device (a driver may still need to check for additional + * restrictions of its device) + * false: Checksum is not offloadable. If checksum_help was set then + * skb_checksum_help was called to resolve checksum for non-GSO + * packets and when IP protocol is not SCTP + */ +bool __skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help) +{ + struct iphdr *iph; + struct ipv6hdr *ipv6; + void *nhdr; + int protocol; + u8 ip_proto; + + if (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) { + if (!spec->vlan_okay) + goto need_help; + } + + /* We check whether the checksum refers to a transport layer checksum in + * the outermost header or an encapsulated transport layer checksum that + * corresponds to the inner headers of the skb. If the checksum is for + * something else in the packet we need help. + */ + if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { + /* Non-encapsulated checksum */ + protocol = eproto_to_ipproto(vlan_get_protocol(skb)); + nhdr = skb_network_header(skb); + *csum_encapped = false; + if (spec->no_not_encapped) + goto need_help; + } else if (skb->encapsulation && spec->encap_okay && + skb_checksum_start_offset(skb) == + skb_inner_transport_offset(skb)) { + /* Encapsulated checksum */ + *csum_encapped = true; + switch (skb->inner_protocol_type) { + case ENCAP_TYPE_ETHER: + protocol = eproto_to_ipproto(skb->inner_protocol); + break; + case ENCAP_TYPE_IPPROTO: + protocol = skb->inner_protocol; + break; + } + nhdr = skb_inner_network_header(skb); + } else { + goto need_help; + } + + switch (protocol) { + case IPPROTO_IP: + if (!spec->ipv4_okay) + goto need_help; + iph = nhdr; + ip_proto = iph->protocol; + if (iph->ihl != 5 && !spec->ip_options_okay) + goto need_help; + break; + case IPPROTO_IPV6: + if (!spec->ipv6_okay) + goto need_help; + if (spec->no_encapped_ipv6 && *csum_encapped) + goto need_help; + ipv6 = nhdr; + nhdr += sizeof(*ipv6); + ip_proto = ipv6->nexthdr; + break; + default: + goto need_help; + } + +ip_proto_again: + switch (ip_proto) { + case IPPROTO_TCP: + if (!spec->tcp_okay || + skb->csum_offset != offsetof(struct tcphdr, check)) + goto need_help; + break; + case IPPROTO_UDP: + if (!spec->udp_okay || + skb->csum_offset != offsetof(struct udphdr, check)) + goto need_help; + break; + case IPPROTO_SCTP: + if (!spec->sctp_okay || + skb->csum_offset != offsetof(struct sctphdr, checksum)) + goto cant_help; + break; + case NEXTHDR_HOP: + case NEXTHDR_ROUTING: + case NEXTHDR_DEST: { + u8 *opthdr = nhdr; + + if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) + goto need_help; + + ip_proto = opthdr[0]; + nhdr += (opthdr[1] + 1) << 3; + + goto ip_proto_again; + } + default: + goto need_help; + } + + /* Passed the tests for offloading checksum */ + return true; + +need_help: + if (csum_help && !skb_shinfo(skb)->gso_size) + skb_checksum_help(skb); +cant_help: + return false; +} +EXPORT_SYMBOL(__skb_csum_offload_chk); + __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; -- cgit v1.2.3 From 33f11d16142b06588eedfc1dd8cf93790979a712 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:35 -0800 Subject: ila: Create net/ipv6/ila directory Create ila directory in preparation for supporting other hooks in the kernel than LWT for doing ILA. This includes: - Moving ila.c to ila/ila_lwt.c - Splitting out some common functions into ila_common.c Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/Makefile | 2 +- net/ipv6/ila.c | 229 ---------------------------------------------- net/ipv6/ila/Makefile | 7 ++ net/ipv6/ila/ila.h | 46 ++++++++++ net/ipv6/ila/ila_common.c | 95 +++++++++++++++++++ net/ipv6/ila/ila_lwt.c | 152 ++++++++++++++++++++++++++++++ 6 files changed, 301 insertions(+), 230 deletions(-) delete mode 100644 net/ipv6/ila.c create mode 100644 net/ipv6/ila/Makefile create mode 100644 net/ipv6/ila/ila.h create mode 100644 net/ipv6/ila/ila_common.c create mode 100644 net/ipv6/ila/ila_lwt.c (limited to 'net') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2c900c7b7eb1..2fbd90bf8d33 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o -obj-$(CONFIG_IPV6_ILA) += ila.o +obj-$(CONFIG_IPV6_ILA) += ila/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c deleted file mode 100644 index 1a6852e1ac69..000000000000 --- a/net/ipv6/ila.c +++ /dev/null @@ -1,229 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct ila_params { - __be64 locator; - __be64 locator_match; - __wsum csum_diff; -}; - -static inline struct ila_params *ila_params_lwtunnel( - struct lwtunnel_state *lwstate) -{ - return (struct ila_params *)lwstate->data; -} - -static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) -{ - __be32 diff[] = { - ~from[0], ~from[1], to[0], to[1], - }; - - return csum_partial(diff, sizeof(diff), 0); -} - -static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) -{ - if (*(__be64 *)&ip6h->daddr == p->locator_match) - return p->csum_diff; - else - return compute_csum_diff8((__be32 *)&ip6h->daddr, - (__be32 *)&p->locator); -} - -static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) -{ - __wsum diff; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - size_t nhoff = sizeof(struct ipv6hdr); - - /* First update checksum */ - switch (ip6h->nexthdr) { - case NEXTHDR_TCP: - if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { - struct tcphdr *th = (struct tcphdr *) - (skb_network_header(skb) + nhoff); - - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&th->check, skb, - diff, true); - } - break; - case NEXTHDR_UDP: - if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { - struct udphdr *uh = (struct udphdr *) - (skb_network_header(skb) + nhoff); - - if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&uh->check, skb, - diff, true); - if (!uh->check) - uh->check = CSUM_MANGLED_0; - } - } - break; - case NEXTHDR_ICMP: - if (likely(pskb_may_pull(skb, - nhoff + sizeof(struct icmp6hdr)))) { - struct icmp6hdr *ih = (struct icmp6hdr *) - (skb_network_header(skb) + nhoff); - - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, - diff, true); - } - break; - } - - /* Now change destination address */ - *(__be64 *)&ip6h->daddr = p->locator; -} - -static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - if (skb->protocol != htons(ETH_P_IPV6)) - goto drop; - - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); - - return dst->lwtstate->orig_output(net, sk, skb); - -drop: - kfree_skb(skb); - return -EINVAL; -} - -static int ila_input(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - if (skb->protocol != htons(ETH_P_IPV6)) - goto drop; - - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); - - return dst->lwtstate->orig_input(skb); - -drop: - kfree_skb(skb); - return -EINVAL; -} - -static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { - [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, -}; - -static int ila_build_state(struct net_device *dev, struct nlattr *nla, - unsigned int family, const void *cfg, - struct lwtunnel_state **ts) -{ - struct ila_params *p; - struct nlattr *tb[ILA_ATTR_MAX + 1]; - size_t encap_len = sizeof(*p); - struct lwtunnel_state *newts; - const struct fib6_config *cfg6 = cfg; - int ret; - - if (family != AF_INET6) - return -EINVAL; - - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, - ila_nl_policy); - if (ret < 0) - return ret; - - if (!tb[ILA_ATTR_LOCATOR]) - return -EINVAL; - - newts = lwtunnel_state_alloc(encap_len); - if (!newts) - return -ENOMEM; - - newts->len = encap_len; - p = ila_params_lwtunnel(newts); - - p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); - - if (cfg6->fc_dst_len > sizeof(__be64)) { - /* Precompute checksum difference for translation since we - * know both the old locator and the new one. - */ - p->locator_match = *(__be64 *)&cfg6->fc_dst; - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator_match, (__be32 *)&p->locator); - } - - newts->type = LWTUNNEL_ENCAP_ILA; - newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | - LWTUNNEL_STATE_INPUT_REDIRECT; - - *ts = newts; - - return 0; -} - -static int ila_fill_encap_info(struct sk_buff *skb, - struct lwtunnel_state *lwtstate) -{ - struct ila_params *p = ila_params_lwtunnel(lwtstate); - - if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator)) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) -{ - /* No encapsulation overhead */ - return 0; -} - -static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) -{ - struct ila_params *a_p = ila_params_lwtunnel(a); - struct ila_params *b_p = ila_params_lwtunnel(b); - - return (a_p->locator != b_p->locator); -} - -static const struct lwtunnel_encap_ops ila_encap_ops = { - .build_state = ila_build_state, - .output = ila_output, - .input = ila_input, - .fill_encap = ila_fill_encap_info, - .get_encap_size = ila_encap_nlsize, - .cmp_encap = ila_encap_cmp, -}; - -static int __init ila_init(void) -{ - return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); -} - -static void __exit ila_fini(void) -{ - lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); -} - -module_init(ila_init); -module_exit(ila_fini); -MODULE_AUTHOR("Tom Herbert "); -MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile new file mode 100644 index 000000000000..31d136be2f21 --- /dev/null +++ b/net/ipv6/ila/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for ILA module +# + +obj-$(CONFIG_IPV6_ILA) += ila.o + +ila-objs := ila_common.o ila_lwt.o diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h new file mode 100644 index 000000000000..b94081ff2f8a --- /dev/null +++ b/net/ipv6/ila/ila.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 Tom Herbert + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ + +#ifndef __ILA_H +#define __ILA_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ila_params { + __be64 locator; + __be64 locator_match; + __wsum csum_diff; +}; + +static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], to[0], to[1], + }; + + return csum_partial(diff, sizeof(diff), 0); +} + +void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); + +int ila_lwt_init(void); +void ila_lwt_fini(void); + +#endif /* __ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c new file mode 100644 index 000000000000..64e1904991ac --- /dev/null +++ b/net/ipv6/ila/ila_common.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ila.h" + +static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +{ + if (*(__be64 *)&ip6h->daddr == p->locator_match) + return p->csum_diff; + else + return compute_csum_diff8((__be32 *)&ip6h->daddr, + (__be32 *)&p->locator); +} + +void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +{ + __wsum diff; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + size_t nhoff = sizeof(struct ipv6hdr); + + /* First update checksum */ + switch (ip6h->nexthdr) { + case NEXTHDR_TCP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { + struct tcphdr *th = (struct tcphdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&th->check, skb, + diff, true); + } + break; + case NEXTHDR_UDP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { + struct udphdr *uh = (struct udphdr *) + (skb_network_header(skb) + nhoff); + + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&uh->check, skb, + diff, true); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + } + break; + case NEXTHDR_ICMP: + if (likely(pskb_may_pull(skb, + nhoff + sizeof(struct icmp6hdr)))) { + struct icmp6hdr *ih = (struct icmp6hdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, + diff, true); + } + break; + } + + /* Now change destination address */ + *(__be64 *)&ip6h->daddr = p->locator; +} + +static int __init ila_init(void) +{ + int ret; + + ret = ila_lwt_init(); + + if (ret) + goto fail_lwt; + +fail_lwt: + return ret; +} + +static void __exit ila_fini(void) +{ + ila_lwt_fini(); +} + +module_init(ila_init); +module_exit(ila_fini); +MODULE_AUTHOR("Tom Herbert "); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c new file mode 100644 index 000000000000..2ae3c4fd8aab --- /dev/null +++ b/net/ipv6/ila/ila_lwt.c @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ila.h" + +static inline struct ila_params *ila_params_lwtunnel( + struct lwtunnel_state *lwstate) +{ + return (struct ila_params *)lwstate->data; +} + +static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + + return dst->lwtstate->orig_output(net, sk, skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int ila_input(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + + return dst->lwtstate->orig_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, +}; + +static int ila_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct ila_params *p; + struct nlattr *tb[ILA_ATTR_MAX + 1]; + size_t encap_len = sizeof(*p); + struct lwtunnel_state *newts; + const struct fib6_config *cfg6 = cfg; + int ret; + + if (family != AF_INET6) + return -EINVAL; + + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, + ila_nl_policy); + if (ret < 0) + return ret; + + if (!tb[ILA_ATTR_LOCATOR]) + return -EINVAL; + + newts = lwtunnel_state_alloc(encap_len); + if (!newts) + return -ENOMEM; + + newts->len = encap_len; + p = ila_params_lwtunnel(newts); + + p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); + + if (cfg6->fc_dst_len > sizeof(__be64)) { + /* Precompute checksum difference for translation since we + * know both the old locator and the new one. + */ + p->locator_match = *(__be64 *)&cfg6->fc_dst; + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator_match, (__be32 *)&p->locator); + } + + newts->type = LWTUNNEL_ENCAP_ILA; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + + *ts = newts; + + return 0; +} + +static int ila_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ila_params *p = ila_params_lwtunnel(lwtstate); + + if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + /* No encapsulation overhead */ + return 0; +} + +static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ila_params *a_p = ila_params_lwtunnel(a); + struct ila_params *b_p = ila_params_lwtunnel(b); + + return (a_p->locator != b_p->locator); +} + +static const struct lwtunnel_encap_ops ila_encap_ops = { + .build_state = ila_build_state, + .output = ila_output, + .input = ila_input, + .fill_encap = ila_fill_encap_info, + .get_encap_size = ila_encap_nlsize, + .cmp_encap = ila_encap_cmp, +}; + +int ila_lwt_init(void) +{ + return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); +} + +void ila_lwt_fini(void) +{ + lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); +} -- cgit v1.2.3 From fc9e50f5a5a4e1fa9ba2756f745a13e693cf6a06 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:37 -0800 Subject: netlink: add a start callback for starting a netlink dump The start callback allows the caller to set up a context for the dump callbacks. Presumably, the context can then be destroyed in the done callback. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 ++ include/net/genetlink.h | 2 ++ net/netlink/af_netlink.c | 4 ++++ net/netlink/genetlink.c | 16 ++++++++++++++++ 4 files changed, 24 insertions(+) (limited to 'net') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 639e9b8b0e4d..0b41959aab9f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); @@ -153,6 +154,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); void *data; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b6b6dcb018d..43c0e771f417 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @flags: flags * @policy: attribute validation policy * @doit: standard command callback + * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps * @ops_list: operations list @@ -122,6 +123,7 @@ struct genl_ops { const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*start)(struct netlink_callback *cb); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af8cc27..81dc1bb6e016 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2915,6 +2915,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb = &nlk->cb; memset(cb, 0, sizeof(*cb)); + cb->start = control->start; cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -2927,6 +2928,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + if (cb->start) + cb->start(cb); + ret = netlink_dump(sk); sock_put(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index bc0e504f33a6..8e63662c6fb0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_start(struct netlink_callback *cb) +{ + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->start) { + genl_lock(); + rc = ops->start(cb); + genl_unlock(); + } + return rc; +} + static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* our ops are always const - netlink API doesn't propagate that */ @@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family, .module = family->module, /* we have const, but the netlink API doesn't */ .data = (void *)ops, + .start = genl_lock_start, .dump = genl_lock_dumpit, .done = genl_lock_done, }; @@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family, } else { struct netlink_dump_control c = { .module = family->module, + .start = ops->start, .dump = ops->dumpit, .done = ops->done, }; -- cgit v1.2.3 From 7f00feaf107645d95a6d87e99b4d141ac0a08efd Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:38 -0800 Subject: ila: Add generic ILA translation facility This patch implements an ILA tanslation table. This table can be configured with identifier to locator mappings, and can be be queried to resolve a mapping. Queries can be parameterized based on interface, direction (incoming or outoing), and matching locator. The table is implemented using rhashtable and is configured via netlink (through "ip ila .." in iproute). The table may be used as alternative means to do do ILA tanslations other than the lw tunnels Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ila.h | 18 ++ include/uapi/linux/ila.h | 22 ++ net/ipv6/ila/Makefile | 2 +- net/ipv6/ila/ila.h | 2 + net/ipv6/ila/ila_common.c | 8 + net/ipv6/ila/ila_xlat.c | 680 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 731 insertions(+), 1 deletion(-) create mode 100644 include/net/ila.h create mode 100644 net/ipv6/ila/ila_xlat.c (limited to 'net') diff --git a/include/net/ila.h b/include/net/ila.h new file mode 100644 index 000000000000..9f4f43e94ae4 --- /dev/null +++ b/include/net/ila.h @@ -0,0 +1,18 @@ +/* + * ILA kernel interface + * + * Copyright (c) 2015 Tom Herbert + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +#ifndef _NET_ILA_H +#define _NET_ILA_H + +int ila_xlat_outgoing(struct sk_buff *skb); +int ila_xlat_incoming(struct sk_buff *skb); + +#endif /* _NET_ILA_H */ diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index 7ed9e670814e..abde7bbd6f3b 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -3,13 +3,35 @@ #ifndef _UAPI_LINUX_ILA_H #define _UAPI_LINUX_ILA_H +/* NETLINK_GENERIC related info */ +#define ILA_GENL_NAME "ila" +#define ILA_GENL_VERSION 0x1 + enum { ILA_ATTR_UNSPEC, ILA_ATTR_LOCATOR, /* u64 */ + ILA_ATTR_IDENTIFIER, /* u64 */ + ILA_ATTR_LOCATOR_MATCH, /* u64 */ + ILA_ATTR_IFINDEX, /* s32 */ + ILA_ATTR_DIR, /* u32 */ __ILA_ATTR_MAX, }; #define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1) +enum { + ILA_CMD_UNSPEC, + ILA_CMD_ADD, + ILA_CMD_DEL, + ILA_CMD_GET, + + __ILA_CMD_MAX, +}; + +#define ILA_CMD_MAX (__ILA_CMD_MAX - 1) + +#define ILA_DIR_IN (1 << 0) +#define ILA_DIR_OUT (1 << 1) + #endif /* _UAPI_LINUX_ILA_H */ diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile index 31d136be2f21..4b32e5921e5c 100644 --- a/net/ipv6/ila/Makefile +++ b/net/ipv6/ila/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IPV6_ILA) += ila.o -ila-objs := ila_common.o ila_lwt.o +ila-objs := ila_common.o ila_lwt.o ila_xlat.o diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index b94081ff2f8a..28542cb2b387 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -42,5 +42,7 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); int ila_lwt_init(void); void ila_lwt_fini(void); +int ila_xlat_init(void); +void ila_xlat_fini(void); #endif /* __ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 64e1904991ac..32dc9aab7297 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -80,12 +80,20 @@ static int __init ila_init(void) if (ret) goto fail_lwt; + ret = ila_xlat_init(); + if (ret) + goto fail_xlat; + + return 0; +fail_xlat: + ila_lwt_fini(); fail_lwt: return ret; } static void __exit ila_fini(void) { + ila_xlat_fini(); ila_lwt_fini(); } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c new file mode 100644 index 000000000000..295ca29a23c3 --- /dev/null +++ b/net/ipv6/ila/ila_xlat.c @@ -0,0 +1,680 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ila.h" + +struct ila_xlat_params { + struct ila_params ip; + __be64 identifier; + int ifindex; + unsigned int dir; +}; + +struct ila_map { + struct ila_xlat_params p; + struct rhash_head node; + struct ila_map __rcu *next; + struct rcu_head rcu; +}; + +static unsigned int ila_net_id; + +struct ila_net { + struct rhashtable rhash_table; + spinlock_t *locks; /* Bucket locks for entry manipulation */ + unsigned int locks_mask; + bool hooks_registered; +}; + +#define LOCKS_PER_CPU 10 + +static int alloc_ila_locks(struct ila_net *ilan) +{ + unsigned int i, size; + unsigned int nr_pcpus = num_possible_cpus(); + + nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); + + if (sizeof(spinlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(spinlock_t) > PAGE_SIZE) + ilan->locks = vmalloc(size * sizeof(spinlock_t)); + else +#endif + ilan->locks = kmalloc_array(size, sizeof(spinlock_t), + GFP_KERNEL); + if (!ilan->locks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&ilan->locks[i]); + } + ilan->locks_mask = size - 1; + + return 0; +} + +static u32 hashrnd __read_mostly; +static __always_inline void __ila_hash_secret_init(void) +{ + net_get_random_once(&hashrnd, sizeof(hashrnd)); +} + +static inline u32 ila_identifier_hash(__be64 identifier) +{ + u32 *v = (u32 *)&identifier; + + return jhash_2words(v[0], v[1], hashrnd); +} + +static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier) +{ + return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask]; +} + +static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc, + int ifindex, unsigned int dir) +{ + return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) || + (ila->p.ifindex && ila->p.ifindex != ifindex) || + !(ila->p.dir & dir); +} + +static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p) +{ + return (ila->p.ip.locator_match != p->ip.locator_match) || + (ila->p.ifindex != p->ifindex) || + (ila->p.dir != p->dir); +} + +static int ila_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct ila_map *ila = obj; + + return (ila->p.identifier != *(__be64 *)arg->key); +} + +static inline int ila_order(struct ila_map *ila) +{ + int score = 0; + + if (ila->p.ip.locator_match) + score += 1 << 0; + + if (ila->p.ifindex) + score += 1 << 1; + + return score; +} + +static const struct rhashtable_params rht_params = { + .nelem_hint = 1024, + .head_offset = offsetof(struct ila_map, node), + .key_offset = offsetof(struct ila_map, p.identifier), + .key_len = sizeof(u64), /* identifier */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, + .obj_cmpfn = ila_cmpfn, +}; + +static struct genl_family ila_nl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, +}; + +static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, + [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, + [ILA_ATTR_DIR] = { .type = NLA_U32, }, +}; + +static int parse_nl_config(struct genl_info *info, + struct ila_xlat_params *p) +{ + memset(p, 0, sizeof(*p)); + + if (info->attrs[ILA_ATTR_IDENTIFIER]) + p->identifier = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_IDENTIFIER]); + + if (info->attrs[ILA_ATTR_LOCATOR]) + p->ip.locator = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_LOCATOR]); + + if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) + p->ip.locator_match = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_LOCATOR_MATCH]); + + if (info->attrs[ILA_ATTR_IFINDEX]) + p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); + + if (info->attrs[ILA_ATTR_DIR]) + p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]); + + return 0; +} + +/* Must be called with rcu readlock */ +static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, + int ifindex, + unsigned int dir, + struct ila_net *ilan) +{ + struct ila_map *ila; + + ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params); + while (ila) { + if (!ila_cmp_wildcards(ila, loc, ifindex, dir)) + return ila; + ila = rcu_access_pointer(ila->next); + } + + return NULL; +} + +/* Must be called with rcu readlock */ +static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p, + struct ila_net *ilan) +{ + struct ila_map *ila; + + ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + rht_params); + while (ila) { + if (!ila_cmp_params(ila, p)) + return ila; + ila = rcu_access_pointer(ila->next); + } + + return NULL; +} + +static inline void ila_release(struct ila_map *ila) +{ + kfree_rcu(ila, rcu); +} + +static void ila_free_cb(void *ptr, void *arg) +{ + struct ila_map *ila = (struct ila_map *)ptr, *next; + + /* Assume rcu_readlock held */ + while (ila) { + next = rcu_access_pointer(ila->next); + ila_release(ila); + ila = next; + } +} + +static int ila_xlat_addr(struct sk_buff *skb, int dir); + +static unsigned int +ila_nf_input(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + ila_xlat_addr(skb, ILA_DIR_IN); + return NF_ACCEPT; +} + +static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { + { + .hook = ila_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = -1, + }, +}; + +static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_map *ila, *head; + spinlock_t *lock = ila_get_lock(ilan, p->identifier); + int err = 0, order; + + if (!ilan->hooks_registered) { + /* We defer registering net hooks in the namespace until the + * first mapping is added. + */ + err = nf_register_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); + if (err) + return err; + + ilan->hooks_registered = true; + } + + ila = kzalloc(sizeof(*ila), GFP_KERNEL); + if (!ila) + return -ENOMEM; + + ila->p = *p; + + if (p->ip.locator_match) { + /* Precompute checksum difference for translation since we + * know both the old identifier and the new one. + */ + ila->p.ip.csum_diff = compute_csum_diff8( + (__be32 *)&p->ip.locator_match, + (__be32 *)&p->ip.locator); + } + + order = ila_order(ila); + + spin_lock(lock); + + head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + rht_params); + if (!head) { + /* New entry for the rhash_table */ + err = rhashtable_lookup_insert_fast(&ilan->rhash_table, + &ila->node, rht_params); + } else { + struct ila_map *tila = head, *prev = NULL; + + do { + if (!ila_cmp_params(tila, p)) { + err = -EEXIST; + goto out; + } + + if (order > ila_order(tila)) + break; + + prev = tila; + tila = rcu_dereference_protected(tila->next, + lockdep_is_held(lock)); + } while (tila); + + if (prev) { + /* Insert in sub list of head */ + RCU_INIT_POINTER(ila->next, tila); + rcu_assign_pointer(prev->next, ila); + } else { + /* Make this ila new head */ + RCU_INIT_POINTER(ila->next, head); + err = rhashtable_replace_fast(&ilan->rhash_table, + &head->node, + &ila->node, rht_params); + if (err) + goto out; + } + } + +out: + spin_unlock(lock); + + if (err) + kfree(ila); + + return err; +} + +static int ila_del_mapping(struct net *net, struct ila_xlat_params *p) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_map *ila, *head, *prev; + spinlock_t *lock = ila_get_lock(ilan, p->identifier); + int err = -ENOENT; + + spin_lock(lock); + + head = rhashtable_lookup_fast(&ilan->rhash_table, + &p->identifier, rht_params); + ila = head; + + prev = NULL; + + while (ila) { + if (ila_cmp_params(ila, p)) { + prev = ila; + ila = rcu_dereference_protected(ila->next, + lockdep_is_held(lock)); + continue; + } + + err = 0; + + if (prev) { + /* Not head, just delete from list */ + rcu_assign_pointer(prev->next, ila->next); + } else { + /* It is the head. If there is something in the + * sublist we need to make a new head. + */ + head = rcu_dereference_protected(ila->next, + lockdep_is_held(lock)); + if (head) { + /* Put first entry in the sublist into the + * table + */ + err = rhashtable_replace_fast( + &ilan->rhash_table, &ila->node, + &head->node, rht_params); + if (err) + goto out; + } else { + /* Entry no longer used */ + err = rhashtable_remove_fast(&ilan->rhash_table, + &ila->node, + rht_params); + } + } + + ila_release(ila); + + break; + } + +out: + spin_unlock(lock); + + return err; +} + +static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_xlat_params p; + int err; + + err = parse_nl_config(info, &p); + if (err) + return err; + + return ila_add_mapping(net, &p); +} + +static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_xlat_params p; + int err; + + err = parse_nl_config(info, &p); + if (err) + return err; + + ila_del_mapping(net, &p); + + return 0; +} + +static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) +{ + if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER, + (__force u64)ila->p.identifier) || + nla_put_u64(msg, ILA_ATTR_LOCATOR, + (__force u64)ila->p.ip.locator) || + nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH, + (__force u64)ila->p.ip.locator_match) || + nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) || + nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir)) + return -1; + + return 0; +} + +static int ila_dump_info(struct ila_map *ila, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ila_nl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (ila_fill_info(ila, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct sk_buff *msg; + struct ila_xlat_params p; + struct ila_map *ila; + int ret; + + ret = parse_nl_config(info, &p); + if (ret) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rcu_read_lock(); + + ila = ila_lookup_by_params(&p, ilan); + if (ila) { + ret = ila_dump_info(ila, + info->snd_portid, + info->snd_seq, 0, msg, + info->genlhdr->cmd); + } + + rcu_read_unlock(); + + if (ret < 0) + goto out_free; + + return genlmsg_reply(msg, info); + +out_free: + nlmsg_free(msg); + return ret; +} + +struct ila_dump_iter { + struct rhashtable_iter rhiter; +}; + +static int ila_nl_dump_start(struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + + return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter); +} + +static int ila_nl_dump_done(struct netlink_callback *cb) +{ + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + + rhashtable_walk_exit(&iter->rhiter); + + return 0; +} + +static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct rhashtable_iter *rhiter = &iter->rhiter; + struct ila_map *ila; + int ret; + + ret = rhashtable_walk_start(rhiter); + if (ret && ret != -EAGAIN) + goto done; + + for (;;) { + ila = rhashtable_walk_next(rhiter); + + if (IS_ERR(ila)) { + if (PTR_ERR(ila) == -EAGAIN) + continue; + ret = PTR_ERR(ila); + goto done; + } else if (!ila) { + break; + } + + while (ila) { + ret = ila_dump_info(ila, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, ILA_CMD_GET); + if (ret) + goto done; + + ila = rcu_access_pointer(ila->next); + } + } + + ret = skb->len; + +done: + rhashtable_walk_stop(rhiter); + return ret; +} + +static const struct genl_ops ila_nl_ops[] = { + { + .cmd = ILA_CMD_ADD, + .doit = ila_nl_cmd_add_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_DEL, + .doit = ila_nl_cmd_del_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_GET, + .doit = ila_nl_cmd_get_mapping, + .start = ila_nl_dump_start, + .dumpit = ila_nl_dump, + .done = ila_nl_dump_done, + .policy = ila_nl_policy, + }, +}; + +#define ILA_HASH_TABLE_SIZE 1024 + +static __net_init int ila_init_net(struct net *net) +{ + int err; + struct ila_net *ilan = net_generic(net, ila_net_id); + + err = alloc_ila_locks(ilan); + if (err) + return err; + + rhashtable_init(&ilan->rhash_table, &rht_params); + + return 0; +} + +static __net_exit void ila_exit_net(struct net *net) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + + rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL); + + kvfree(ilan->locks); + + if (ilan->hooks_registered) + nf_unregister_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); +} + +static struct pernet_operations ila_net_ops = { + .init = ila_init_net, + .exit = ila_exit_net, + .id = &ila_net_id, + .size = sizeof(struct ila_net), +}; + +static int ila_xlat_addr(struct sk_buff *skb, int dir) +{ + struct ila_map *ila; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ila_net *ilan = net_generic(net, ila_net_id); + __be64 identifier, locator_match; + size_t nhoff; + + /* Assumes skb contains a valid IPv6 header that is pulled */ + + identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8]; + locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0]; + nhoff = sizeof(struct ipv6hdr); + + rcu_read_lock(); + + ila = ila_lookup_wildcards(identifier, locator_match, + skb->dev->ifindex, dir, ilan); + if (ila) + update_ipv6_locator(skb, &ila->p.ip); + + rcu_read_unlock(); + + return 0; +} + +int ila_xlat_incoming(struct sk_buff *skb) +{ + return ila_xlat_addr(skb, ILA_DIR_IN); +} +EXPORT_SYMBOL(ila_xlat_incoming); + +int ila_xlat_outgoing(struct sk_buff *skb) +{ + return ila_xlat_addr(skb, ILA_DIR_OUT); +} +EXPORT_SYMBOL(ila_xlat_outgoing); + +int ila_xlat_init(void) +{ + int ret; + + ret = register_pernet_device(&ila_net_ops); + if (ret) + goto exit; + + ret = genl_register_family_with_ops(&ila_nl_family, + ila_nl_ops); + if (ret < 0) + goto unregister; + + return 0; + +unregister: + unregister_pernet_device(&ila_net_ops); +exit: + return ret; +} + +void ila_xlat_fini(void) +{ + genl_unregister_family(&ila_nl_family); + unregister_pernet_device(&ila_net_ops); +} -- cgit v1.2.3 From b613f56ec9baf30edf5d9d607b822532a273dad7 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:02 +0900 Subject: net: diag: split inet_diag_dump_one_icsk into two Currently, inet_diag_dump_one_icsk finds a socket and then dumps its information to userspace. Split it into a part that finds the socket and a part that dumps the information. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 5 +++++ net/ipv4/inet_diag.c | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 0e707f0c1a3e..e7032f041982 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -3,6 +3,7 @@ #include +struct net; struct sock; struct inet_hashinfo; struct nlattr; @@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req); +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req); + int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); extern int inet_diag_register(const struct inet_diag_handler *handler); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ab9f8a66615d..cfabb8f8f0a0 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - const struct inet_diag_req_v2 *req) +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req) { - struct net *net = sock_net(in_skb->sk); - struct sk_buff *rep; struct sock *sk; - int err; - err = -EINVAL; if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, req->id.idiag_if); #endif else - goto out_nosk; + return ERR_PTR(-EINVAL); - err = -ENOENT; if (!sk) - goto out_nosk; + return ERR_PTR(-ENOENT); - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); - if (err) - goto out; + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; +} +EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); + +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = inet_diag_find_one_icsk(net, hashinfo, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { @@ -409,7 +422,6 @@ out: if (sk) sock_gen_put(sk); -out_nosk: return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -- cgit v1.2.3 From 64be0aed59ad519d6f2160868734f7e278290ac1 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:03 +0900 Subject: net: diag: Add the ability to destroy a socket. This patch adds a SOCK_DESTROY operation, a destroy function pointer to sock_diag_handler, and a diag_destroy function pointer. It does not include any implementation code. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 ++ include/net/sock.h | 1 + include/uapi/linux/sock_diag.h | 1 + net/core/sock_diag.c | 23 ++++++++++++++++++++--- 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index fddebc617469..4018b48f2b3b 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -15,6 +15,7 @@ struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); + int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk) } void sock_diag_broadcast_destroy(struct sock *sk); +int sock_diag_destroy(struct sock *sk, int err); #endif diff --git a/include/net/sock.h b/include/net/sock.h index ab0269f4b2cc..6e6e8a25d997 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1060,6 +1060,7 @@ struct proto { void (*destroy_cgroup)(struct mem_cgroup *memcg); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif + int (*diag_destroy)(struct sock *sk, int err); }; int proto_register(struct proto *prot, int alloc_slab); diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index 49230d36f9ce..bae2d80034d4 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -4,6 +4,7 @@ #include #define SOCK_DIAG_BY_FAMILY 20 +#define SOCK_DESTROY 21 struct sock_diag_req { __u8 sdiag_family; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 0c1d58d43f67..a996ce8c8fb2 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = nlmsg_data(nlh); @@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; - else + else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); + else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy) + err = hndl->destroy(skb, nlh); + else + err = -EOPNOTSUPP; mutex_unlock(&sock_diag_table_mutex); return err; @@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ret; case SOCK_DIAG_BY_FAMILY: - return __sock_diag_rcv_msg(skb, nlh); + case SOCK_DESTROY: + return __sock_diag_cmd(skb, nlh); default: return -EINVAL; } @@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group) return 0; } +int sock_diag_destroy(struct sock *sk, int err) +{ + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!sk->sk_prot->diag_destroy) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, err); +} +EXPORT_SYMBOL_GPL(sock_diag_destroy); + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { -- cgit v1.2.3 From 6eb5d2e08f071c05ecbe135369c9ad418826cab2 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:04 +0900 Subject: net: diag: Support SOCK_DESTROY for inet sockets. This passes the SOCK_DESTROY operation to the underlying protocol diag handler, or returns -EOPNOTSUPP if that handler does not define a destroy operation. Most of this patch is just renaming functions. This is not strictly necessary, but it would be fairly counterintuitive to have the code to destroy inet sockets be in a function whose name starts with inet_diag_get. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 4 ++++ net/ipv4/inet_diag.c | 23 +++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index e7032f041982..7c27fa1030e8 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,10 @@ struct inet_diag_handler { void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); + + int (*destroy)(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req); + __u16 idiag_type; __u16 idiag_info_size; }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index cfabb8f8f0a0..8bb8e7ad8548 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -426,7 +426,7 @@ out: } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -static int inet_diag_get_exact(struct sk_buff *in_skb, +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req) { @@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) err = PTR_ERR(handler); - else + else if (cmd == SOCK_DIAG_BY_FAMILY) err = handler->dump_one(in_skb, nlh, req); + else if (cmd == SOCK_DESTROY && handler->destroy) + err = handler->destroy(in_skb, req); + else + err = -EOPNOTSUPP; inet_diag_unlock_handler(handler); return err; @@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); @@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; @@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } } - return inet_diag_get_exact(skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); } static @@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) -- cgit v1.2.3 From c1e64e298b8cad309091b95d8436a0255c84f54a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:05 +0900 Subject: net: diag: Support destroying TCP sockets. This implements SOCK_DESTROY for TCP sockets. It causes all blocking calls on the socket to fail fast with ECONNABORTED and causes a protocol close of the socket. It informs the other end of the connection by sending a RST, i.e., initiating a TCP ABORT as per RFC 793. ECONNABORTED was chosen for consistency with FreeBSD. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/Kconfig | 13 +++++++++++++ net/ipv4/tcp.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 19 +++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 6 files changed, 68 insertions(+) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index f80e74c5ad18..3077735b348d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1170,6 +1170,8 @@ void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); +int tcp_abort(struct sock *sk, int err); + static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 416dfa004cfb..c22920525e5d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -436,6 +436,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG + default n + ---help--- + Provides a SOCK_DESTROY operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/write/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 92b3e61b847d..2c0e340518d2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3080,6 +3080,38 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b31604086edd..4d610934fb39 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -10,6 +10,8 @@ */ #include +#include +#include #include #include @@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index db003438aaf5..7aa13bd3de29 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2342,6 +2342,7 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c16e3fbf6854..5382c2662fa2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1890,6 +1890,7 @@ struct proto tcpv6_prot = { .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, + .diag_destroy = tcp_abort, }; static const struct inet6_protocol tcpv6_protocol = { -- cgit v1.2.3 From 6857a02af5386e9f5d11734363741dbe6b0a6959 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Dec 2015 15:33:39 -0800 Subject: sctp: use GFP_KERNEL in sctp_init() modules init functions being called from process context, we better use GFP_KERNEL allocations to increase our chances to get these high-order pages we want for SCTP hash tables. This mostly matters if SCTP module is loaded once memory got fragmented. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3d9ea9a48289..6c2c0accc6a0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1419,7 +1419,7 @@ static __init int sctp_init(void) if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) continue; sctp_assoc_hashtable = (struct sctp_hashbucket *) - __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); + __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); } while (!sctp_assoc_hashtable && --order > 0); if (!sctp_assoc_hashtable) { pr_err("Failed association hash alloc\n"); @@ -1452,7 +1452,7 @@ static __init int sctp_init(void) if ((sctp_port_hashsize > (64 * 1024)) && order > 0) continue; sctp_port_hashtable = (struct sctp_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); + __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); } while (!sctp_port_hashtable && --order > 0); if (!sctp_port_hashtable) { pr_err("Failed bind hash alloc\n"); -- cgit v1.2.3 From c169c59dd5177de2befcd5aa2cee9a1c8abeff61 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 2 Sep 2015 20:09:56 +0200 Subject: batman-adv: detect local excess vlans in TT request If the local representation of the global TT table of one originator has more VLAN entries than the respective TT update, there is some inconsistency present. By detecting and reporting this inconsistency, the global table gets updated and the excess VLAN will get removed in the process. Reported-by: Alessandro Bolletta Signed-off-by: Simon Wunderlich Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4228b10c47ea..17822de78ba3 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2411,8 +2411,8 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, { struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp; struct batadv_orig_node_vlan *vlan; + int i, orig_num_vlan; u32 crc; - int i; /* check if each received CRC matches the locally stored one */ for (i = 0; i < num_vlan; i++) { @@ -2438,6 +2438,18 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, return false; } + /* check if any excess VLANs exist locally for the originator + * which are not mentioned in the TVLV from the originator. + */ + rcu_read_lock(); + orig_num_vlan = 0; + hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) + orig_num_vlan++; + rcu_read_unlock(); + + if (orig_num_vlan > num_vlan) + return false; + return true; } -- cgit v1.2.3 From ad7e2c466d8b0a7056cd248e1df6bb7296e014f7 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Aug 2015 16:33:34 +0200 Subject: batman-adv: unify flags access style in tt global add This should slightly improve readability Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 17822de78ba3..5cf431177f34 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1435,7 +1435,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, * TT_CLIENT_WIFI, therefore they have to be copied in the * client entry */ - tt_global_entry->common.flags |= flags; + common->flags |= flags; /* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only * one originator left in the list and we previously received a -- cgit v1.2.3 From 01f6b5c76a68e294e90a040c651adb90126e802d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 26 Aug 2015 10:31:50 +0200 Subject: batman-adv: Use chain pointer when purging fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chain pointer was already created in batadv_frag_purge_orig to make the checks more readable. Just use the chain pointer everywhere instead of having the same dereference + array access in the most lines of this function. Signed-off-by: Sven Eckelmann Acked-by: Martin Hundebøll Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/fragmentation.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 700c96c82a15..20d9282f895b 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -71,14 +71,14 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { chain = &orig_node->fragments[i]; - spin_lock_bh(&orig_node->fragments[i].lock); + spin_lock_bh(&chain->lock); if (!check_cb || check_cb(chain)) { - batadv_frag_clear_chain(&orig_node->fragments[i].head); - orig_node->fragments[i].size = 0; + batadv_frag_clear_chain(&chain->head); + chain->size = 0; } - spin_unlock_bh(&orig_node->fragments[i].lock); + spin_unlock_bh(&chain->lock); } } -- cgit v1.2.3 From c05a57f6fb6f398cde873c5ebe13ae26b3843b7e Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 26 Aug 2015 10:31:51 +0200 Subject: batman-adv: Fix typo 'wether' -> 'whether' Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 45952dcb0b68..5dbcb2e2b497 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -908,7 +908,7 @@ end: * appropriate handlers * @bat_priv: the bat priv with all the soft interface information * @tvlv_handler: tvlv callback function handling the tvlv content - * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet + * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet * @orig_node: orig node emitting the ogm packet * @src: source mac address of the unicast packet * @dst: destination mac address of the unicast packet @@ -961,7 +961,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, * batadv_tvlv_containers_process - parse the given tvlv buffer to call the * appropriate handlers * @bat_priv: the bat priv with all the soft interface information - * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet + * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet * @orig_node: orig node emitting the ogm packet * @src: source mac address of the unicast packet * @dst: destination mac address of the unicast packet -- cgit v1.2.3 From 5a1dd8a4773d4c24e925cc6154826d555a85c370 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 11 Sep 2015 18:04:13 +0200 Subject: batman-adv: lock crc access in bridge loop avoidance We have found some networks in which nodes were constantly requesting other nodes BLA claim tables to synchronize, just to ask for that again once completed. The reason was that the crc checksum of the asked nodes were out of sync due to missing locking and multiple writes to the same crc checksum when adding/removing entries. Therefore the asked nodes constantly reported the wrong crc, which caused repeating requests. To avoid multiple functions changing a backbone gateways crc entry at the same time, lock it using a spinlock. Signed-off-by: Simon Wunderlich Tested-by: Alfons Name Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 35 +++++++++++++++++++++++++++++----- net/batman-adv/types.h | 2 ++ 2 files changed, 32 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 191a70290dca..99dcae316ec8 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -260,7 +260,9 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) } /* all claims gone, initialize CRC */ + spin_lock_bh(&backbone_gw->crc_lock); backbone_gw->crc = BATADV_BLA_CRC_INIT; + spin_unlock_bh(&backbone_gw->crc_lock); } /** @@ -408,6 +410,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, entry->lasttime = jiffies; entry->crc = BATADV_BLA_CRC_INIT; entry->bat_priv = bat_priv; + spin_lock_init(&entry->crc_lock); atomic_set(&entry->request_sent, 0); atomic_set(&entry->wait_periods, 0); ether_addr_copy(entry->orig, orig); @@ -557,7 +560,9 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, __be16 crc; memcpy(mac, batadv_announce_mac, 4); + spin_lock_bh(&backbone_gw->crc_lock); crc = htons(backbone_gw->crc); + spin_unlock_bh(&backbone_gw->crc_lock); memcpy(&mac[4], &crc, 2); batadv_bla_send_claim(bat_priv, mac, backbone_gw->vid, @@ -618,14 +623,18 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", mac, BATADV_PRINT_VID(vid)); + spin_lock_bh(&claim->backbone_gw->crc_lock); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); + spin_unlock_bh(&claim->backbone_gw->crc_lock); batadv_backbone_gw_free_ref(claim->backbone_gw); } /* set (new) backbone gw */ atomic_inc(&backbone_gw->refcount); claim->backbone_gw = backbone_gw; + spin_lock_bh(&backbone_gw->crc_lock); backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); + spin_unlock_bh(&backbone_gw->crc_lock); backbone_gw->lasttime = jiffies; claim_free_ref: @@ -653,7 +662,9 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_choose_claim, claim); batadv_claim_free_ref(claim); /* reference from the hash is gone */ + spin_lock_bh(&claim->backbone_gw->crc_lock); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); + spin_unlock_bh(&claim->backbone_gw->crc_lock); /* don't need the reference from hash_find() anymore */ batadv_claim_free_ref(claim); @@ -664,7 +675,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, u8 *backbone_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; - u16 crc; + u16 backbone_crc, crc; if (memcmp(an_addr, batadv_announce_mac, 4) != 0) return 0; @@ -683,12 +694,16 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", BATADV_PRINT_VID(vid), backbone_gw->orig, crc); - if (backbone_gw->crc != crc) { + spin_lock_bh(&backbone_gw->crc_lock); + backbone_crc = backbone_gw->crc; + spin_unlock_bh(&backbone_gw->crc_lock); + + if (backbone_crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", backbone_gw->orig, BATADV_PRINT_VID(backbone_gw->vid), - backbone_gw->crc, crc); + backbone_crc, crc); batadv_bla_send_request(backbone_gw); } else { @@ -1647,6 +1662,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_bla_claim *claim; struct batadv_hard_iface *primary_if; struct hlist_head *head; + u16 backbone_crc; u32 i; bool is_own; u8 *primary_addr; @@ -1669,11 +1685,15 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); + + spin_lock_bh(&claim->backbone_gw->crc_lock); + backbone_crc = claim->backbone_gw->crc; + spin_unlock_bh(&claim->backbone_gw->crc_lock); seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", claim->addr, BATADV_PRINT_VID(claim->vid), claim->backbone_gw->orig, (is_own ? 'x' : ' '), - claim->backbone_gw->crc); + backbone_crc); } rcu_read_unlock(); } @@ -1692,6 +1712,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hard_iface *primary_if; struct hlist_head *head; int secs, msecs; + u16 backbone_crc; u32 i; bool is_own; u8 *primary_addr; @@ -1722,10 +1743,14 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) if (is_own) continue; + spin_lock_bh(&backbone_gw->crc_lock); + backbone_crc = backbone_gw->crc; + spin_unlock_bh(&backbone_gw->crc_lock); + seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", backbone_gw->orig, BATADV_PRINT_VID(backbone_gw->vid), secs, - msecs, backbone_gw->crc); + msecs, backbone_crc); } rcu_read_unlock(); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 9bdb21c2368a..7c386dbb75f0 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -906,6 +906,7 @@ struct batadv_socket_packet { * backbone gateway - no bcast traffic is formwared until the situation was * resolved * @crc: crc16 checksum over all claims + * @crc_lock: lock protecting crc * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ @@ -919,6 +920,7 @@ struct batadv_bla_backbone_gw { atomic_t wait_periods; atomic_t request_sent; u16 crc; + spinlock_t crc_lock; /* protects crc */ atomic_t refcount; struct rcu_head rcu; }; -- cgit v1.2.3 From 566178f853c1aa57be9c16007c7cca07df5d51b6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 16 Dec 2015 13:55:04 +0800 Subject: net: sctp: dynamically enable or disable pf state As we all know, the value of pf_retrans >= max_retrans_path can disable pf state. The variables of pf_retrans and max_retrans_path can be changed by the userspace application. Sometimes the user expects to disable pf state while the 2 variables are changed to enable pf state. So it is necessary to introduce a new variable to disable pf state. According to the suggestions from Vlad Yasevich, extra1 and extra2 are removed. The initialization of pf_enable is added. Acked-by: Vlad Yasevich Signed-off-by: Zhu Yanjun Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 23 ++++++++++++++++++++++- include/net/netns/sctp.h | 7 +++++++ net/sctp/protocol.c | 3 +++ net/sctp/sm_sideeffect.c | 5 ++++- net/sctp/sysctl.c | 7 +++++++ 5 files changed, 43 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 2ea4c45cf1c8..5de632ed0ec0 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1723,6 +1723,25 @@ addip_enable - BOOLEAN Default: 0 +pf_enable - INTEGER + Enable or disable pf (pf is short for potentially failed) state. A value + of pf_retrans > path_max_retrans also disables pf state. That is, one of + both pf_enable and pf_retrans > path_max_retrans can disable pf state. + Since pf_retrans and path_max_retrans can be changed by userspace + application, sometimes user expects to disable pf state by the value of + pf_retrans > path_max_retrans, but occasionally the value of pf_retrans + or path_max_retrans is changed by the user application, this pf state is + enabled. As such, it is necessary to add this to dynamically enable + and disable pf state. See: + https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover for + details. + + 1: Enable pf. + + 0: Disable pf. + + Default: 1 + addip_noauth_enable - BOOLEAN Dynamic Address Reconfiguration (ADD-IP) requires the use of authentication to protect the operations of adding or removing new @@ -1799,7 +1818,9 @@ pf_retrans - INTEGER having to reduce path_max_retrans to a very low value. See: http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt for details. Note also that a value of pf_retrans > path_max_retrans - disables this feature + disables this feature. Since both pf_retrans and path_max_retrans can + be changed by userspace application, a variable pf_enable is used to + disable pf state. Default: 0 diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 8ba379f9e467..c501d67172b1 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -88,6 +88,13 @@ struct netns_sctp { */ int pf_retrans; + /* + * Disable Potentially-Failed feature, the feature is enabled by default + * pf_enable - 0 : disable pf + * - >0 : enable pf + */ + int pf_enable; + /* * Policy for preforming sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6c2c0accc6a0..010aced44b6b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1223,6 +1223,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Max.Burst - 4 */ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; + /* Enable pf state by default */ + net->sctp.pf_enable = 1; + /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) * Max.Init.Retransmits - 8 attempts diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6098d4c42fa9..05cd16400e0b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -477,6 +477,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, struct sctp_transport *transport, int is_hb) { + struct net *net = sock_net(asoc->base.sk); + /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ @@ -503,7 +505,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, * is SCTP_ACTIVE, then mark this transport as Partially Failed, * see SCTP Quick Failover Draft, section 5.1 */ - if ((transport->state == SCTP_ACTIVE) && + if (net->sctp.pf_enable && + (transport->state == SCTP_ACTIVE) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 26d50c565f54..ccbfc93fb8fe 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -308,6 +308,13 @@ static struct ctl_table sctp_net_table[] = { .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, + { + .procname = "pf_enable", + .data = &init_net.sctp.pf_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { /* sentinel */ } }; -- cgit v1.2.3 From b3379041ddf68ba46c31fce741ddb71675b39d23 Mon Sep 17 00:00:00 2001 From: Hubert Sokolowski Date: Tue, 15 Dec 2015 13:20:30 +0000 Subject: net: Pass ndm_state to route netlink FDB notifications. Before this change applications monitoring FDB notifications were not able to determine whether a new FDB entry is permament or not: bridge fdb add f1:f2:f3:f4:f5:f8 dev sw0p1 temp self bridge fdb add f1:f2:f3:f4:f5:f9 dev sw0p1 self bridge monitor fdb f1:f2:f3:f4:f5:f8 dev sw0p1 self permanent f1:f2:f3:f4:f5:f9 dev sw0p1 self permanent With this change ndm_state from the original netlink message is passed to the new netlink message sent as notification. bridge fdb add f1:f2:f3:f4:f5:f6 dev sw0p1 self bridge fdb add f1:f2:f3:f4:f5:f7 dev sw0p1 temp self bridge monitor fdb f1:f2:f3:f4:f5:f6 dev sw0p1 self permanent f1:f2:f3:f4:f5:f7 dev sw0p1 self static Signed-off-by: Hubert Sokolowski Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d8b0113d3eec..baf49cb2f23d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2564,7 +2564,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u16 vid, u32 pid, u32 seq, int type, unsigned int flags, - int nlflags) + int nlflags, u16 ndm_state) { struct nlmsghdr *nlh; struct ndmsg *ndm; @@ -2580,7 +2580,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, ndm->ndm_flags = flags; ndm->ndm_type = 0; ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = NUD_PERMANENT; + ndm->ndm_state = ndm_state; if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) goto nla_put_failure; @@ -2601,7 +2601,8 @@ static inline size_t rtnl_fdb_nlmsg_size(void) return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); } -static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type) +static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, + u16 ndm_state) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2612,7 +2613,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type) goto errout; err = nlmsg_populate_fdb_fill(skb, dev, addr, vid, - 0, 0, type, NTF_SELF, 0); + 0, 0, type, NTF_SELF, 0, ndm_state); if (err < 0) { kfree_skb(skb); goto errout; @@ -2747,7 +2748,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) nlh->nlmsg_flags); if (!err) { - rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH); + rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH, + ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } @@ -2848,7 +2850,8 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); if (!err) { - rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH); + rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, + ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } @@ -2876,7 +2879,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, portid, seq, RTM_NEWNEIGH, NTF_SELF, - NLM_F_MULTI); + NLM_F_MULTI, NUD_PERMANENT); if (err < 0) return err; skip: -- cgit v1.2.3 From 32bc201e1974976b7d3fea9a9b17bb7392ca6394 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Dec 2015 17:50:11 +0800 Subject: ipv6: allow routes to be configured with expire values Add the support for adding expire value to routes, requested by Tom Gundersen for systemd-networkd, and NetworkManager wants it too. implement it by adding the new RTNETLINK attribute RTA_EXPIRES. Signed-off-by: Xin Long Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + net/ipv6/route.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 123a5af4e8bb..ca764b5da86d 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -311,6 +311,7 @@ enum rtattr_type_t { RTA_PREF, RTA_ENCAP_TYPE, RTA_ENCAP, + RTA_EXPIRES, __RTA_MAX }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c83b6a5b3604..3c8834bc822d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2709,6 +2709,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PREF] = { .type = NLA_U8 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_EXPIRES] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2809,6 +2810,15 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP_TYPE]) cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); + if (tb[RTA_EXPIRES]) { + unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); + + if (addrconf_finite_timeout(timeout)) { + cfg->fc_expires = jiffies_to_clock_t(timeout * HZ); + cfg->fc_flags |= RTF_EXPIRES; + } + } + err = 0; errout: return err; -- cgit v1.2.3 From 715f504b118998c41a2079a17e16bf5a8a114885 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 16 Dec 2015 17:22:47 +0100 Subject: ipv6: add IPV6_HDRINCL option for raw sockets Same as in Windows, we miss IPV6_HDRINCL for SOL_IPV6 and SOL_RAW. The SOL_IP/IP_HDRINCL is not available for IPv6 sockets. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 1 + net/ipv6/raw.c | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 79b12b004ade..318a4828bf98 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -196,6 +196,7 @@ struct in6_flowlabel_req { #define IPV6_IPSEC_POLICY 34 #define IPV6_XFRM_POLICY 35 +#define IPV6_HDRINCL 36 #endif /* diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 99140986e887..fa59dd7a427e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -972,6 +972,11 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, return -EFAULT; switch (optname) { + case IPV6_HDRINCL: + if (sk->sk_type != SOCK_RAW) + return -EINVAL; + inet_sk(sk)->hdrincl = !!val; + return 0; case IPV6_CHECKSUM: if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && level == IPPROTO_IPV6) { @@ -1016,7 +1021,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; default: return ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -1037,7 +1043,8 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; default: return compat_ipv6_setsockopt(sk, level, optname, @@ -1057,6 +1064,9 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; switch (optname) { + case IPV6_HDRINCL: + val = inet_sk(sk)->hdrincl; + break; case IPV6_CHECKSUM: /* * We allow getsockopt() for IPPROTO_IPV6-level @@ -1094,7 +1104,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; default: return ipv6_getsockopt(sk, level, optname, optval, optlen); @@ -1115,7 +1126,8 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; default: return compat_ipv6_getsockopt(sk, level, optname, -- cgit v1.2.3 From b4aae759c22e71a3c32144f0b3bc4f2fa4aaae98 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 10 Dec 2015 18:04:07 +0100 Subject: netfilter: meta: add support for setting skb->pkttype This allows to redirect bridged packets to local machine: ether type ip ether daddr set aa:53:08:12:34:56 meta pkttype set unicast Without 'set unicast', ip stack discards PACKET_OTHERHOST skbs. It is also useful to add support for a '-m cluster like' nft rule (where switch floods packets to several nodes, and each cluster node node processes a subset of packets for load distribution). Mangling is restricted to HOST/OTHER/BROAD/MULTICAST, i.e. you cannot set skb->pkt_type to PACKET_KERNEL or change PACKET_LOOPBACK to PACKET_HOST. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5bcd1b0cc2ec..fe885bf271c5 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -26,6 +26,8 @@ #include #include +#include /* NF_BR_PRE_ROUTING */ + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -190,6 +192,13 @@ err: } EXPORT_SYMBOL_GPL(nft_meta_get_eval); +/* don't change or set _LOOPBACK, _USER, etc. */ +static bool pkt_type_ok(u32 p) +{ + return p == PACKET_HOST || p == PACKET_BROADCAST || + p == PACKET_MULTICAST || p == PACKET_OTHERHOST; +} + void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -205,6 +214,11 @@ void nft_meta_set_eval(const struct nft_expr *expr, case NFT_META_PRIORITY: skb->priority = value; break; + case NFT_META_PKTTYPE: + if (skb->pkt_type != value && + pkt_type_ok(value) && pkt_type_ok(skb->pkt_type)) + skb->pkt_type = value; + break; case NFT_META_NFTRACE: skb->nf_trace = 1; break; @@ -273,6 +287,24 @@ int nft_meta_get_init(const struct nft_ctx *ctx, } EXPORT_SYMBOL_GPL(nft_meta_get_init); +static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx) +{ + unsigned int hooks; + + switch (ctx->afi->family) { + case NFPROTO_BRIDGE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; + case NFPROTO_NETDEV: + hooks = 1 << NF_NETDEV_INGRESS; + break; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + int nft_meta_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -290,6 +322,12 @@ int nft_meta_set_init(const struct nft_ctx *ctx, case NFT_META_NFTRACE: len = sizeof(u8); break; + case NFT_META_PKTTYPE: + err = nft_meta_set_init_pkttype(ctx); + if (err) + return err; + len = sizeof(u8); + break; default: return -EOPNOTSUPP; } -- cgit v1.2.3 From 8cb964daeba8b626f8fbf779a50635684e42abdb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 18 Dec 2015 15:37:37 +0100 Subject: ila: add NETFILTER dependency The recently added generic ILA translation facility fails to build when CONFIG_NETFILTER is disabled: net/ipv6/ila/ila_xlat.c:229:20: warning: 'struct nf_hook_state' declared inside parameter list net/ipv6/ila/ila_xlat.c:235:27: error: array type has incomplete element type 'struct nf_hook_ops' static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { This adds an explicit Kconfig dependency to avoid that case. Signed-off-by: Arnd Bergmann Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 983bb999738c..bb7dabe2ebbf 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -94,6 +94,7 @@ config IPV6_MIP6 config IPV6_ILA tristate "IPv6: Identifier Locator Addressing (ILA)" + depends on NETFILTER select LWTUNNEL ---help--- Support for IPv6 Identifier Locator Addressing (ILA). -- cgit v1.2.3 From cc9da6cc4f56e05cc9e591459fe0192727ff58b3 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 16 Dec 2015 16:44:38 +0100 Subject: ipv6: addrconf: use stable address generator for ARPHRD_NONE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new address generator mode, using the stable address generator with an automatically generated secret. This is intended as a default address generator mode for device types with no EUI64 implementation. The new generator is used for ARPHRD_NONE interfaces initially, adding default IPv6 autoconf support to e.g. tun interfaces. If the addrgenmode is set to 'random', either by default or manually, and no stable secret is available, then a random secret is used as input for the stable-privacy address generator. The secret can be read and modified like manually configured secrets, using the proc interface. Modifying the secret will change the addrgen mode to 'stable-privacy' to indicate that it operates on a known secret. Existing behaviour of the 'stable-privacy' mode is kept unchanged. If a known secret is available when the device is created, then the mode will default to 'stable-privacy' as before. The mode can be manually set to 'random' but it will behave exactly like 'stable-privacy' in this case. The secret will not change. Cc: Hannes Frederic Sowa Cc: 吉藤英明 Signed-off-by: Bjørn Mork Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/ipv6/addrconf.c | 45 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2be1dd5a103f..a30b78090594 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -218,6 +218,7 @@ enum in6_addr_gen_mode { IN6_ADDR_GEN_MODE_EUI64, IN6_ADDR_GEN_MODE_NONE, IN6_ADDR_GEN_MODE_STABLE_PRIVACY, + IN6_ADDR_GEN_MODE_RANDOM, }; /* Bridge section */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 233efa67dc3d..819b7777f3cb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2319,6 +2319,12 @@ static void manage_tempaddrs(struct inet6_dev *idev, } } +static bool is_addr_mode_generate_stable(struct inet6_dev *idev) +{ + return idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY || + idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; +} + void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; @@ -2432,8 +2438,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev->token.s6_addr + 8, 8); read_unlock_bh(&in6_dev->lock); tokenized = true; - } else if (in6_dev->addr_gen_mode == - IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + } else if (is_addr_mode_generate_stable(in6_dev) && !ipv6_generate_stable_address(&addr, 0, in6_dev)) { addr_flags |= IFA_F_STABLE_PRIVACY; @@ -3033,6 +3038,17 @@ retry: return 0; } +static void ipv6_gen_mode_random_init(struct inet6_dev *idev) +{ + struct ipv6_stable_secret *s = &idev->cnf.stable_secret; + + if (s->initialized) + return; + s = &idev->cnf.stable_secret; + get_random_bytes(&s->secret, sizeof(s->secret)); + s->initialized = true; +} + static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { struct in6_addr addr; @@ -3043,13 +3059,18 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { + switch (idev->addr_gen_mode) { + case IN6_ADDR_GEN_MODE_RANDOM: + ipv6_gen_mode_random_init(idev); + /* fallthrough */ + case IN6_ADDR_GEN_MODE_STABLE_PRIVACY: if (!ipv6_generate_stable_address(&addr, 0, idev)) addrconf_add_linklocal(idev, &addr, IFA_F_STABLE_PRIVACY); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); - } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { + break; + case IN6_ADDR_GEN_MODE_EUI64: /* addrconf_add_linklocal also adds a prefix_route and we * only need to care about prefix routes if ipv6_generate_eui64 * couldn't generate one. @@ -3058,6 +3079,11 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + break; + case IN6_ADDR_GEN_MODE_NONE: + default: + /* will not add any link local address */ + break; } } @@ -3073,7 +3099,8 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_INFINIBAND) && (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && - (dev->type != ARPHRD_6LOWPAN)) { + (dev->type != ARPHRD_6LOWPAN) && + (dev->type != ARPHRD_NONE)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -3082,6 +3109,11 @@ static void addrconf_dev_config(struct net_device *dev) if (IS_ERR(idev)) return; + /* this device type has no EUI support */ + if (dev->type == ARPHRD_NONE && + idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) + idev->addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; + addrconf_addr_gen(idev, false); } @@ -4926,7 +4958,8 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (mode != IN6_ADDR_GEN_MODE_EUI64 && mode != IN6_ADDR_GEN_MODE_NONE && - mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY) + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + mode != IN6_ADDR_GEN_MODE_RANDOM) return -EINVAL; if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && -- cgit v1.2.3 From 6dd9a14e92e54895e143f10fef4d0b9abe109aa9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 16 Dec 2015 13:20:44 -0800 Subject: net: Allow accepted sockets to be bound to l3mdev domain Allow accepted sockets to derive their sk_bound_dev_if setting from the l3mdev domain in which the packets originated. A sysctl setting is added to control the behavior which is similar to sk_mark and sysctl_tcp_fwmark_accept. This effectively allow a process to have a "VRF-global" listen socket, with child sockets bound to the VRF device in which the packet originated. A similar behavior can be achieved using sk_mark, but a solution using marks is incomplete as it does not handle duplicate addresses in different L3 domains/VRFs. Allowing sockets to inherit the sk_bound_dev_if from l3mdev domain provides a complete solution. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/net/inet_sock.h | 14 ++++++++++++++ include/net/netns/ipv4.h | 3 +++ net/ipv4/syncookies.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 1 + net/ipv6/syncookies.c | 4 ++-- 8 files changed, 42 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5de632ed0ec0..ceb44a095a27 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -335,6 +335,14 @@ tcp_keepalive_intvl - INTEGER after probes started. Default value: 75sec i.e. connection will be aborted after ~11 minutes of retries. +tcp_l3mdev_accept - BOOLEAN + Enables child sockets to inherit the L3 master device index. + Enabling this option allows a "global" listen socket to work + across L3 master domains (e.g., VRFs) with connected sockets + derived from the listen socket to be bound to the L3 domain in + which the packets originated. Only valid when the kernel was + compiled with CONFIG_NET_L3_MASTER_DEV. + tcp_low_latency - BOOLEAN If set, the TCP stack makes decisions that prefer lower latency as opposed to higher throughput. By default, this diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 625bdf95d673..012b1f91f3ec 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -28,6 +28,7 @@ #include #include #include +#include /** struct ip_options - IP Options * @@ -113,6 +114,19 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) return sk->sk_mark; } +static inline int inet_request_bound_dev_if(const struct sock *sk, + struct sk_buff *skb) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + struct net *net = sock_net(sk); + + if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + return l3mdev_master_ifindex_by_index(net, skb->skb_iif); +#endif + + return sk->sk_bound_dev_if; +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c68926b4899c..d75be32650ba 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -86,6 +86,9 @@ struct netns_ipv4 { int sysctl_fwmark_reflect; int sysctl_tcp_fwmark_accept; +#ifdef CONFIG_NET_L3_MASTER_DEV + int sysctl_tcp_l3mdev_accept; +#endif int sysctl_tcp_mtu_probing; int sysctl_tcp_base_mss; int sysctl_tcp_probe_threshold; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4cbe9f0a4281..643a86c49020 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -351,7 +351,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->tfo_listener = false; - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -371,7 +371,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, + flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a0bd7a55193e..41ff1f87dfd7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -915,6 +915,17 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_NET_L3_MASTER_DEV + { + .procname = "tcp_l3mdev_accept", + .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { .procname = "tcp_mtu_probing", .data = &init_net.ipv4.sysctl_tcp_mtu_probing, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d656eef7f8e..7b1fddc47019 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6204,7 +6204,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_openreq_init(req, &tmp_opt, skb, sk); /* Note: tcp_v6_init_req() might override ir_iif for link locals */ - inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); af_ops->init_req(req, sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 205e6745393f..46e92fbd26a8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1276,6 +1276,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, ireq = inet_rsk(req); sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newsk->sk_bound_dev_if = ireq->ir_iif; newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index eaf7ac496d50..2906ef20795e 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -193,7 +193,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->pktopts = skb; } - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) @@ -224,7 +224,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq->ir_v6_loc_addr; - fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = ireq->ir_iif; fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; -- cgit v1.2.3 From 05c74e5e53f6cb07502c3e6a820f33e2777b6605 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 17 Dec 2015 23:51:53 +0100 Subject: bpf: add bpf_skb_load_bytes helper When hacking tc programs with eBPF, one of the issues that come up from time to time is to load addresses from headers. In eBPF as in classic BPF, we have BPF_LD | BPF_ABS | BPF_{B,H,W} instructions that extract a byte, half-word or word out of the skb data though helpers such as bpf_load_pointer() (interpreter case). F.e. extracting a whole IPv6 address could possibly look like ... union v6addr { struct { __u32 p1; __u32 p2; __u32 p3; __u32 p4; }; __u8 addr[16]; }; [...] a.p1 = htonl(load_word(skb, off)); a.p2 = htonl(load_word(skb, off + 4)); a.p3 = htonl(load_word(skb, off + 8)); a.p4 = htonl(load_word(skb, off + 12)); [...] /* access to a.addr[...] */ This work adds a complementary helper bpf_skb_load_bytes() (we also have bpf_skb_store_bytes()) as an alternative where the same call would look like from an eBPF program: ret = bpf_skb_load_bytes(skb, off, addr, sizeof(addr)); Same verifier restrictions apply as in ffeedafbf023 ("bpf: introduce current->pid, tgid, uid, gid, comm accessors") case, where stack memory access needs to be statically verified and thus guaranteed to be initialized in first use (otherwise verifier cannot tell whether a subsequent access to it is valid or not as it's runtime dependent). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9ea2d22fa2cb..8bed7f1176b8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -269,6 +269,7 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_perf_event_output, + BPF_FUNC_skb_load_bytes, __BPF_FUNC_MAX_ID, }; diff --git a/net/core/filter.c b/net/core/filter.c index 672eefbfbe99..34bf6fc77c1d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1245,6 +1245,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) } #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) +#define BPF_LDST_LEN 16U static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { @@ -1252,7 +1253,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) int offset = (int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; - char buf[16]; + char buf[BPF_LDST_LEN]; void *ptr; /* bpf verifier guarantees that: @@ -1299,6 +1300,36 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg5_type = ARG_ANYTHING, }; +static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; + int offset = (int) r2; + void *to = (void *)(unsigned long) r3; + unsigned int len = (unsigned int) r4; + void *ptr; + + if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN)) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, len, to); + if (unlikely(!ptr)) + return -EFAULT; + if (ptr != to) + memcpy(to, ptr, len); + + return 0; +} + +const struct bpf_func_proto bpf_skb_load_bytes_proto = { + .func = bpf_skb_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE, +}; + #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) @@ -1654,6 +1685,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_skb_store_bytes: return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: -- cgit v1.2.3 From 8b614aebecdf2b1f72d51b1527f5a75d218b78e2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 17 Dec 2015 23:51:54 +0100 Subject: bpf: move clearing of A/X into classic to eBPF migration prologue Back in the days where eBPF (or back then "internal BPF" ;->) was not exposed to user space, and only the classic BPF programs internally translated into eBPF programs, we missed the fact that for classic BPF A and X needed to be cleared. It was fixed back then via 83d5b7ef99c9 ("net: filter: initialize A and X registers"), and thus classic BPF specifics were added to the eBPF interpreter core to work around it. This added some confusion for JIT developers later on that take the eBPF interpreter code as an example for deriving their JIT. F.e. in f75298f5c3fe ("s390/bpf: clear correct BPF accumulator register"), at least X could leak stack memory. Furthermore, since this is only needed for classic BPF translations and not for eBPF (verifier takes care that read access to regs cannot be done uninitialized), more complexity is added to JITs as they need to determine whether they deal with migrations or native eBPF where they can just omit clearing A/X in their prologue and thus reduce image size a bit, see f.e. cde66c2d88da ("s390/bpf: Only clear A and X for converted BPF programs"). In other cases (x86, arm64), A and X is being cleared in the prologue also for eBPF case, which is unnecessary. Lets move this into the BPF migration in bpf_convert_filter() where it actually belongs as long as the number of eBPF JITs are still few. It can thus be done generically; allowing us to remove the quirk from __bpf_prog_run() and to slightly reduce JIT image size in case of eBPF, while reducing code duplication on this matter in current(/future) eBPF JITs. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Michael Holzheu Tested-by: Michael Holzheu Cc: Zi Shen Lim Cc: Yang Shi Acked-by: Yang Shi Acked-by: Zi Shen Lim Signed-off-by: David S. Miller --- arch/arm64/net/bpf_jit_comp.c | 6 ------ arch/s390/net/bpf_jit_comp.c | 13 ++----------- arch/x86/net/bpf_jit_comp.c | 14 +++++++++----- kernel/bpf/core.c | 4 ---- net/core/filter.c | 19 ++++++++++++++++--- 5 files changed, 27 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b162ad70effc..7658612d915c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -152,8 +152,6 @@ static void build_prologue(struct jit_ctx *ctx) const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; - const u8 ra = bpf2a64[BPF_REG_A]; - const u8 rx = bpf2a64[BPF_REG_X]; const u8 tmp1 = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; @@ -200,10 +198,6 @@ static void build_prologue(struct jit_ctx *ctx) /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); - - /* Clear registers A and X */ - emit_a64_mov_i64(ra, 0, ctx); - emit_a64_mov_i64(rx, 0, ctx); } static void build_epilogue(struct jit_ctx *ctx) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9a0c4c22e536..3c0bfc1f2694 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -408,7 +408,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) * Save registers and create stack frame if necessary. * See stack frame layout desription in "bpf_jit.h"! */ -static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) +static void bpf_jit_prologue(struct bpf_jit *jit) { if (jit->seen & SEEN_TAIL_CALL) { /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ @@ -448,15 +448,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_SKBP); - /* Clear A (%b0) and X (%b7) registers for converted BPF programs */ - if (is_classic) { - if (REG_SEEN(BPF_REG_A)) - /* lghi %ba,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_A, 0); - if (REG_SEEN(BPF_REG_X)) - /* lghi %bx,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_X, 0); - } } /* @@ -1245,7 +1236,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) jit->lit = jit->lit_start; jit->prg = 0; - bpf_jit_prologue(jit, bpf_prog_was_classic(fp)); + bpf_jit_prologue(jit); for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 75991979f667..c080e812ce85 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -193,7 +193,7 @@ struct jit_context { 32 /* space for rbx, r13, r14, r15 */ + \ 8 /* space for skb_copy_bits() buffer */) -#define PROLOGUE_SIZE 51 +#define PROLOGUE_SIZE 48 /* emit x64 prologue code for BPF program and check it's size. * bpf_tail_call helper will skip it while jumping into another program @@ -229,11 +229,15 @@ static void emit_prologue(u8 **pprog) /* mov qword ptr [rbp-X],r15 */ EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24); - /* clear A and X registers */ - EMIT2(0x31, 0xc0); /* xor eax, eax */ - EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ + /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls + * we need to reset the counter to 0. It's done in two instructions, + * resetting rax register to 0 (xor on eax gets 0 extended), and + * moving it to the counter location. + */ - /* clear tail_cnt: mov qword ptr [rbp-X], rax */ + /* xor eax, eax */ + EMIT2(0x31, 0xc0); + /* mov qword ptr [rbp-X], rax */ EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32); BUILD_BUG_ON(cnt != PROLOGUE_SIZE); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 334b1bdd572c..972d9a8e4ac4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -306,10 +306,6 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; ARG1 = (u64) (unsigned long) ctx; - /* Registers used in classic BPF programs need to be reset first. */ - regs[BPF_REG_A] = 0; - regs[BPF_REG_X] = 0; - select_insn: goto *jumptable[insn->code]; diff --git a/net/core/filter.c b/net/core/filter.c index 34bf6fc77c1d..b513eb871839 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -381,9 +381,22 @@ do_pass: new_insn = new_prog; fp = prog; - if (new_insn) - *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); - new_insn++; + /* Classic BPF related prologue emission. */ + if (new_insn) { + /* Classic BPF expects A and X to be reset first. These need + * to be guaranteed to be the first two instructions. + */ + *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); + *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); + + /* All programs must keep CTX in callee saved BPF_REG_CTX. + * In eBPF case it's done by the compiler, here we need to + * do this ourself. Initial CTX is present in BPF_REG_ARG1. + */ + *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); + } else { + new_insn += 3; + } for (i = 0; i < len; fp++, i++) { struct bpf_insn tmp_insns[6] = { }; -- cgit v1.2.3 From 23bf88078afdb8f9b8071dd9f32754ebab7ba3dc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 17 Dec 2015 23:51:55 +0100 Subject: bpf: fix misleading comment in bpf_convert_filter Comment says "User BPF's register A is mapped to our BPF register 6", which is actually wrong as the mapping is on register 0. This can already be inferred from the code itself. So just remove it before someone makes assumptions based on that. Only code tells truth. ;) Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index b513eb871839..c770196ae8d5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -348,12 +348,6 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * jump offsets, 2nd pass remapping: * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len); * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); - * - * User BPF's register A is mapped to our BPF register 6, user BPF - * register X is mapped to BPF register 7; frame pointer is always - * register 10; Context 'void *ctx' is stored in register 1, that is, - * for socket filters: ctx == 'struct sk_buff *', for seccomp: - * ctx == 'struct seccomp_data *'. */ static int bpf_convert_filter(struct sock_filter *prog, int len, struct bpf_insn *new_prog, int *new_len) -- cgit v1.2.3 From 07f6f4a31e5a8dee67960fc07bb0b37c5f879d4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Dec 2015 16:14:11 -0800 Subject: tcp: diag: add support for request sockets to tcp_abort() Adding support for SYN_RECV request sockets to tcp_abort() is quite easy after our tcp listener rewrite. Note that we also need to better handle listeners, or we might leak not yet accepted children, because of a missing inet_csk_listen_stop() call. Signed-off-by: Eric Dumazet Cc: Lorenzo Colitti Tested-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2c0e340518d2..cc7aaa507abf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3083,6 +3083,15 @@ EXPORT_SYMBOL_GPL(tcp_done); int tcp_abort(struct sock *sk, int err) { if (!sk_fullsock(sk)) { + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + + local_bh_disable(); + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + local_bh_enable(); + return 0; + } sock_gen_put(sk); return -EOPNOTSUPP; } -- cgit v1.2.3 From 7eb7404f7ee4bf59cb034897ab678aba2755c5e0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Dec 2015 23:33:25 +0800 Subject: Bluetooth: use list_for_each_entry* Use list_for_each_entry*() instead of list_for_each*() to simplify the code. Signed-off-by: Geliang Tang Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 12 ++++++------ net/bluetooth/cmtp/capi.c | 8 ++------ net/bluetooth/hci_core.c | 8 +++----- net/bluetooth/rfcomm/core.c | 46 ++++++++++++++------------------------------ 4 files changed, 25 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index cb4e8d4f7c25..955eda93e66f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -174,13 +174,13 @@ EXPORT_SYMBOL(bt_accept_unlink); struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) { - struct list_head *p, *n; + struct bt_sock *s, *n; struct sock *sk; BT_DBG("parent %p", parent); - list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { - sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); + list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { + sk = (struct sock *)s; lock_sock(sk); @@ -388,11 +388,11 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg); static inline unsigned int bt_accept_poll(struct sock *parent) { - struct list_head *p, *n; + struct bt_sock *s, *n; struct sock *sk; - list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { - sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); + list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { + sk = (struct sock *)s; if (sk->sk_state == BT_CONNECTED || (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && sk->sk_state == BT_CONNECT2)) diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 9a50338772f3..46ac686c8911 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -100,10 +100,8 @@ static void cmtp_application_del(struct cmtp_session *session, struct cmtp_appli static struct cmtp_application *cmtp_application_get(struct cmtp_session *session, int pattern, __u16 value) { struct cmtp_application *app; - struct list_head *p; - list_for_each(p, &session->applications) { - app = list_entry(p, struct cmtp_application, list); + list_for_each_entry(app, &session->applications, list) { switch (pattern) { case CMTP_MSGNUM: if (app->msgnum == value) @@ -511,14 +509,12 @@ static int cmtp_proc_show(struct seq_file *m, void *v) struct capi_ctr *ctrl = m->private; struct cmtp_session *session = ctrl->driverdata; struct cmtp_application *app; - struct list_head *p; seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl)); seq_printf(m, "addr %s\n", session->name); seq_printf(m, "ctrl %d\n", session->num); - list_for_each(p, &session->applications) { - app = list_entry(p, struct cmtp_application, list); + list_for_each_entry(app, &session->applications, list) { seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9fb443a5473a..47bcef754796 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2713,12 +2713,10 @@ struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, void hci_bdaddr_list_clear(struct list_head *bdaddr_list) { - struct list_head *p, *n; + struct bdaddr_list *b, *n; - list_for_each_safe(p, n, bdaddr_list) { - struct bdaddr_list *b = list_entry(p, struct bdaddr_list, list); - - list_del(p); + list_for_each_entry_safe(b, n, bdaddr_list, list) { + list_del(&b->list); kfree(b); } } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 29709fbfd1f5..f7eb02f09b54 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -692,11 +692,9 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s) static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) { - struct rfcomm_session *s; - struct list_head *p, *n; + struct rfcomm_session *s, *n; struct l2cap_chan *chan; - list_for_each_safe(p, n, &session_list) { - s = list_entry(p, struct rfcomm_session, list); + list_for_each_entry_safe(s, n, &session_list, list) { chan = l2cap_pi(s->sock->sk)->chan; if ((!bacmp(src, BDADDR_ANY) || !bacmp(&chan->src, src)) && @@ -709,16 +707,14 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, int err) { - struct rfcomm_dlc *d; - struct list_head *p, *n; + struct rfcomm_dlc *d, *n; s->state = BT_CLOSED; BT_DBG("session %p state %ld err %d", s, s->state, err); /* Close all dlcs */ - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); + list_for_each_entry_safe(d, n, &s->dlcs, list) { d->state = BT_CLOSED; __rfcomm_dlc_close(d, err); } @@ -1771,13 +1767,11 @@ static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s, static void rfcomm_process_connect(struct rfcomm_session *s) { - struct rfcomm_dlc *d; - struct list_head *p, *n; + struct rfcomm_dlc *d, *n; BT_DBG("session %p state %ld", s, s->state); - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); + list_for_each_entry_safe(d, n, &s->dlcs, list) { if (d->state == BT_CONFIG) { d->mtu = s->mtu; if (rfcomm_check_security(d)) { @@ -1843,14 +1837,11 @@ static int rfcomm_process_tx(struct rfcomm_dlc *d) static void rfcomm_process_dlcs(struct rfcomm_session *s) { - struct rfcomm_dlc *d; - struct list_head *p, *n; + struct rfcomm_dlc *d, *n; BT_DBG("session %p state %ld", s, s->state); - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); - + list_for_each_entry_safe(d, n, &s->dlcs, list) { if (test_bit(RFCOMM_TIMED_OUT, &d->flags)) { __rfcomm_dlc_close(d, ETIMEDOUT); continue; @@ -1985,14 +1976,11 @@ static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s) static void rfcomm_process_sessions(void) { - struct list_head *p, *n; + struct rfcomm_session *s, *n; rfcomm_lock(); - list_for_each_safe(p, n, &session_list) { - struct rfcomm_session *s; - s = list_entry(p, struct rfcomm_session, list); - + list_for_each_entry_safe(s, n, &session_list, list) { if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); @@ -2075,15 +2063,12 @@ failed: static void rfcomm_kill_listener(void) { - struct rfcomm_session *s; - struct list_head *p, *n; + struct rfcomm_session *s, *n; BT_DBG(""); - list_for_each_safe(p, n, &session_list) { - s = list_entry(p, struct rfcomm_session, list); + list_for_each_entry_safe(s, n, &session_list, list) rfcomm_session_del(s); - } } static int rfcomm_run(void *unused) @@ -2113,8 +2098,7 @@ static int rfcomm_run(void *unused) static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; - struct rfcomm_dlc *d; - struct list_head *p, *n; + struct rfcomm_dlc *d, *n; BT_DBG("conn %p status 0x%02x encrypt 0x%02x", conn, status, encrypt); @@ -2122,9 +2106,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) if (!s) return; - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); - + list_for_each_entry_safe(d, n, &s->dlcs, list) { if (test_and_clear_bit(RFCOMM_SEC_PENDING, &d->flags)) { rfcomm_dlc_clear_timer(d); if (status || encrypt == 0x00) { -- cgit v1.2.3 From 92e17ee72a60b126263cbcd749e5da688e0198a3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Tue, 15 Dec 2015 12:25:35 +0100 Subject: 6lowpan: fix debugfs interface entry name This patches moves the debugfs interface related register after netdevice register. The function lowpan_dev_debugfs_init will use "dev->name" which can be before register_netdevice a format string. The function register_netdevice will evaluate the format string if necessary and replace "dev->name" to the real interface name. Reported-by: Lukasz Duda Signed-off-by: Alexander Aring Acked-by: Lukasz Duda Signed-off-by: Marcel Holtmann --- net/6lowpan/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index c7f06f5c0121..faf65baed617 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -29,13 +29,13 @@ int lowpan_register_netdevice(struct net_device *dev, lowpan_priv(dev)->lltype = lltype; - ret = lowpan_dev_debugfs_init(dev); + ret = register_netdevice(dev); if (ret < 0) return ret; - ret = register_netdevice(dev); + ret = lowpan_dev_debugfs_init(dev); if (ret < 0) - lowpan_dev_debugfs_exit(dev); + unregister_netdevice(dev); return ret; } -- cgit v1.2.3 From 5c29482dd17835def5cb97918f8f83a881c9918a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 22 Dec 2015 23:11:49 +0800 Subject: net-sysfs: use to_net_dev in net_namespace() Use to_net_dev() instead of open-coding it. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index bca8c350e7f3..b6c8a6629b39 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1453,8 +1453,8 @@ static void netdev_release(struct device *d) static const void *net_namespace(struct device *d) { - struct net_device *dev; - dev = container_of(d, struct net_device, dev); + struct net_device *dev = to_net_dev(d); + return dev_net(dev); } -- cgit v1.2.3 From f2830d09895a64ba7a1a2c6ef41106c72e3654b2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 19 Dec 2015 12:55:43 -0800 Subject: RDS: don't pretend to use cpu notifiers It looks like an attempt to use CPU notifier here which was never completed. Nobody tried to wire it up completely since 2k9. So I unwind this code and get rid of everything not required. Oh look! 19 lines were removed while code still does the same thing. Acked-by: Santosh Shilimkar Tested-by: Santosh Shilimkar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/rds/page.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/rds/page.c b/net/rds/page.c index 9005a2c920ee..5a14e6d6a926 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -179,37 +179,18 @@ out: } EXPORT_SYMBOL_GPL(rds_page_remainder_alloc); -static int rds_page_remainder_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +void rds_page_exit(void) { - struct rds_page_remainder *rem; - long cpu = (long)hcpu; + unsigned int cpu; - rem = &per_cpu(rds_page_remainders, cpu); + for_each_possible_cpu(cpu) { + struct rds_page_remainder *rem; - rdsdebug("cpu %ld action 0x%lx\n", cpu, action); + rem = &per_cpu(rds_page_remainders, cpu); + rdsdebug("cpu %u\n", cpu); - switch (action) { - case CPU_DEAD: if (rem->r_page) __free_page(rem->r_page); rem->r_page = NULL; - break; } - - return 0; -} - -static struct notifier_block rds_page_remainder_nb = { - .notifier_call = rds_page_remainder_cpu_notify, -}; - -void rds_page_exit(void) -{ - int i; - - for_each_possible_cpu(i) - rds_page_remainder_cpu_notify(&rds_page_remainder_nb, - (unsigned long)CPU_DEAD, - (void *)(long)i); } -- cgit v1.2.3 From 2010b93e9317cc12acd20c4aed385af7f9d1681e Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 22 Dec 2015 00:03:44 +0900 Subject: net: tcp: deal with listen sockets properly in tcp_abort. When closing a listen socket, tcp_abort currently calls tcp_done without clearing the request queue. If the socket has a child socket that is established but not yet accepted, the child socket is then left without a parent, causing a leak. Fix this by setting the socket state to TCP_CLOSE and calling inet_csk_listen_stop with the socket lock held, like tcp_close does. Tested using net_test. With this patch, calling SOCK_DESTROY on a listen socket that has an established but not yet accepted child socket results in the parent and the child being closed, such that they no longer appear in sock_diag dumps. Reported-by: Eric Dumazet Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cc7aaa507abf..7bb1b091efd1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3099,6 +3099,11 @@ int tcp_abort(struct sock *sk, int err) /* Don't race with userspace socket closes such as tcp_close. */ lock_sock(sk); + if (sk->sk_state == TCP_LISTEN) { + tcp_set_state(sk, TCP_CLOSE); + inet_csk_listen_stop(sk); + } + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ local_bh_disable(); bh_lock_sock(sk); -- cgit v1.2.3 From e46787f0dd9385449fd77246d4fddb8634350af8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Dec 2015 21:29:25 +0100 Subject: tcp: send_reset: test for non-NULL sk first tcp_md5_do_lookup requires a full socket, so once we extend _send_reset() to also accept timewait socket we would have to change if (!sk && hash_location) to something like if ((!sk || !sk_fullsock(sk)) && hash_location) { ... } else { (sk && sk_fullsock(sk)) tcp_md5_do_lookup() } Switch the two branches: check if we have a socket first, then fall back to a listener lookup if we saw a md5 option (hash_location). Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 11 +++++------ net/ipv6/tcp_ipv6.c | 6 +++--- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 46e92fbd26a8..eb29c2f5bcea 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -587,7 +587,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) } rep; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; + struct tcp_md5sig_key *key = NULL; const __u8 *hash_location = NULL; unsigned char newhash[16]; int genhash; @@ -627,7 +627,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (!sk && hash_location) { + if (sk) { + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, AF_INET); + } else if (hash_location) { /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. @@ -651,10 +654,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; - } else { - key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) - &ip_hdr(skb)->saddr, - AF_INET) : NULL; } if (key) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f03d2b0445fd..32fa0de9982a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -854,7 +854,9 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (!sk && hash_location) { + if (sk) { + key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); + } else if (hash_location) { /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. @@ -877,8 +879,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; - } else { - key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL; } #endif -- cgit v1.2.3 From 271c3b9b7bdae09c7da467ac1ae96e3298754977 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Dec 2015 21:29:26 +0100 Subject: tcp: honour SO_BINDTODEVICE for TW_RST case too Hannes points out that when we generate tcp reset for timewait sockets we pretend we found no socket and pass NULL sk to tcp_vX_send_reset(). Make it cope with inet tw sockets and then provide tw sk. This makes RSTs appear on correct interface when SO_BINDTODEVICE is used. Packetdrill test case: // want default route to be used, we rely on BINDTODEVICE `ip route del 192.0.2.0/24 via 192.168.0.2 dev tun0` 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 // test case still works due to BINDTODEVICE 0.001 setsockopt(3, SOL_SOCKET, SO_BINDTODEVICE, "tun0", 4) = 0 0.100...0.200 connect(3, ..., ...) = 0 0.100 > S 0:0(0) 0.200 < S. 0:0(0) ack 1 win 32792 0.200 > . 1:1(0) ack 1 0.210 close(3) = 0 0.210 > F. 1:1(0) ack 1 win 29200 0.300 < . 1:1(0) ack 2 win 46 // more data while in FIN_WAIT2, expect RST 1.300 < P. 1:1001(1000) ack 1 win 46 // fails without this change -- default route is used 1.301 > R 1:1(0) win 0 Reported-by: Hannes Frederic Sowa Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 12 +++++++++--- net/ipv4/tcp_minisocks.c | 7 ++----- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index eb29c2f5bcea..fc4f72686705 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -627,7 +627,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (sk) { + if (sk && sk_fullsock(sk)) { key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) &ip_hdr(skb)->saddr, AF_INET); } else if (hash_location) { @@ -674,7 +674,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. * routing might fail in this case. No choice here, if we choose to force * input interface, we will misroute in case of asymmetric route. @@ -682,6 +683,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) arg.bound_dev_if = sk->sk_bound_dev_if; + BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != + offsetof(struct inet_timewait_sock, tw_bound_dev_if)); + arg.tos = ip_hdr(skb)->tos; ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -1705,7 +1709,9 @@ do_time_wait: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: - goto no_tcp_socket; + tcp_v4_send_reset(sk, skb); + inet_twsk_deschedule_put(inet_twsk(sk)); + goto discard_it; case TCP_TW_SUCCESS:; } goto discard_it; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ac6b1961ffeb..75632a925824 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -131,7 +131,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, goto kill; if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) - goto kill_with_rst; + return TCP_TW_RST; /* Dup ACK? */ if (!th->ack || @@ -145,11 +145,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * reset. */ if (!th->fin || - TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { -kill_with_rst: - inet_twsk_deschedule_put(tw); + TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) return TCP_TW_RST; - } /* FIN arrived, enter true time-wait state. */ tw->tw_substate = TCP_TIME_WAIT; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 32fa0de9982a..9ecb012291cf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -854,7 +854,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (sk) { + if (sk && sk_fullsock(sk)) { key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); } else if (hash_location) { /* @@ -1516,7 +1516,9 @@ do_time_wait: break; case TCP_TW_RST: tcp_v6_restore_cb(skb); - goto no_tcp_socket; + tcp_v6_send_reset(sk, skb); + inet_twsk_deschedule_put(inet_twsk(sk)); + goto discard_it; case TCP_TW_SUCCESS: ; } -- cgit v1.2.3 From aeb7ed14fe5df3a4ce019c8d4ce0b2922a091196 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 23 Dec 2015 20:42:21 +0800 Subject: bridge: use kobj_to_dev instead of to_dev kobj_to_dev has been defined in linux/device.h, so I replace to_dev with it. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- net/bridge/br_sysfs_br.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8365bd53c421..6b8091407ca3 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -22,7 +22,6 @@ #include "br_private.h" -#define to_dev(obj) container_of(obj, struct device, kobj) #define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) /* @@ -814,7 +813,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { - struct device *dev = to_dev(kobj); + struct device *dev = kobj_to_dev(kobj); struct net_bridge *br = to_bridge(dev); int n; -- cgit v1.2.3 From 039f50629b7f860f36644ed1f34b27da9aa62f43 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 24 Dec 2015 14:34:54 -0800 Subject: ip_tunnel: Move stats update to iptunnel_xmit() By moving stats update into iptunnel_xmit(), we can simplify iptunnel_xmit() usage. With this change there is no need to call another function (iptunnel_xmit_stats()) to update stats in tunnel xmit code path. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/geneve.c | 17 ++++++++--------- drivers/net/vxlan.c | 9 ++++----- include/net/ip6_tunnel.h | 17 ++++------------- include/net/ip_tunnels.h | 28 +++++++++++++++------------- include/net/udp_tunnel.h | 8 ++++---- net/ipv4/ip_gre.c | 7 +++---- net/ipv4/ip_tunnel.c | 7 ++----- net/ipv4/ip_tunnel_core.c | 9 +++++---- net/ipv4/ip_vti.c | 2 +- net/ipv4/udp_tunnel.c | 11 +++++------ net/ipv6/sit.c | 7 ++----- net/tipc/udp_media.c | 12 +++--------- 12 files changed, 56 insertions(+), 78 deletions(-) (limited to 'net') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index e6e00924f8ef..20dd66423ec8 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -918,12 +918,11 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); df = 0; } - err = udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, - tos, ttl, df, sport, geneve->dst_port, - !net_eq(geneve->net, dev_net(geneve->dev)), - !(flags & GENEVE_F_UDP_CSUM)); + udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, + tos, ttl, df, sport, geneve->dst_port, + !net_eq(geneve->net, dev_net(geneve->dev)), + !(flags & GENEVE_F_UDP_CSUM)); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; tx_error: @@ -1005,10 +1004,10 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = 1; ttl = ttl ? : ip6_dst_hoplimit(dst); } - err = udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, - &fl6.saddr, &fl6.daddr, prio, ttl, - sport, geneve->dst_port, - !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); + udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, + &fl6.saddr, &fl6.daddr, prio, ttl, + sport, geneve->dst_port, + !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); return NETDEV_TX_OK; tx_error: diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ba363cedef80..fecf7b6c732e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1841,9 +1841,10 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, - ttl, df, src_port, dst_port, xnet, - !(vxflags & VXLAN_F_UDP_CSUM)); + udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df, + src_port, dst_port, xnet, + !(vxflags & VXLAN_F_UDP_CSUM)); + return 0; } #if IS_ENABLED(CONFIG_IPV6) @@ -2056,8 +2057,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, skb = NULL; goto rt_tx_error; } - - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index ff788b665277..ae07e94778d8 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -5,6 +5,7 @@ #include #include #include +#include #define IP6TUNNEL_ERR_TIMEO (30*HZ) @@ -83,22 +84,12 @@ int ip6_tnl_get_iflink(const struct net_device *dev); static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device *dev) { - struct net_device_stats *stats = &dev->stats; int pkt_len, err; pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - put_cpu_ptr(tstats); - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } + if (unlikely(net_xmit_eval(err))) + pkt_len = -1; + iptunnel_xmit_stats(dev, pkt_len); } #endif diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 62a750a6a8f8..6db96ea0144f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -273,32 +273,34 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, } int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); -int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, - __be32 src, __be32 dst, u8 proto, - u8 tos, u8 ttl, __be16 df, bool xnet); +void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, u8 proto, + u8 tos, u8 ttl, __be16 df, bool xnet); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum, int gso_type_mask); -static inline void iptunnel_xmit_stats(int err, - struct net_device_stats *err_stats, - struct pcpu_sw_netstats __percpu *stats) +static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { - if (err > 0) { - struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats); + if (pkt_len > 0) { + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += err; + tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); - } else if (err < 0) { - err_stats->tx_errors++; - err_stats->tx_aborted_errors++; } else { - err_stats->tx_dropped++; + struct net_device_stats *err_stats = &dev->stats; + + if (pkt_len < 0) { + err_stats->tx_errors++; + err_stats->tx_aborted_errors++; + } else { + err_stats->tx_dropped++; + } } } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index cb2f89f20f5c..cca2ad3082c3 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -78,10 +78,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* Transmit the skb using UDP encapsulation. */ -int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, - __be16 df, __be16 src_port, __be16 dst_port, - bool xnet, bool nocheck); +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck); #if IS_ENABLED(CONFIG_IPV6) int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 04a48c0159cc..7c51c4e1661f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -561,10 +561,9 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr, - key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + + iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, + key->tos, key->ttl, df, false); return; err_free_rt: diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0f6e9ee031c4..c7bd72e9b544 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -656,7 +656,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct rtable *rt; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int err; bool connected; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -794,10 +793,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, - tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); - + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 1db8418aa62e..eb52ce950c27 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -47,12 +47,13 @@ #include #include -int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 proto, - __u8 tos, __u8 ttl, __be16 df, bool xnet) +void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet) { int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); + struct net_device *dev = skb->dev; struct iphdr *iph; int err; @@ -81,7 +82,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, err = ip_local_out(net, sk, skb); if (unlikely(net_xmit_eval(err))) pkt_len = 0; - return pkt_len; + iptunnel_xmit_stats(dev, pkt_len); } EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 02d9c21e2953..5cf10b777b7e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -199,7 +199,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) err = skb->len; - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + iptunnel_xmit_stats(dev, err); return NETDEV_TX_OK; tx_error_icmp: diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index aba428626b52..0ec08814f37d 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -74,10 +74,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, - __be16 df, __be16 src_port, __be16 dst_port, - bool xnet, bool nocheck) +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) { struct udphdr *uh; @@ -91,8 +91,7 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, - tos, ttl, df, xnet); + iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index dcccae86190f..e794ef66a401 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -820,7 +820,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, const struct in6_addr *addr6; int addr_type; u8 ttl; - int err; u8 protocol = IPPROTO_IPV6; int t_hlen = tunnel->hlen + sizeof(struct iphdr); @@ -983,10 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, - protocol, tos, ttl, df, - !net_eq(tunnel->net, dev_net(dev))); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return NETDEV_TX_OK; tx_error_icmp: diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 6af78c6276b4..d63a911e7fe2 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -182,15 +182,9 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto tx_error; } ttl = ip4_dst_hoplimit(&rt->dst); - err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, - src->ipv4.s_addr, - dst->ipv4.s_addr, 0, ttl, 0, - src->udp_port, dst->udp_port, - false, true); - if (err < 0) { - ip_rt_put(rt); - goto tx_error; - } + udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, + dst->udp_port, false, true); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; -- cgit v1.2.3 From df05ef874b284d833c2d9795a6350c6a373ab6c9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:39:32 +0100 Subject: netfilter: nf_tables: release objects on netns destruction We have to release the existing objects on netns removal otherwise we leak them. Chains are unregistered in first place to make sure no packets are walking on our rules and sets anymore. The object release happens by when we unregister the family via nft_release_afinfo() which is called from nft_unregister_afinfo() from the corresponding __net_exit path in every family. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/bridge/netfilter/nf_tables_bridge.c | 2 +- net/ipv4/netfilter/nf_tables_arp.c | 2 +- net/ipv4/netfilter/nf_tables_ipv4.c | 2 +- net/ipv6/netfilter/nf_tables_ipv6.c | 2 +- net/netfilter/nf_tables_api.c | 47 +++++++++++++++++++++++++++++++-- net/netfilter/nf_tables_inet.c | 2 +- net/netfilter/nf_tables_netdev.c | 2 +- 8 files changed, 52 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b313cda49194..a50f139ce087 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -880,7 +880,7 @@ struct nft_af_info { }; int nft_register_afinfo(struct net *, struct nft_af_info *); -void nft_unregister_afinfo(struct nft_af_info *); +void nft_unregister_afinfo(struct net *, struct nft_af_info *); int nft_register_chain_type(const struct nf_chain_type *); void nft_unregister_chain_type(const struct nf_chain_type *); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 62f6b1b19589..7fcdd7261d88 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -141,7 +141,7 @@ err: static void nf_tables_bridge_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.bridge); + nft_unregister_afinfo(net, net->nft.bridge); kfree(net->nft.bridge); } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 9d09d4f59545..cd84d4295a20 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -57,7 +57,7 @@ err: static void nf_tables_arp_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.arp); + nft_unregister_afinfo(net, net->nft.arp); kfree(net->nft.arp); } diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index ca9dc3c46c4f..e44ba3b12fbb 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -78,7 +78,7 @@ err: static void nf_tables_ipv4_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.ipv4); + nft_unregister_afinfo(net, net->nft.ipv4); kfree(net->nft.ipv4); } diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 120ea9131be0..30b22f4dff55 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -77,7 +77,7 @@ err: static void nf_tables_ipv6_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.ipv6); + nft_unregister_afinfo(net, net->nft.ipv6); kfree(net->nft.ipv6); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4a23f77c363a..852273110275 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -41,6 +41,8 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi) } EXPORT_SYMBOL_GPL(nft_register_afinfo); +static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi); + /** * nft_unregister_afinfo - unregister nf_tables address family info * @@ -48,9 +50,10 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo); * * Unregister the address family for use with nf_tables. */ -void nft_unregister_afinfo(struct nft_af_info *afi) +void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi) { nfnl_lock(NFNL_SUBSYS_NFTABLES); + __nft_release_afinfo(net, afi); list_del_rcu(&afi->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } @@ -4579,7 +4582,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); -static int nf_tables_init_net(struct net *net) +static int __net_init nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); INIT_LIST_HEAD(&net->nft.commit_list); @@ -4587,6 +4590,46 @@ static int nf_tables_init_net(struct net *net) return 0; } +/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ +static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) +{ + struct nft_table *table, *nt; + struct nft_chain *chain, *nc; + struct nft_rule *rule, *nr; + struct nft_set *set, *ns; + struct nft_ctx ctx = { + .net = net, + .afi = afi, + }; + + list_for_each_entry_safe(table, nt, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hooks(table, chain, afi->nops); + /* No packets are walking on these chains anymore. */ + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + chain->use--; + nf_tables_rule_destroy(&ctx, rule); + } + } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + table->use--; + nft_set_destroy(set); + } + list_for_each_entry_safe(chain, nc, &table->chains, list) { + list_del(&chain->list); + table->use--; + nf_tables_chain_destroy(chain); + } + list_del(&table->list); + nf_tables_table_destroy(&ctx); + } +} + static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, }; diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index 9dd2d216cfc1..6b5f76295d3d 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -57,7 +57,7 @@ err: static void __net_exit nf_tables_inet_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.inet); + nft_unregister_afinfo(net, net->nft.inet); kfree(net->nft.inet); } diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 7b9c053ba750..2bfd1fbccec8 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -139,7 +139,7 @@ err: static void nf_tables_netdev_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.netdev); + nft_unregister_afinfo(net, net->nft.netdev); kfree(net->nft.netdev); } -- cgit v1.2.3 From 5ebe0b0eec9d6f703b137f9b938c52f7b91dd9d6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:40:49 +0100 Subject: netfilter: nf_tables: destroy basechain and rules on netdevice removal If the netdevice is destroyed, the resources that are attached should be released too as they belong to the device that is now gone. Suggested-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +---- net/netfilter/nf_tables_api.c | 31 +++++++++++++++++++++------ net/netfilter/nf_tables_netdev.c | 45 ++++++++++++++++----------------------- 3 files changed, 44 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a50f139ce087..0191fbb33a2f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -821,10 +821,7 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } -int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops); -void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops); +int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 852273110275..5729844e1d46 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,8 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +static int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { struct net *net = read_pnet(&basechain->pnet); @@ -141,10 +141,9 @@ int nft_register_basechain(struct nft_base_chain *basechain, return nf_register_net_hooks(net, basechain->ops, hook_nops); } -EXPORT_SYMBOL_GPL(nft_register_basechain); -void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +static void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { struct net *net = read_pnet(&basechain->pnet); @@ -153,7 +152,6 @@ void nft_unregister_basechain(struct nft_base_chain *basechain, nf_unregister_net_hooks(net, basechain->ops, hook_nops); } -EXPORT_SYMBOL_GPL(nft_unregister_basechain); static int nf_tables_register_hooks(const struct nft_table *table, struct nft_chain *chain, @@ -4590,6 +4588,27 @@ static int __net_init nf_tables_init_net(struct net *net) return 0; } +int __nft_release_basechain(struct nft_ctx *ctx) +{ + struct nft_rule *rule, *nr; + + BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN)); + + nf_tables_unregister_hooks(ctx->chain->table, ctx->chain, + ctx->afi->nops); + list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { + list_del(&rule->list); + ctx->chain->use--; + nf_tables_rule_destroy(ctx, rule); + } + list_del(&ctx->chain->list); + ctx->table->use--; + nf_tables_chain_destroy(ctx->chain); + + return 0; +} +EXPORT_SYMBOL_GPL(__nft_release_basechain); + /* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) { diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 2bfd1fbccec8..3e9c87b961ba 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -156,35 +156,17 @@ static const struct nf_chain_type nft_filter_chain_netdev = { .hook_mask = (1 << NF_NETDEV_INGRESS), }; -static void nft_netdev_event(unsigned long event, struct nft_af_info *afi, - struct net_device *dev, struct nft_table *table, - struct nft_base_chain *basechain) +static void nft_netdev_event(unsigned long event, struct net_device *dev, + struct nft_ctx *ctx) { - switch (event) { - case NETDEV_REGISTER: - if (strcmp(basechain->dev_name, dev->name) != 0) - return; + struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED)); - - dev_hold(dev); - basechain->ops[0].dev = dev; - basechain->flags &= ~NFT_BASECHAIN_DISABLED; - if (!(table->flags & NFT_TABLE_F_DORMANT)) - nft_register_basechain(basechain, afi->nops); - break; + switch (event) { case NETDEV_UNREGISTER: if (strcmp(basechain->dev_name, dev->name) != 0) return; - BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED); - - if (!(table->flags & NFT_TABLE_F_DORMANT)) - nft_unregister_basechain(basechain, afi->nops); - - dev_put(basechain->ops[0].dev); - basechain->ops[0].dev = NULL; - basechain->flags |= NFT_BASECHAIN_DISABLED; + __nft_release_basechain(ctx); break; case NETDEV_CHANGENAME: if (dev->ifindex != basechain->ops[0].dev->ifindex) @@ -201,20 +183,29 @@ static int nf_tables_netdev_event(struct notifier_block *this, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_af_info *afi; struct nft_table *table; - struct nft_chain *chain; + struct nft_chain *chain, *nr; + struct nft_ctx ctx = { + .net = dev_net(dev), + }; + + if (event != NETDEV_UNREGISTER && + event != NETDEV_CHANGENAME) + return NOTIFY_DONE; nfnl_lock(NFNL_SUBSYS_NFTABLES); list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { + ctx.afi = afi; if (afi->family != NFPROTO_NETDEV) continue; list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { + ctx.table = table; + list_for_each_entry_safe(chain, nr, &table->chains, list) { if (!(chain->flags & NFT_BASE_CHAIN)) continue; - nft_netdev_event(event, afi, dev, table, - nft_base_chain(chain)); + ctx.chain = chain; + nft_netdev_event(event, dev, &ctx); } } } -- cgit v1.2.3 From f4c756b4ea7d2921391febcaed4ce2511872a0e1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:40:50 +0100 Subject: netfilter: nf_tables: remove check against removal of inactive objects The following sequence inside a batch, although not very useful, is valid: add table foo ... delete table foo This may be generated by some robot while applying some incremental upgrade, so remove the defensive checks against this. This patch keeps the check on the get/dump path by now, we have to replace the inactive flag by introducing object generations. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5729844e1d46..28cbc457f1f3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -832,8 +832,6 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; ctx.afi = afi; ctx.table = table; @@ -1493,14 +1491,10 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; if (chain->use > 0) return -EBUSY; @@ -2192,8 +2186,6 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; if (nla[NFTA_RULE_CHAIN]) { chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); @@ -2362,8 +2354,6 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; } nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); @@ -2898,8 +2888,6 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; if (!list_empty(&set->bindings)) return -EBUSY; @@ -3022,8 +3010,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - bool trans) + const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; @@ -3036,8 +3023,6 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); - if (!trans && (table->flags & NFT_TABLE_INACTIVE)) - return -ENOENT; nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); return 0; @@ -3146,9 +3131,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return err; err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, - (void *)nla, false); + (void *)nla); if (err < 0) return err; + if (ctx.table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) @@ -3212,9 +3199,11 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); if (err < 0) return err; + if (ctx.table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) @@ -3536,7 +3525,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, true); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); if (err < 0) return err; @@ -3630,7 +3619,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); if (err < 0) return err; -- cgit v1.2.3 From 7b8002a1511fcbcb0596cac90d67ad5c8182d0aa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 18:41:56 +0100 Subject: netfilter: nfnetlink: pass down netns pointer to call() and call_rcu() Adapt callsites to avoid recurrent lookup of the netns pointer. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 8 +-- net/netfilter/ipset/ip_set_core.c | 108 +++++++++++++++-------------------- net/netfilter/nf_conntrack_netlink.c | 96 ++++++++++++++----------------- net/netfilter/nf_tables_api.c | 30 ++++------ net/netfilter/nfnetlink.c | 6 +- net/netfilter/nfnetlink_acct.c | 21 +++---- net/netfilter/nfnetlink_cthelper.c | 18 +++--- net/netfilter/nfnetlink_cttimeout.c | 42 ++++++-------- net/netfilter/nfnetlink_log.c | 15 ++--- net/netfilter/nfnetlink_queue.c | 36 +++++------- net/netfilter/nft_compat.c | 6 +- net/netfilter/xt_osf.c | 7 ++- 12 files changed, 173 insertions(+), 220 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 5646b24bfc64..ceacbf5dcb73 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -8,12 +8,12 @@ #include struct nfnl_callback { - int (*call)(struct sock *nl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]); - int (*call_rcu)(struct sock *nl, struct sk_buff *skb, + int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); + int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]); int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 54f3d7cb23e6..95db43fc0303 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -825,20 +825,17 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, return 0; } -static int -ip_set_none(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { return -EOPNOTSUPP; } -static int -ip_set_create(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_create(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct net *net = sock_net(ctnl); struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; @@ -976,12 +973,11 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } -static int -ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_destroy(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -1052,12 +1048,11 @@ ip_set_flush_set(struct ip_set *set) spin_unlock_bh(&set->lock); } -static int -ip_set_flush(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; ip_set_id_t i; @@ -1092,12 +1087,11 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, }; -static int -ip_set_rename(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_rename(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -1142,12 +1136,11 @@ out: * so the ip_set_list always contains valid pointers to the sets. */ -static int -ip_set_swap(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1413,10 +1406,9 @@ out: return ret < 0 ? ret : skb->len; } -static int -ip_set_dump(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1500,12 +1492,11 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, return ret; } -static int -ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; @@ -1555,12 +1546,11 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -ip_set_udel(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; @@ -1610,12 +1600,11 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -ip_set_utest(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; @@ -1646,12 +1635,11 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, /* Get headed data of a set */ -static int -ip_set_header(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_header(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1703,10 +1691,9 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, }; -static int -ip_set_type(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1762,10 +1749,9 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, }; -static int -ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_protocol(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { struct sk_buff *skb2; struct nlmsghdr *nlh2; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9f5272968abb..dbb1bb3edb45 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1113,12 +1113,11 @@ static int ctnetlink_flush_conntrack(struct net *net, return 0; } -static int -ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; @@ -1168,12 +1167,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return 0; } -static int -ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; @@ -1330,10 +1328,10 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) return ctnetlink_dump_list(skb, cb, true); } -static int -ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1352,10 +1350,10 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) return ctnetlink_dump_list(skb, cb, false); } -static int -ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1865,12 +1863,11 @@ err1: return ERR_PTR(err); } -static int -ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -2034,10 +2031,10 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -2080,10 +2077,9 @@ nlmsg_failure: return -1; } -static int -ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct sk_buff *skb2; int err; @@ -2729,12 +2725,12 @@ out: return skb->len; } -static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, +static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { int err; - struct net *net = sock_net(ctnl); struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_tuple tuple; @@ -2768,12 +2764,10 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, return err; } -static int -ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct sk_buff *skb2; @@ -2784,7 +2778,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { if (cda[CTA_EXPECT_MASTER]) - return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda); + return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda); else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, @@ -2850,12 +2844,10 @@ out: return err == -EAGAIN ? -ENOBUFS : err; } -static int -ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -3136,12 +3128,10 @@ err_ct: return err; } -static int -ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_new_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -3242,10 +3232,10 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -ctnetlink_stat_exp_cpu(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 28cbc457f1f3..69cb5be9a174 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -543,15 +543,14 @@ done: return skb->len; } -static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_gettable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1097,8 +1096,8 @@ done: return skb->len; } -static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getchain(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1106,7 +1105,6 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1923,8 +1921,8 @@ done: return skb->len; } -static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getrule(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1933,7 +1931,6 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -2604,11 +2601,10 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb) return 0; } -static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getset(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; @@ -3190,11 +3186,10 @@ nla_put_failure: return -ENOSPC; } -static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_ctx ctx; int err; @@ -3723,11 +3718,10 @@ err: return err; } -static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getgen(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); struct sk_buff *skb2; int err; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9ed453465167..7012154b28ca 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -206,7 +206,7 @@ replay: } if (nc->call_rcu) { - err = nc->call_rcu(net->nfnl, skb, nlh, + err = nc->call_rcu(net, net->nfnl, skb, nlh, (const struct nlattr **)cda); rcu_read_unlock(); } else { @@ -216,8 +216,8 @@ replay: nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) - err = nc->call(net->nfnl, skb, nlh, - (const struct nlattr **)cda); + err = nc->call(net, net->nfnl, skb, nlh, + (const struct nlattr **)cda); else err = -EINVAL; nfnl_unlock(subsys_id); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index fefbf5f0b28d..5274b04c42a6 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -46,12 +46,11 @@ struct nfacct_filter { #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) #define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */ -static int -nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_new(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; - struct net *net = sock_net(nfnl); char *acct_name; unsigned int size = 0; u32 flags = 0; @@ -253,11 +252,10 @@ nfacct_filter_alloc(const struct nlattr * const attr) return filter; } -static int -nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { - struct net *net = sock_net(nfnl); int ret = -ENOENT; struct nf_acct *cur; char *acct_name; @@ -333,11 +331,10 @@ static int nfnl_acct_try_del(struct nf_acct *cur) return ret; } -static int -nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_del(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { - struct net *net = sock_net(nfnl); char *acct_name; struct nf_acct *cur; int ret = -ENOENT; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 54330fb5efaf..e924e95fcc7f 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -286,9 +286,9 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return 0; } -static int -nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; @@ -498,9 +498,9 @@ out: return skb->len; } -static int -nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { int ret = -ENOENT, i; struct nf_conntrack_helper *cur; @@ -570,9 +570,9 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, return ret; } -static int -nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { char *helper_name = NULL; struct nf_conntrack_helper *cur; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 3921d544f5ba..5d010f27ac01 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -65,16 +65,15 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, return ret; } -static int -cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; - struct net *net = sock_net(skb->sk); char *name; int ret; @@ -239,12 +238,11 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -339,15 +337,14 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) return ret; } -static int -cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(skb->sk); - char *name; struct ctnl_timeout *cur; int ret = -ENOENT; + char *name; if (!cda[CTA_TIMEOUT_NAME]) { list_for_each_entry(cur, &net->nfct_timeout_list, head) @@ -370,15 +367,14 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_default_set(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; - struct net *net = sock_net(skb->sk); unsigned int *timeouts; int ret; @@ -460,14 +456,14 @@ nla_put_failure: return -1; } -static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb, +static int cttimeout_default_get(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; - struct net *net = sock_net(skb->sk); struct sk_buff *skb2; int ret, err; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 70b6bd3b781e..6a57f10a4e0b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -785,10 +785,9 @@ static struct notifier_block nfulnl_rtnl_notifier = { .notifier_call = nfulnl_rcv_nl_event, }; -static int -nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -809,16 +808,14 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { [NFULA_CFG_FLAGS] = { .type = NLA_U16 }, }; -static int -nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfula[]) +static int nfulnl_recv_config(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfula[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); struct nfulnl_instance *inst; struct nfulnl_msg_config_cmd *cmd = NULL; - struct net *net = sock_net(ctnl); struct nfnl_log_net *log = nfnl_log_pernet(net); int ret = 0; u16 flags = 0; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 861c6615253b..3d1f16cf5cd0 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -957,10 +957,10 @@ static int nfq_id_after(unsigned int id, unsigned int max) return (int)(id - max) > 0; } -static int -nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_queue_entry *entry, *tmp; @@ -969,8 +969,6 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_instance *queue; LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); queue = verdict_instance_lookup(q, queue_num, @@ -1029,14 +1027,13 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, return ct; } -static int -nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); - struct nfqnl_msg_verdict_hdr *vhdr; struct nfqnl_instance *queue; unsigned int verdict; @@ -1044,8 +1041,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, enum ip_conntrack_info uninitialized_var(ctinfo); struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; - - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); queue = instance_lookup(q, queue_num); @@ -1092,10 +1087,9 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, return 0; } -static int -nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -1110,16 +1104,14 @@ static const struct nf_queue_handler nfqh = { .nf_hook_drop = &nfqnl_nf_hook_drop, }; -static int -nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_config(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; struct nfqnl_msg_config_cmd *cmd = NULL; - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); int ret = 0; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 9c8fab00164b..454841baa4d0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -519,9 +519,9 @@ nla_put_failure: return -1; } -static int -nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_compat_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { int ret = 0, target; struct nfgenmsg *nfmsg; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index df8801e02a32..4e3c3affd285 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -61,8 +61,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, }; -static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int xt_osf_add_callback(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const osf_attrs[]) { struct xt_osf_user_finger *f; @@ -104,7 +104,8 @@ static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, return err; } -static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, +static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const osf_attrs[]) { -- cgit v1.2.3 From 5913beaf0d70f97135ed7191c028fd88b3848864 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:41:57 +0100 Subject: netfilter: nfnetlink: pass down netns pointer to commit() and abort() callbacks Adapt callsites to avoid recurrent lookup of the netns pointer. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 4 ++-- net/netfilter/nf_tables_api.c | 6 ++---- net/netfilter/nfnetlink.c | 6 +++--- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ceacbf5dcb73..ba0d9789eb6e 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -26,8 +26,8 @@ struct nfnetlink_subsystem { __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ const struct nfnl_callback *cb; /* callback for individual types */ - int (*commit)(struct sk_buff *skb); - int (*abort)(struct sk_buff *skb); + int (*commit)(struct net *net, struct sk_buff *skb); + int (*abort)(struct net *net, struct sk_buff *skb); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 69cb5be9a174..f5c397158e29 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3865,9 +3865,8 @@ static void nf_tables_commit_release(struct nft_trans *trans) kfree(trans); } -static int nf_tables_commit(struct sk_buff *skb) +static int nf_tables_commit(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; @@ -4002,9 +4001,8 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } -static int nf_tables_abort(struct sk_buff *skb) +static int nf_tables_abort(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 7012154b28ca..a7ba23353dab 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -425,15 +425,15 @@ next: } done: if (status & NFNL_BATCH_REPLAY) { - ss->abort(oskb); + ss->abort(net, oskb); nfnl_err_reset(&err_list); nfnl_unlock(subsys_id); kfree_skb(skb); goto replay; } else if (status == NFNL_BATCH_DONE) { - ss->commit(oskb); + ss->commit(net, oskb); } else { - ss->abort(oskb); + ss->abort(net, oskb); } nfnl_err_deliver(&err_list, oskb); -- cgit v1.2.3 From ce2e56cdfbb010e22073d303161e74c144ebe731 Mon Sep 17 00:00:00 2001 From: Shikha Singh Date: Fri, 20 Nov 2015 06:40:19 -0500 Subject: NFC: digital: Add Type4A tags support The definition of DIGITAL_PROTO_NFCA_RF_TECH is modified to support ISO14443 Type4A tags. Without this change it is not possible to start polling for ISO14443 Type4A tags from the initiator side. Signed-off-by: Shikha Singh Signed-off-by: Samuel Ortiz --- net/nfc/digital_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 23c2a118ac9f..dd9003f38822 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -20,7 +20,8 @@ #include "digital.h" #define DIGITAL_PROTO_NFCA_RF_TECH \ - (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK) + (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | \ + NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO14443_MASK) #define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK -- cgit v1.2.3 From 2a84193f14c4196ee94bf1d44c5f28bcabe7e840 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Wed, 23 Dec 2015 23:45:15 +0100 Subject: NFC: nci: Fix error check of nci_hci_create_pipe() result net/nfc/nci/hci.c: In function nci_hci_connect_gate : net/nfc/nci/hci.c:679: warning: comparison is always false due to limited range of data type In case of error, nci_hci_create_pipe() returns NCI_HCI_INVALID_PIPE, and not a negative error code. Correct the check to fix this. Acked-by: Geert Uytterhoeven Reported-by: Dan Carpenter Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- net/nfc/nci/hci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 2aedac15cb59..a0ab26d535dc 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -676,7 +676,7 @@ int nci_hci_connect_gate(struct nci_dev *ndev, break; default: pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r); - if (pipe < 0) + if (pipe == NCI_HCI_INVALID_PIPE) return r; pipe_created = true; break; -- cgit v1.2.3 From 9afec6d3866b8451abcf1a7a1a381a3be6c83386 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Wed, 23 Dec 2015 23:45:18 +0100 Subject: nfc: netlink: HCI event connectivity implementation Add support for missing HCI event EVT_CONNECTIVITY and forward it to userspace. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 1 + net/nfc/core.c | 13 +++++++++++++ net/nfc/netlink.c | 37 +++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 1 + 4 files changed, 52 insertions(+) (limited to 'net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index dcfcfc9c00bf..1a3de8b34ad2 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -299,6 +299,7 @@ void nfc_driver_failure(struct nfc_dev *dev, int err); int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction); +int nfc_se_connectivity(struct nfc_dev *dev, u8 se_idx); int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_remove_se(struct nfc_dev *dev, u32 se_idx); struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx); diff --git a/net/nfc/core.c b/net/nfc/core.c index 1fe3d3b362c0..122bb81da918 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -953,6 +953,19 @@ out: } EXPORT_SYMBOL(nfc_se_transaction); +int nfc_se_connectivity(struct nfc_dev *dev, u8 se_idx) +{ + int rc; + + pr_debug("connectivity: %x\n", se_idx); + + device_lock(&dev->dev); + rc = nfc_genl_se_connectivity(dev, se_idx); + device_unlock(&dev->dev); + return rc; +} +EXPORT_SYMBOL(nfc_se_connectivity); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f58c1fba1026..ea023b35f1c2 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -552,6 +552,43 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx) +{ + struct nfc_se *se; + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_CONNECTIVITY); + if (!hdr) + goto free_msg; + + se = nfc_find_se(dev, se_idx); + if (!se) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index c20b784ad720..6c6f76b370b1 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -105,6 +105,7 @@ int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction); +int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx); struct nfc_dev *nfc_get_device(unsigned int idx); -- cgit v1.2.3 From c6dc65d885b98898bf287aaf44e020077b41769f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Dec 2015 23:45:27 +0100 Subject: NFC: nci: memory leak in nci_core_conn_create() I've moved the check for "number_destination_params" forward a few lines to avoid leaking "cmd". Fixes: caa575a86ec1 ('NFC: nci: fix possible crash in nci_core_conn_create') Acked-by: Christophe Ricard Signed-off-by: Dan Carpenter Signed-off-by: Samuel Ortiz --- net/nfc/nci/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 10c99a578421..fbb7a2b57b44 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -610,14 +610,14 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, struct nci_core_conn_create_cmd *cmd; struct core_conn_create_data data; + if (!number_destination_params) + return -EINVAL; + data.length = params_len + sizeof(struct nci_core_conn_create_cmd); cmd = kzalloc(data.length, GFP_KERNEL); if (!cmd) return -ENOMEM; - if (!number_destination_params) - return -EINVAL; - cmd->destination_type = destination_type; cmd->number_destination_params = number_destination_params; memcpy(cmd->params, params, params_len); -- cgit v1.2.3 From f3a4094558ddf8afa8bb58250d548e15e059c65a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 30 Dec 2015 16:28:25 +0100 Subject: ethtool: Add phy statistics Ethernet PHYs can maintain statistics, for example errors while idle and receive errors. Add an ethtool mechanism to retrieve these statistics, using the same model as MAC statistics. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 6 ++++ include/uapi/linux/ethtool.h | 3 ++ net/core/ethtool.c | 81 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 89 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/phy.h b/include/linux/phy.h index 05fde31b6dc6..a89cb0eef911 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -589,6 +589,12 @@ struct phy_driver { int (*module_eeprom)(struct phy_device *dev, struct ethtool_eeprom *ee, u8 *data); + /* Get statistics from the phy using ethtool */ + int (*get_sset_count)(struct phy_device *dev); + void (*get_strings)(struct phy_device *dev, u8 *data); + void (*get_stats)(struct phy_device *dev, + struct ethtool_stats *stats, u64 *data); + struct device_driver driver; }; #define to_phy_driver(d) container_of(d, struct phy_driver, driver) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index cd1629170103..57fa39005e79 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -542,6 +542,7 @@ struct ethtool_pauseparam { * now deprecated * @ETH_SS_FEATURES: Device feature names * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names + * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -551,6 +552,7 @@ enum ethtool_stringset { ETH_SS_FEATURES, ETH_SS_RSS_HASH_FUNCS, ETH_SS_TUNABLES, + ETH_SS_PHY_STATS, }; /** @@ -1225,6 +1227,7 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ #define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */ #define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ +#define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 09948a726347..daf04709dd3c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -191,6 +191,23 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) return ret; } +static int phy_get_sset_count(struct phy_device *phydev) +{ + int ret; + + if (phydev->drv->get_sset_count && + phydev->drv->get_strings && + phydev->drv->get_stats) { + mutex_lock(&phydev->lock); + ret = phydev->drv->get_sset_count(phydev); + mutex_unlock(&phydev->lock); + + return ret; + } + + return -EOPNOTSUPP; +} + static int __ethtool_get_sset_count(struct net_device *dev, int sset) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -204,6 +221,13 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_TUNABLES) return ARRAY_SIZE(tunable_strings); + if (sset == ETH_SS_PHY_STATS) { + if (dev->phydev) + return phy_get_sset_count(dev->phydev); + else + return -EOPNOTSUPP; + } + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -223,7 +247,17 @@ static void __ethtool_get_strings(struct net_device *dev, sizeof(rss_hash_func_strings)); else if (stringset == ETH_SS_TUNABLES) memcpy(data, tunable_strings, sizeof(tunable_strings)); - else + else if (stringset == ETH_SS_PHY_STATS) { + struct phy_device *phydev = dev->phydev; + + if (phydev) { + mutex_lock(&phydev->lock); + phydev->drv->get_strings(phydev, data); + mutex_unlock(&phydev->lock); + } else { + return; + } + } else /* ops->get_strings is valid because checked earlier */ ops->get_strings(dev, stringset, data); } @@ -1401,6 +1435,47 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_stats stats; + struct phy_device *phydev = dev->phydev; + u64 *data; + int ret, n_stats; + + if (!phydev) + return -EOPNOTSUPP; + + n_stats = phy_get_sset_count(phydev); + + if (n_stats < 0) + return n_stats; + WARN_ON(n_stats == 0); + + if (copy_from_user(&stats, useraddr, sizeof(stats))) + return -EFAULT; + + stats.n_stats = n_stats; + data = kmalloc_array(n_stats, sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; + + mutex_lock(&phydev->lock); + phydev->drv->get_stats(phydev, &stats, data); + mutex_unlock(&phydev->lock); + + ret = -EFAULT; + if (copy_to_user(useraddr, &stats, sizeof(stats))) + goto out; + useraddr += sizeof(stats); + if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) + goto out; + ret = 0; + + out: + kfree(data); + return ret; +} + static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) { struct ethtool_perm_addr epaddr; @@ -1779,6 +1854,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GSSET_INFO: case ETHTOOL_GSTRINGS: case ETHTOOL_GSTATS: + case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: @@ -1991,6 +2067,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_STUNABLE: rc = ethtool_set_tunable(dev, useraddr); break; + case ETHTOOL_GPHYSTATS: + rc = ethtool_get_phy_stats(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From c7862a5f0de5f521c545f3436f0aa190964342dd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Dec 2015 18:21:44 +0100 Subject: netfilter: nft_limit: allow to invert matching criteria This patch allows you to invert the ratelimit matching criteria, so you can match packets over the ratelimit. This is required to support what hashlimit does. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nft_limit.c | 16 +++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b48a3ab761f8..22043ce95ae6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -780,6 +780,10 @@ enum nft_limit_type { NFT_LIMIT_PKT_BYTES }; +enum nft_limit_flags { + NFT_LIMIT_F_INV = (1 << 0), +}; + /** * enum nft_limit_attributes - nf_tables limit expression netlink attributes * @@ -787,6 +791,7 @@ enum nft_limit_type { * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) * @NFTA_LIMIT_BURST: burst (NLA_U32) * @NFTA_LIMIT_TYPE: type of limit (NLA_U32: enum nft_limit_type) + * @NFTA_LIMIT_FLAGS: flags (NLA_U32: enum nft_limit_flags) */ enum nft_limit_attributes { NFTA_LIMIT_UNSPEC, @@ -794,6 +799,7 @@ enum nft_limit_attributes { NFTA_LIMIT_UNIT, NFTA_LIMIT_BURST, NFTA_LIMIT_TYPE, + NFTA_LIMIT_FLAGS, __NFTA_LIMIT_MAX }; #define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 5d67938f8b2f..99d18578afc6 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -26,6 +26,7 @@ struct nft_limit { u64 rate; u64 nsecs; u32 burst; + bool invert; }; static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) @@ -44,11 +45,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) if (delta >= 0) { limit->tokens = delta; spin_unlock_bh(&limit_lock); - return false; + return limit->invert; } limit->tokens = tokens; spin_unlock_bh(&limit_lock); - return true; + return !limit->invert; } static int nft_limit_init(struct nft_limit *limit, @@ -78,6 +79,12 @@ static int nft_limit_init(struct nft_limit *limit, limit->rate = rate; } + if (tb[NFTA_LIMIT_FLAGS]) { + u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); + + if (flags & NFT_LIMIT_F_INV) + limit->invert = true; + } limit->last = ktime_get_ns(); return 0; @@ -86,13 +93,15 @@ static int nft_limit_init(struct nft_limit *limit, static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, enum nft_limit_type type) { + u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0; u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); u64 rate = limit->rate - limit->burst; if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) || nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) || nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || - nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type))) + nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) || + nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags))) goto nla_put_failure; return 0; @@ -120,6 +129,7 @@ static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, [NFTA_LIMIT_BURST] = { .type = NLA_U32 }, [NFTA_LIMIT_TYPE] = { .type = NLA_U32 }, + [NFTA_LIMIT_FLAGS] = { .type = NLA_U32 }, }; static int nft_limit_pkts_init(const struct nft_ctx *ctx, -- cgit v1.2.3 From 502061f81d3eb4518d2e72178e494a8547788ad0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 3 Jan 2016 21:02:18 +0100 Subject: netfilter: nf_tables: add packet duplication to the netdev family You can use this to duplicate packets and inject them at the egress path of the specified interface. This duplication allows you to inspect traffic from the dummy or any other interface dedicated to this purpose. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_dup_netdev.h | 6 +++ net/netfilter/Kconfig | 16 ++++++ net/netfilter/Makefile | 6 +++ net/netfilter/nf_dup_netdev.c | 40 +++++++++++++++ net/netfilter/nft_dup_netdev.c | 97 +++++++++++++++++++++++++++++++++++ 5 files changed, 165 insertions(+) create mode 100644 include/net/netfilter/nf_dup_netdev.h create mode 100644 net/netfilter/nf_dup_netdev.c create mode 100644 net/netfilter/nft_dup_netdev.c (limited to 'net') diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h new file mode 100644 index 000000000000..397dcae349f9 --- /dev/null +++ b/include/net/netfilter/nf_dup_netdev.h @@ -0,0 +1,6 @@ +#ifndef _NF_DUP_NETDEV_H_ +#define _NF_DUP_NETDEV_H_ + +void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); + +#endif diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4692782b5280..8514cc4b22a8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -563,6 +563,22 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +if NF_TABLES_NETDEV + +config NF_DUP_NETDEV + tristate "Netfilter packet duplication support" + help + This option enables the generic packet duplication infrastructure + for Netfilter. + +config NFT_DUP_NETDEV + tristate "Netfilter nf_tables netdev packet duplication support" + select NF_DUP_NETDEV + help + This option enables packet duplication for the "netdev" family. + +endif # NF_TABLES_NETDEV + endif # NF_TABLES config NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 22934846b5d1..5c9913359537 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -66,6 +66,9 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +# generic packet duplication from netdev family +obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o + # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o @@ -90,6 +93,9 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o +# nf_tables netdev +obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c new file mode 100644 index 000000000000..8414ee1a0319 --- /dev/null +++ b/net/netfilter/nf_dup_netdev.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) +{ + struct net_device *dev; + struct sk_buff *skb; + + dev = dev_get_by_index_rcu(pkt->net, oif); + if (dev == NULL) + return; + + skb = skb_clone(pkt->skb, GFP_ATOMIC); + if (skb == NULL) + return; + + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + + skb->dev = dev; + skb_sender_cpu_clear(skb); + dev_queue_xmit(skb); +} +EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c new file mode 100644 index 000000000000..2cc1e0ef56e8 --- /dev/null +++ b/net/netfilter/nft_dup_netdev.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_dup_netdev { + enum nft_registers sreg_dev:8; +}; + +static void nft_dup_netdev_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + int oif = regs->data[priv->sreg_dev]; + + nf_dup_netdev_egress(pkt, oif); +} + +static const struct nla_policy nft_dup_netdev_policy[NFTA_DUP_MAX + 1] = { + [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, +}; + +static int nft_dup_netdev_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + + if (tb[NFTA_DUP_SREG_DEV] == NULL) + return -EINVAL; + + priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); +} + +static const struct nft_expr_ops nft_dup_netdev_ingress_ops; + +static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dup_netdev_type; +static const struct nft_expr_ops nft_dup_netdev_ops = { + .type = &nft_dup_netdev_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_netdev)), + .eval = nft_dup_netdev_eval, + .init = nft_dup_netdev_init, + .dump = nft_dup_netdev_dump, +}; + +static struct nft_expr_type nft_dup_netdev_type __read_mostly = { + .family = NFPROTO_NETDEV, + .name = "dup", + .ops = &nft_dup_netdev_ops, + .policy = nft_dup_netdev_policy, + .maxattr = NFTA_DUP_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_dup_netdev_module_init(void) +{ + return nft_register_expr(&nft_dup_netdev_type); +} + +static void __exit nft_dup_netdev_module_exit(void) +{ + nft_unregister_expr(&nft_dup_netdev_type); +} + +module_init(nft_dup_netdev_module_init); +module_exit(nft_dup_netdev_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_AF_EXPR(5, "dup"); -- cgit v1.2.3 From 39e6dea28adc874f7021e5580c13cab0b58407ea Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Nov 2015 13:39:38 +0100 Subject: netfilter: nf_tables: add forward expression to the netdev family You can use this to forward packets from ingress to the egress path of the specified interface. This provides a fast path to bounce packets from one interface to another specific destination interface. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 12 ++++ net/netfilter/Kconfig | 6 ++ net/netfilter/Makefile | 1 + net/netfilter/nft_fwd_netdev.c | 98 ++++++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 net/netfilter/nft_fwd_netdev.c (limited to 'net') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 22043ce95ae6..731288a039f6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -983,6 +983,18 @@ enum nft_dup_attributes { }; #define NFTA_DUP_MAX (__NFTA_DUP_MAX - 1) +/** + * enum nft_fwd_attributes - nf_tables fwd expression netlink attributes + * + * @NFTA_FWD_SREG_DEV: source register of output interface (NLA_U32: nft_register) + */ +enum nft_fwd_attributes { + NFTA_FWD_UNSPEC, + NFTA_FWD_SREG_DEV, + __NFTA_FWD_MAX +}; +#define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1) + /** * enum nft_gen_attributes - nf_tables ruleset generation attributes * diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8514cc4b22a8..8c067e6663a1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -577,6 +577,12 @@ config NFT_DUP_NETDEV help This option enables packet duplication for the "netdev" family. +config NFT_FWD_NETDEV + tristate "Netfilter nf_tables netdev packet forwarding support" + select NF_DUP_NETDEV + help + This option enables packet forwarding for the "netdev" family. + endif # NF_TABLES_NETDEV endif # NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5c9913359537..69134541d65b 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o +obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c new file mode 100644 index 000000000000..763ebc3e0b2b --- /dev/null +++ b/net/netfilter/nft_fwd_netdev.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_fwd_netdev { + enum nft_registers sreg_dev:8; +}; + +static void nft_fwd_netdev_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_fwd_netdev *priv = nft_expr_priv(expr); + int oif = regs->data[priv->sreg_dev]; + + nf_dup_netdev_egress(pkt, oif); + regs->verdict.code = NF_DROP; +} + +static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { + [NFTA_FWD_SREG_DEV] = { .type = NLA_U32 }, +}; + +static int nft_fwd_netdev_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_fwd_netdev *priv = nft_expr_priv(expr); + + if (tb[NFTA_FWD_SREG_DEV] == NULL) + return -EINVAL; + + priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); +} + +static const struct nft_expr_ops nft_fwd_netdev_ingress_ops; + +static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_fwd_netdev *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_fwd_netdev_type; +static const struct nft_expr_ops nft_fwd_netdev_ops = { + .type = &nft_fwd_netdev_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)), + .eval = nft_fwd_netdev_eval, + .init = nft_fwd_netdev_init, + .dump = nft_fwd_netdev_dump, +}; + +static struct nft_expr_type nft_fwd_netdev_type __read_mostly = { + .family = NFPROTO_NETDEV, + .name = "fwd", + .ops = &nft_fwd_netdev_ops, + .policy = nft_fwd_netdev_policy, + .maxattr = NFTA_FWD_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fwd_netdev_module_init(void) +{ + return nft_register_expr(&nft_fwd_netdev_type); +} + +static void __exit nft_fwd_netdev_module_exit(void) +{ + nft_unregister_expr(&nft_fwd_netdev_type); +} + +module_init(nft_fwd_netdev_module_init); +module_exit(nft_fwd_netdev_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_AF_EXPR(5, "fwd"); -- cgit v1.2.3 From ad6d950393138830edae2efcc500aa69b467b89c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 3 Jan 2016 22:41:24 +0100 Subject: netfilter: nf_ct_helper: define pr_fmt() Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 17 ++++++++--------- net/netfilter/nf_conntrack_irc.c | 7 ++++--- net/netfilter/nf_conntrack_sane.c | 19 +++++++++---------- net/netfilter/nf_conntrack_sip.c | 5 +++-- net/netfilter/nf_conntrack_tftp.c | 7 ++++--- 5 files changed, 28 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index b666959f17c0..883c691ec8d0 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -505,11 +507,11 @@ skip_nl_seq: different IP address. Simply don't record it for NAT. */ if (cmd.l3num == PF_INET) { - pr_debug("conntrack_ftp: NOT RECORDING: %pI4 != %pI4\n", + pr_debug("NOT RECORDING: %pI4 != %pI4\n", &cmd.u3.ip, &ct->tuplehash[dir].tuple.src.u3.ip); } else { - pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n", + pr_debug("NOT RECORDING: %pI6 != %pI6\n", cmd.u3.ip6, ct->tuplehash[dir].tuple.src.u3.ip6); } @@ -586,8 +588,7 @@ static void nf_conntrack_ftp_fini(void) if (ftp[i][j].me == NULL) continue; - pr_debug("nf_ct_ftp: unregistering helper for pf: %d " - "port: %d\n", + pr_debug("unregistering helper for pf: %d port: %d\n", ftp[i][j].tuple.src.l3num, ports[i]); nf_conntrack_helper_unregister(&ftp[i][j]); } @@ -625,14 +626,12 @@ static int __init nf_conntrack_ftp_init(void) else sprintf(ftp[i][j].name, "ftp-%d", ports[i]); - pr_debug("nf_ct_ftp: registering helper for pf: %d " - "port: %d\n", + pr_debug("registering helper for pf: %d port: %d\n", ftp[i][j].tuple.src.l3num, ports[i]); ret = nf_conntrack_helper_register(&ftp[i][j]); if (ret) { - printk(KERN_ERR "nf_ct_ftp: failed to register" - " helper for pf: %d port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); + pr_err("failed to register helper for pf: %d port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); nf_conntrack_ftp_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 0fd2976db7ee..8b6da2719600 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -9,6 +9,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -237,7 +239,7 @@ static int __init nf_conntrack_irc_init(void) int i, ret; if (max_dcc_channels < 1) { - printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n"); + pr_err("max_dcc_channels must not be zero\n"); return -EINVAL; } @@ -267,8 +269,7 @@ static int __init nf_conntrack_irc_init(void) ret = nf_conntrack_helper_register(&irc[i]); if (ret) { - printk(KERN_ERR "nf_ct_irc: failed to register helper " - "for pf: %u port: %u\n", + pr_err("failed to register helper for pf: %u port: %u\n", irc[i].tuple.src.l3num, ports[i]); nf_conntrack_irc_fini(); return ret; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 4a2134fd3fcb..7523a575f6d1 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -17,6 +17,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -120,14 +122,14 @@ static int help(struct sk_buff *skb, ct_sane_info->state = SANE_STATE_NORMAL; if (datalen < sizeof(struct sane_reply_net_start)) { - pr_debug("nf_ct_sane: NET_START reply too short\n"); + pr_debug("NET_START reply too short\n"); goto out; } reply = sb_ptr; if (reply->status != htonl(SANE_STATUS_SUCCESS)) { /* saned refused the command */ - pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", + pr_debug("unsuccessful SANE_STATUS = %u\n", ntohl(reply->status)); goto out; } @@ -148,7 +150,7 @@ static int help(struct sk_buff *skb, &tuple->src.u3, &tuple->dst.u3, IPPROTO_TCP, NULL, &reply->port); - pr_debug("nf_ct_sane: expect: "); + pr_debug("expect: "); nf_ct_dump_tuple(&exp->tuple); /* Can't expect this? Best to drop packet now. */ @@ -178,8 +180,7 @@ static void nf_conntrack_sane_fini(void) for (i = 0; i < ports_c; i++) { for (j = 0; j < 2; j++) { - pr_debug("nf_ct_sane: unregistering helper for pf: %d " - "port: %d\n", + pr_debug("unregistering helper for pf: %d port: %d\n", sane[i][j].tuple.src.l3num, ports[i]); nf_conntrack_helper_unregister(&sane[i][j]); } @@ -216,14 +217,12 @@ static int __init nf_conntrack_sane_init(void) else sprintf(sane[i][j].name, "sane-%d", ports[i]); - pr_debug("nf_ct_sane: registering helper for pf: %d " - "port: %d\n", + pr_debug("registering helper for pf: %d port: %d\n", sane[i][j].tuple.src.l3num, ports[i]); ret = nf_conntrack_helper_register(&sane[i][j]); if (ret) { - printk(KERN_ERR "nf_ct_sane: failed to " - "register helper for pf: %d port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); + pr_err("failed to register helper for pf: %d port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); nf_conntrack_sane_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 885b4aba3695..3e06402739e0 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1665,8 +1667,7 @@ static int __init nf_conntrack_sip_init(void) ret = nf_conntrack_helper_register(&sip[i][j]); if (ret) { - printk(KERN_ERR "nf_ct_sip: failed to register" - " helper for pf: %u port: %u\n", + pr_err("failed to register helper for pf: %u port: %u\n", sip[i][j].tuple.src.l3num, ports[i]); nf_conntrack_sip_fini(); return ret; diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index e68ab4fbd71f..36f964066461 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -5,6 +5,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -138,9 +140,8 @@ static int __init nf_conntrack_tftp_init(void) ret = nf_conntrack_helper_register(&tftp[i][j]); if (ret) { - printk(KERN_ERR "nf_ct_tftp: failed to register" - " helper for pf: %u port: %u\n", - tftp[i][j].tuple.src.l3num, ports[i]); + pr_err("failed to register helper for pf: %u port: %u\n", + tftp[i][j].tuple.src.l3num, ports[i]); nf_conntrack_tftp_fini(); return ret; } -- cgit v1.2.3 From d3d20725407955d0bb107939f23535d2e7dadbee Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 4 Jan 2016 14:23:21 +0200 Subject: Bluetooth: hci_bcm: move all Broadcom ACPI IDs to BCM HCI driver The IDs should all be for Broadcom BCM43241 module, and hci_bcm is now the proper driver for them. This removes one of two different ways of handling PM with the module. Cc: Johannes Berg Signed-off-by: Heikki Krogerus Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 4 ++++ net/rfkill/rfkill-gpio.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 5c7c696c5838..abf13a754689 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -814,7 +814,11 @@ static const struct hci_uart_proto bcm_proto = { #ifdef CONFIG_ACPI static const struct acpi_device_id bcm_acpi_match[] = { + { "BCM2E1A", 0 }, { "BCM2E39", 0 }, + { "BCM2E3D", 0 }, + { "BCM2E40", 0 }, + { "BCM2E64", 0 }, { "BCM2E65", 0 }, { "BCM2E67", 0 }, { }, diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 93127220cb54..4b1e3f35f06c 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -163,10 +163,6 @@ static int rfkill_gpio_remove(struct platform_device *pdev) #ifdef CONFIG_ACPI static const struct acpi_device_id rfkill_acpi_match[] = { - { "BCM2E1A", RFKILL_TYPE_BLUETOOTH }, - { "BCM2E3D", RFKILL_TYPE_BLUETOOTH }, - { "BCM2E40", RFKILL_TYPE_BLUETOOTH }, - { "BCM2E64", RFKILL_TYPE_BLUETOOTH }, { "BCM4752", RFKILL_TYPE_GPS }, { "LNV4752", RFKILL_TYPE_GPS }, { }, -- cgit v1.2.3 From 29663b0cc1d5b9b6e2f6caf41e86c599a0310def Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 23 Dec 2015 22:36:32 +0100 Subject: mac802154: constify ieee802154_llsec_ops structure The ieee802154_llsec_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/ieee802154_netdev.h | 2 +- net/mac802154/mac_cmd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index a62a051a3a2f..c4b31601cd53 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -337,7 +337,7 @@ struct ieee802154_mlme_ops { void (*get_mac_params)(struct net_device *dev, struct ieee802154_mac_params *params); - struct ieee802154_llsec_ops *llsec; + const struct ieee802154_llsec_ops *llsec; }; static inline struct ieee802154_mlme_ops * diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 8606da459ff3..3db16346cab3 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -126,7 +126,7 @@ static void mac802154_get_mac_params(struct net_device *dev, params->lbt = wpan_dev->lbt; } -static struct ieee802154_llsec_ops mac802154_llsec_ops = { +static const struct ieee802154_llsec_ops mac802154_llsec_ops = { .get_params = mac802154_get_params, .set_params = mac802154_set_params, .add_key = mac802154_add_key, -- cgit v1.2.3 From 98f40b3e22aed519bc545ba3cc7d884ede9428c9 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 29 Dec 2015 13:06:59 +0100 Subject: l2tp: rely on ppp layer for skb scrubbing Since 79c441ae505c ("ppp: implement x-netns support"), the PPP layer calls skb_scrub_packet() whenever the skb is received on the PPP device. Manually resetting packet meta-data in the L2TP layer is thus redundant. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index d93f113cb522..652c250b9a3b 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -230,26 +230,11 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int if (sk->sk_state & PPPOX_BOUND) { struct pppox_sock *po; + l2tp_dbg(session, PPPOL2TP_MSG_DATA, "%s: recv %d byte data frame, passing to ppp\n", session->name, data_len); - /* We need to forget all info related to the L2TP packet - * gathered in the skb as we are going to reuse the same - * skb for the inner packet. - * Namely we need to: - * - reset xfrm (IPSec) information as it applies to - * the outer L2TP packet and not to the inner one - * - release the dst to force a route lookup on the inner - * IP packet since skb->dst currently points to the dst - * of the UDP tunnel - * - reset netfilter information as it doesn't apply - * to the inner packet either - */ - secpath_reset(skb); - skb_dst_drop(skb); - nf_reset(skb); - po = pppox_sk(sk); ppp_input(&po->chan, skb); } else { -- cgit v1.2.3 From 197c949e7798fbf28cfadc69d9ca0c2abbf93191 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Dec 2015 08:51:12 -0500 Subject: udp: properly support MSG_PEEK with truncated buffers Backport of this upstream commit into stable kernels : 89c22d8c3b27 ("net: Fix skb csum races when peeking") exposed a bug in udp stack vs MSG_PEEK support, when user provides a buffer smaller than skb payload. In this case, skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); returns -EFAULT. This bug does not happen in upstream kernels since Al Viro did a great job to replace this into : skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg); This variant is safe vs short buffers. For the time being, instead reverting Herbert Xu patch and add back skb->ip_summed invalid changes, simply store the result of udp_lib_checksum_complete() so that we avoid computing the checksum a second time, and avoid the problematic skb_copy_and_csum_datagram_iovec() call. This patch can be applied on recent kernels as it avoids a double checksumming, then backported to stable kernels as a bug fix. Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 ++++-- net/ipv6/udp.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8841e984f8bf..ac14ae44390d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1271,6 +1271,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); + bool checksum_valid = false; bool slow; if (flags & MSG_ERRQUEUE) @@ -1296,11 +1297,12 @@ try_again: */ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) + checksum_valid = !udp_lib_checksum_complete(skb); + if (!checksum_valid) goto csum_copy_err; } - if (skb_csum_unnecessary(skb)) + if (checksum_valid || skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), msg, copied); else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9da3287a3923..00775ee27d86 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -402,6 +402,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); + bool checksum_valid = false; int is_udp4; bool slow; @@ -433,11 +434,12 @@ try_again: */ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) + checksum_valid = !udp_lib_checksum_complete(skb); + if (!checksum_valid) goto csum_copy_err; } - if (skb_csum_unnecessary(skb)) + if (checksum_valid || skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), msg, copied); else { -- cgit v1.2.3 From ef456144da8ef507c8cf504284b6042e9201a05c Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:45 -0500 Subject: soreuseport: define reuseport groups struct sock_reuseport is an optional shared structure referenced by each socket belonging to a reuseport group. When a socket is bound to an address/port not yet in use and the reuseport flag has been set, the structure will be allocated and attached to the newly bound socket. When subsequent calls to bind are made for the same address/port, the shared structure will be updated to include the new socket and the newly bound socket will reference the group structure. Usually, when an incoming packet was destined for a reuseport group, all sockets in the same group needed to be considered before a dispatching decision was made. With this structure, an appropriate socket can be found after looking up just one socket in the group. This shared structure will also allow for more complicated decisions to be made when selecting a socket (eg a BPF filter). This work is based off a similar implementation written by Ying Cai for implementing policy-based reuseport selection. Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 + include/net/sock_reuseport.h | 20 +++++ net/core/Makefile | 2 +- net/core/sock_reuseport.c | 173 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 include/net/sock_reuseport.h create mode 100644 net/core/sock_reuseport.c (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 3794cdde837a..e830c1006935 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -318,6 +318,7 @@ struct cg_proto; * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 + * @sk_reuseport_cb: reuseport group container */ struct sock { /* @@ -453,6 +454,7 @@ struct sock { int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); + struct sock_reuseport __rcu *sk_reuseport_cb; }; #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h new file mode 100644 index 000000000000..67d1eb8fd7af --- /dev/null +++ b/include/net/sock_reuseport.h @@ -0,0 +1,20 @@ +#ifndef _SOCK_REUSEPORT_H +#define _SOCK_REUSEPORT_H + +#include +#include + +struct sock_reuseport { + struct rcu_head rcu; + + u16 max_socks; /* length of socks */ + u16 num_socks; /* elements in socks */ + struct sock *socks[0]; /* array of sock pointers */ +}; + +extern int reuseport_alloc(struct sock *sk); +extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2); +extern void reuseport_detach_sock(struct sock *sk); +extern struct sock *reuseport_select_sock(struct sock *sk, u32 hash); + +#endif /* _SOCK_REUSEPORT_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 086b01fbe1bd..0b835de04de3 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o tso.o + sock_diag.o dev_ioctl.o tso.o sock_reuseport.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c new file mode 100644 index 000000000000..963c8d5f3027 --- /dev/null +++ b/net/core/sock_reuseport.c @@ -0,0 +1,173 @@ +/* + * To speed up listener socket lookup, create an array to store all sockets + * listening on the same port. This allows a decision to be made after finding + * the first socket. + */ + +#include +#include + +#define INIT_SOCKS 128 + +static DEFINE_SPINLOCK(reuseport_lock); + +static struct sock_reuseport *__reuseport_alloc(u16 max_socks) +{ + size_t size = sizeof(struct sock_reuseport) + + sizeof(struct sock *) * max_socks; + struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); + + if (!reuse) + return NULL; + + reuse->max_socks = max_socks; + + return reuse; +} + +int reuseport_alloc(struct sock *sk) +{ + struct sock_reuseport *reuse; + + /* bh lock used since this function call may precede hlist lock in + * soft irq of receive path or setsockopt from process context + */ + spin_lock_bh(&reuseport_lock); + WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + "multiple allocations for the same socket"); + reuse = __reuseport_alloc(INIT_SOCKS); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); + return -ENOMEM; + } + + reuse->socks[0] = sk; + reuse->num_socks = 1; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + spin_unlock_bh(&reuseport_lock); + + return 0; +} +EXPORT_SYMBOL(reuseport_alloc); + +static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) +{ + struct sock_reuseport *more_reuse; + u32 more_socks_size, i; + + more_socks_size = reuse->max_socks * 2U; + if (more_socks_size > U16_MAX) + return NULL; + + more_reuse = __reuseport_alloc(more_socks_size); + if (!more_reuse) + return NULL; + + more_reuse->max_socks = more_socks_size; + more_reuse->num_socks = reuse->num_socks; + + memcpy(more_reuse->socks, reuse->socks, + reuse->num_socks * sizeof(struct sock *)); + + for (i = 0; i < reuse->num_socks; ++i) + rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, + more_reuse); + + kfree_rcu(reuse, rcu); + return more_reuse; +} + +/** + * reuseport_add_sock - Add a socket to the reuseport group of another. + * @sk: New socket to add to the group. + * @sk2: Socket belonging to the existing reuseport group. + * May return ENOMEM and not add socket to group under memory pressure. + */ +int reuseport_add_sock(struct sock *sk, const struct sock *sk2) +{ + struct sock_reuseport *reuse; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + "socket already in reuseport group"); + + if (reuse->num_socks == reuse->max_socks) { + reuse = reuseport_grow(reuse); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); + return -ENOMEM; + } + } + + reuse->socks[reuse->num_socks] = sk; + /* paired with smp_rmb() in reuseport_select_sock() */ + smp_wmb(); + reuse->num_socks++; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + spin_unlock_bh(&reuseport_lock); + + return 0; +} +EXPORT_SYMBOL(reuseport_add_sock); + +void reuseport_detach_sock(struct sock *sk) +{ + struct sock_reuseport *reuse; + int i; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + rcu_assign_pointer(sk->sk_reuseport_cb, NULL); + + for (i = 0; i < reuse->num_socks; i++) { + if (reuse->socks[i] == sk) { + reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; + reuse->num_socks--; + if (reuse->num_socks == 0) + kfree_rcu(reuse, rcu); + break; + } + } + spin_unlock_bh(&reuseport_lock); +} +EXPORT_SYMBOL(reuseport_detach_sock); + +/** + * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. + * @sk: First socket in the group. + * @hash: Use this hash to select. + * Returns a socket that should receive the packet (or NULL on error). + */ +struct sock *reuseport_select_sock(struct sock *sk, u32 hash) +{ + struct sock_reuseport *reuse; + struct sock *sk2 = NULL; + u16 socks; + + rcu_read_lock(); + reuse = rcu_dereference(sk->sk_reuseport_cb); + + /* if memory allocation failed or add call is not yet complete */ + if (!reuse) + goto out; + + socks = READ_ONCE(reuse->num_socks); + if (likely(socks)) { + /* paired with smp_wmb() in reuseport_add_sock() */ + smp_rmb(); + + sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + } + +out: + rcu_read_unlock(); + return sk2; +} +EXPORT_SYMBOL(reuseport_select_sock); -- cgit v1.2.3 From e32ea7e747271a0abcd37e265005e97cc81d9df5 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:46 -0500 Subject: soreuseport: fast reuseport UDP socket selection Include a struct sock_reuseport instance when a UDP socket binds to a specific address for the first time with the reuseport flag set. When selecting a socket for an incoming UDP packet, use the information available in sock_reuseport if present. This required adding an additional field to the UDP source address equality function to differentiate between exact and wildcard matches. The original use case allowed wildcard matches when checking for existing port uses during bind. The new use case of adding a socket to a reuseport group requires exact address matching. Performance test (using a machine with 2 CPU sockets and a total of 48 cores): Create reuseport groups of varying size. Use one socket from this group per user thread (pinning each thread to a different core) calling recvmmsg in a tight loop. Record number of messages received per second while saturating a 10G link. 10 sockets: 18% increase (~2.8M -> 3.3M pkts/s) 20 sockets: 14% increase (~2.9M -> 3.3M pkts/s) 40 sockets: 13% increase (~3.0M -> 3.4M pkts/s) This work is based off a similar implementation written by Ying Cai for implementing policy-based reuseport selection. Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- include/net/addrconf.h | 3 +- include/net/udp.h | 2 +- net/ipv4/udp.c | 119 +++++++++++++++++++++++++++++++-------- net/ipv6/inet6_connection_sock.c | 4 +- net/ipv6/udp.c | 48 +++++++++++++--- 5 files changed, 141 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78003dfb8539..47f52d3cd8df 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -87,7 +87,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2); +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/include/net/udp.h b/include/net/udp.h index 6d4ed18e1427..3b5d7f93bc23 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -191,7 +191,7 @@ static inline void udp_lib_close(struct sock *sk, long timeout) } int udp_lib_get_port(struct sock *sk, unsigned short snum, - int (*)(const struct sock *, const struct sock *), + int (*)(const struct sock *, const struct sock *, bool), unsigned int hash2_nulladdr); u32 udp_flow_hashrnd(void); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ac14ae44390d..762b01f55707 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -113,6 +113,7 @@ #include #include #include "udp_impl.h" +#include struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -137,7 +138,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2), + const struct sock *sk2, + bool match_wildcard), unsigned int log) { struct sock *sk2; @@ -152,8 +154,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (!sk2->sk_reuseport || !sk->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || !uid_eq(uid, sock_i_uid(sk2))) && - saddr_comp(sk, sk2)) { + saddr_comp(sk, sk2, true)) { if (!bitmap) return 1; __set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap); @@ -170,7 +173,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, struct udp_hslot *hslot2, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2)) + const struct sock *sk2, + bool match_wildcard)) { struct sock *sk2; struct hlist_nulls_node *node; @@ -186,8 +190,9 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (!sk2->sk_reuseport || !sk->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || !uid_eq(uid, sock_i_uid(sk2))) && - saddr_comp(sk, sk2)) { + saddr_comp(sk, sk2, true)) { res = 1; break; } @@ -196,6 +201,35 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, return res; } +static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, + int (*saddr_same)(const struct sock *sk1, + const struct sock *sk2, + bool match_wildcard)) +{ + struct net *net = sock_net(sk); + struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); + struct sock *sk2; + + sk_nulls_for_each(sk2, node, &hslot->head) { + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + sk2->sk_family == sk->sk_family && + ipv6_only_sock(sk2) == ipv6_only_sock(sk) && + (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) && + (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && + (*saddr_same)(sk, sk2, false)) { + return reuseport_add_sock(sk, sk2); + } + } + + /* Initial allocation may have already happened via setsockopt */ + if (!rcu_access_pointer(sk->sk_reuseport_cb)) + return reuseport_alloc(sk); + return 0; +} + /** * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * @@ -207,7 +241,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, */ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2), + const struct sock *sk2, + bool match_wildcard), unsigned int hash2_nulladdr) { struct udp_hslot *hslot, *hslot2; @@ -290,6 +325,14 @@ found: udp_sk(sk)->udp_port_hash = snum; udp_sk(sk)->udp_portaddr_hash ^= snum; if (sk_unhashed(sk)) { + if (sk->sk_reuseport && + udp_reuseport_add_sock(sk, hslot, saddr_comp)) { + inet_sk(sk)->inet_num = 0; + udp_sk(sk)->udp_port_hash = 0; + udp_sk(sk)->udp_portaddr_hash ^= snum; + goto fail_unlock; + } + sk_nulls_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -309,13 +352,22 @@ fail: } EXPORT_SYMBOL(udp_lib_get_port); -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses + * match_wildcard == false: addresses must be exactly the same, i.e. + * 0.0.0.0 only equals to 0.0.0.0 + */ +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2, + bool match_wildcard) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - return (!ipv6_only_sock(sk2) && - (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || - inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); + if (!ipv6_only_sock(sk2)) { + if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr) + return 1; + if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr) + return match_wildcard; + } + return 0; } static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, @@ -459,8 +511,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -478,6 +536,7 @@ begin: if (get_nulls_value(node) != slot2) goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -540,8 +599,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -560,6 +625,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, @@ -587,7 +653,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, + &udp_table); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -1398,6 +1465,8 @@ void udp_lib_unhash(struct sock *sk) hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock_bh(&hslot->lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); if (sk_nulls_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; @@ -1425,22 +1494,28 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); udp_sk(sk)->udp_portaddr_hash = newhash; - if (hslot2 != nhslot2) { + + if (hslot2 != nhslot2 || + rcu_access_pointer(sk->sk_reuseport_cb)) { hslot = udp_hashslot(udptable, sock_net(sk), udp_sk(sk)->udp_port_hash); /* we must lock primary chain too */ spin_lock_bh(&hslot->lock); - - spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); - hslot2->count--; - spin_unlock(&hslot2->lock); - - spin_lock(&nhslot2->lock); - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, - &nhslot2->head); - nhslot2->count++; - spin_unlock(&nhslot2->lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); + + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + } spin_unlock_bh(&hslot->lock); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index a7ca2cde2ecb..36c3f0155010 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -51,12 +51,12 @@ int inet6_csk_bind_conflict(const struct sock *sk, (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid((struct sock *)sk2))))) { - if (ipv6_rcv_saddr_equal(sk, sk2)) + if (ipv6_rcv_saddr_equal(sk, sk2, true)) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN && - ipv6_rcv_saddr_equal(sk, sk2)) + ipv6_rcv_saddr_equal(sk, sk2, true)) break; } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 00775ee27d86..6204b8992de4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,14 @@ static u32 udp6_ehashfn(const struct net *net, udp_ipv6_hash_secret + net_hash_mix(net)); } -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) +/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 + * only, and any IPv4 addresses if not IPv6 only + * match_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only + */ +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) { const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); int sk2_ipv6only = inet_v6_ipv6only(sk2); @@ -84,16 +92,24 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) - return (!sk2_ipv6only && - (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || - sk->sk_rcv_saddr == sk2->sk_rcv_saddr)); + if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { + if (!sk2_ipv6only) { + if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + return 1; + if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + return match_wildcard; + } + return 0; + } + + if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) + return 1; - if (addr_type2 == IPV6_ADDR_ANY && + if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) return 1; - if (addr_type == IPV6_ADDR_ANY && + if (addr_type == IPV6_ADDR_ANY && match_wildcard && !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) return 1; @@ -253,8 +269,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -273,6 +295,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -332,8 +355,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -352,6 +381,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, hnum, saddr, sport, @@ -549,8 +579,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; struct net *net = dev_net(skb->dev); - sk = __udp6_lib_lookup(net, daddr, uh->dest, - saddr, uh->source, inet6_iif(skb), udptable); + sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, + inet6_iif(skb), udptable); if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); -- cgit v1.2.3 From 538950a1b7527a0a52ccd9337e3fcd304f027f13 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:47 -0500 Subject: soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF Expose socket options for setting a classic or extended BPF program for use when selecting sockets in an SO_REUSEPORT group. These options can be used on the first socket to belong to a group before bind or on any socket in the group after bind. This change includes refactoring of the existing sk_filter code to allow reuse of the existing BPF filter validation checks. Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 3 + arch/avr32/include/uapi/asm/socket.h | 3 + arch/frv/include/uapi/asm/socket.h | 3 + arch/ia64/include/uapi/asm/socket.h | 3 + arch/m32r/include/uapi/asm/socket.h | 3 + arch/mips/include/uapi/asm/socket.h | 3 + arch/mn10300/include/uapi/asm/socket.h | 3 + arch/parisc/include/uapi/asm/socket.h | 3 + arch/powerpc/include/uapi/asm/socket.h | 3 + arch/s390/include/uapi/asm/socket.h | 3 + arch/sparc/include/uapi/asm/socket.h | 3 + arch/xtensa/include/uapi/asm/socket.h | 3 + include/linux/filter.h | 2 + include/net/sock_reuseport.h | 10 ++- include/net/udp.h | 5 +- include/uapi/asm-generic/socket.h | 3 + net/core/filter.c | 121 +++++++++++++++++++++++++++------ net/core/sock.c | 29 ++++++++ net/core/sock_reuseport.c | 88 ++++++++++++++++++++++-- net/ipv4/udp.c | 14 ++-- net/ipv4/udp_diag.c | 4 +- net/ipv6/udp.c | 14 ++-- 22 files changed, 282 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 9a20821b111c..c5fb9e6bc3a5 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 2b65ed6b277c..9de0796240a0 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 4823ad125578..f02e4849ae83 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -85,5 +85,8 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 59be3d87f86d..bce29166de1b 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -94,4 +94,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 7bc4cb273856..14aa4a6bccf1 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index dec3c850f36b..5910fe294e93 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -103,4 +103,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index cab7d6d50051..58b1aa01ab9f 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index a5cd40cd8ee1..f9cf1223422c 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -84,4 +84,7 @@ #define SO_ATTACH_BPF 0x402B #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 0x402C +#define SO_ATTACH_REUSEPORT_EBPF 0x402D + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index c046666038f8..dd54f28ecdec 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 296942d56e6a..d02e89d14fef 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -91,4 +91,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index e6a16c40be5f..d270ee91968e 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -81,6 +81,9 @@ #define SO_ATTACH_BPF 0x0034 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 0x0035 +#define SO_ATTACH_REUSEPORT_EBPF 0x0036 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 4120af086160..fd3b96d1153f 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -96,4 +96,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 4165e9ac9e36..294c3cdf07b3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -447,6 +447,8 @@ void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); +int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 67d1eb8fd7af..7dda3d7adba8 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -1,6 +1,8 @@ #ifndef _SOCK_REUSEPORT_H #define _SOCK_REUSEPORT_H +#include +#include #include #include @@ -9,12 +11,18 @@ struct sock_reuseport { u16 max_socks; /* length of socks */ u16 num_socks; /* elements in socks */ + struct bpf_prog __rcu *prog; /* optional BPF sock selector */ struct sock *socks[0]; /* array of sock pointers */ }; extern int reuseport_alloc(struct sock *sk); extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2); extern void reuseport_detach_sock(struct sock *sk); -extern struct sock *reuseport_select_sock(struct sock *sk, u32 hash); +extern struct sock *reuseport_select_sock(struct sock *sk, + u32 hash, + struct sk_buff *skb, + int hdr_len); +extern struct bpf_prog *reuseport_attach_prog(struct sock *sk, + struct bpf_prog *prog); #endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 3b5d7f93bc23..2842541e28e7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -258,7 +258,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, - struct udp_table *tbl); + struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -266,7 +266,8 @@ struct sock *udp6_lib_lookup(struct net *net, struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *tbl); + int dif, struct udp_table *tbl, + struct sk_buff *skb); /* * SNMP statistics for UDP and UDP-Lite diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 5c15c2a5c123..fb8a41668382 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -87,4 +87,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/filter.c b/net/core/filter.c index c770196ae8d5..35e6fed28709 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -50,6 +50,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1167,17 +1168,32 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return 0; } -/** - * sk_attach_filter - attach a socket filter - * @fprog: the filter program - * @sk: the socket to use - * - * Attach the user's filter code. We first run some sanity checks on - * it to make sure it does not explode on us later. If an error - * occurs or there is insufficient memory for the filter a negative - * errno code is returned. On success the return is zero. - */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk) +{ + struct bpf_prog *old_prog; + int err; + + if (bpf_prog_size(prog->len) > sysctl_optmem_max) + return -ENOMEM; + + if (sk_unhashed(sk)) { + err = reuseport_alloc(sk); + if (err) + return err; + } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) { + /* The socket wasn't bound with SO_REUSEPORT */ + return -EINVAL; + } + + old_prog = reuseport_attach_prog(sk, prog); + if (old_prog) + bpf_prog_destroy(old_prog); + + return 0; +} + +static +struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) { unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); @@ -1185,19 +1201,19 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) - return -EPERM; + return ERR_PTR(-EPERM); /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) - return -EINVAL; + return ERR_PTR(-EINVAL); prog = bpf_prog_alloc(bpf_fsize, 0); if (!prog) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (copy_from_user(prog->insns, fprog->filter, fsize)) { __bpf_prog_free(prog); - return -EFAULT; + return ERR_PTR(-EFAULT); } prog->len = fprog->len; @@ -1205,13 +1221,30 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) err = bpf_prog_store_orig_filter(prog, fprog); if (err) { __bpf_prog_free(prog); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - prog = bpf_prepare_filter(prog, NULL); + return bpf_prepare_filter(prog, NULL); +} + +/** + * sk_attach_filter - attach a socket filter + * @fprog: the filter program + * @sk: the socket to use + * + * Attach the user's filter code. We first run some sanity checks on + * it to make sure it does not explode on us later. If an error + * occurs or there is insufficient memory for the filter a negative + * errno code is returned. On success the return is zero. + */ +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + struct bpf_prog *prog = __get_filter(fprog, sk); + int err; + if (IS_ERR(prog)) return PTR_ERR(prog); @@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } EXPORT_SYMBOL_GPL(sk_attach_filter); -int sk_attach_bpf(u32 ufd, struct sock *sk) +int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct bpf_prog *prog; + struct bpf_prog *prog = __get_filter(fprog, sk); int err; + if (IS_ERR(prog)) + return PTR_ERR(prog); + + err = __reuseport_attach_prog(prog, sk); + if (err < 0) { + __bpf_prog_release(prog); + return err; + } + + return 0; +} + +static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog; + if (sock_flag(sk, SOCK_FILTER_LOCKED)) - return -EPERM; + return ERR_PTR(-EPERM); prog = bpf_prog_get(ufd); if (IS_ERR(prog)) - return PTR_ERR(prog); + return prog; if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); - return -EINVAL; + return ERR_PTR(-EINVAL); } + return prog; +} + +int sk_attach_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + + if (IS_ERR(prog)) + return PTR_ERR(prog); + err = __sk_attach_prog(prog, sk); if (err < 0) { bpf_prog_put(prog); @@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + + if (IS_ERR(prog)) + return PTR_ERR(prog); + + err = __reuseport_attach_prog(prog, sk); + if (err < 0) { + bpf_prog_put(prog); + return err; + } + + return 0; +} + #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) #define BPF_LDST_LEN 16U diff --git a/net/core/sock.c b/net/core/sock.c index 565bab7baca9..51270238e269 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -134,6 +134,7 @@ #include #include +#include #include @@ -932,6 +933,32 @@ set_rcvbuf: } break; + case SO_ATTACH_REUSEPORT_CBPF: + ret = -EINVAL; + if (optlen == sizeof(struct sock_fprog)) { + struct sock_fprog fprog; + + ret = -EFAULT; + if (copy_from_user(&fprog, optval, sizeof(fprog))) + break; + + ret = sk_reuseport_attach_filter(&fprog, sk); + } + break; + + case SO_ATTACH_REUSEPORT_EBPF: + ret = -EINVAL; + if (optlen == sizeof(u32)) { + u32 ufd; + + ret = -EFAULT; + if (copy_from_user(&ufd, optval, sizeof(ufd))) + break; + + ret = sk_reuseport_attach_bpf(ufd, sk); + } + break; + case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; @@ -1443,6 +1470,8 @@ void sk_destruct(struct sock *sk) sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); } + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 963c8d5f3027..ae0969c0fc2e 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -1,10 +1,12 @@ /* * To speed up listener socket lookup, create an array to store all sockets * listening on the same port. This allows a decision to be made after finding - * the first socket. + * the first socket. An optional BPF program can also be configured for + * selecting the socket index from the array of available sockets. */ #include +#include #include #define INIT_SOCKS 128 @@ -22,6 +24,7 @@ static struct sock_reuseport *__reuseport_alloc(u16 max_socks) reuse->max_socks = max_socks; + RCU_INIT_POINTER(reuse->prog, NULL); return reuse; } @@ -67,6 +70,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->max_socks = more_socks_size; more_reuse->num_socks = reuse->num_socks; + more_reuse->prog = reuse->prog; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); @@ -75,6 +79,10 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, more_reuse); + /* Note: we use kfree_rcu here instead of reuseport_free_rcu so + * that reuse and more_reuse can temporarily share a reference + * to prog. + */ kfree_rcu(reuse, rcu); return more_reuse; } @@ -116,6 +124,16 @@ int reuseport_add_sock(struct sock *sk, const struct sock *sk2) } EXPORT_SYMBOL(reuseport_add_sock); +static void reuseport_free_rcu(struct rcu_head *head) +{ + struct sock_reuseport *reuse; + + reuse = container_of(head, struct sock_reuseport, rcu); + if (reuse->prog) + bpf_prog_destroy(reuse->prog); + kfree(reuse); +} + void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; @@ -131,7 +149,7 @@ void reuseport_detach_sock(struct sock *sk) reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; reuse->num_socks--; if (reuse->num_socks == 0) - kfree_rcu(reuse, rcu); + call_rcu(&reuse->rcu, reuseport_free_rcu); break; } } @@ -139,15 +157,53 @@ void reuseport_detach_sock(struct sock *sk) } EXPORT_SYMBOL(reuseport_detach_sock); +static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks, + struct bpf_prog *prog, struct sk_buff *skb, + int hdr_len) +{ + struct sk_buff *nskb = NULL; + u32 index; + + if (skb_shared(skb)) { + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return NULL; + skb = nskb; + } + + /* temporarily advance data past protocol header */ + if (!pskb_pull(skb, hdr_len)) { + consume_skb(nskb); + return NULL; + } + index = bpf_prog_run_save_cb(prog, skb); + __skb_push(skb, hdr_len); + + consume_skb(nskb); + + if (index >= socks) + return NULL; + + return reuse->socks[index]; +} + /** * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * @sk: First socket in the group. - * @hash: Use this hash to select. + * @hash: When no BPF filter is available, use this hash to select. + * @skb: skb to run through BPF filter. + * @hdr_len: BPF filter expects skb data pointer at payload data. If + * the skb does not yet point at the payload, this parameter represents + * how far the pointer needs to advance to reach the payload. * Returns a socket that should receive the packet (or NULL on error). */ -struct sock *reuseport_select_sock(struct sock *sk, u32 hash) +struct sock *reuseport_select_sock(struct sock *sk, + u32 hash, + struct sk_buff *skb, + int hdr_len) { struct sock_reuseport *reuse; + struct bpf_prog *prog; struct sock *sk2 = NULL; u16 socks; @@ -158,12 +214,16 @@ struct sock *reuseport_select_sock(struct sock *sk, u32 hash) if (!reuse) goto out; + prog = rcu_dereference(reuse->prog); socks = READ_ONCE(reuse->num_socks); if (likely(socks)) { /* paired with smp_wmb() in reuseport_add_sock() */ smp_rmb(); - sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + if (prog && skb) + sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); + else + sk2 = reuse->socks[reciprocal_scale(hash, socks)]; } out: @@ -171,3 +231,21 @@ out: return sk2; } EXPORT_SYMBOL(reuseport_select_sock); + +struct bpf_prog * +reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog) +{ + struct sock_reuseport *reuse; + struct bpf_prog *old_prog; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + old_prog = rcu_dereference_protected(reuse->prog, + lockdep_is_held(&reuseport_lock)); + rcu_assign_pointer(reuse->prog, prog); + spin_unlock_bh(&reuseport_lock); + + return old_prog; +} +EXPORT_SYMBOL(reuseport_attach_prog); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 762b01f55707..835378365f25 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -514,7 +514,7 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, NULL, 0); if (sk2) { result = sk2; goto found; @@ -553,7 +553,7 @@ found: */ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, - int dif, struct udp_table *udptable) + int dif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -602,7 +602,8 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -647,14 +648,14 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - udptable); + udptable, skb); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, - &udp_table); + &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -702,7 +703,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable); + iph->saddr, uh->source, skb->dev->ifindex, udptable, + NULL); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; /* No socket for error */ diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 6116604bf6e8..df1966f3b6ec 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -44,7 +44,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_if, tbl); + req->id.idiag_if, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, @@ -52,7 +52,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, - req->id.idiag_if, tbl); + req->id.idiag_if, tbl, NULL); #endif else goto out_nosk; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6204b8992de4..56fcb55fda31 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -272,7 +272,7 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, NULL, 0); if (sk2) { result = sk2; goto found; @@ -310,7 +310,8 @@ found: struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *udptable) + int dif, struct udp_table *udptable, + struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -358,7 +359,8 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -407,13 +409,13 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, return sk; return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - udptable); + udptable, skb); } struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { - return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); + return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp6_lib_lookup); @@ -580,7 +582,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), udptable); + inet6_iif(skb), udptable, skb); if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); -- cgit v1.2.3 From 0d3b7f64c84d53658daf28e2f9772e38acb9340d Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Jan 2016 13:19:31 +0200 Subject: Bluetooth: Change eir_has_data_type() to more generic eir_get_data() To make the EIR parsing helper more general purpose, make it return the found data and its length rather than just saying whether the data was present or not. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 30 ++++++++++++++++++++---------- net/bluetooth/hci_event.c | 6 +++--- net/bluetooth/mgmt.c | 3 ++- 3 files changed, 25 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c95e0326c41a..372e2a7c4ada 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1283,31 +1283,41 @@ static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, mutex_unlock(&hci_cb_list_lock); } -static inline bool eir_has_data_type(u8 *data, size_t data_len, u8 type) +static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, + size_t *data_len) { size_t parsed = 0; - if (data_len < 2) - return false; + if (eir_len < 2) + return NULL; - while (parsed < data_len - 1) { - u8 field_len = data[0]; + while (parsed < eir_len - 1) { + u8 field_len = eir[0]; if (field_len == 0) break; parsed += field_len + 1; - if (parsed > data_len) + if (parsed > eir_len) break; - if (data[1] == type) - return true; + if (eir[1] != type) { + eir += field_len + 1; + continue; + } + + /* Zero length data */ + if (field_len == 1) + return NULL; - data += field_len + 1; + if (data_len) + *data_len = field_len - 1; + + return &eir[2]; } - return false; + return NULL; } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7554da5b7a8f..c162af5d16bf 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3833,9 +3833,9 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, data.ssp_mode = 0x01; if (hci_dev_test_flag(hdev, HCI_MGMT)) - name_known = eir_has_data_type(info->data, - sizeof(info->data), - EIR_NAME_COMPLETE); + name_known = eir_get_data(info->data, + sizeof(info->data), + EIR_NAME_COMPLETE, NULL); else name_known = true; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 621f6fdd0dd1..3297a4ecc05e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7266,7 +7266,8 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, /* Copy EIR or advertising data into event */ memcpy(ev->eir, eir, eir_len); - if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV)) + if (dev_class && !eir_get_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, + NULL)) eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, dev_class, 3); -- cgit v1.2.3 From 78b781ca0d35191ebf8d8cad8beec810270f0f2e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Jan 2016 13:19:32 +0200 Subject: Bluetooth: Add support for Start Limited Discovery command This patch implements the mgmt Start Limited Discovery command. Most of existing Start Discovery code is reused since the only difference is the presence of a 'limited' flag as part of the discovery state. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 2 ++ net/bluetooth/hci_request.c | 11 ++++++--- net/bluetooth/mgmt.c | 53 +++++++++++++++++++++++++++++++++------- 4 files changed, 55 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 372e2a7c4ada..d4f82edb5cff 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -77,6 +77,7 @@ struct discovery_state { u8 last_adv_data_len; bool report_invalid_rssi; bool result_filtering; + bool limited; s8 rssi; u16 uuid_count; u8 (*uuids)[16]; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index af17774c9416..ea73e0826aa7 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -584,6 +584,8 @@ struct mgmt_rp_get_adv_size_info { __u8 max_scan_rsp_len; } __packed; +#define MGMT_OP_START_LIMITED_DISCOVERY 0x0041 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9997c31ef987..41b5f3813f02 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1737,8 +1737,8 @@ static int le_scan_disable(struct hci_request *req, unsigned long opt) static int bredr_inquiry(struct hci_request *req, unsigned long opt) { u8 length = opt; - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; + const u8 giac[3] = { 0x33, 0x8b, 0x9e }; + const u8 liac[3] = { 0x00, 0x8b, 0x9e }; struct hci_cp_inquiry cp; BT_DBG("%s", req->hdev->name); @@ -1748,7 +1748,12 @@ static int bredr_inquiry(struct hci_request *req, unsigned long opt) hci_dev_unlock(req->hdev); memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); + + if (req->hdev->discovery.limited) + memcpy(&cp.lap, liac, sizeof(cp.lap)); + else + memcpy(&cp.lap, giac, sizeof(cp.lap)); + cp.length = length; hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3297a4ecc05e..5a5089cb6570 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -103,6 +103,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_ADD_ADVERTISING, MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_OP_START_LIMITED_DISCOVERY, }; static const u16 mgmt_events[] = { @@ -3283,6 +3284,9 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) if (!cmd) cmd = pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); + if (!cmd) + cmd = pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev); + if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); @@ -3318,8 +3322,8 @@ static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, return true; } -static int start_discovery(struct sock *sk, struct hci_dev *hdev, - void *data, u16 len) +static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, + u16 op, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; struct mgmt_pending_cmd *cmd; @@ -3331,7 +3335,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_NOT_POWERED, &cp->type, sizeof(cp->type)); goto failed; @@ -3339,15 +3343,14 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, if (hdev->discovery.state != DISCOVERY_STOPPED || hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_BUSY, &cp->type, - sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY, + &cp->type, sizeof(cp->type)); goto failed; } if (!discovery_type_is_valid(hdev, cp->type, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - status, &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, op, status, + &cp->type, sizeof(cp->type)); goto failed; } @@ -3358,8 +3361,12 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; hdev->discovery.report_invalid_rssi = false; + if (op == MGMT_OP_START_LIMITED_DISCOVERY) + hdev->discovery.limited = true; + else + hdev->discovery.limited = false; - cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); + cmd = mgmt_pending_add(sk, op, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -3376,6 +3383,21 @@ failed: return err; } +static int start_discovery(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + return start_discovery_internal(sk, hdev, MGMT_OP_START_DISCOVERY, + data, len); +} + +static int start_limited_discovery(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + return start_discovery_internal(sk, hdev, + MGMT_OP_START_LIMITED_DISCOVERY, + data, len); +} + static int service_discovery_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { @@ -6313,6 +6335,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { HCI_MGMT_VAR_LEN }, { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, + { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -7237,6 +7260,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, return; } + if (hdev->discovery.limited) { + /* Check for limited discoverable bit */ + if (dev_class) { + if (!(dev_class[1] & 0x20)) + return; + } else { + u8 *flags = eir_get_data(eir, eir_len, EIR_FLAGS, NULL); + if (!flags || !(flags[0] & LE_AD_LIMITED)) + return; + } + } + /* Make sure that the buffer is big enough. The 5 extra bytes * are for the potential CoD field. */ -- cgit v1.2.3 From d6c0256a60e685214cc8cc2b886809f11efc0084 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:46 +0800 Subject: sctp: add the rhashtable apis for sctp global transport hashtable tranport hashtbale will replace the association hashtable to do the lookup for transport, and then get association by t->assoc, rhashtable apis will be used because of it's resizable, scalable and using rcu. lport + rport + paddr will be the base hashkey to locate the chain, with net to protect one netns from another, then plus the laddr to compare to get the target. this patch will provider the lookup functions: - sctp_epaddr_lookup_transport - sctp_addrs_lookup_transport hash/unhash functions: - sctp_hash_transport - sctp_unhash_transport init/destroy functions: - sctp_transport_hashtable_init - sctp_transport_hashtable_destroy Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 11 ++++ include/net/sctp/structs.h | 5 ++ net/sctp/input.c | 131 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) (limited to 'net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ce13cf20f625..7bbdfbab2efa 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -143,6 +143,17 @@ void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_transport *t); void sctp_backlog_migrate(struct sctp_association *assoc, struct sock *oldsk, struct sock *newsk); +int sctp_transport_hashtable_init(void); +void sctp_transport_hashtable_destroy(void); +void sctp_hash_transport(struct sctp_transport *t); +void sctp_unhash_transport(struct sctp_transport *t); +struct sctp_transport *sctp_addrs_lookup_transport( + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr); +struct sctp_transport *sctp_epaddr_lookup_transport( + const struct sctp_endpoint *ep, + const union sctp_addr *paddr); /* * sctp/proc.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index eea9bdeecba2..4ab87d08e766 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -48,6 +48,7 @@ #define __sctp_structs_h__ #include +#include #include /* linux/in.h needs this!! */ #include /* We get struct sockaddr_in. */ #include /* We get struct in6_addr */ @@ -123,6 +124,8 @@ extern struct sctp_globals { struct sctp_hashbucket *assoc_hashtable; /* This is the sctp port control hash. */ struct sctp_bind_hashbucket *port_hashtable; + /* This is the hash of all transports. */ + struct rhashtable transport_hashtable; /* Sizes of above hashtables. */ int ep_hashsize; @@ -147,6 +150,7 @@ extern struct sctp_globals { #define sctp_assoc_hashtable (sctp_globals.assoc_hashtable) #define sctp_port_hashsize (sctp_globals.port_hashsize) #define sctp_port_hashtable (sctp_globals.port_hashtable) +#define sctp_transport_hashtable (sctp_globals.transport_hashtable) #define sctp_checksum_disable (sctp_globals.checksum_disable) /* SCTP Socket type: UDP or TCP style. */ @@ -753,6 +757,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet) struct sctp_transport { /* A list of transports. */ struct list_head transports; + struct rhash_head node; /* Reference counting. */ atomic_t refcnt; diff --git a/net/sctp/input.c b/net/sctp/input.c index b6493b3f11a9..bac8278b176b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -782,6 +782,137 @@ hit: return ep; } +/* rhashtable for transport */ +struct sctp_hash_cmp_arg { + const union sctp_addr *laddr; + const union sctp_addr *paddr; + const struct net *net; +}; + +static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct sctp_hash_cmp_arg *x = arg->key; + const struct sctp_transport *t = ptr; + struct sctp_association *asoc = t->asoc; + const struct net *net = x->net; + + if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port)) + return 1; + if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) + return 1; + if (!net_eq(sock_net(asoc->base.sk), net)) + return 1; + if (!sctp_bind_addr_match(&asoc->base.bind_addr, + x->laddr, sctp_sk(asoc->base.sk))) + return 1; + + return 0; +} + +static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct sctp_transport *t = data; + const union sctp_addr *paddr = &t->ipaddr; + const struct net *net = sock_net(t->asoc->base.sk); + u16 lport = htons(t->asoc->base.bind_addr.port); + u32 addr; + + if (paddr->sa.sa_family == AF_INET6) + addr = jhash(&paddr->v6.sin6_addr, 16, seed); + else + addr = paddr->v4.sin_addr.s_addr; + + return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + (__force __u32)lport, net_hash_mix(net), seed); +} + +static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed) +{ + const struct sctp_hash_cmp_arg *x = data; + const union sctp_addr *paddr = x->paddr; + const struct net *net = x->net; + u16 lport = x->laddr->v4.sin_port; + u32 addr; + + if (paddr->sa.sa_family == AF_INET6) + addr = jhash(&paddr->v6.sin6_addr, 16, seed); + else + addr = paddr->v4.sin_addr.s_addr; + + return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + (__force __u32)lport, net_hash_mix(net), seed); +} + +static const struct rhashtable_params sctp_hash_params = { + .head_offset = offsetof(struct sctp_transport, node), + .hashfn = sctp_hash_key, + .obj_hashfn = sctp_hash_obj, + .obj_cmpfn = sctp_hash_cmp, + .automatic_shrinking = true, +}; + +int sctp_transport_hashtable_init(void) +{ + return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params); +} + +void sctp_transport_hashtable_destroy(void) +{ + rhashtable_destroy(&sctp_transport_hashtable); +} + +void sctp_hash_transport(struct sctp_transport *t) +{ + struct sctp_sockaddr_entry *addr; + struct sctp_hash_cmp_arg arg; + + addr = list_entry(t->asoc->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list); + arg.laddr = &addr->a; + arg.paddr = &t->ipaddr; + arg.net = sock_net(t->asoc->base.sk); + +reinsert: + if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg, + &t->node, sctp_hash_params) == -EBUSY) + goto reinsert; +} + +void sctp_unhash_transport(struct sctp_transport *t) +{ + rhashtable_remove_fast(&sctp_transport_hashtable, &t->node, + sctp_hash_params); +} + +struct sctp_transport *sctp_addrs_lookup_transport( + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr) +{ + struct sctp_hash_cmp_arg arg = { + .laddr = laddr, + .paddr = paddr, + .net = net, + }; + + return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg, + sctp_hash_params); +} + +struct sctp_transport *sctp_epaddr_lookup_transport( + const struct sctp_endpoint *ep, + const union sctp_addr *paddr) +{ + struct sctp_sockaddr_entry *addr; + struct net *net = sock_net(ep->base.sk); + + addr = list_entry(ep->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list); + + return sctp_addrs_lookup_transport(net, &addr->a, paddr); +} + /* Insert association into the hash table. */ static void __sctp_hash_established(struct sctp_association *asoc) { -- cgit v1.2.3 From 4f0087812648b7611157ae22954acfaed820d24e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:47 +0800 Subject: sctp: apply rhashtable api to send/recv path apply lookup apis to two functions, for __sctp_endpoint_lookup_assoc and __sctp_lookup_association, it's invoked in the protection of sock lock, it will be safe, but sctp_lookup_association need to call rcu_read_lock() and to detect the t->dead to protect it. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/associola.c | 5 +++++ net/sctp/endpointola.c | 35 ++++++++--------------------------- net/sctp/input.c | 39 ++++++++++----------------------------- net/sctp/protocol.c | 6 ++++++ 4 files changed, 29 insertions(+), 56 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 559afd0ee7de..2bf8ec92dde4 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -383,6 +383,7 @@ void sctp_association_free(struct sctp_association *asoc) list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); list_del_rcu(pos); + sctp_unhash_transport(transport); sctp_transport_free(transport); } @@ -500,6 +501,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, /* Remove this peer from the list. */ list_del_rcu(&peer->transports); + /* Remove this peer from the transport hashtable */ + sctp_unhash_transport(peer); /* Get the first transport of asoc. */ pos = asoc->peer.transport_addr_list.next; @@ -699,6 +702,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Attach the remote transport to our asoc. */ list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list); asoc->peer.transport_count++; + /* Add this peer into the transport hashtable */ + sctp_hash_transport(peer); /* If we do not yet have a primary path, set one. */ if (!asoc->peer.primary_path) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 9da76ba4d10f..8838bf492a12 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -314,8 +314,8 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, } /* Find the association that goes with this chunk. - * We do a linear search of the associations for this endpoint. - * We return the matching transport address too. + * We lookup the transport from hashtable at first, then get association + * through t->assoc. */ static struct sctp_association *__sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, @@ -323,12 +323,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( struct sctp_transport **transport) { struct sctp_association *asoc = NULL; - struct sctp_association *tmp; - struct sctp_transport *t = NULL; - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - int hash; - int rport; + struct sctp_transport *t; *transport = NULL; @@ -337,26 +332,12 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( */ if (!ep->base.bind_addr.port) goto out; + t = sctp_epaddr_lookup_transport(ep, paddr); + if (!t || t->asoc->temp) + goto out; - rport = ntohs(paddr->v4.sin_port); - - hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port, - rport); - head = &sctp_assoc_hashtable[hash]; - read_lock(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - tmp = sctp_assoc(epb); - if (tmp->ep != ep || rport != tmp->peer.port) - continue; - - t = sctp_assoc_lookup_paddr(tmp, paddr); - if (t) { - asoc = tmp; - *transport = t; - break; - } - } - read_unlock(&head->lock); + *transport = t; + asoc = t->asoc; out: return asoc; } diff --git a/net/sctp/input.c b/net/sctp/input.c index bac8278b176b..6f075d835764 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -981,38 +981,19 @@ static struct sctp_association *__sctp_lookup_association( const union sctp_addr *peer, struct sctp_transport **pt) { - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - struct sctp_association *asoc; - struct sctp_transport *transport; - int hash; + struct sctp_transport *t; - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port), - ntohs(peer->v4.sin_port)); - head = &sctp_assoc_hashtable[hash]; - read_lock(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - asoc = sctp_assoc(epb); - transport = sctp_assoc_is_match(asoc, net, local, peer); - if (transport) - goto hit; - } + t = sctp_addrs_lookup_transport(net, local, peer); + if (!t || t->dead || t->asoc->temp) + return NULL; - read_unlock(&head->lock); + sctp_association_hold(t->asoc); + *pt = t; - return NULL; - -hit: - *pt = transport; - sctp_association_hold(asoc); - read_unlock(&head->lock); - return asoc; + return t->asoc; } -/* Look up an association. BH-safe. */ +/* Look up an association. protected by RCU read lock */ static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, @@ -1021,9 +1002,9 @@ struct sctp_association *sctp_lookup_association(struct net *net, { struct sctp_association *asoc; - local_bh_disable(); + rcu_read_lock(); asoc = __sctp_lookup_association(net, laddr, paddr, transportp); - local_bh_enable(); + rcu_read_unlock(); return asoc; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 010aced44b6b..631cfb380535 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1467,6 +1467,9 @@ static __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } + if (sctp_transport_hashtable_init()) + goto err_thash_alloc; + pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); @@ -1521,6 +1524,8 @@ err_register_defaults: get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); err_bhash_alloc: + sctp_transport_hashtable_destroy(); +err_thash_alloc: kfree(sctp_ep_hashtable); err_ehash_alloc: free_pages((unsigned long)sctp_assoc_hashtable, @@ -1567,6 +1572,7 @@ static __exit void sctp_exit(void) free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); + sctp_transport_hashtable_destroy(); percpu_counter_destroy(&sctp_sockets_allocated); -- cgit v1.2.3 From 39f66a7dce3213fb0a0c6256929c816df27c7548 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:48 +0800 Subject: sctp: apply rhashtable api to sctp procfs Traversal the transport rhashtable, get the association only once through the condition assoc->peer.primary_path != transport. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/proc.c | 316 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 173 insertions(+), 143 deletions(-) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0697eda5aed8..dfa7eeccb537 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -281,88 +281,136 @@ void sctp_eps_proc_exit(struct net *net) remove_proc_entry("eps", net->sctp.proc_net_sctp); } +struct sctp_ht_iter { + struct seq_net_private p; + struct rhashtable_iter hti; +}; -static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) +static struct sctp_transport *sctp_transport_get_next(struct seq_file *seq) { - if (*pos >= sctp_assoc_hashsize) - return NULL; + struct sctp_ht_iter *iter = seq->private; + struct sctp_transport *t; - if (*pos < 0) - *pos = 0; + t = rhashtable_walk_next(&iter->hti); + for (; t; t = rhashtable_walk_next(&iter->hti)) { + if (IS_ERR(t)) { + if (PTR_ERR(t) == -EAGAIN) + continue; + break; + } - if (*pos == 0) - seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " - "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " - "RPORT LADDRS <-> RADDRS " - "HBINT INS OUTS MAXRT T1X T2X RTXC " - "wmema wmemq sndbuf rcvbuf\n"); + if (net_eq(sock_net(t->asoc->base.sk), seq_file_net(seq)) && + t->asoc->peer.primary_path == t) + break; + } - return (void *)pos; + return t; } -static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) +static struct sctp_transport *sctp_transport_get_idx(struct seq_file *seq, + loff_t pos) +{ + void *obj; + + while (pos && (obj = sctp_transport_get_next(seq)) && !IS_ERR(obj)) + pos--; + + return obj; +} + +static int sctp_transport_walk_start(struct seq_file *seq) { + struct sctp_ht_iter *iter = seq->private; + int err; + + err = rhashtable_walk_init(&sctp_transport_hashtable, &iter->hti); + if (err) + return err; + + err = rhashtable_walk_start(&iter->hti); + + return err == -EAGAIN ? 0 : err; } +static void sctp_transport_walk_stop(struct seq_file *seq) +{ + struct sctp_ht_iter *iter = seq->private; + + rhashtable_walk_stop(&iter->hti); + rhashtable_walk_exit(&iter->hti); +} + +static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) +{ + int err = sctp_transport_walk_start(seq); + + if (err) + return ERR_PTR(err); + + return *pos ? sctp_transport_get_idx(seq, *pos) : SEQ_START_TOKEN; +} + +static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) +{ + sctp_transport_walk_stop(seq); +} static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - if (++*pos >= sctp_assoc_hashsize) - return NULL; + ++*pos; - return pos; + return sctp_transport_get_next(seq); } /* Display sctp associations (/proc/net/sctp/assocs). */ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) { - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; + struct sctp_transport *transport; struct sctp_association *assoc; + struct sctp_ep_common *epb; struct sock *sk; - int hash = *(loff_t *)v; - - if (hash >= sctp_assoc_hashsize) - return -ENOMEM; - head = &sctp_assoc_hashtable[hash]; - local_bh_disable(); - read_lock(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - assoc = sctp_assoc(epb); - sk = epb->sk; - if (!net_eq(sock_net(sk), seq_file_net(seq))) - continue; - seq_printf(seq, - "%8pK %8pK %-3d %-3d %-2d %-4d " - "%4d %8d %8d %7u %5lu %-5d %5d ", - assoc, sk, sctp_sk(sk)->type, sk->sk_state, - assoc->state, hash, - assoc->assoc_id, - assoc->sndbuf_used, - atomic_read(&assoc->rmem_alloc), - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), - sock_i_ino(sk), - epb->bind_addr.port, - assoc->peer.port); - seq_printf(seq, " "); - sctp_seq_dump_local_addrs(seq, epb); - seq_printf(seq, "<-> "); - sctp_seq_dump_remote_addrs(seq, assoc); - seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " - "%8d %8d %8d %8d", - assoc->hbinterval, assoc->c.sinit_max_instreams, - assoc->c.sinit_num_ostreams, assoc->max_retrans, - assoc->init_retries, assoc->shutdown_retries, - assoc->rtx_data_chunks, - atomic_read(&sk->sk_wmem_alloc), - sk->sk_wmem_queued, - sk->sk_sndbuf, - sk->sk_rcvbuf); - seq_printf(seq, "\n"); + if (v == SEQ_START_TOKEN) { + seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " + "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " + "RPORT LADDRS <-> RADDRS " + "HBINT INS OUTS MAXRT T1X T2X RTXC " + "wmema wmemq sndbuf rcvbuf\n"); + return 0; } - read_unlock(&head->lock); - local_bh_enable(); + + transport = (struct sctp_transport *)v; + assoc = transport->asoc; + epb = &assoc->base; + sk = epb->sk; + + seq_printf(seq, + "%8pK %8pK %-3d %-3d %-2d %-4d " + "%4d %8d %8d %7u %5lu %-5d %5d ", + assoc, sk, sctp_sk(sk)->type, sk->sk_state, + assoc->state, 0, + assoc->assoc_id, + assoc->sndbuf_used, + atomic_read(&assoc->rmem_alloc), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk), + epb->bind_addr.port, + assoc->peer.port); + seq_printf(seq, " "); + sctp_seq_dump_local_addrs(seq, epb); + seq_printf(seq, "<-> "); + sctp_seq_dump_remote_addrs(seq, assoc); + seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " + "%8d %8d %8d %8d", + assoc->hbinterval, assoc->c.sinit_max_instreams, + assoc->c.sinit_num_ostreams, assoc->max_retrans, + assoc->init_retries, assoc->shutdown_retries, + assoc->rtx_data_chunks, + atomic_read(&sk->sk_wmem_alloc), + sk->sk_wmem_queued, + sk->sk_sndbuf, + sk->sk_rcvbuf); + seq_printf(seq, "\n"); return 0; } @@ -378,7 +426,7 @@ static const struct seq_operations sctp_assoc_ops = { static int sctp_assocs_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &sctp_assoc_ops, - sizeof(struct seq_net_private)); + sizeof(struct sctp_ht_iter)); } static const struct file_operations sctp_assocs_seq_fops = { @@ -409,112 +457,94 @@ void sctp_assocs_proc_exit(struct net *net) static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) { - if (*pos >= sctp_assoc_hashsize) - return NULL; - - if (*pos < 0) - *pos = 0; + int err = sctp_transport_walk_start(seq); - if (*pos == 0) - seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX " - "REM_ADDR_RTX START STATE\n"); + if (err) + return ERR_PTR(err); - return (void *)pos; + return *pos ? sctp_transport_get_idx(seq, *pos) : SEQ_START_TOKEN; } static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - if (++*pos >= sctp_assoc_hashsize) - return NULL; + ++*pos; - return pos; + return sctp_transport_get_next(seq); } static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) { + sctp_transport_walk_stop(seq); } static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; struct sctp_association *assoc; struct sctp_transport *tsp; - int hash = *(loff_t *)v; - if (hash >= sctp_assoc_hashsize) - return -ENOMEM; + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX " + "REM_ADDR_RTX START STATE\n"); + return 0; + } - head = &sctp_assoc_hashtable[hash]; - local_bh_disable(); - read_lock(&head->lock); - rcu_read_lock(); - sctp_for_each_hentry(epb, &head->chain) { - if (!net_eq(sock_net(epb->sk), seq_file_net(seq))) + tsp = (struct sctp_transport *)v; + assoc = tsp->asoc; + + list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, + transports) { + if (tsp->dead) continue; - assoc = sctp_assoc(epb); - list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, - transports) { - if (tsp->dead) - continue; + /* + * The remote address (ADDR) + */ + tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr); + seq_printf(seq, " "); + /* + * The association ID (ASSOC_ID) + */ + seq_printf(seq, "%d ", tsp->asoc->assoc_id); + + /* + * If the Heartbeat is active (HB_ACT) + * Note: 1 = Active, 0 = Inactive + */ + seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer)); + + /* + * Retransmit time out (RTO) + */ + seq_printf(seq, "%lu ", tsp->rto); + + /* + * Maximum path retransmit count (PATH_MAX_RTX) + */ + seq_printf(seq, "%d ", tsp->pathmaxrxt); + + /* + * remote address retransmit count (REM_ADDR_RTX) + * Note: We don't have a way to tally this at the moment + * so lets just leave it as zero for the moment + */ + seq_puts(seq, "0 "); + + /* + * remote address start time (START). This is also not + * currently implemented, but we can record it with a + * jiffies marker in a subsequent patch + */ + seq_puts(seq, "0 "); + + /* + * The current state of this destination. I.e. + * SCTP_ACTIVE, SCTP_INACTIVE, ... + */ + seq_printf(seq, "%d", tsp->state); - /* - * The remote address (ADDR) - */ - tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr); - seq_printf(seq, " "); - - /* - * The association ID (ASSOC_ID) - */ - seq_printf(seq, "%d ", tsp->asoc->assoc_id); - - /* - * If the Heartbeat is active (HB_ACT) - * Note: 1 = Active, 0 = Inactive - */ - seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer)); - - /* - * Retransmit time out (RTO) - */ - seq_printf(seq, "%lu ", tsp->rto); - - /* - * Maximum path retransmit count (PATH_MAX_RTX) - */ - seq_printf(seq, "%d ", tsp->pathmaxrxt); - - /* - * remote address retransmit count (REM_ADDR_RTX) - * Note: We don't have a way to tally this at the moment - * so lets just leave it as zero for the moment - */ - seq_puts(seq, "0 "); - - /* - * remote address start time (START). This is also not - * currently implemented, but we can record it with a - * jiffies marker in a subsequent patch - */ - seq_puts(seq, "0 "); - - /* - * The current state of this destination. I.e. - * SCTP_ACTIVE, SCTP_INACTIVE, ... - */ - seq_printf(seq, "%d", tsp->state); - - seq_printf(seq, "\n"); - } + seq_printf(seq, "\n"); } - rcu_read_unlock(); - read_unlock(&head->lock); - local_bh_enable(); - return 0; - } static const struct seq_operations sctp_remaddr_ops = { @@ -533,7 +563,7 @@ void sctp_remaddr_proc_exit(struct net *net) static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &sctp_remaddr_ops, - sizeof(struct seq_net_private)); + sizeof(struct sctp_ht_iter)); } static const struct file_operations sctp_remaddr_seq_fops = { -- cgit v1.2.3 From b5eff7128366c4a7a9b502097a968ec9cae2bea2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:49 +0800 Subject: sctp: drop the old assoc hashtable of sctp transport hashtable will replace the association hashtable, so association hashtable is not used in sctp any more, so drop the codes about that. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 21 ---------------- include/net/sctp/structs.h | 5 ---- net/sctp/input.c | 61 ---------------------------------------------- net/sctp/protocol.c | 30 ++--------------------- net/sctp/sm_sideeffect.c | 2 -- net/sctp/socket.c | 6 +---- 6 files changed, 3 insertions(+), 122 deletions(-) (limited to 'net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 7bbdfbab2efa..835aa2ed9870 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -126,8 +126,6 @@ int sctp_primitive_ASCONF(struct net *, struct sctp_association *, void *arg); */ int sctp_rcv(struct sk_buff *skb); void sctp_v4_err(struct sk_buff *skb, u32 info); -void sctp_hash_established(struct sctp_association *); -void sctp_unhash_established(struct sctp_association *); void sctp_hash_endpoint(struct sctp_endpoint *); void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, @@ -530,25 +528,6 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1); } -/* This is the hash function for the association hash table. */ -static inline int sctp_assoc_hashfn(struct net *net, __u16 lport, __u16 rport) -{ - int h = (lport << 16) + rport + net_hash_mix(net); - h ^= h>>8; - return h & (sctp_assoc_hashsize - 1); -} - -/* This is the hash function for the association hash table. This is - * not used yet, but could be used as a better hash function when - * we have a vtag. - */ -static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) -{ - int h = (lport << 16) + rport; - h ^= vtag; - return h & (sctp_assoc_hashsize - 1); -} - #define sctp_for_each_hentry(epb, head) \ hlist_for_each_entry(epb, head, node) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 4ab87d08e766..20e72129be1c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -120,8 +120,6 @@ extern struct sctp_globals { /* This is the hash of all endpoints. */ struct sctp_hashbucket *ep_hashtable; - /* This is the hash of all associations. */ - struct sctp_hashbucket *assoc_hashtable; /* This is the sctp port control hash. */ struct sctp_bind_hashbucket *port_hashtable; /* This is the hash of all transports. */ @@ -129,7 +127,6 @@ extern struct sctp_globals { /* Sizes of above hashtables. */ int ep_hashsize; - int assoc_hashsize; int port_hashsize; /* Default initialization values to be applied to new associations. */ @@ -146,8 +143,6 @@ extern struct sctp_globals { #define sctp_address_families (sctp_globals.address_families) #define sctp_ep_hashsize (sctp_globals.ep_hashsize) #define sctp_ep_hashtable (sctp_globals.ep_hashtable) -#define sctp_assoc_hashsize (sctp_globals.assoc_hashsize) -#define sctp_assoc_hashtable (sctp_globals.assoc_hashtable) #define sctp_port_hashsize (sctp_globals.port_hashsize) #define sctp_port_hashtable (sctp_globals.port_hashtable) #define sctp_transport_hashtable (sctp_globals.transport_hashtable) diff --git a/net/sctp/input.c b/net/sctp/input.c index 6f075d835764..d9a6e66c5c8a 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -913,67 +913,6 @@ struct sctp_transport *sctp_epaddr_lookup_transport( return sctp_addrs_lookup_transport(net, &addr->a, paddr); } -/* Insert association into the hash table. */ -static void __sctp_hash_established(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - struct sctp_ep_common *epb; - struct sctp_hashbucket *head; - - epb = &asoc->base; - - /* Calculate which chain this entry will belong to. */ - epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, - asoc->peer.port); - - head = &sctp_assoc_hashtable[epb->hashent]; - - write_lock(&head->lock); - hlist_add_head(&epb->node, &head->chain); - write_unlock(&head->lock); -} - -/* Add an association to the hash. Local BH-safe. */ -void sctp_hash_established(struct sctp_association *asoc) -{ - if (asoc->temp) - return; - - local_bh_disable(); - __sctp_hash_established(asoc); - local_bh_enable(); -} - -/* Remove association from the hash table. */ -static void __sctp_unhash_established(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - - epb = &asoc->base; - - epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, - asoc->peer.port); - - head = &sctp_assoc_hashtable[epb->hashent]; - - write_lock(&head->lock); - hlist_del_init(&epb->node); - write_unlock(&head->lock); -} - -/* Remove association from the hash table. Local BH-safe. */ -void sctp_unhash_established(struct sctp_association *asoc) -{ - if (asoc->temp) - return; - - local_bh_disable(); - __sctp_unhash_established(asoc); - local_bh_enable(); -} - /* Look up an association. */ static struct sctp_association *__sctp_lookup_association( struct net *net, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 631cfb380535..ab0d538a74ed 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1416,24 +1416,6 @@ static __init int sctp_init(void) for (order = 0; (1UL << order) < goal; order++) ; - do { - sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE / - sizeof(struct sctp_hashbucket); - if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) - continue; - sctp_assoc_hashtable = (struct sctp_hashbucket *) - __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); - } while (!sctp_assoc_hashtable && --order > 0); - if (!sctp_assoc_hashtable) { - pr_err("Failed association hash alloc\n"); - status = -ENOMEM; - goto err_ahash_alloc; - } - for (i = 0; i < sctp_assoc_hashsize; i++) { - rwlock_init(&sctp_assoc_hashtable[i].lock); - INIT_HLIST_HEAD(&sctp_assoc_hashtable[i].chain); - } - /* Allocate and initialize the endpoint hash table. */ sctp_ep_hashsize = 64; sctp_ep_hashtable = @@ -1470,8 +1452,7 @@ static __init int sctp_init(void) if (sctp_transport_hashtable_init()) goto err_thash_alloc; - pr_info("Hash tables configured (established %d bind %d)\n", - sctp_assoc_hashsize, sctp_port_hashsize); + pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize); sctp_sysctl_register(); @@ -1528,10 +1509,6 @@ err_bhash_alloc: err_thash_alloc: kfree(sctp_ep_hashtable); err_ehash_alloc: - free_pages((unsigned long)sctp_assoc_hashtable, - get_order(sctp_assoc_hashsize * - sizeof(struct sctp_hashbucket))); -err_ahash_alloc: percpu_counter_destroy(&sctp_sockets_allocated); err_percpu_counter_init: kmem_cache_destroy(sctp_chunk_cachep); @@ -1565,13 +1542,10 @@ static __exit void sctp_exit(void) sctp_sysctl_unregister(); - free_pages((unsigned long)sctp_assoc_hashtable, - get_order(sctp_assoc_hashsize * - sizeof(struct sctp_hashbucket))); - kfree(sctp_ep_hashtable); free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); + kfree(sctp_ep_hashtable); sctp_transport_hashtable_destroy(); percpu_counter_destroy(&sctp_sockets_allocated); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 05cd16400e0b..4f170ad38ff4 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -866,7 +866,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; - sctp_unhash_established(asoc); sctp_association_free(asoc); } @@ -1269,7 +1268,6 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc = cmd->obj.asoc; BUG_ON(asoc->peer.primary_path == NULL); sctp_endpoint_add_asoc(ep, asoc); - sctp_hash_established(asoc); break; case SCTP_CMD_UPDATE_ASSOC: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b5f4811cea82..9bb80ec4c08f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1228,7 +1228,6 @@ out_free: * To the hash table, try to unhash it, just in case, its a noop * if it wasn't hashed so we're safe */ - sctp_unhash_established(asoc); sctp_association_free(asoc); } return err; @@ -1504,7 +1503,6 @@ static void sctp_close(struct sock *sk, long timeout) * ABORT or SHUTDOWN based on the linger options. */ if (sctp_state(asoc, CLOSED)) { - sctp_unhash_established(asoc); sctp_association_free(asoc); continue; } @@ -1986,10 +1984,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_unlock; out_free: - if (new_asoc) { - sctp_unhash_established(asoc); + if (new_asoc) sctp_association_free(asoc); - } out_unlock: release_sock(sk); -- cgit v1.2.3 From c79c0666915418f9c0f01a6d0e93179416fb0c9e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:50 +0800 Subject: sctp: remove the local_bh_disable/enable in sctp_endpoint_lookup_assoc sctp_endpoint_lookup_assoc is called in the protection of sock lock there is no need to call local_bh_disable in this function. so remove them. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'net') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8838bf492a12..52838eaa1582 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -317,7 +317,7 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, * We lookup the transport from hashtable at first, then get association * through t->assoc. */ -static struct sctp_association *__sctp_endpoint_lookup_assoc( +struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, const union sctp_addr *paddr, struct sctp_transport **transport) @@ -342,21 +342,6 @@ out: return asoc; } -/* Lookup association on an endpoint based on a peer address. BH-safe. */ -struct sctp_association *sctp_endpoint_lookup_assoc( - const struct sctp_endpoint *ep, - const union sctp_addr *paddr, - struct sctp_transport **transport) -{ - struct sctp_association *asoc; - - local_bh_disable(); - asoc = __sctp_endpoint_lookup_assoc(ep, paddr, transport); - local_bh_enable(); - - return asoc; -} - /* Look for any peeled off association from the endpoint that matches the * given peer address. */ -- cgit v1.2.3 From a72a5e2d34ec2921c0d9a7545093087e4cb90d0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jan 2016 22:17:55 +0100 Subject: inet: kill unused skb_free op The only user was removed in commit 029f7f3b8701cc7a ("netfilter: ipv6: nf_defrag: avoid/free clone operations"). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 1 - net/ieee802154/6lowpan/reassembly.c | 1 - net/ipv4/inet_fragment.c | 10 +--------- net/ipv4/ip_fragment.c | 1 - net/ipv6/reassembly.c | 1 - 5 files changed, 1 insertion(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index ac42bbb37b2d..12aac0fd6ee7 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -99,7 +99,6 @@ struct inet_frags { void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); - void (*skb_free)(struct sk_buff *); void (*frag_expire)(unsigned long data); struct kmem_cache *frags_cachep; const char *frags_cache_name; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 6b437e8760d3..30d875dff6b5 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -624,7 +624,6 @@ int __init lowpan_net_frag_init(void) lowpan_frags.hashfn = lowpan_hashfn; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; - lowpan_frags.skb_free = NULL; lowpan_frags.qsize = sizeof(struct frag_queue); lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index fe144dae7372..3a88b0c73797 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -285,14 +285,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) } EXPORT_SYMBOL(inet_frag_kill); -static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, - struct sk_buff *skb) -{ - if (f->skb_free) - f->skb_free(skb); - kfree_skb(skb); -} - void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) { struct sk_buff *fp; @@ -309,7 +301,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) struct sk_buff *xp = fp->next; sum_truesize += fp->truesize; - frag_kfree_skb(nf, f, fp); + kfree_skb(fp); fp = xp; } sum = sum_truesize + f->qsize; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1fe55ae81781..3f00810b7288 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -891,7 +891,6 @@ void __init ipfrag_init(void) ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; - ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 45f5ae51de65..18f3498a6c80 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -755,7 +755,6 @@ int __init ipv6_frag_init(void) ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = NULL; - ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; -- cgit v1.2.3 From 1134158ba3d656b8dbc79a23d482129a531ba0ae Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 5 Jan 2016 15:08:07 -0500 Subject: soreuseport: pass skb to secondary UDP socket lookup This socket-lookup path did not pass along the skb in question in my original BPF-based socket selection patch. The skb in the udpN_lib_lookup2 path can be used for BPF-based socket selection just like it is in the 'traditional' udpN_lib_lookup path. udpN_lib_lookup2 kicks in when there are greater than 10 sockets in the same hlist slot. Coincidentally, I chose 10 sockets per reuseport group in my functional test, so the lookup2 path was not excersised. This adds an additional set of tests with 20 sockets. Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test") Suggested-by: Eric Dumazet Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 10 +++--- net/ipv6/udp.c | 10 +++--- tools/testing/selftests/net/reuseport_bpf.c | 47 +++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 835378365f25..3a66731e3af6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -493,7 +493,8 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2) + struct udp_hslot *hslot2, unsigned int slot2, + struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -514,7 +515,8 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash, NULL, 0); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -573,7 +575,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2); + hslot2, slot2, skb); if (!result) { hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; @@ -583,7 +585,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, htonl(INADDR_ANY), hnum, dif, - hslot2, slot2); + hslot2, slot2, skb); } rcu_read_unlock(); return result; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 56fcb55fda31..5d2c2afffe7b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -251,7 +251,8 @@ static inline int compute_score2(struct sock *sk, struct net *net, static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2) + struct udp_hslot *hslot2, unsigned int slot2, + struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -272,7 +273,8 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash, NULL, 0); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -331,7 +333,7 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2); + hslot2, slot2, skb); if (!result) { hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; @@ -341,7 +343,7 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, &in6addr_any, hnum, dif, - hslot2, slot2); + hslot2, slot2, skb); } rcu_read_unlock(); return result; diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 74ff09988958..bec1b5dd2530 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -123,6 +123,8 @@ static void attach_ebpf(int fd, uint16_t mod) if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd, sizeof(bpf_fd))) error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF"); + + close(bpf_fd); } static void attach_cbpf(int fd, uint16_t mod) @@ -396,6 +398,9 @@ static void test_filter_without_bind(void) int main(void) { fprintf(stderr, "---- IPv4 UDP ----\n"); + /* NOTE: UDP socket lookups traverse a different code path when there + * are > 10 sockets in a group. Run the bpf test through both paths. + */ test_reuseport_ebpf((struct test_params) { .recv_family = AF_INET, .send_family = AF_INET, @@ -403,6 +408,13 @@ int main(void) .recv_socks = 10, .recv_port = 8000, .send_port_min = 9000}); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8000, + .send_port_min = 9000}); test_reuseport_cbpf((struct test_params) { .recv_family = AF_INET, .send_family = AF_INET, @@ -410,6 +422,13 @@ int main(void) .recv_socks = 10, .recv_port = 8001, .send_port_min = 9020}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8001, + .send_port_min = 9020}); test_extra_filter((struct test_params) { .recv_family = AF_INET, .protocol = SOCK_DGRAM, @@ -427,6 +446,13 @@ int main(void) .recv_socks = 10, .recv_port = 8003, .send_port_min = 9040}); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8003, + .send_port_min = 9040}); test_reuseport_cbpf((struct test_params) { .recv_family = AF_INET6, .send_family = AF_INET6, @@ -434,6 +460,13 @@ int main(void) .recv_socks = 10, .recv_port = 8004, .send_port_min = 9060}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET6, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8004, + .send_port_min = 9060}); test_extra_filter((struct test_params) { .recv_family = AF_INET6, .protocol = SOCK_DGRAM, @@ -444,6 +477,13 @@ int main(void) .recv_port = 8009}); fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n"); + test_reuseport_ebpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8006, + .send_port_min = 9080}); test_reuseport_ebpf((struct test_params) { .recv_family = AF_INET6, .send_family = AF_INET, @@ -458,6 +498,13 @@ int main(void) .recv_socks = 10, .recv_port = 8007, .send_port_min = 9100}); + test_reuseport_cbpf((struct test_params) { + .recv_family = AF_INET6, + .send_family = AF_INET, + .protocol = SOCK_DGRAM, + .recv_socks = 20, + .recv_port = 8007, + .send_port_min = 9100}); test_filter_without_bind(); -- cgit v1.2.3 From 00ce3a15d811978fcb204a1a3f5f8c059096fa5e Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 5 Jan 2016 10:57:13 -0500 Subject: soreuseport: change consume_skb to kfree_skb in error case Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") Suggested-by: Daniel Borkmann Signed-off-by: Craig Gallek Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/sock_reuseport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index ae0969c0fc2e..1df98c557440 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -173,7 +173,7 @@ static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks, /* temporarily advance data past protocol header */ if (!pskb_pull(skb, hdr_len)) { - consume_skb(nskb); + kfree_skb(nskb); return NULL; } index = bpf_prog_run_save_cb(prog, skb); -- cgit v1.2.3 From 787b306cf3296bdce5c8559206b237c1ae107484 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Jan 2016 14:38:40 +0100 Subject: Bluetooth: avoid rebuilding hci_sock all the time Instead, allow using string formatting with send_monitor_note() and access init_utsname(). Signed-off-by: Johannes Berg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_sock.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 41f579ba447b..1298d723c0e0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -25,9 +25,8 @@ /* Bluetooth HCI sockets. */ #include +#include #include -#include -#include #include #include @@ -385,17 +384,26 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } -static void send_monitor_note(struct sock *sk, const char *text) +static void __printf(2, 3) +send_monitor_note(struct sock *sk, const char *fmt, ...) { - size_t len = strlen(text); + size_t len; struct hci_mon_hdr *hdr; struct sk_buff *skb; + va_list args; + + va_start(args, fmt); + len = vsnprintf(NULL, 0, fmt, args); + va_end(args); skb = bt_skb_alloc(len + 1, GFP_ATOMIC); if (!skb) return; - strcpy(skb_put(skb, len + 1), text); + va_start(args, fmt); + vsprintf(skb_put(skb, len), fmt, args); + *skb_put(skb, 1) = 0; + va_end(args); __net_timestamp(skb); @@ -897,10 +905,11 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); - send_monitor_note(sk, "Linux version " UTS_RELEASE - " (" UTS_MACHINE ")"); - send_monitor_note(sk, "Bluetooth subsystem version " - BT_SUBSYS_VERSION); + send_monitor_note(sk, "Linux version %s (%s)", + init_utsname()->release, + init_utsname()->machine); + send_monitor_note(sk, "Bluetooth subsystem version %s", + BT_SUBSYS_VERSION); send_monitor_replay(sk); atomic_inc(&monitor_promisc); -- cgit v1.2.3 From 08474cc1e6ea71237cab7e4a651a623c9dea1084 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 6 Jan 2016 13:01:04 +0100 Subject: bridge: Propagate vlan add failure to user Disallow adding interfaces to a bridge when vlan filtering operation failed. Send the failure code to the user. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_if.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8d1d4a22c50d..c367b3e1b5ac 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -511,8 +511,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); - if (nbp_vlan_init(p)) + err = nbp_vlan_init(p); + if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); + goto err6; + } spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); @@ -533,6 +536,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; +err6: + list_del_rcu(&p->list); + br_fdb_delete_by_port(br, p, 0, 1); + nbp_update_port_count(br); + netdev_upper_dev_unlink(dev, br->dev); + err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); -- cgit v1.2.3 From 6b72a770202a0ad843312436dd50ed4690d7cc65 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 6 Jan 2016 13:01:06 +0100 Subject: bridge: add vlan filtering change notification Notifying hardware about bridge vlan-aware changes. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 66c4549efbbb..190fb3372ab5 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -626,9 +626,21 @@ void br_recalculate_fwd_mask(struct net_bridge *br) int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { + struct switchdev_attr attr = { + .orig_dev = br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_filtering = val, + }; + int err; + if (br->vlan_enabled == val) return 0; + err = switchdev_port_attr_set(br->dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + br->vlan_enabled = val; br_manage_promisc(br); recalculate_group_addr(br); @@ -639,13 +651,15 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { + int err; + if (!rtnl_trylock()) return restart_syscall(); - __br_vlan_filter_toggle(br, val); + err = __br_vlan_filter_toggle(br, val); rtnl_unlock(); - return 0; + return err; } int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) -- cgit v1.2.3 From 404cdbf0894a0707dd19179d2e21a3ab37f33f54 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 6 Jan 2016 13:01:07 +0100 Subject: bridge: add vlan filtering change for new bridged device Notifying hardware about newly bridged port vlan-aware changes. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 190fb3372ab5..85e43af4af7a 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -907,6 +907,12 @@ err_rhtbl: int nbp_vlan_init(struct net_bridge_port *p) { + struct switchdev_attr attr = { + .orig_dev = p->br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_filtering = p->br->vlan_enabled, + }; struct net_bridge_vlan_group *vg; int ret = -ENOMEM; @@ -914,6 +920,10 @@ int nbp_vlan_init(struct net_bridge_port *p) if (!vg) goto out; + ret = switchdev_port_attr_set(p->dev, &attr); + if (ret && ret != -EOPNOTSUPP) + goto err_vlan_enabled; + ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); if (ret) goto err_rhtbl; @@ -933,6 +943,7 @@ err_vlan_add: RCU_INIT_POINTER(p->vlgrp, NULL); synchronize_rcu(); rhashtable_destroy(&vg->vlan_hash); +err_vlan_enabled: err_rhtbl: kfree(vg); -- cgit v1.2.3 From c7f5d105495a38ed09e70d825f75d9d7d5407264 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 5 Nov 2015 11:34:57 -0500 Subject: net: Add eth_platform_get_mac_address() helper. A repeating pattern in drivers has become to use OF node information and, if not found, platform specific host information to extract the ethernet address for a given device. Currently this is done with a call to of_get_mac_address() and then some ifdef'd stuff for SPARC. Consolidate this into a portable routine, and provide the arch_get_platform_mac_address() weak function hook for all architectures to implement if they want. Signed-off-by: David S. Miller --- arch/sparc/kernel/idprom.c | 7 +++++++ include/linux/etherdevice.h | 3 +++ net/ethernet/eth.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'net') diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c index 6bd75012109d..f95dd11b75ea 100644 --- a/arch/sparc/kernel/idprom.c +++ b/arch/sparc/kernel/idprom.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,12 @@ static void __init display_system_type(unsigned char machtype) { } #endif + +unsigned char *arch_get_platform_mac_address(void) +{ + return idprom->id_ethaddr; +} + /* Calculate the IDPROM checksum (xor of the data bytes). */ static unsigned char __init calc_idprom_cksum(struct idprom *idprom) { diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index eb049c622208..37ff4a6faa9a 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -29,6 +29,9 @@ #include #ifdef __KERNEL__ +struct device; +int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); +unsigned char *arch_get_platform_get_mac_address(void); u32 eth_get_headlen(void *data, unsigned int max_len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9e63f252a89e..103871784e50 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -52,6 +52,8 @@ #include #include #include +#include +#include #include #include #include @@ -485,3 +487,32 @@ static int __init eth_offload_init(void) } fs_initcall(eth_offload_init); + +unsigned char * __weak arch_get_platform_mac_address(void) +{ + return NULL; +} + +int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) +{ + const unsigned char *addr; + struct device_node *dp; + + if (dev_is_pci(dev)) + dp = pci_device_to_OF_node(to_pci_dev(dev)); + else + dp = dev->of_node; + + addr = NULL; + if (dp) + addr = of_get_mac_address(dp); + if (!addr) + addr = arch_get_platform_mac_address(); + + if (!addr) + return -ENODEV; + + ether_addr_copy(mac_addr, addr); + return 0; +} +EXPORT_SYMBOL(eth_platform_get_mac_address); -- cgit v1.2.3 From 13bbdd370f67aef3351ad7bbc2fb624e3c23f905 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 29 Nov 2015 01:48:57 +0100 Subject: batman-adv: Fix invalid read while copying bat_iv.bcast_own batadv_iv_ogm_orig_del_if removes a part of the bcast_own which previously belonged to the now removed interface. This is done by copying all data which comes before the removed interface and then appending all the data which comes after the removed interface. The address calculation for the position of the data which comes after the removed interface assumed that the bat_iv.bcast_own is a pointer to a single byte datatype. But it is a pointer to unsigned long and thus the calculated position was wrong off factor sizeof(unsigned long). Fixes: 83a8342678a0 ("more basic routing code added (forwarding packets / bitarray added)") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 912d9c36fb1c..aa94b4ec766a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -185,7 +185,8 @@ unlock: static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, int max_if_num, int del_if_num) { - int chunk_size, ret = -ENOMEM, if_offset; + int ret = -ENOMEM; + size_t chunk_size, if_offset; void *data_ptr = NULL; spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); @@ -203,8 +204,9 @@ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size); /* copy second part */ + if_offset = (del_if_num + 1) * chunk_size; memcpy((char *)data_ptr + del_if_num * chunk_size, - orig_node->bat_iv.bcast_own + ((del_if_num + 1) * chunk_size), + (uint8_t *)orig_node->bat_iv.bcast_own + if_offset, (max_if_num - del_if_num) * chunk_size); free_bcast_own: -- cgit v1.2.3 From 2220943a21e26d97d7fd8f83c004b947326b469d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:13 +0100 Subject: phy: Centralise print about attached phy Many Ethernet drivers contain the same netdev_info() print statement about the attached phy. Move it into the phy device code. Additionally add a varargs function which can be used to append additional information. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/bfin_mac.c | 6 ++---- drivers/net/ethernet/agere/et131x.c | 4 +--- drivers/net/ethernet/amd/au1000_eth.c | 4 +--- drivers/net/ethernet/broadcom/b44.c | 3 +-- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 3 +-- drivers/net/ethernet/broadcom/sb1250-mac.c | 7 +++---- drivers/net/ethernet/broadcom/tg3.c | 10 +++------- drivers/net/ethernet/cadence/macb.c | 3 +-- drivers/net/ethernet/dnet.c | 4 +--- drivers/net/ethernet/freescale/fec_main.c | 4 +--- drivers/net/ethernet/lantiq_etop.c | 4 +--- drivers/net/ethernet/nxp/lpc_eth.c | 5 ++--- drivers/net/ethernet/rdc/r6040.c | 4 +--- drivers/net/ethernet/renesas/ravb_main.c | 3 +-- drivers/net/ethernet/renesas/sh_eth.c | 3 +-- drivers/net/ethernet/smsc/smsc911x.c | 4 +--- drivers/net/ethernet/smsc/smsc9420.c | 9 ++------- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 12 +----------- drivers/net/ethernet/ti/cpsw.c | 4 ++-- drivers/net/ethernet/ti/davinci_emac.c | 5 +---- drivers/net/ethernet/toshiba/tc35815.c | 6 ++---- drivers/net/phy/phy_device.c | 27 +++++++++++++++++++++++++++ drivers/staging/netlogic/xlr_net.c | 3 +-- include/linux/phy.h | 4 ++++ net/dsa/slave.c | 5 ++--- 25 files changed, 64 insertions(+), 82 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 5f8a5182b8dc..ed5c78cb7239 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -444,10 +444,8 @@ static int mii_probe(struct net_device *dev, int phy_mode) lp->old_duplex = -1; lp->phydev = phydev; - pr_info("attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq, - MDC_CLK, mdc_div, sclk/1000000); + phy_attached_print(phydev, "mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n", + MDC_CLK, mdc_div, sclk / 1000000); return 0; } diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 80b706f0fc97..825da3af806a 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3289,9 +3289,7 @@ static int et131x_mii_probe(struct net_device *netdev) phydev->autoneg = AUTONEG_ENABLE; adapter->phydev = phydev; - dev_info(&adapter->pdev->dev, - "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, phydev_name(phydev)); + phy_attached_info(phydev); return 0; } diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 8a8d6f2a0f6f..114618d357d5 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -583,9 +583,7 @@ static int au1000_mii_probe(struct net_device *dev) aup->old_duplex = -1; aup->phy_dev = phydev; - netdev_info(dev, "attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); return 0; } diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 928a2210e788..e7d9308d6760 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2315,8 +2315,7 @@ static int b44_register_phy_one(struct b44 *bp) bp->old_link = 0; bp->phy_addr = phydev->addr; - dev_info(sdev->dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, phydev_name(phydev)); + phy_attached_info(phydev); return 0; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index a54bafad3538..55f31faa09e6 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -908,8 +908,7 @@ static int bcm_enet_open(struct net_device *dev) else phydev->advertising &= ~SUPPORTED_Pause; - dev_info(kdev, "attached PHY at address %d [%s]\n", - phydev->addr, phydev->drv->name); + phy_attached_info(phydev); priv->old_link = 0; priv->old_duplex = -1; diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index f557a2aaec23..2470c6084c67 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2388,11 +2388,10 @@ static int sbmac_mii_probe(struct net_device *dev) SUPPORTED_MII | SUPPORTED_Pause | SUPPORTED_Asym_Pause; - phy_dev->advertising = phy_dev->supported; - pr_info("%s: attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - dev->name, phy_dev->drv->name, - dev_name(&phy_dev->dev), phy_dev->irq); + phy_attached_info(phydev); + + phy_dev->advertising = phy_dev->supported; sc->phy_dev = phy_dev; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 69d84d67f09a..07c067590caa 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -2128,6 +2128,8 @@ static int tg3_phy_init(struct tg3 *tp) phydev->advertising = phydev->supported; + phy_attached_info(phydev); + return 0; } @@ -17898,13 +17900,7 @@ static int tg3_init_one(struct pci_dev *pdev, tg3_bus_string(tp, str), dev->dev_addr); - if (tp->phy_flags & TG3_PHYFLG_IS_CONNECTED) { - struct phy_device *phydev; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; - netdev_info(dev, - "attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", - phydev->drv->name, phydev_name(phydev)); - } else { + if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) { char *ethtype; if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 001d60c5521c..98df33b7a395 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2950,8 +2950,7 @@ static int macb_probe(struct platform_device *pdev) dev->base_addr, dev->irq, dev->dev_addr); phydev = bp->phy_dev; - netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); return 0; diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 0ec367521354..6557460cf028 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -886,9 +886,7 @@ static int dnet_probe(struct platform_device *pdev) (bp->capabilities & DNET_HAS_GIGABIT) ? "" : "no ", (bp->capabilities & DNET_HAS_DMA) ? "" : "no "); phydev = bp->phy_dev; - dev_info(&pdev->dev, "attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); return 0; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d2328fc5da57..ceabe21b3b2c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1972,9 +1972,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - netdev_info(ndev, "Freescale FEC PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev), - fep->phy_dev->irq); + phy_attached_info(phy_dev); return 0; } diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 274a3cec84c2..86238a5eaddf 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -408,9 +408,7 @@ ltq_etop_mdio_probe(struct net_device *dev) phydev->advertising = phydev->supported; priv->phydev = phydev; - pr_info("%s: attached PHY [%s] (phy_addr=%s, irq=%d)\n", - dev->name, phydev->drv->name, - phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); return 0; } diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 5801aa197697..024bc3675573 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -816,9 +816,8 @@ static int lpc_mii_probe(struct net_device *ndev) pldat->duplex = -1; pldat->phy_dev = phydev; - netdev_info(ndev, - "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); + return 0; } diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 86a0887811c7..174dea787caf 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1061,9 +1061,7 @@ static int r6040_mii_probe(struct net_device *dev) lp->old_link = 0; lp->old_duplex = -1; - dev_info(&lp->pdev->dev, "attached PHY driver [%s] " - "(mii_bus:phy_addr=%s)\n", - phydev->drv->name, phydev_name(phydev)); + phy_attached_info(phydev); return 0; } diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 2f6c974e4a6d..9e20f37a3b6f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -927,8 +927,7 @@ static int ravb_phy_init(struct net_device *ndev) /* 10BASE is not supported */ phydev->supported &= ~PHY_10BT_FEATURES; - netdev_info(ndev, "attached PHY %d (IRQ %d) to driver %s\n", - phydev->addr, phydev->irq, phydev_name(phydev)); + phy_attached_info(phydev); priv->phydev = phydev; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e14d28474b70..94581be64d65 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1826,8 +1826,7 @@ static int sh_eth_phy_init(struct net_device *ndev) return PTR_ERR(phydev); } - netdev_info(ndev, "attached PHY %d (IRQ %d) to driver %s\n", - phydev->addr, phydev->irq, phydev_name(phydev)); + phy_attached_info(phydev); mdp->phydev = phydev; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 067346d3209d..139b99b04099 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1031,9 +1031,7 @@ static int smsc911x_mii_probe(struct net_device *dev) return ret; } - netdev_info(dev, - "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); + phy_attached_info(phydev); /* mask with MAC supported features */ phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause | diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index a02ed6b63064..fa8893a804f7 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -1163,10 +1163,6 @@ static int smsc9420_mii_probe(struct net_device *dev) return -ENODEV; } - phydev = pd->mii_bus->phy_map[1]; - netif_info(pd, probe, pd->dev, "PHY addr %d, phy_id 0x%08X\n", - phydev->addr, phydev->phy_id); - phydev = phy_connect(dev, phydev_name(phydev), smsc9420_phy_adjust_link, PHY_INTERFACE_MODE_MII); @@ -1175,14 +1171,13 @@ static int smsc9420_mii_probe(struct net_device *dev) return PTR_ERR(phydev); } - netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - phydev->drv->name, phydev_name(phydev), phydev->irq); - /* mask with MAC supported features */ phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause); phydev->advertising = phydev->supported; + phy_attached_info(phydev); + pd->phy_dev = phydev; pd->last_duplex = -1; pd->last_carrier = -1; diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 9066d7a8483c..b25ee370254a 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -972,9 +972,7 @@ static int dwceqos_mii_probe(struct net_device *ndev) } if (netif_msg_probe(lp)) - netdev_dbg(lp->ndev, - "phydev %p, phydev->phy_id 0xa%x, phydev->addr 0x%x\n", - phydev, phydev->phy_id, phydev->addr); + phy_attached_info(phydev); phydev->supported &= PHY_GBIT_FEATURES; @@ -983,14 +981,6 @@ static int dwceqos_mii_probe(struct net_device *ndev) lp->duplex = DUPLEX_UNKNOWN; lp->phy_dev = phydev; - if (netif_msg_probe(lp)) { - netdev_dbg(lp->ndev, "phy_addr 0x%x, phy_id 0x%08x\n", - lp->phy_dev->addr, lp->phy_dev->phy_id); - - netdev_dbg(lp->ndev, "attach [%s] phy driver\n", - lp->phy_dev->drv->name); - } - return 0; } diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 34ce7dce8c9d..49544c0fa6a7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1159,8 +1159,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->data->phy_id, slave->slave_num); slave->phy = NULL; } else { - dev_info(priv->dev, "phy found : id is : 0x%x\n", - slave->phy->phy_id); + phy_attached_info(slave->phy); + phy_start(slave->phy); /* Configure GMII_SEL register */ diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 5a40b0256327..5d9abedd6b75 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1644,10 +1644,7 @@ static int emac_dev_open(struct net_device *ndev) priv->speed = 0; priv->duplex = ~0; - dev_info(emac_dev, "attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, id=%x)\n", - priv->phydev->drv->name, phydev_name(priv->phydev), - priv->phydev->phy_id); + phy_attached_info(priv->phydev); } if (!priv->phydev) { diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 8df6072ac78d..8fd5e0ba718c 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -638,10 +638,8 @@ static int tc_mii_probe(struct net_device *dev) printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name); return PTR_ERR(phydev); } - printk(KERN_INFO "%s: attached PHY driver [%s] " - "(mii_bus:phy_addr=%s, id=%x)\n", - dev->name, phydev->drv->name, phydev_name(phydev), - phydev->phy_id); + + phy_attached_info(phydev); /* mask with MAC supported features */ phydev->supported &= PHY_BASIC_FEATURES; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0f179709a289..68fe5738daef 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -607,6 +607,33 @@ int phy_init_hw(struct phy_device *phydev) } EXPORT_SYMBOL(phy_init_hw); +void phy_attached_info(struct phy_device *phydev) +{ + phy_attached_print(phydev, NULL); +} +EXPORT_SYMBOL(phy_attached_info); + +#define ATTACHED_FMT "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)" +void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) +{ + if (!fmt) { + dev_info(&phydev->dev, ATTACHED_FMT "\n", + phydev->drv->name, phydev_name(phydev), + phydev->irq); + } else { + va_list ap; + + dev_info(&phydev->dev, ATTACHED_FMT, + phydev->drv->name, phydev_name(phydev), + phydev->irq); + + va_start(ap, fmt); + vprintk(fmt, ap); + va_end(ap); + } +} +EXPORT_SYMBOL(phy_attached_print); + /** * phy_attach_direct - attach a network device to a given PHY device pointer * @dev: network device to attach diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c index b939c4b5f229..cbc25b7e70a2 100644 --- a/drivers/staging/netlogic/xlr_net.c +++ b/drivers/staging/netlogic/xlr_net.c @@ -854,8 +854,7 @@ static int xlr_mii_probe(struct xlr_net_priv *priv) | ADVERTISED_MII); phydev->advertising = phydev->supported; - pr_info("attached PHY driver [%s] (mii_bus:phy_addr=%s\n", - phydev->drv->name, phydev_name(phydev)); + phy_attached_info(phydev); return 0; } diff --git a/include/linux/phy.h b/include/linux/phy.h index 08198ce98773..ecbf6382ba29 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -16,6 +16,7 @@ #ifndef __PHY_H #define __PHY_H +#include #include #include #include @@ -785,6 +786,9 @@ static inline const char *phydev_name(const struct phy_device *phydev) return dev_name(&phydev->dev); } +void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) + __printf(2, 3); +void phy_attached_info(struct phy_device *phydev); int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1e9e9424a33d..5f45e68b52dc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1080,11 +1080,10 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret); return ret; } - } else { - netdev_info(slave_dev, "attached PHY at address %d [%s]\n", - p->phy->addr, p->phy->drv->name); } + phy_attached_info(p->phy); + return 0; } -- cgit v1.2.3 From e5a03bfd873c29eb786655ef2e95e53ed242b404 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:16 +0100 Subject: phy: Add an mdio_device structure Not all devices attached to an MDIO bus are phys. So add an mdio_device structure to represent the generic parts of an mdio device, and place this structure into the phy_device. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/agere/et131x.c | 30 ++++++------ drivers/net/ethernet/altera/altera_tse_main.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- drivers/net/ethernet/broadcom/sb1250-mac.c | 4 +- drivers/net/ethernet/freescale/fman/fman_dtsec.c | 6 +-- drivers/net/ethernet/freescale/fman/fman_memac.c | 6 +-- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 4 +- drivers/net/ethernet/freescale/ucc_geth.c | 4 +- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/smsc/smsc911x.c | 11 +++-- drivers/net/ethernet/smsc/smsc9420.c | 3 +- drivers/net/ethernet/ti/cpsw.c | 3 +- drivers/net/ethernet/ti/davinci_mdio.c | 2 +- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 2 +- drivers/net/phy/at803x.c | 2 +- drivers/net/phy/bcm87xx.c | 4 +- drivers/net/phy/dp83640.c | 22 +++++---- drivers/net/phy/dp83867.c | 4 +- drivers/net/phy/fixed_phy.c | 10 ++-- drivers/net/phy/icplus.c | 18 +++---- drivers/net/phy/marvell.c | 7 +-- drivers/net/phy/mdio_bus.c | 12 ++--- drivers/net/phy/micrel.c | 12 ++--- drivers/net/phy/microchip.c | 4 +- drivers/net/phy/phy.c | 25 +++++----- drivers/net/phy/phy_device.c | 62 ++++++++++++------------ drivers/net/phy/smsc.c | 2 +- drivers/of/of_mdio.c | 6 +-- include/linux/mdio.h | 9 ++++ include/linux/phy.h | 26 +++++----- net/dsa/dsa.c | 2 +- 35 files changed, 165 insertions(+), 151 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index f29d45eea1d9..3f3bcbea15bd 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -1235,7 +1235,7 @@ static int et131x_mii_read(struct et131x_adapter *adapter, u8 reg, u16 *value) if (!phydev) return -EIO; - return et131x_phy_mii_read(adapter, phydev->addr, reg, value); + return et131x_phy_mii_read(adapter, phydev->mdio.addr, reg, value); } static int et131x_mii_write(struct et131x_adapter *adapter, u8 addr, u8 reg, @@ -1462,7 +1462,7 @@ static void et1310_phy_power_switch(struct et131x_adapter *adapter, bool down) data &= ~BMCR_PDOWN; if (down) data |= BMCR_PDOWN; - et131x_mii_write(adapter, phydev->addr, MII_BMCR, data); + et131x_mii_write(adapter, phydev->mdio.addr, MII_BMCR, data); } /* et131x_xcvr_init - Init the phy if we are setting it into force mode */ @@ -1490,7 +1490,7 @@ static void et131x_xcvr_init(struct et131x_adapter *adapter) else lcr2 |= (LED_VAL_LINKON << LED_TXRX_SHIFT); - et131x_mii_write(adapter, phydev->addr, PHY_LED_2, lcr2); + et131x_mii_write(adapter, phydev->mdio.addr, PHY_LED_2, lcr2); } } @@ -3192,14 +3192,14 @@ static void et131x_adjust_link(struct net_device *netdev) et131x_mii_read(adapter, PHY_MPHY_CONTROL_REG, ®ister18); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_MPHY_CONTROL_REG, register18 | 0x4); - et131x_mii_write(adapter, phydev->addr, PHY_INDEX_REG, - register18 | 0x8402); - et131x_mii_write(adapter, phydev->addr, PHY_DATA_REG, - register18 | 511); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, + PHY_INDEX_REG, register18 | 0x8402); + et131x_mii_write(adapter, phydev->mdio.addr, + PHY_DATA_REG, register18 | 511); + et131x_mii_write(adapter, phydev->mdio.addr, PHY_MPHY_CONTROL_REG, register18); } @@ -3212,8 +3212,8 @@ static void et131x_adjust_link(struct net_device *netdev) et131x_mii_read(adapter, PHY_CONFIG, ®); reg &= ~ET_PHY_CONFIG_TX_FIFO_DEPTH; reg |= ET_PHY_CONFIG_FIFO_DEPTH_32; - et131x_mii_write(adapter, phydev->addr, PHY_CONFIG, - reg); + et131x_mii_write(adapter, phydev->mdio.addr, + PHY_CONFIG, reg); } et131x_set_rx_dma_timer(adapter); @@ -3226,14 +3226,14 @@ static void et131x_adjust_link(struct net_device *netdev) et131x_mii_read(adapter, PHY_MPHY_CONTROL_REG, ®ister18); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_MPHY_CONTROL_REG, register18 | 0x4); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_INDEX_REG, register18 | 0x8402); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_DATA_REG, register18 | 511); - et131x_mii_write(adapter, phydev->addr, + et131x_mii_write(adapter, phydev->mdio.addr, PHY_MPHY_CONTROL_REG, register18); } diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 10d51e8aefe0..17472851674f 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -844,7 +844,7 @@ static int init_phy(struct net_device *dev) } netdev_dbg(dev, "attached to PHY %d UID 0x%08x Link = %d\n", - phydev->addr, phydev->phy_id, phydev->link); + phydev->mdio.addr, phydev->phy_id, phydev->link); priv->phydev = phydev; return 0; diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 4d08bc02c7a8..843a4a5864fc 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2305,7 +2305,7 @@ static int b44_register_phy_one(struct b44 *bp) bp->phydev = phydev; bp->old_link = 0; - bp->phy_addr = phydev->addr; + bp->phy_addr = phydev->mdio.addr; phy_attached_info(phydev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 4523acd8c1c2..633b59db813a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -401,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - priv->mii_bus->irq[phydev->addr] = PHY_IGNORE_INTERRUPT; + priv->mii_bus->irq[phydev->mdio.addr] = PHY_IGNORE_INTERRUPT; return 0; } diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 68a363708d27..768c18da510c 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2366,8 +2366,8 @@ static int sbmac_mii_probe(struct net_device *dev) return -ENXIO; } - phy_dev = phy_connect(dev, dev_name(&phy_dev->dev), &sbmac_mii_poll, - PHY_INTERFACE_MODE_GMII); + phy_dev = phy_connect(dev, dev_name(&phy_dev->mdio.dev), + &sbmac_mii_poll, PHY_INTERFACE_MODE_GMII); if (IS_ERR(phy_dev)) { printk(KERN_ERR "%s: could not attach to PHY\n", dev->name); return PTR_ERR(phy_dev); diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index 587f9b40cfaa..6b1261c0b1c2 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -1295,7 +1295,7 @@ int dtsec_init(struct fman_mac *dtsec) err = init(dtsec->regs, dtsec_drv_param, dtsec->phy_if, dtsec->max_speed, (u8 *)eth_addr, dtsec->exceptions, - dtsec->tbiphy->addr); + dtsec->tbiphy->mdio.addr); if (err) { free_init_resources(dtsec); pr_err("DTSEC version doesn't support this i/f mode\n"); @@ -1434,11 +1434,11 @@ struct fman_mac *dtsec_config(struct fman_mac_params *params) dtsec->tbiphy = of_phy_find_device(params->internal_phy_node); if (!dtsec->tbiphy) { pr_err("of_phy_find_device (TBI PHY) failed\n"); - put_device(&dtsec->tbiphy->dev); + put_device(&dtsec->tbiphy->mdio.dev); goto err_dtsec_drv_param; } - put_device(&dtsec->tbiphy->dev); + put_device(&dtsec->tbiphy->mdio.dev); /* Save FMan revision */ fman_get_revision(dtsec->fm, &dtsec->fm_rev_info); diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 58bb72071c14..45e98fd8b79e 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1054,15 +1054,15 @@ int memac_init(struct fman_mac *memac) * register address space and access each one of 4 * ports inside QSGMII. */ - phy_addr = memac->pcsphy->addr; + phy_addr = memac->pcsphy->mdio.addr; qsmgii_phy_addr = (u8)((phy_addr << 2) | i); - memac->pcsphy->addr = qsmgii_phy_addr; + memac->pcsphy->mdio.addr = qsmgii_phy_addr; if (memac->basex_if) setup_sgmii_internal_phy_base_x(memac); else setup_sgmii_internal_phy(memac, fixed_link); - memac->pcsphy->addr = phy_addr; + memac->pcsphy->mdio.addr = phy_addr; } } diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 016743e355de..c158d409f6af 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -254,7 +254,7 @@ static void restart(struct net_device *dev) int r; u32 addrhi, addrlo; - struct mii_bus* mii = fep->phydev->bus; + struct mii_bus *mii = fep->phydev->mdio.bus; struct fec_info* fec_inf = mii->priv; r = whack_reset(fep->fec.fecp); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 4e394f75261e..2aa7b401cc3b 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1834,7 +1834,7 @@ static void gfar_configure_serdes(struct net_device *dev) * several seconds for it to come back. */ if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) { - put_device(&tbiphy->dev); + put_device(&tbiphy->mdio.dev); return; } @@ -1849,7 +1849,7 @@ static void gfar_configure_serdes(struct net_device *dev) BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | BMCR_SPEED1000); - put_device(&tbiphy->dev); + put_device(&tbiphy->mdio.dev); } static int __gfar_is_rx_idle(struct gfar_private *priv) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 650f7888e32b..0e7f24ec3239 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1385,7 +1385,7 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) value &= ~0x1000; /* Turn off autonegotiation */ phy_write(tbiphy, ENET_TBI_MII_CR, value); - put_device(&tbiphy->dev); + put_device(&tbiphy->mdio.dev); } init_check_frame_length_mode(ug_info->lengthCheckRx, &ug_regs->maccfg2); @@ -1705,7 +1705,7 @@ static void uec_configure_serdes(struct net_device *dev) * several seconds for it to come back. */ if (phy_read(tbiphy, ENET_TBI_MII_SR) & TBISR_LSTATUS) { - put_device(&tbiphy->dev); + put_device(&tbiphy->mdio.dev); return; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 4eddbeb19307..3df22840fcd1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -71,7 +71,7 @@ static void hns_get_mdix_mode(struct net_device *net_dev, struct hns_nic_priv *priv = netdev_priv(net_dev); struct phy_device *phy_dev = priv->phy; - if (!phy_dev || !phy_dev->bus) { + if (!phy_dev || !phy_dev->mdio.bus) { cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; return; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 4eba2ed53052..a0c03834a2f7 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -3133,7 +3133,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (!mp->phy) err = -ENODEV; else - phy_addr_set(mp, mp->phy->addr); + phy_addr_set(mp, mp->phy->mdio.addr); } else if (pd->phy_addr != MV643XX_ETH_PHY_NONE) { mp->phy = phy_scan(mp, pd->phy_addr); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 15b1f6bbd92d..fabc8df40392 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3714,7 +3714,7 @@ static int mvneta_probe(struct platform_device *pdev) mvneta_fixed_link_update(pp, phy); - put_device(&phy->dev); + put_device(&phy->mdio.dev); } return 0; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index c74e78dd989a..8af25563f627 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -864,8 +864,8 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev) for (i = 0; i < 10; i++) { /* Set PHY to 10/FD, no ANEG, and loopback mode */ - smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, - BMCR_LOOPBACK | BMCR_FULLDPLX); + smsc911x_mii_write(phy_dev->mdio.bus, phy_dev->mdio.addr, + MII_BMCR, BMCR_LOOPBACK | BMCR_FULLDPLX); /* Enable MAC tx/rx, FD */ spin_lock_irqsave(&pdata->mac_lock, flags); @@ -893,7 +893,7 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev) spin_unlock_irqrestore(&pdata->mac_lock, flags); /* Cancel PHY loopback mode */ - smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, 0); + smsc911x_mii_write(phy_dev->mdio.bus, phy_dev->mdio.addr, MII_BMCR, 0); smsc911x_reg_write(pdata, TX_CFG, 0); smsc911x_reg_write(pdata, RX_CFG, 0); @@ -1021,7 +1021,7 @@ static int smsc911x_mii_probe(struct net_device *dev) } SMSC_TRACE(pdata, probe, "PHY: addr %d, phy_id 0x%08X", - phydev->addr, phydev->phy_id); + phydev->mdio.addr, phydev->phy_id); ret = phy_connect_direct(dev, phydev, &smsc911x_phy_adjust_link, pdata->config.phy_interface); @@ -1988,7 +1988,8 @@ smsc911x_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs, } for (i = 0; i <= 31; i++) - data[j++] = smsc911x_mii_read(phy_dev->bus, phy_dev->addr, i); + data[j++] = smsc911x_mii_read(phy_dev->mdio.bus, + phy_dev->mdio.addr, i); } static void smsc911x_eeprom_enable_access(struct smsc911x_data *pdata) diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index 59bf4c353d50..53355c323f54 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -315,7 +315,8 @@ smsc9420_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs, return; for (i = 0; i <= 31; i++) - data[j++] = smsc9420_mii_read(phy_dev->bus, phy_dev->addr, i); + data[j++] = smsc9420_mii_read(phy_dev->mdio.bus, + phy_dev->mdio.addr, i); } static void smsc9420_eeprom_enable_access(struct smsc9420_pdata *pd) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 49544c0fa6a7..42fdfd4d9d4f 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2050,7 +2050,8 @@ static int cpsw_probe_dt(struct cpsw_priv *priv, if (!phy_dev) return -ENODEV; snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, phy_dev->bus->id, phy_dev->addr); + PHY_ID_FMT, phy_dev->mdio.bus->id, + phy_dev->mdio.addr); } else if (parp) { u32 phyid; struct device_node *mdio_node; diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 88e8e6055b9f..78299c1592c1 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -396,7 +396,7 @@ static int davinci_mdio_probe(struct platform_device *pdev) phy = data->bus->phy_map[addr]; if (phy) { dev_info(dev, "phy[%d]: device %s, driver %s\n", - phy->addr, phydev_name(phy), + phy->mdio.addr, phydev_name(phy), phy->drv ? phy->drv->name : "unknown"); } } diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index d1a0167dff84..e324b3092380 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -827,7 +827,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) dev_info(dev, "MDIO of the phy is not registered yet\n"); else - put_device(&phydev->dev); + put_device(&phydev->mdio.dev); return 0; } diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 62361f8af375..b76ac09a554f 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -190,7 +190,7 @@ static int at803x_resume(struct phy_device *phydev) static int at803x_probe(struct phy_device *phydev) { - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct at803x_priv *priv; struct gpio_desc *gpiod_reset; diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index 71b491c7bf96..e536e30d1643 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -40,10 +40,10 @@ static int bcm87xx_of_reg_init(struct phy_device *phydev) const __be32 *paddr_end; int len, ret; - if (!phydev->dev.of_node) + if (!phydev->mdio.dev.of_node) return 0; - paddr = of_get_property(phydev->dev.of_node, + paddr = of_get_property(phydev->mdio.dev.of_node, "broadcom,c45-reg-init", &len); if (!paddr) return 0; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 47b711739ba9..39da6fc6a85e 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -220,9 +220,10 @@ static void rx_timestamp_work(struct work_struct *work); #define BROADCAST_ADDR 31 -static inline int broadcast_write(struct mii_bus *bus, u32 regnum, u16 val) +static inline int broadcast_write(struct phy_device *phydev, u32 regnum, + u16 val) { - return mdiobus_write(bus, BROADCAST_ADDR, regnum, val); + return mdiobus_write(phydev->mdio.bus, BROADCAST_ADDR, regnum, val); } /* Caller must hold extreg_lock. */ @@ -232,7 +233,7 @@ static int ext_read(struct phy_device *phydev, int page, u32 regnum) int val; if (dp83640->clock->page != page) { - broadcast_write(phydev->bus, PAGESEL, page); + broadcast_write(phydev, PAGESEL, page); dp83640->clock->page = page; } val = phy_read(phydev, regnum); @@ -247,11 +248,11 @@ static void ext_write(int broadcast, struct phy_device *phydev, struct dp83640_private *dp83640 = phydev->priv; if (dp83640->clock->page != page) { - broadcast_write(phydev->bus, PAGESEL, page); + broadcast_write(phydev, PAGESEL, page); dp83640->clock->page = page; } if (broadcast) - broadcast_write(phydev->bus, regnum, val); + broadcast_write(phydev, regnum, val); else phy_write(phydev, regnum, val); } @@ -1039,7 +1040,7 @@ static int choose_this_phy(struct dp83640_clock *clock, if (chosen_phy == -1 && !clock->chosen) return 1; - if (chosen_phy == phydev->addr) + if (chosen_phy == phydev->mdio.addr) return 1; return 0; @@ -1103,10 +1104,10 @@ static int dp83640_probe(struct phy_device *phydev) struct dp83640_private *dp83640; int err = -ENOMEM, i; - if (phydev->addr == BROADCAST_ADDR) + if (phydev->mdio.addr == BROADCAST_ADDR) return 0; - clock = dp83640_clock_get_bus(phydev->bus); + clock = dp83640_clock_get_bus(phydev->mdio.bus); if (!clock) goto no_clock; @@ -1132,7 +1133,8 @@ static int dp83640_probe(struct phy_device *phydev) if (choose_this_phy(clock, phydev)) { clock->chosen = dp83640; - clock->ptp_clock = ptp_clock_register(&clock->caps, &phydev->dev); + clock->ptp_clock = ptp_clock_register(&clock->caps, + &phydev->mdio.dev); if (IS_ERR(clock->ptp_clock)) { err = PTR_ERR(clock->ptp_clock); goto no_register; @@ -1158,7 +1160,7 @@ static void dp83640_remove(struct phy_device *phydev) struct list_head *this, *next; struct dp83640_private *tmp, *dp83640 = phydev->priv; - if (phydev->addr == BROADCAST_ADDR) + if (phydev->mdio.addr == BROADCAST_ADDR) return; enable_status_frames(phydev, false); diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index e4c0b0c0af02..74e4521bd2d3 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -103,7 +103,7 @@ static int dp83867_config_intr(struct phy_device *phydev) static int dp83867_of_init(struct phy_device *phydev) { struct dp83867_private *dp83867 = phydev->priv; - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct device_node *of_node = dev->of_node; int ret; @@ -137,7 +137,7 @@ static int dp83867_config_init(struct phy_device *phydev) u16 val, delay; if (!phydev->priv) { - dp83867 = devm_kzalloc(&phydev->dev, sizeof(*dp83867), + dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867), GFP_KERNEL); if (!dp83867) return -ENOMEM; diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 0a1cde6803b0..ab9c473d75ea 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -197,11 +197,11 @@ int fixed_phy_set_link_update(struct phy_device *phydev, struct fixed_mdio_bus *fmb = &platform_fmb; struct fixed_phy *fp; - if (!phydev || !phydev->bus) + if (!phydev || !phydev->mdio.bus) return -EINVAL; list_for_each_entry(fp, &fmb->phys, node) { - if (fp->addr == phydev->addr) { + if (fp->addr == phydev->mdio.addr) { fp->link_update = link_update; fp->phydev = phydev; return 0; @@ -219,11 +219,11 @@ int fixed_phy_update_state(struct phy_device *phydev, struct fixed_mdio_bus *fmb = &platform_fmb; struct fixed_phy *fp; - if (!phydev || phydev->bus != fmb->mii_bus) + if (!phydev || phydev->mdio.bus != fmb->mii_bus) return -EINVAL; list_for_each_entry(fp, &fmb->phys, node) { - if (fp->addr == phydev->addr) { + if (fp->addr == phydev->mdio.addr) { #define _UPD(x) if (changed->x) \ fp->status.x = status->x _UPD(link); @@ -344,7 +344,7 @@ struct phy_device *fixed_phy_register(unsigned int irq, } of_node_get(np); - phy->dev.of_node = np; + phy->mdio.dev.of_node = np; phy->is_pseudo_fixed_link = true; switch (status->speed) { diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 0dbc445a5fa0..c12170d07b62 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -53,43 +53,43 @@ static int ip175c_config_init(struct phy_device *phydev) if (full_reset_performed == 0) { /* master reset */ - err = mdiobus_write(phydev->bus, 30, 0, 0x175c); + err = mdiobus_write(phydev->mdio.bus, 30, 0, 0x175c); if (err < 0) return err; /* ensure no bus delays overlap reset period */ - err = mdiobus_read(phydev->bus, 30, 0); + err = mdiobus_read(phydev->mdio.bus, 30, 0); /* data sheet specifies reset period is 2 msec */ mdelay(2); /* enable IP175C mode */ - err = mdiobus_write(phydev->bus, 29, 31, 0x175c); + err = mdiobus_write(phydev->mdio.bus, 29, 31, 0x175c); if (err < 0) return err; /* Set MII0 speed and duplex (in PHY mode) */ - err = mdiobus_write(phydev->bus, 29, 22, 0x420); + err = mdiobus_write(phydev->mdio.bus, 29, 22, 0x420); if (err < 0) return err; /* reset switch ports */ for (i = 0; i < 5; i++) { - err = mdiobus_write(phydev->bus, i, + err = mdiobus_write(phydev->mdio.bus, i, MII_BMCR, BMCR_RESET); if (err < 0) return err; } for (i = 0; i < 5; i++) - err = mdiobus_read(phydev->bus, i, MII_BMCR); + err = mdiobus_read(phydev->mdio.bus, i, MII_BMCR); mdelay(2); full_reset_performed = 1; } - if (phydev->addr != 4) { + if (phydev->mdio.addr != 4) { phydev->state = PHY_RUNNING; phydev->speed = SPEED_100; phydev->duplex = DUPLEX_FULL; @@ -184,7 +184,7 @@ static int ip101a_g_config_init(struct phy_device *phydev) static int ip175c_read_status(struct phy_device *phydev) { - if (phydev->addr == 4) /* WAN port */ + if (phydev->mdio.addr == 4) /* WAN port */ genphy_read_status(phydev); else /* Don't need to read status for switch ports */ @@ -195,7 +195,7 @@ static int ip175c_read_status(struct phy_device *phydev) static int ip175c_config_aneg(struct phy_device *phydev) { - if (phydev->addr == 4) /* WAN port */ + if (phydev->mdio.addr == 4) /* WAN port */ genphy_config_aneg(phydev); return 0; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 50b5eac75854..f96c93c9819a 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -300,10 +300,11 @@ static int marvell_of_reg_init(struct phy_device *phydev) const __be32 *paddr; int len, i, saved_page, current_page, page_changed, ret; - if (!phydev->dev.of_node) + if (!phydev->mdio.dev.of_node) return 0; - paddr = of_get_property(phydev->dev.of_node, "marvell,reg-init", &len); + paddr = of_get_property(phydev->mdio.dev.of_node, + "marvell,reg-init", &len); if (!paddr || len < (4 * sizeof(*paddr))) return 0; @@ -1060,7 +1061,7 @@ static int marvell_probe(struct phy_device *phydev) { struct marvell_priv *priv; - priv = devm_kzalloc(&phydev->dev, sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 05381d0f559c..e5b1ccde835b 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -200,16 +200,16 @@ EXPORT_SYMBOL(of_mdio_find_bus); * the phy. This allows auto-probed pyh devices to be supplied with information * passed in via DT. */ -static void of_mdiobus_link_phydev(struct mii_bus *mdio, +static void of_mdiobus_link_phydev(struct mii_bus *bus, struct phy_device *phydev) { - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct device_node *child; - if (dev->of_node || !mdio->dev.of_node) + if (dev->of_node || !bus->dev.of_node) return; - for_each_available_child_of_node(mdio->dev.of_node, child) { + for_each_available_child_of_node(bus->dev.of_node, child) { int addr; int ret; @@ -227,7 +227,7 @@ static void of_mdiobus_link_phydev(struct mii_bus *mdio, continue; } - if (addr == phydev->addr) { + if (addr == phydev->mdio.addr) { dev->of_node = child; return; } @@ -522,7 +522,7 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv) static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { - struct device_driver *drv = phydev->dev.driver; + struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); struct net_device *netdev = phydev->attached_dev; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index bf72365e90bc..b51505be1fa9 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -350,7 +350,7 @@ static int ksz9021_load_values_from_of(struct phy_device *phydev, static int ksz9021_config_init(struct phy_device *phydev) { - const struct device *dev = &phydev->dev; + const struct device *dev = &phydev->mdio.dev; const struct device_node *of_node = dev->of_node; const struct device *dev_walker; @@ -358,7 +358,7 @@ static int ksz9021_config_init(struct phy_device *phydev) * properties in the MAC node. Walk up the tree of devices to * find a device with an OF node. */ - dev_walker = &phydev->dev; + dev_walker = &phydev->mdio.dev; do { of_node = dev_walker->of_node; dev_walker = dev_walker->parent; @@ -471,7 +471,7 @@ static int ksz9031_center_flp_timing(struct phy_device *phydev) static int ksz9031_config_init(struct phy_device *phydev) { - const struct device *dev = &phydev->dev; + const struct device *dev = &phydev->mdio.dev; const struct device_node *of_node = dev->of_node; static const char *clk_skews[2] = {"rxc-skew-ps", "txc-skew-ps"}; static const char *rx_data_skews[4] = { @@ -630,12 +630,12 @@ static void kszphy_get_stats(struct phy_device *phydev, static int kszphy_probe(struct phy_device *phydev) { const struct kszphy_type *type = phydev->drv->driver_data; - const struct device_node *np = phydev->dev.of_node; + const struct device_node *np = phydev->mdio.dev.of_node; struct kszphy_priv *priv; struct clk *clk; int ret; - priv = devm_kzalloc(&phydev->dev, sizeof(*priv), GFP_KERNEL); + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -658,7 +658,7 @@ static int kszphy_probe(struct phy_device *phydev) priv->led_mode = -1; } - clk = devm_clk_get(&phydev->dev, "rmii-ref"); + clk = devm_clk_get(&phydev->mdio.dev, "rmii-ref"); /* NOTE: clk may be NULL if building without CONFIG_HAVE_CLK */ if (!IS_ERR_OR_NULL(clk)) { unsigned long rate = clk_get_rate(clk); diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 99df5bc47424..5e34b49be0b3 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -68,7 +68,7 @@ int lan88xx_suspend(struct phy_device *phydev) static int lan88xx_probe(struct phy_device *phydev) { - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct lan88xx_priv *priv; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -89,7 +89,7 @@ static int lan88xx_probe(struct phy_device *phydev) static void lan88xx_remove(struct phy_device *phydev) { - struct device *dev = &phydev->dev; + struct device *dev = &phydev->mdio.dev; struct lan88xx_priv *priv = phydev->priv; if (priv) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 56c8dd8c0c85..8763bb20988a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -319,7 +319,7 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) { u32 speed = ethtool_cmd_speed(cmd); - if (cmd->phy_address != phydev->addr) + if (cmd->phy_address != phydev->mdio.addr) return -EINVAL; /* We make sure that we don't pass unsupported values in to the PHY */ @@ -375,7 +375,7 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) cmd->port = PORT_BNC; else cmd->port = PORT_MII; - cmd->phy_address = phydev->addr; + cmd->phy_address = phydev->mdio.addr; cmd->transceiver = phy_is_internal(phydev) ? XCVR_INTERNAL : XCVR_EXTERNAL; cmd->autoneg = phydev->autoneg; @@ -403,16 +403,17 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: - mii_data->phy_id = phydev->addr; + mii_data->phy_id = phydev->mdio.addr; /* fall through */ case SIOCGMIIREG: - mii_data->val_out = mdiobus_read(phydev->bus, mii_data->phy_id, + mii_data->val_out = mdiobus_read(phydev->mdio.bus, + mii_data->phy_id, mii_data->reg_num); return 0; case SIOCSMIIREG: - if (mii_data->phy_id == phydev->addr) { + if (mii_data->phy_id == phydev->mdio.addr) { switch (mii_data->reg_num) { case MII_BMCR: if ((val & (BMCR_RESET | BMCR_ANENABLE)) == 0) { @@ -445,10 +446,10 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) } } - mdiobus_write(phydev->bus, mii_data->phy_id, + mdiobus_write(phydev->mdio.bus, mii_data->phy_id, mii_data->reg_num, val); - if (mii_data->phy_id == phydev->addr && + if (mii_data->phy_id == phydev->mdio.addr && mii_data->reg_num == MII_BMCR && val & BMCR_RESET) return phy_init_hw(phydev); @@ -643,7 +644,7 @@ int phy_start_interrupts(struct phy_device *phydev) if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt", phydev) < 0) { pr_warn("%s: Can't get IRQ %d (PHY)\n", - phydev->bus->name, phydev->irq); + phydev->mdio.bus->name, phydev->irq); phydev->irq = PHY_POLL; return 0; } @@ -1041,11 +1042,11 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad) { struct phy_driver *phydrv = phydev->drv; - int addr = phydev->addr; + int addr = phydev->mdio.addr; int value = -1; if (!phydrv->read_mmd_indirect) { - struct mii_bus *bus = phydev->bus; + struct mii_bus *bus = phydev->mdio.bus; mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, prtad, devad, addr); @@ -1079,10 +1080,10 @@ void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, int devad, u32 data) { struct phy_driver *phydrv = phydev->drv; - int addr = phydev->addr; + int addr = phydev->mdio.addr; if (!phydrv->write_mmd_indirect) { - struct mii_bus *bus = phydev->bus; + struct mii_bus *bus = phydev->mdio.bus; mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, prtad, devad, addr); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 68fe5738daef..01e5d52dc37c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -43,7 +43,7 @@ MODULE_LICENSE("GPL"); void phy_device_free(struct phy_device *phydev) { - put_device(&phydev->dev); + put_device(&phydev->mdio.dev); } EXPORT_SYMBOL(phy_device_free); @@ -65,7 +65,7 @@ static DEFINE_MUTEX(phy_fixup_lock); /** * phy_register_fixup - creates a new phy_fixup and adds it to the list - * @bus_id: A string which matches phydev->dev.bus_id (or PHY_ANY_ID) + * @bus_id: A string which matches phydev->mdio.dev.bus_id (or PHY_ANY_ID) * @phy_uid: Used to match against phydev->phy_id (the UID of the PHY) * It can also be PHY_ANY_UID * @phy_uid_mask: Applied to phydev->phy_id and fixup->phy_uid before @@ -153,13 +153,19 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, struct phy_c45_device_ids *c45_ids) { struct phy_device *dev; + struct mdio_device *mdiodev; /* We allocate the device, and initialize the default values */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); - dev->dev.release = phy_device_release; + mdiodev = &dev->mdio; + mdiodev->dev.release = phy_device_release; + mdiodev->dev.parent = &bus->dev; + mdiodev->dev.bus = &mdio_bus_type; + mdiodev->bus = bus; + mdiodev->addr = addr; dev->speed = 0; dev->duplex = -1; @@ -171,15 +177,11 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, dev->autoneg = AUTONEG_ENABLE; dev->is_c45 = is_c45; - dev->addr = addr; dev->phy_id = phy_id; if (c45_ids) dev->c45_ids = *c45_ids; - dev->bus = bus; - dev->dev.parent = &bus->dev; - dev->dev.bus = &mdio_bus_type; dev->irq = bus->irq ? bus->irq[addr] : PHY_POLL; - dev_set_name(&dev->dev, PHY_ID_FMT, bus->id, addr); + dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); dev->state = PHY_DOWN; @@ -199,7 +201,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, */ request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, MDIO_ID_ARGS(phy_id)); - device_initialize(&dev->dev); + device_initialize(&mdiodev->dev); return dev; } @@ -382,27 +384,27 @@ int phy_device_register(struct phy_device *phydev) int err; /* Don't register a phy if one is already registered at this address */ - if (phydev->bus->phy_map[phydev->addr]) + if (phydev->mdio.bus->phy_map[phydev->mdio.addr]) return -EINVAL; - phydev->bus->phy_map[phydev->addr] = phydev; + phydev->mdio.bus->phy_map[phydev->mdio.addr] = phydev; /* Run all of the fixups for this PHY */ err = phy_scan_fixups(phydev); if (err) { - pr_err("PHY %d failed to initialize\n", phydev->addr); + pr_err("PHY %d failed to initialize\n", phydev->mdio.addr); goto out; } - err = device_add(&phydev->dev); + err = device_add(&phydev->mdio.dev); if (err) { - pr_err("PHY %d failed to add\n", phydev->addr); + pr_err("PHY %d failed to add\n", phydev->mdio.addr); goto out; } return 0; out: - phydev->bus->phy_map[phydev->addr] = NULL; + phydev->mdio.bus->phy_map[phydev->mdio.addr] = NULL; return err; } EXPORT_SYMBOL(phy_device_register); @@ -417,10 +419,10 @@ EXPORT_SYMBOL(phy_device_register); */ void phy_device_remove(struct phy_device *phydev) { - struct mii_bus *bus = phydev->bus; - int addr = phydev->addr; + struct mii_bus *bus = phydev->mdio.bus; + int addr = phydev->mdio.addr; - device_del(&phydev->dev); + device_del(&phydev->mdio.dev); bus->phy_map[addr] = NULL; } EXPORT_SYMBOL(phy_device_remove); @@ -617,13 +619,13 @@ EXPORT_SYMBOL(phy_attached_info); void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) { if (!fmt) { - dev_info(&phydev->dev, ATTACHED_FMT "\n", + dev_info(&phydev->mdio.dev, ATTACHED_FMT "\n", phydev->drv->name, phydev_name(phydev), phydev->irq); } else { va_list ap; - dev_info(&phydev->dev, ATTACHED_FMT, + dev_info(&phydev->mdio.dev, ATTACHED_FMT, phydev->drv->name, phydev_name(phydev), phydev->irq); @@ -652,8 +654,8 @@ EXPORT_SYMBOL(phy_attached_print); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface) { - struct mii_bus *bus = phydev->bus; - struct device *d = &phydev->dev; + struct mii_bus *bus = phydev->mdio.bus; + struct device *d = &phydev->mdio.dev; int err; if (!try_module_get(bus->owner)) { @@ -771,8 +773,8 @@ void phy_detach(struct phy_device *phydev) * real driver could be loaded */ for (i = 0; i < ARRAY_SIZE(genphy_driver); i++) { - if (phydev->dev.driver == &genphy_driver[i].driver) { - device_release_driver(&phydev->dev); + if (phydev->mdio.dev.driver == &genphy_driver[i].driver) { + device_release_driver(&phydev->mdio.dev); break; } } @@ -781,16 +783,16 @@ void phy_detach(struct phy_device *phydev) * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. */ - bus = phydev->bus; + bus = phydev->mdio.bus; - put_device(&phydev->dev); + put_device(&phydev->mdio.dev); module_put(bus->owner); } EXPORT_SYMBOL(phy_detach); int phy_suspend(struct phy_device *phydev) { - struct phy_driver *phydrv = to_phy_driver(phydev->dev.driver); + struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; int ret = 0; @@ -813,7 +815,7 @@ EXPORT_SYMBOL(phy_suspend); int phy_resume(struct phy_device *phydev) { - struct phy_driver *phydrv = to_phy_driver(phydev->dev.driver); + struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); int ret = 0; if (phydrv->resume) @@ -1330,7 +1332,7 @@ EXPORT_SYMBOL(phy_set_max_speed); static void of_set_phy_supported(struct phy_device *phydev) { - struct device_node *node = phydev->dev.of_node; + struct device_node *node = phydev->mdio.dev.of_node; u32 max_speed; if (!IS_ENABLED(CONFIG_OF_MDIO)) @@ -1354,7 +1356,7 @@ static void of_set_phy_supported(struct phy_device *phydev) static int phy_probe(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); - struct device_driver *drv = phydev->dev.driver; + struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); int err = 0; diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index dc2da8770918..18c981b95910 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -44,7 +44,7 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev) static int smsc_phy_config_init(struct phy_device *phydev) { int __maybe_unused len; - struct device *dev __maybe_unused = &phydev->dev; + struct device *dev __maybe_unused = &phydev->mdio.dev; struct device_node *of_node __maybe_unused = dev->of_node; int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); int enable_energy = 1; diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 2f88ff4654da..bc9d76329435 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -75,7 +75,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi /* Associate the OF node with the device structure so it * can be looked up later */ of_node_get(child); - phy->dev.of_node = child; + phy->mdio.dev.of_node = child; /* All data is now stored in the phy struct; * register it */ @@ -233,7 +233,7 @@ struct phy_device *of_phy_connect(struct net_device *dev, ret = phy_connect_direct(dev, phy, hndlr, iface); /* refcount is held by phy_connect_direct() on success */ - put_device(&phy->dev); + put_device(&phy->mdio.dev); return ret ? NULL : phy; } @@ -263,7 +263,7 @@ struct phy_device *of_phy_attach(struct net_device *dev, ret = phy_attach_direct(dev, phy, flags, iface); /* refcount is held by phy_attach_direct() on success */ - put_device(&phy->dev); + put_device(&phy->mdio.dev); return ret ? NULL : phy; } diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 0d073c23c10d..94f9f1491cde 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -13,6 +13,15 @@ struct mii_bus; +struct mdio_device { + struct device dev; + + struct mii_bus *bus; + /* Bus address of the MDIO device (0-31) */ + int addr; +}; +#define to_mdio_device(d) container_of(d, struct mdio_device, dev) + static inline bool mdio_phy_id_is_c45(int phy_id) { return (phy_id & MDIO_PHY_ID_C45) && !(phy_id & ~MDIO_PHY_ID_C45_MASK); diff --git a/include/linux/phy.h b/include/linux/phy.h index a5473c9e19de..239a0c2bc49d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -358,14 +358,12 @@ struct phy_c45_device_ids { * handling, as well as handling shifts in PHY hardware state */ struct phy_device { + struct mdio_device mdio; + /* Information about the PHY type */ /* And management functions */ struct phy_driver *drv; - struct mii_bus *bus; - - struct device dev; - u32 phy_id; struct phy_c45_device_ids c45_ids; @@ -381,9 +379,6 @@ struct phy_device { phy_interface_t interface; - /* Bus address of the PHY (0-31) */ - int addr; - /* * forced speed & duplex (no autoneg) * partner speed & duplex & pause (autoneg) @@ -432,7 +427,8 @@ struct phy_device { void (*adjust_link)(struct net_device *dev); }; -#define to_phy_device(d) container_of(d, struct phy_device, dev) +#define to_phy_device(d) container_of(to_mdio_device(d), \ + struct phy_device, mdio) /* struct phy_driver: Driver structure for a particular PHY type * @@ -622,7 +618,7 @@ static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) if (!phydev->is_c45) return -EOPNOTSUPP; - return mdiobus_read(phydev->bus, phydev->addr, + return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff)); } @@ -648,7 +644,7 @@ int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad); */ static inline int phy_read(struct phy_device *phydev, u32 regnum) { - return mdiobus_read(phydev->bus, phydev->addr, regnum); + return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, regnum); } /** @@ -663,7 +659,7 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum) */ static inline int phy_write(struct phy_device *phydev, u32 regnum, u16 val) { - return mdiobus_write(phydev->bus, phydev->addr, regnum, val); + return mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, regnum, val); } /** @@ -726,7 +722,7 @@ static inline int phy_write_mmd(struct phy_device *phydev, int devad, regnum = MII_ADDR_C45 | ((devad & 0x1f) << 16) | (regnum & 0xffff); - return mdiobus_write(phydev->bus, phydev->addr, regnum, val); + return mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, regnum, val); } /** @@ -776,14 +772,14 @@ static inline int phy_read_status(struct phy_device *phydev) } #define phydev_err(_phydev, format, args...) \ - dev_err(&_phydev->dev, format, ##args) + dev_err(&_phydev->mdio.dev, format, ##args) #define phydev_dbg(_phydev, format, args...) \ - dev_dbg(&_phydev->dev, format, ##args) + dev_dbg(&_phydev->mdio.dev, format, ##args); static inline const char *phydev_name(const struct phy_device *phydev) { - return dev_name(&phydev->dev); + return dev_name(&phydev->mdio.dev); } void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 208d1b257194..fa4daba8db55 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -439,7 +439,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (of_phy_is_fixed_link(port_dn)) { phydev = of_phy_find_device(port_dn); if (phydev) { - int addr = phydev->addr; + int addr = phydev->mdio.addr; phy_device_free(phydev); of_node_put(port_dn); -- cgit v1.2.3 From 7f854420fbfe9d49afe2ffb1df052cfe8e215541 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:18 +0100 Subject: phy: Add API for {un}registering an mdio device to a bus. Rather than have drivers directly manipulate the mii_bus structure, provide and API for registering and unregistering devices on an MDIO bus, and performing lookups. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 9 +++-- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 30 +++++++-------- drivers/net/ethernet/ethoc.c | 4 +- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/net/ethernet/freescale/fec_main.c | 7 +--- drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c | 2 +- drivers/net/ethernet/smsc/smsc9420.c | 3 +- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- drivers/net/ethernet/ti/davinci_mdio.c | 2 +- drivers/net/phy/mdio_bus.c | 46 ++++++++++++++++++++++- drivers/net/phy/phy_device.c | 21 +++++------ drivers/of/of_mdio.c | 2 +- include/linux/mdio.h | 8 ++++ include/linux/phy.h | 2 +- net/dsa/slave.c | 3 +- 17 files changed, 98 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 982b581d3484..c8640418fc37 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -502,7 +502,7 @@ static int au1000_mii_probe(struct net_device *dev) BUG_ON(aup->mac_id < 0 || aup->mac_id > 1); if (aup->phy_addr) - phydev = aup->mii_bus->phy_map[aup->phy_addr]; + phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); else netdev_info(dev, "using PHY-less setup\n"); return 0; @@ -512,8 +512,8 @@ static int au1000_mii_probe(struct net_device *dev) * on the current MAC's MII bus */ for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) - if (aup->mii_bus->phy_map[phy_addr]) { - phydev = aup->mii_bus->phy_map[phy_addr]; + if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { + phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); if (!aup->phy_search_highest_addr) /* break out with first one found */ break; @@ -531,7 +531,8 @@ static int au1000_mii_probe(struct net_device *dev) */ for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { struct phy_device *const tmp_phydev = - aup->mii_bus->phy_map[phy_addr]; + mdiobus_get_phy(aup->mii_bus, + phy_addr); if (aup->mac_id == 1) break; diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 843a4a5864fc..74f0a37c4eb6 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2272,7 +2272,7 @@ static int b44_register_phy_one(struct b44 *bp) goto err_out_mdiobus; } - if (!bp->mii_bus->phy_map[bp->phy_addr] && + if (!mdiobus_is_registered_device(bp->mii_bus, bp->phy_addr) && (sprom->boardflags_lo & (B44_BOARDFLAG_ROBO | B44_BOARDFLAG_ADM))) { dev_info(sdev->dev, diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 633b59db813a..0d775964b060 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -573,7 +573,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) - phydev = mdio->phy_map[pd->phy_address]; + phydev = mdiobus_get_phy(mdio, pd->phy_address); else phydev = phy_find_first(mdio); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 04e7d0d0e5b1..9293675df7ba 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -1406,7 +1406,7 @@ static void tg3_mdio_config_5785(struct tg3 *tp) u32 val; struct phy_device *phydev; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); switch (phydev->drv->phy_id & phydev->drv->phy_id_mask) { case PHY_ID_BCM50610: case PHY_ID_BCM50610M: @@ -1554,7 +1554,7 @@ static int tg3_mdio_init(struct tg3 *tp) return i; } - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); if (!phydev || !phydev->drv) { dev_warn(&tp->pdev->dev, "No PHY devices\n"); @@ -1964,7 +1964,7 @@ static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv) u32 old_tx_mode = tp->tx_mode; if (tg3_flag(tp, USE_PHYLIB)) - autoneg = tp->mdio_bus->phy_map[tp->phy_addr]->autoneg; + autoneg = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)->autoneg; else autoneg = tp->link_config.autoneg; @@ -2000,7 +2000,7 @@ static void tg3_adjust_link(struct net_device *dev) u8 oldflowctrl, linkmesg = 0; u32 mac_mode, lcl_adv, rmt_adv; struct tg3 *tp = netdev_priv(dev); - struct phy_device *phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + struct phy_device *phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); spin_lock_bh(&tp->lock); @@ -2089,7 +2089,7 @@ static int tg3_phy_init(struct tg3 *tp) /* Bring the PHY back to a known state. */ tg3_bmcr_reset(tp); - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); /* Attach the MAC to the PHY. */ phydev = phy_connect(tp->dev, phydev_name(phydev), @@ -2116,7 +2116,7 @@ static int tg3_phy_init(struct tg3 *tp) SUPPORTED_Asym_Pause); break; default: - phy_disconnect(tp->mdio_bus->phy_map[tp->phy_addr]); + phy_disconnect(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)); return -EINVAL; } @@ -2136,7 +2136,7 @@ static void tg3_phy_start(struct tg3 *tp) if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) { tp->phy_flags &= ~TG3_PHYFLG_IS_LOW_POWER; @@ -2156,13 +2156,13 @@ static void tg3_phy_stop(struct tg3 *tp) if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return; - phy_stop(tp->mdio_bus->phy_map[tp->phy_addr]); + phy_stop(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)); } static void tg3_phy_fini(struct tg3 *tp) { if (tp->phy_flags & TG3_PHYFLG_IS_CONNECTED) { - phy_disconnect(tp->mdio_bus->phy_map[tp->phy_addr]); + phy_disconnect(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)); tp->phy_flags &= ~TG3_PHYFLG_IS_CONNECTED; } } @@ -4046,7 +4046,7 @@ static int tg3_power_down_prepare(struct tg3 *tp) struct phy_device *phydev; u32 phyid, advertising; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); tp->phy_flags |= TG3_PHYFLG_IS_LOW_POWER; @@ -12074,7 +12074,7 @@ static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); return phy_ethtool_gset(phydev, cmd); } @@ -12141,7 +12141,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); return phy_ethtool_sset(phydev, cmd); } @@ -12296,7 +12296,7 @@ static int tg3_nway_reset(struct net_device *dev) if (tg3_flag(tp, USE_PHYLIB)) { if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; - r = phy_start_aneg(tp->mdio_bus->phy_map[tp->phy_addr]); + r = phy_start_aneg(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr)); } else { u32 bmcr; @@ -12414,7 +12414,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam u32 newadv; struct phy_device *phydev; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); if (!(phydev->supported & SUPPORTED_Pause) || (!(phydev->supported & SUPPORTED_Asym_Pause) && @@ -13924,7 +13924,7 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; - phydev = tp->mdio_bus->phy_map[tp->phy_addr]; + phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); return phy_mii_ioctl(phydev, ifr, cmd); } diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index c028b299ab3f..62fa136554ac 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -678,7 +678,7 @@ static int ethoc_mdio_probe(struct net_device *dev) int err; if (priv->phy_id != -1) - phy = priv->mdio->phy_map[priv->phy_id]; + phy = mdiobus_get_phy(priv->mdio, priv->phy_id); else phy = phy_find_first(priv->mdio); @@ -766,7 +766,7 @@ static int ethoc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (mdio->phy_id >= PHY_MAX_ADDR) return -ERANGE; - phy = priv->mdio->phy_map[mdio->phy_id]; + phy = mdiobus_get_phy(priv->mdio, mdio->phy_id); if (!phy) return -ENODEV; } else { diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 8f3f2cf0dcbf..bb116ad646f6 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -839,7 +839,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv) /* search for connect PHY device */ for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *tmp = priv->mii_bus->phy_map[i]; + struct phy_device *tmp = mdiobus_get_phy(priv->mii_bus, i); if (tmp) { phydev = tmp; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index da255fb4f1d5..502da6f48f95 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -1926,11 +1927,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) } else { /* check for attached phy */ for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { - if ((fep->mii_bus->phy_mask & (1 << phy_id))) - continue; - if (fep->mii_bus->phy_map[phy_id] == NULL) - continue; - if (fep->mii_bus->phy_map[phy_id]->phy_id == 0) + if (!mdiobus_is_registered_device(fep->mii_bus, phy_id)) continue; if (dev_id--) continue; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c index 5b13b8c11bef..467ff7033606 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c @@ -180,7 +180,7 @@ int sxgbe_mdio_register(struct net_device *ndev) } for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { - struct phy_device *phy = mdio_bus->phy_map[phy_addr]; + struct phy_device *phy = mdiobus_get_phy(mdio_bus, phy_addr); if (phy) { char irq_num[4]; diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index 53355c323f54..8594b9e8b28b 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -1158,7 +1158,8 @@ static int smsc9420_mii_probe(struct net_device *dev) BUG_ON(pd->phy_dev); /* Device only supports internal PHY at address 1 */ - if (!pd->mii_bus->phy_map[1]) { + phydev = mdiobus_get_phy(pd->mii_bus, 1); + if (!phydev) { netdev_err(dev, "no PHY found at address 1\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index f0990eb9460f..bff28595b427 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -252,7 +252,7 @@ int stmmac_mdio_register(struct net_device *ndev) found = 0; for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - struct phy_device *phydev = new_bus->phy_map[addr]; + struct phy_device *phydev = mdiobus_get_phy(new_bus, addr); if (phydev) { int act = 0; char irq_num[4]; diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 78299c1592c1..4e7c9b9b042a 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -393,7 +393,7 @@ static int davinci_mdio_probe(struct platform_device *pdev) /* scan and dump the bus */ for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - phy = data->bus->phy_map[addr]; + phy = mdiobus_get_phy(data->bus, addr); if (phy) { dev_info(dev, "phy[%d]: device %s, driver %s\n", phy->mdio.addr, phydev_name(phy), diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index e5b1ccde835b..f28f89e109ba 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -38,6 +38,48 @@ #include +int mdiobus_register_device(struct mdio_device *mdiodev) +{ + if (mdiodev->bus->mdio_map[mdiodev->addr]) + return -EBUSY; + + mdiodev->bus->mdio_map[mdiodev->addr] = mdiodev; + + return 0; +} +EXPORT_SYMBOL(mdiobus_register_device); + +int mdiobus_unregister_device(struct mdio_device *mdiodev) +{ + if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev) + return -EINVAL; + + mdiodev->bus->mdio_map[mdiodev->addr] = NULL; + + return 0; +} +EXPORT_SYMBOL(mdiobus_unregister_device); + +struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr) +{ + struct mdio_device *mdiodev = bus->mdio_map[addr]; + + if (!mdiodev) + return NULL; + + if (!(mdiodev->flags & MDIO_DEVICE_FLAG_PHY)) + return NULL; + + return container_of(mdiodev, struct phy_device, mdio); +} +EXPORT_SYMBOL(mdiobus_get_phy); + +bool mdiobus_is_registered_device(struct mii_bus *bus, int addr) +{ + return bus->mdio_map[addr]; +} +EXPORT_SYMBOL(mdiobus_is_registered_device); + /** * mdiobus_alloc_size - allocate a mii_bus structure * @size: extra amount of memory to allocate for private storage. @@ -299,7 +341,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) error: while (--i >= 0) { - struct phy_device *phydev = bus->phy_map[i]; + struct phy_device *phydev = mdiobus_get_phy(bus, i); if (phydev) { phy_device_remove(phydev); phy_device_free(phydev); @@ -318,7 +360,7 @@ void mdiobus_unregister(struct mii_bus *bus) bus->state = MDIOBUS_UNREGISTERED; for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *phydev = bus->phy_map[i]; + struct phy_device *phydev = mdiobus_get_phy(bus, i); if (phydev) { phy_device_remove(phydev); phy_device_free(phydev); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 01e5d52dc37c..e0d5dbb96700 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -166,6 +166,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mdiodev->dev.bus = &mdio_bus_type; mdiodev->bus = bus; mdiodev->addr = addr; + mdiodev->flags = MDIO_DEVICE_FLAG_PHY; dev->speed = 0; dev->duplex = -1; @@ -383,10 +384,9 @@ int phy_device_register(struct phy_device *phydev) { int err; - /* Don't register a phy if one is already registered at this address */ - if (phydev->mdio.bus->phy_map[phydev->mdio.addr]) - return -EINVAL; - phydev->mdio.bus->phy_map[phydev->mdio.addr] = phydev; + err = mdiobus_register_device(&phydev->mdio); + if (err) + return err; /* Run all of the fixups for this PHY */ err = phy_scan_fixups(phydev); @@ -404,7 +404,7 @@ int phy_device_register(struct phy_device *phydev) return 0; out: - phydev->mdio.bus->phy_map[phydev->mdio.addr] = NULL; + mdiobus_unregister_device(&phydev->mdio); return err; } EXPORT_SYMBOL(phy_device_register); @@ -419,11 +419,8 @@ EXPORT_SYMBOL(phy_device_register); */ void phy_device_remove(struct phy_device *phydev) { - struct mii_bus *bus = phydev->mdio.bus; - int addr = phydev->mdio.addr; - device_del(&phydev->mdio.dev); - bus->phy_map[addr] = NULL; + mdiobus_unregister_device(&phydev->mdio); } EXPORT_SYMBOL(phy_device_remove); @@ -433,11 +430,13 @@ EXPORT_SYMBOL(phy_device_remove); */ struct phy_device *phy_find_first(struct mii_bus *bus) { + struct phy_device *phydev; int addr; for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - if (bus->phy_map[addr]) - return bus->phy_map[addr]; + phydev = mdiobus_get_phy(bus, addr); + if (phydev) + return phydev; } return NULL; } diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index c0292051392e..6febe2df76f9 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -193,7 +193,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) for (addr = 0; addr < PHY_MAX_ADDR; addr++) { /* skip already registered PHYs */ - if (mdio->phy_map[addr]) + if (mdiobus_is_registered_device(mdio, addr)) continue; /* be noisy to encourage people to set reg property */ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 94f9f1491cde..8cd9579e18ea 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -19,9 +19,12 @@ struct mdio_device { struct mii_bus *bus; /* Bus address of the MDIO device (0-31) */ int addr; + int flags; }; #define to_mdio_device(d) container_of(d, struct mdio_device, dev) +#define MDIO_DEVICE_FLAG_PHY 1 + static inline bool mdio_phy_id_is_c45(int phy_id) { return (phy_id & MDIO_PHY_ID_C45) && !(phy_id & ~MDIO_PHY_ID_C45_MASK); @@ -188,4 +191,9 @@ int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int mdiobus_register_device(struct mdio_device *mdiodev); +int mdiobus_unregister_device(struct mdio_device *mdiodev); +bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); +struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr); + #endif /* __LINUX_MDIO_H__ */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 239a0c2bc49d..2d7beef20825 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -180,7 +180,7 @@ struct mii_bus { struct device dev; /* list of all PHYs on bus */ - struct phy_device *phy_map[PHY_MAX_ADDR]; + struct mdio_device *mdio_map[PHY_MAX_ADDR]; /* PHY addresses to be ignored when probing */ u32 phy_mask; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 5f45e68b52dc..2771713714f1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -997,7 +998,7 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, { struct dsa_switch *ds = p->parent; - p->phy = ds->slave_mii_bus->phy_map[addr]; + p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); if (!p->phy) { netdev_err(slave_dev, "no phy at %d\n", addr); return -ENODEV; -- cgit v1.2.3 From 0071f56e46dadb88dc3ad1f8d9cf9c3ae014735d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:20 +0100 Subject: dsa: Register netdev before phy When the phy is connected, an info message is printed. If the netdev it is attached to has not been registered yet, the name 'uninitialised' in the output. By registering the netdev first, then connecting they phy, we can avoid this. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2771713714f1..40b9ca72aae3 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1189,13 +1189,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, p->old_link = -1; p->old_duplex = -1; - ret = dsa_slave_phy_setup(p, slave_dev); - if (ret) { - netdev_err(master, "error %d setting up slave phy\n", ret); - free_netdev(slave_dev); - return ret; - } - ds->ports[port] = slave_dev; ret = register_netdev(slave_dev); if (ret) { @@ -1209,6 +1202,13 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); + ret = dsa_slave_phy_setup(p, slave_dev); + if (ret) { + netdev_err(master, "error %d setting up slave phy\n", ret); + free_netdev(slave_dev); + return ret; + } + return 0; } -- cgit v1.2.3 From 60d2c7f9ab1cac8b2d44307b660eb7813091dbf0 Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:28:05 +0900 Subject: netfilter: nfnetlink_queue: validate dependencies to avoid breaking atomicity Check that dependencies are fulfilled before updating the queue instance, otherwise we can leave things in intermediate state on errors in nfqnl_recv_config(). Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 72 ++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3d1f16cf5cd0..fe360f7dd146 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1113,6 +1113,7 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, struct nfqnl_instance *queue; struct nfqnl_msg_config_cmd *cmd = NULL; struct nfnl_queue_net *q = nfnl_queue_pernet(net); + __u32 flags = 0, mask = 0; int ret = 0; if (nfqa[NFQA_CFG_CMD]) { @@ -1125,6 +1126,29 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, } } + /* Check if we support these flags in first place, dependencies should + * be there too not to break atomicity. + */ + if (nfqa[NFQA_CFG_FLAGS]) { + if (!nfqa[NFQA_CFG_MASK]) { + /* A mask is needed to specify which flags are being + * changed. + */ + return -EINVAL; + } + + flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); + mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); + + if (flags >= NFQA_CFG_F_MAX) + return -EOPNOTSUPP; + +#if !IS_ENABLED(CONFIG_NETWORK_SECMARK) + if (flags & mask & NFQA_CFG_F_SECCTX) + return -EOPNOTSUPP; +#endif + } + rcu_read_lock(); queue = instance_lookup(q, queue_num); if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { @@ -1162,60 +1186,28 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, } } + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + if (nfqa[NFQA_CFG_PARAMS]) { - struct nfqnl_msg_config_params *params; + struct nfqnl_msg_config_params *params = + nla_data(nfqa[NFQA_CFG_PARAMS]); - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - params = nla_data(nfqa[NFQA_CFG_PARAMS]); nfqnl_set_mode(queue, params->copy_mode, ntohl(params->copy_range)); } if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { - __be32 *queue_maxlen; + __be32 *queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); spin_lock_bh(&queue->lock); queue->queue_maxlen = ntohl(*queue_maxlen); spin_unlock_bh(&queue->lock); } if (nfqa[NFQA_CFG_FLAGS]) { - __u32 flags, mask; - - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - - if (!nfqa[NFQA_CFG_MASK]) { - /* A mask is needed to specify which flags are being - * changed. - */ - ret = -EINVAL; - goto err_out_unlock; - } - - flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); - mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); - - if (flags >= NFQA_CFG_F_MAX) { - ret = -EOPNOTSUPP; - goto err_out_unlock; - } -#if !IS_ENABLED(CONFIG_NETWORK_SECMARK) - if (flags & mask & NFQA_CFG_F_SECCTX) { - ret = -EOPNOTSUPP; - goto err_out_unlock; - } -#endif spin_lock_bh(&queue->lock); queue->flags &= ~mask; queue->flags |= flags & mask; -- cgit v1.2.3 From 17bc6b4884340b045e779be38ba9f574256866a2 Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:29:54 +0900 Subject: netfilter: nfnetlink_queue: don't handle options after unbind This patch stops processing after destroying a queue instance. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index fe360f7dd146..57951ce621b1 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1176,7 +1176,7 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, goto err_out_unlock; } instance_destroy(q, queue); - break; + goto err_out_unlock; case NFQNL_CFG_CMD_PF_BIND: case NFQNL_CFG_CMD_PF_UNBIND: break; -- cgit v1.2.3 From 21c3c971d1eb5d5598ddb1eda2fc3e4d2c992182 Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:31:40 +0900 Subject: netfilter: nfnetlink_queue: just returns error for unknown command This patch stops processing options for unknown command. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 57951ce621b1..c1f6df4cfe88 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1182,7 +1182,7 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, break; default: ret = -ENOTSUPP; - break; + goto err_out_unlock; } } -- cgit v1.2.3 From 71b2e5f5ca3b163b90e487a96fd0cabbaf16792b Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:32:59 +0900 Subject: netfilter: nfnetlink_queue: autoload nf_conntrack_netlink module NFQA_CFG_F_CONNTRACK config flag This patch enables to load nf_conntrack_netlink module if NFQA_CFG_F_CONNTRACK config flag is specified. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index c1f6df4cfe88..1d3936587ace 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1147,6 +1147,17 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, if (flags & mask & NFQA_CFG_F_SECCTX) return -EOPNOTSUPP; #endif + if ((flags & mask & NFQA_CFG_F_CONNTRACK) && + !rcu_access_pointer(nfnl_ct_hook)) { +#ifdef CONFIG_MODULES + nfnl_unlock(NFNL_SUBSYS_QUEUE); + request_module("ip_conntrack_netlink"); + nfnl_lock(NFNL_SUBSYS_QUEUE); + if (rcu_access_pointer(nfnl_ct_hook)) + return -EAGAIN; +#endif + return -EOPNOTSUPP; + } } rcu_read_lock(); -- cgit v1.2.3 From eb075954e9fde114f57adc39a9ea6d379c13f81e Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:34:34 +0900 Subject: netfilter: nfnetlink_log: just returns error for unknown command This patch stops processing options for unknown command. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6a57f10a4e0b..8ca932057c13 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -888,7 +888,7 @@ static int nfulnl_recv_config(struct net *net, struct sock *ctnl, goto out_put; default: ret = -ENOTSUPP; - break; + goto out_put; } } else if (!inst) { ret = -ENODEV; -- cgit v1.2.3 From e6d8ecac9e68265aee9be711c5bd29406129666f Mon Sep 17 00:00:00 2001 From: Carlos Falgueras García Date: Tue, 5 Jan 2016 14:03:32 +0100 Subject: netfilter: nf_tables: Add new attributes into nft_set to store user data. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User data is stored at after 'nft_set_ops' private data into 'data[]' flexible array. The field 'udata' points to user data and 'udlen' stores its length. Add new flag NFTA_SET_USERDATA. Signed-off-by: Carlos Falgueras García Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 21 ++++++++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0191fbb33a2f..f6b1daf2e698 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -291,6 +291,8 @@ void nft_unregister_set(struct nft_set_ops *ops); * @timeout: default timeout value in msecs * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) + * @udlen: user data length + * @udata: user data * @ops: set ops * @pnet: network namespace * @flags: set flags @@ -310,6 +312,8 @@ struct nft_set { u64 timeout; u32 gc_int; u16 policy; + u16 udlen; + unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; possible_net_t pnet; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 731288a039f6..03c28a402c63 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -291,6 +291,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32) * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) + * @NFTA_SET_USERDATA: user data (NLA_BINARY) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -306,6 +307,7 @@ enum nft_set_attributes { NFTA_SET_ID, NFTA_SET_TIMEOUT, NFTA_SET_GC_INTERVAL, + NFTA_SET_USERDATA, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f5c397158e29..2011977cd79d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2323,6 +2323,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_ID] = { .type = NLA_U32 }, [NFTA_SET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, + [NFTA_SET_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2482,6 +2484,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata)) + goto nla_put_failure; + desc = nla_nest_start(skb, NFTA_SET_DESC); if (desc == NULL) goto nla_put_failure; @@ -2691,6 +2696,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, u64 timeout; u32 ktype, dtype, flags, policy, gc_int; struct nft_set_desc desc; + unsigned char *udata; + u16 udlen; int err; if (nla[NFTA_SET_TABLE] == NULL || @@ -2803,12 +2810,16 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (IS_ERR(ops)) return PTR_ERR(ops); + udlen = 0; + if (nla[NFTA_SET_USERDATA]) + udlen = nla_len(nla[NFTA_SET_USERDATA]); + size = 0; if (ops->privsize != NULL) size = ops->privsize(nla); err = -ENOMEM; - set = kzalloc(sizeof(*set) + size, GFP_KERNEL); + set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); if (set == NULL) goto err1; @@ -2817,6 +2828,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (err < 0) goto err2; + udata = NULL; + if (udlen) { + udata = set->data + size; + nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); + } + INIT_LIST_HEAD(&set->bindings); write_pnet(&set->pnet, net); set->ops = ops; @@ -2827,6 +2844,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, set->flags = flags; set->size = desc.size; set->policy = policy; + set->udlen = udlen; + set->udata = udata; set->timeout = timeout; set->gc_int = gc_int; -- cgit v1.2.3 From ce1e7989d989e36ee3b032d46aab28b7d5e30428 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 8 Jan 2016 10:29:12 +0100 Subject: netfilter: nft_byteorder: provide 64bit le/be conversion Needed to convert the (64bit) conntrack counters to BE ordering. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_byteorder.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index fde5145f2e36..383c17138399 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -8,6 +8,7 @@ * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include #include #include #include @@ -39,6 +40,27 @@ static void nft_byteorder_eval(const struct nft_expr *expr, d = (void *)dst; switch (priv->size) { + case 8: { + u64 src64; + + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 8; i++) { + src64 = get_unaligned_be64(&src[i]); + src64 = be64_to_cpu((__force __be64)src64); + put_unaligned_be64(src64, &dst[i]); + } + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 8; i++) { + src64 = get_unaligned_be64(&src[i]); + src64 = (__force u64)cpu_to_be64(src64); + put_unaligned_be64(src64, &dst[i]); + } + break; + } + break; + } case 4: switch (priv->op) { case NFT_BYTEORDER_NTOH: @@ -101,6 +123,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, switch (priv->size) { case 2: case 4: + case 8: break; default: return -EINVAL; -- cgit v1.2.3 From 48f66c905a976bf0ff092fc24f08d9addd82a245 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Jan 2016 21:34:24 +0100 Subject: netfilter: nft_ct: add byte/packet counter support If the accounting extension isn't present, we'll return a counter value of 0. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_ct.c | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 03c28a402c63..be41ffc128b8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -757,6 +757,8 @@ enum nft_ct_keys { NFT_CT_PROTO_SRC, NFT_CT_PROTO_DST, NFT_CT_LABELS, + NFT_CT_PKTS, + NFT_CT_BYTES, }; /** diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 8cbca3432f90..6f74109a7ed3 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,18 @@ struct nft_ct { }; }; +static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c, + enum nft_ct_keys k, + enum ip_conntrack_dir d) +{ + if (d < IP_CT_DIR_MAX) + return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) : + atomic64_read(&c[d].packets); + + return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) + + nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY); +} + static void nft_ct_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -114,6 +127,17 @@ static void nft_ct_get_eval(const struct nft_expr *expr, NF_CT_LABELS_MAX_SIZE - size); return; } + case NFT_CT_BYTES: /* fallthrough */ + case NFT_CT_PKTS: { + const struct nf_conn_acct *acct = nf_conn_acct_find(ct); + u64 count = 0; + + if (acct) + count = nft_ct_get_eval_counter(acct->counter, + priv->key, priv->dir); + memcpy(dest, &count, sizeof(count)); + return; + } #endif default: break; @@ -291,6 +315,13 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, return -EINVAL; len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all); break; + case NFT_CT_BYTES: + case NFT_CT_PKTS: + /* no direction? return sum of original + reply */ + if (tb[NFTA_CT_DIRECTION] == NULL) + priv->dir = IP_CT_DIR_MAX; + len = sizeof(u64); + break; default: return -EOPNOTSUPP; } @@ -373,6 +404,13 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) case NFT_CT_PROTO_DST: if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) goto nla_put_failure; + break; + case NFT_CT_BYTES: + case NFT_CT_PKTS: + if (priv->dir < IP_CT_DIR_MAX && + nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + break; default: break; } -- cgit v1.2.3 From 30d3d83a7dcef7a26c5c48e548bdd6a76754cbcd Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 6 Jan 2016 17:22:45 -0500 Subject: ipv4: fix endianness warnings in ip_tunnel_core.c Eliminate endianness mismatch warnings (reported by sparse) in this file by using appropriate nla_put_*()/nla_get_*() calls. Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index eb52ce950c27..859d415c0b2d 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -251,7 +251,7 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info = lwt_tun_info(new_state); if (tb[LWTUNNEL_IP_ID]) - tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP_ID]); + tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]); if (tb[LWTUNNEL_IP_DST]) tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]); @@ -266,7 +266,7 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); if (tb[LWTUNNEL_IP_FLAGS]) - tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]); + tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->options_len = 0; @@ -281,12 +281,12 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_u64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || + if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || - nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) + nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; return 0; @@ -346,7 +346,7 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info = lwt_tun_info(new_state); if (tb[LWTUNNEL_IP6_ID]) - tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP6_ID]); + tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]); if (tb[LWTUNNEL_IP6_DST]) tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]); @@ -361,7 +361,7 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); if (tb[LWTUNNEL_IP6_FLAGS]) - tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]); + tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; tun_info->options_len = 0; @@ -376,12 +376,12 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_u64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || + if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) || - nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) + nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; return 0; -- cgit v1.2.3 From ad64b8be71e3a37ea43745aa69817c4bcd489987 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 6 Jan 2016 17:22:47 -0500 Subject: ipv4: eliminate lock count warnings in ping.c Add lock release/acquire annotations to ping_seq_start() and ping_seq_stop() to satisfy sparse. Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e89094ab5ddb..c117b21b937d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1063,6 +1063,7 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) } void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) + __acquires(ping_table.lock) { struct ping_iter_state *state = seq->private; state->bucket = 0; @@ -1094,6 +1095,7 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) + __releases(ping_table.lock) { read_unlock_bh(&ping_table.lock); } -- cgit v1.2.3 From 4a4d045eb2c174472b68f366108bf76f1802f803 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 23 Nov 2015 20:30:15 +0100 Subject: batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ebd8af0a1eb0..da9f16c6829b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2015.2" +#define BATADV_SOURCE_VERSION "2016.0" #endif /* B.A.T.M.A.N. parameters */ -- cgit v1.2.3 From 008a374487070a391c12aa39288fd8511f822cab Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 3 Nov 2015 19:20:34 +0100 Subject: batman-adv: Fix lockdep annotation of batadv_tlv_container_remove The function handles tlv containers and not tlv handlers. Thus the lockdep_assert_held has to check for the container_list lock. Fixes: 2c72d655b044 ("batman-adv: Annotate deleting functions with external lock via lockdep") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5dbcb2e2b497..95fd418e9567 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -747,7 +747,7 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, struct batadv_tvlv_container *tvlv) { - lockdep_assert_held(&bat_priv->tvlv.handler_list_lock); + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); if (!tvlv) return; -- cgit v1.2.3 From 143d157c9ecfa09ed777bf33635eb27fabce3e0a Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sun, 9 Aug 2015 23:56:50 +0800 Subject: batman-adv: remove leftovers of unused BATADV_PRIMARIES_FIRST_HOP flag Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 3 --- net/batman-adv/packet.h | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 5677169c1b98..246702486228 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -361,7 +361,6 @@ batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; - batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; batadv_ogm_packet->ttl = BATADV_TTL; } @@ -842,8 +841,6 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, "Forwarding packet: tq: %i, ttl: %i\n", batadv_ogm_packet->tq, batadv_ogm_packet->ttl); - /* switch of primaries first hop flag when forwarding */ - batadv_ogm_packet->flags &= ~BATADV_PRIMARIES_FIRST_HOP; if (is_single_hop_neigh) batadv_ogm_packet->flags |= BATADV_DIRECTLINK; else diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 11f996b39fef..0558e3237e0e 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -72,8 +72,7 @@ enum batadv_subtype { * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was * previously received from someone else than the best neighbor. - * @BATADV_PRIMARIES_FIRST_HOP: flag is set when the primary interface address - * is used, and the packet travels its first hop. + * @BATADV_PRIMARIES_FIRST_HOP: flag unused. * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a * one hop neighbor on the interface where it was originally received. */ -- cgit v1.2.3 From d68081a24081f9a1910a41778a8411d924255471 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 9 Nov 2015 16:20:52 +0100 Subject: batman-adv: purge bridge loop avoidance when its disabled When bridge loop avoidance is disabled through sysfs, the internal datastructures are not disabled, but only BLA operations are disabled. To be sure that they are removed, purge the data immediately. That is especially useful if a firmwares network state is changed, and the BLA wait periods should restart on the new network. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 20 ++++++++++++++++++++ net/batman-adv/bridge_loop_avoidance.h | 2 ++ net/batman-adv/sysfs.c | 4 +++- 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 99dcae316ec8..d5d71ac96c8a 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1168,6 +1168,26 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, } } +/** + * batadv_bla_status_update - purge bla interfaces if necessary + * @net_dev: the soft interface net device + */ +void batadv_bla_status_update(struct net_device *net_dev) +{ + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + return; + + /* this function already purges everything when bla is disabled, + * so just call that one. + */ + batadv_bla_update_orig_address(bat_priv, primary_if, primary_if); + batadv_hardif_free_ref(primary_if); +} + /* periodic work to do: * * purge structures when they are too old * * send announcements diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 025152b34282..7ea199b8b5ab 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -22,6 +22,7 @@ #include +struct net_device; struct seq_file; struct sk_buff; @@ -42,6 +43,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, struct batadv_hard_iface *oldif); +void batadv_bla_status_update(struct net_device *net_dev); int batadv_bla_init(struct batadv_priv *bat_priv); void batadv_bla_free(struct batadv_priv *bat_priv); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 9de3c8804ff4..48e2aaddda49 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -40,6 +40,7 @@ #include "distributed-arp-table.h" #include "gateway_client.h" #include "gateway_common.h" +#include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "network-coding.h" #include "packet.h" @@ -549,7 +550,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); #ifdef CONFIG_BATMAN_ADV_BLA -BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL); +BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, + batadv_bla_status_update); #endif #ifdef CONFIG_BATMAN_ADV_DAT BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, -- cgit v1.2.3 From e1544f3c87778ab4af9689d571570d6abfd2f6c2 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 9 Nov 2015 16:20:53 +0100 Subject: batman-adv: increase BLA wait periods to 6 If networks take a long time to come up, e.g. due to lossy links, then the bridge loop avoidance wait time to suppress broadcasts may not wait long enough and detect a backbone before the mesh is brought up. Increasing the wait period further to 60 seconds makes this scenario less likely. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index da9f16c6829b..9dbd9107e7e1 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -109,7 +109,7 @@ #define BATADV_MAX_AGGREGATION_MS 100 #define BATADV_BLA_PERIOD_LENGTH 10000 /* 10 seconds */ -#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 3) +#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 6) #define BATADV_BLA_CLAIM_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 10) #define BATADV_BLA_WAIT_PERIODS 3 -- cgit v1.2.3 From 9e728e84389ba8317d1444bdf256e34ad467f3da Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 17 Nov 2015 14:11:26 +0100 Subject: batman-adv: only call post function if something changed Currently, the post function is also called on errors or if there were no changes, which is redundant for the functions currently using these facilities. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/sysfs.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 48e2aaddda49..fe87777fda8a 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -242,10 +242,13 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \ static int batadv_store_bool_attr(char *buff, size_t count, struct net_device *net_dev, - const char *attr_name, atomic_t *attr) + const char *attr_name, atomic_t *attr, + bool *changed) { int enabled = -1; + *changed = false; + if (buff[count - 1] == '\n') buff[count - 1] = '\0'; @@ -272,6 +275,8 @@ static int batadv_store_bool_attr(char *buff, size_t count, atomic_read(attr) == 1 ? "enabled" : "disabled", enabled == 1 ? "enabled" : "disabled"); + *changed = true; + atomic_set(attr, (unsigned int)enabled); return count; } @@ -282,11 +287,12 @@ __batadv_store_bool_attr(char *buff, size_t count, struct attribute *attr, atomic_t *attr_store, struct net_device *net_dev) { + bool changed; int ret; ret = batadv_store_bool_attr(buff, count, net_dev, attr->name, - attr_store); - if (post_func && ret) + attr_store, &changed); + if (post_func && changed) post_func(net_dev); return ret; -- cgit v1.2.3 From d737ccbed3e62dd45d631cf69183de005144d05b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 13 Sep 2015 09:44:45 +0200 Subject: batman-adv: Add function to convert string to batadv throughput The code to convert the throughput information from a string to the batman-adv internal (100Kibit/s) representation is duplicated in batadv_parse_gw_bandwidth. Move this functionality to its own function batadv_parse_throughput to reduce the code complexity. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/gateway_common.c | 117 +++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 0cb5e6b6f6d4..b51bface8bdd 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -31,27 +31,23 @@ #include "packet.h" /** - * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download - * and upload bandwidth information + * batadv_parse_throughput - parse supplied string buffer to extract throughput + * information * @net_dev: the soft interface net device * @buff: string buffer to parse - * @down: pointer holding the returned download bandwidth information - * @up: pointer holding the returned upload bandwidth information + * @description: text shown when throughput string cannot be parsed + * @throughput: pointer holding the returned throughput information * * Returns false on parse error and true otherwise. */ -static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, - u32 *down, u32 *up) +static bool batadv_parse_throughput(struct net_device *net_dev, char *buff, + const char *description, u32 *throughput) { enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT; - char *slash_ptr, *tmp_ptr; - u64 ldown, lup; + u64 lthroughput; + char *tmp_ptr; int ret; - slash_ptr = strchr(buff, '/'); - if (slash_ptr) - *slash_ptr = 0; - if (strlen(buff) > 4) { tmp_ptr = buff + strlen(buff) - 4; @@ -63,90 +59,75 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = kstrtou64(buff, 10, &ldown); + ret = kstrtou64(buff, 10, <hroughput); if (ret) { batadv_err(net_dev, - "Download speed of gateway mode invalid: %s\n", - buff); + "Invalid throughput speed for %s: %s\n", + description, buff); return false; } switch (bw_unit_type) { case BATADV_BW_UNIT_MBIT: /* prevent overflow */ - if (U64_MAX / 10 < ldown) { + if (U64_MAX / 10 < lthroughput) { batadv_err(net_dev, - "Download speed of gateway mode too large: %s\n", - buff); + "Throughput speed for %s too large: %s\n", + description, buff); return false; } - ldown *= 10; + lthroughput *= 10; break; case BATADV_BW_UNIT_KBIT: default: - ldown = div_u64(ldown, 100); + lthroughput = div_u64(lthroughput, 100); break; } - if (U32_MAX < ldown) { + if (lthroughput > U32_MAX) { batadv_err(net_dev, - "Download speed of gateway mode too large: %s\n", - buff); + "Throughput speed for %s too large: %s\n", + description, buff); return false; } - *down = ldown; - - /* we also got some upload info */ - if (slash_ptr) { - bw_unit_type = BATADV_BW_UNIT_KBIT; - - if (strlen(slash_ptr + 1) > 4) { - tmp_ptr = slash_ptr + 1 - 4 + strlen(slash_ptr + 1); + *throughput = lthroughput; - if (strncasecmp(tmp_ptr, "mbit", 4) == 0) - bw_unit_type = BATADV_BW_UNIT_MBIT; + return true; +} - if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || - (bw_unit_type == BATADV_BW_UNIT_MBIT)) - *tmp_ptr = '\0'; - } +/** + * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download + * and upload bandwidth information + * @net_dev: the soft interface net device + * @buff: string buffer to parse + * @down: pointer holding the returned download bandwidth information + * @up: pointer holding the returned upload bandwidth information + * + * Return: false on parse error and true otherwise. + */ +static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, + u32 *down, u32 *up) +{ + char *slash_ptr; + bool ret; - ret = kstrtou64(slash_ptr + 1, 10, &lup); - if (ret) { - batadv_err(net_dev, - "Upload speed of gateway mode invalid: %s\n", - slash_ptr + 1); - return false; - } + slash_ptr = strchr(buff, '/'); + if (slash_ptr) + *slash_ptr = 0; - switch (bw_unit_type) { - case BATADV_BW_UNIT_MBIT: - /* prevent overflow */ - if (U64_MAX / 10 < lup) { - batadv_err(net_dev, - "Upload speed of gateway mode too large: %s\n", - slash_ptr + 1); - return false; - } - - lup *= 10; - break; - case BATADV_BW_UNIT_KBIT: - default: - lup = div_u64(lup, 100); - break; - } + ret = batadv_parse_throughput(net_dev, buff, "download gateway speed", + down); + if (!ret) + return false; - if (U32_MAX < lup) { - batadv_err(net_dev, - "Upload speed of gateway mode too large: %s\n", - slash_ptr + 1); + /* we also got some upload info */ + if (slash_ptr) { + ret = batadv_parse_throughput(net_dev, slash_ptr + 1, + "upload gateway speed", up); + if (!ret) return false; - } - - *up = lup; } return true; -- cgit v1.2.3 From c799443ee13ef37221732839f1cca6f11c798b7a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 15 Nov 2015 08:04:43 +0100 Subject: batman-adv: Delete unnecessary checks before the function call "kfree_skb" The kfree_skb() function tests whether its argument is NULL and then returns immediately. Thus the test around the calls is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 2 +- net/batman-adv/network-coding.c | 4 +--- net/batman-adv/send.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 95fd418e9567..5b678f3471fc 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1184,7 +1184,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, ret = true; out: - if (skb && !ret) + if (!ret) kfree_skb(skb); if (orig_node) batadv_orig_node_free_ref(orig_node); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index f5276be2c77c..c98b0ab85449 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -244,9 +244,7 @@ static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) */ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) { - if (nc_packet->skb) - kfree_skb(nc_packet->skb); - + kfree_skb(nc_packet->skb); batadv_nc_path_free_ref(nc_packet->nc_path); kfree(nc_packet); } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index f664324805eb..782fa33ec296 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -407,8 +407,7 @@ void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) { - if (forw_packet->skb) - kfree_skb(forw_packet->skb); + kfree_skb(forw_packet->skb); if (forw_packet->if_incoming) batadv_hardif_free_ref(forw_packet->if_incoming); if (forw_packet->if_outgoing) -- cgit v1.2.3 From 8bbb7cb2324d6a5fb7ccdc4ab0099dc18b91b690 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 15 Nov 2015 09:00:42 +0100 Subject: batman-adv: Less checks in batadv_tvlv_unicast_send() * Let us return directly if a call of the batadv_orig_hash_find() function returned a null pointer. * Omit the initialisation for the variable "skb" at the beginning. * Replace an assignment by a call of the kfree_skb() function and delete the affected variable "ret" then. Signed-off-by: Markus Elfring Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5b678f3471fc..4b5d61fbadb1 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1143,15 +1143,14 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; struct batadv_tvlv_hdr *tvlv_hdr; struct batadv_orig_node *orig_node; - struct sk_buff *skb = NULL; + struct sk_buff *skb; unsigned char *tvlv_buff; unsigned int tvlv_len; ssize_t hdr_len = sizeof(*unicast_tvlv_packet); - bool ret = false; orig_node = batadv_orig_hash_find(bat_priv, dst); if (!orig_node) - goto out; + return; tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len; @@ -1180,14 +1179,10 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, tvlv_buff += sizeof(*tvlv_hdr); memcpy(tvlv_buff, tvlv_value, tvlv_value_len); - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) - ret = true; - -out: - if (!ret) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) kfree_skb(skb); - if (orig_node) - batadv_orig_node_free_ref(orig_node); +out: + batadv_orig_node_free_ref(orig_node); } /** -- cgit v1.2.3 From f75a33aeed0776f52da05276c2ef98e16d680a6b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 3 Nov 2015 19:20:34 +0100 Subject: batman-adv: Delete an unnecessary check before the function call "batadv_softif_vlan_free_ref" The batadv_softif_vlan_free_ref() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index ec67deff1621..5852fda9f175 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3356,8 +3356,7 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, ret = true; out: - if (vlan) - batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); if (tt_global_entry) batadv_tt_global_entry_free_ref(tt_global_entry); if (tt_local_entry) -- cgit v1.2.3 From e087f34f28d8597f7c82f079337939367ba96537 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 3 Nov 2015 19:20:34 +0100 Subject: batman-adv: Split a condition check Let us split a check for a condition at the beginning of the batadv_is_ap_isolated() function so that a direct return can be performed in this function if the variable "vlan" contained a null pointer. Signed-off-by: Markus Elfring Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5852fda9f175..a22080c53401 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3339,7 +3339,10 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, bool ret = false; vlan = batadv_softif_vlan_get(bat_priv, vid); - if (!vlan || !atomic_read(&vlan->ap_isolation)) + if (!vlan) + return false; + + if (!atomic_read(&vlan->ap_isolation)) goto out; tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid); -- cgit v1.2.3 From 426fc6c8119820164dd44e99862dda85159eef93 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Sep 2015 21:38:45 +0200 Subject: batman-adv: Fix kernel-doc parsing of main structs kernel-doc is not able to skip an #ifdef between the kernel documentation block and the start of the struct. Moving the #ifdef before the kernel doc block avoids this problem Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/types.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 7c386dbb75f0..876ac336c61e 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -423,13 +423,14 @@ struct batadv_neigh_ifinfo { struct rcu_head rcu; }; +#ifdef CONFIG_BATMAN_ADV_BLA + /** * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast * @crc: crc32 checksum of broadcast payload * @entrytime: time when the broadcast packet was received */ -#ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bcast_duplist_entry { u8 orig[ETH_ALEN]; __be32 crc; @@ -571,6 +572,8 @@ struct batadv_priv_tt { struct delayed_work work; }; +#ifdef CONFIG_BATMAN_ADV_BLA + /** * struct batadv_priv_bla - per mesh interface bridge loope avoidance data * @num_requests; number of bla requests in flight @@ -583,7 +586,6 @@ struct batadv_priv_tt { * @claim_dest: local claim data (e.g. claim group) * @work: work queue callback item for cleanups & bla announcements */ -#ifdef CONFIG_BATMAN_ADV_BLA struct batadv_priv_bla { atomic_t num_requests; struct batadv_hashtable *claim_hash; @@ -597,6 +599,8 @@ struct batadv_priv_bla { }; #endif +#ifdef CONFIG_BATMAN_ADV_DEBUG + /** * struct batadv_priv_debug_log - debug logging data * @log_buff: buffer holding the logs (ring bufer) @@ -605,7 +609,6 @@ struct batadv_priv_bla { * @lock: lock protecting log_buff, log_start & log_end * @queue_wait: log reader's wait queue */ -#ifdef CONFIG_BATMAN_ADV_DEBUG struct batadv_priv_debug_log { char log_buff[BATADV_LOG_BUF_LEN]; unsigned long log_start; @@ -647,13 +650,14 @@ struct batadv_priv_tvlv { spinlock_t handler_list_lock; /* protects handler_list */ }; +#ifdef CONFIG_BATMAN_ADV_DAT + /** * struct batadv_priv_dat - per mesh interface DAT private data * @addr: node DAT address * @hash: hashtable representing the local ARP cache * @work: work queue callback item for cache purging */ -#ifdef CONFIG_BATMAN_ADV_DAT struct batadv_priv_dat { batadv_dat_addr_t addr; struct batadv_hashtable *hash; @@ -893,6 +897,8 @@ struct batadv_socket_packet { u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE]; }; +#ifdef CONFIG_BATMAN_ADV_BLA + /** * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN * @orig: originator address of backbone node (mac address of primary iface) @@ -910,7 +916,6 @@ struct batadv_socket_packet { * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ -#ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bla_backbone_gw { u8 orig[ETH_ALEN]; unsigned short vid; -- cgit v1.2.3 From 006a199d5d1d4e1666b0d8b4f51b5a978ddc6aab Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Sep 2015 21:38:46 +0200 Subject: batman-adv: Fix kerneldoc member names in for main structs Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/types.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 876ac336c61e..d93501edb987 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -223,12 +223,12 @@ struct batadv_orig_bat_iv { * @orig: originator ethernet address * @ifinfo_list: list for routers per outgoing interface * @last_bonding_candidate: pointer to last ifinfo of last used router - * @batadv_dat_addr_t: address of the orig node in the distributed hash + * @dat_addr: address of the orig node in the distributed hash * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset * @mcast_handler_lock: synchronizes mcast-capability and -flag changes * @mcast_flags: multicast flags announced by the orig node - * @mcast_want_all_unsnoop_node: a list node for the + * @mcast_want_all_unsnoopables_node: a list node for the * mcast.want_all_unsnoopables list * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 list * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 list @@ -427,7 +427,7 @@ struct batadv_neigh_ifinfo { /** * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression - * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast + * @orig: mac address of orig node orginating the broadcast * @crc: crc32 checksum of broadcast payload * @entrytime: time when the broadcast packet was received */ @@ -576,7 +576,7 @@ struct batadv_priv_tt { /** * struct batadv_priv_bla - per mesh interface bridge loope avoidance data - * @num_requests; number of bla requests in flight + * @num_requests: number of bla requests in flight * @claim_hash: hash table containing mesh nodes this host has claimed * @backbone_hash: hash table containing all detected backbone gateways * @bcast_duplist: recently received broadcast packets array (for broadcast @@ -799,7 +799,7 @@ struct batadv_softif_vlan { * @dat: distributed arp table data * @mcast: multicast data * @network_coding: bool indicating whether network coding is enabled - * @batadv_priv_nc: network coding data + * @nc: network coding data */ struct batadv_priv { atomic_t mesh_state; @@ -934,7 +934,7 @@ struct batadv_bla_backbone_gw { * struct batadv_bla_claim - claimed non-mesh client structure * @addr: mac address of claimed non-mesh client * @vid: vlan id this client was detected on - * @batadv_bla_backbone_gw: pointer to backbone gw claiming this client + * @backbone_gw: pointer to backbone gw claiming this client * @lasttime: last time we heard of claim (locals only) * @hash_entry: hlist node for batadv_priv_bla::claim_hash * @refcount: number of contexts the object is used -- cgit v1.2.3 From 8a3719a184cfd122eba9212c8d4a2fab5c9fb628 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Sep 2015 21:38:47 +0200 Subject: batman-adv: Remove kerneldoc for missing struct members Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/types.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d93501edb987..1a67a1a402f6 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -371,9 +371,7 @@ struct batadv_hardif_neigh_node { * @ifinfo_lock: lock protecting private ifinfo members and list * @if_incoming: pointer to incoming hard interface * @last_seen: when last packet via this neighbor was received - * @last_ttl: last received ttl from this neigh node * @rcu: struct used for freeing in an RCU-safe manner - * @bat_iv: B.A.T.M.A.N. IV private structure */ struct batadv_neigh_node { struct hlist_node list; @@ -1257,8 +1255,6 @@ struct batadv_dat_candidate { * struct batadv_tvlv_container - container for tvlv appended to OGMs * @list: hlist node for batadv_priv_tvlv::container_list * @tvlv_hdr: tvlv header information needed to construct the tvlv - * @value_len: length of the buffer following this struct which contains - * the actual tvlv payload * @refcount: number of contexts the object is used */ struct batadv_tvlv_container { -- cgit v1.2.3 From ed21d170e878b6b067a3216040b7b935c8007196 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Sep 2015 21:38:48 +0200 Subject: batman-adv: Add kerneldoc for batadv_neigh_node::refcount Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 1a67a1a402f6..3437b667a2cd 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -371,6 +371,7 @@ struct batadv_hardif_neigh_node { * @ifinfo_lock: lock protecting private ifinfo members and list * @if_incoming: pointer to incoming hard interface * @last_seen: when last packet via this neighbor was received + * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_neigh_node { -- cgit v1.2.3 From 4d41e12593a9a6c4aaf113d44c8c619067b2b0aa Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Sun, 10 Jan 2016 21:06:22 +0100 Subject: switchdev: Adding MDB entry offload Define HW multicast entry: MAC and VID. Using a MAC address simplifies support for both IPV4 and IPv6. Signed-off-by: Elad Raz Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 11 +++++++++++ net/switchdev/switchdev.c | 2 ++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 603ae2f88dbb..d451122e8404 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,6 +68,7 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_PORT_VLAN, SWITCHDEV_OBJ_ID_IPV4_FIB, SWITCHDEV_OBJ_ID_PORT_FDB, + SWITCHDEV_OBJ_ID_PORT_MDB, }; struct switchdev_obj { @@ -113,6 +114,16 @@ struct switchdev_obj_port_fdb { #define SWITCHDEV_OBJ_PORT_FDB(obj) \ container_of(obj, struct switchdev_obj_port_fdb, obj) +/* SWITCHDEV_OBJ_ID_PORT_MDB */ +struct switchdev_obj_port_mdb { + struct switchdev_obj obj; + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +#define SWITCHDEV_OBJ_PORT_MDB(obj) \ + container_of(obj, struct switchdev_obj_port_mdb, obj) + void switchdev_trans_item_enqueue(struct switchdev_trans *trans, void *data, void (*destructor)(void const *), struct switchdev_trans_item *tritem); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index df790d3385a2..ebc661d3b6e3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) return sizeof(struct switchdev_obj_ipv4_fib); case SWITCHDEV_OBJ_ID_PORT_FDB: return sizeof(struct switchdev_obj_port_fdb); + case SWITCHDEV_OBJ_ID_PORT_MDB: + return sizeof(struct switchdev_obj_port_mdb); default: BUG(); } -- cgit v1.2.3 From f1fecb1d10ecc2f94d19e67827b9f678b36bfc61 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Sun, 10 Jan 2016 21:06:23 +0100 Subject: bridge: Reflect MDB entries to hardware Offload MDB changes per port to hardware Signed-off-by: Elad Raz Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index cd8deea2d074..30e105f57f0d 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -7,6 +7,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include #include @@ -210,10 +211,32 @@ static inline size_t rtnl_mdb_nlmsg_size(void) static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, int type) { + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_PORT_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + .vid = entry->vid, + }; + struct net_device *port_dev; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + port_dev = __dev_get_by_index(net, entry->ifindex); + if (entry->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(entry->addr.u.ip4, mdb.addr); +#if IS_ENABLED(CONFIG_IPV6) + else + ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr); +#endif + + mdb.obj.orig_dev = port_dev; + if (port_dev && type == RTM_NEWMDB) + switchdev_port_obj_add(port_dev, &mdb.obj); + else if (port_dev && type == RTM_DELMDB) + switchdev_port_obj_del(port_dev, &mdb.obj); + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); if (!skb) goto errout; -- cgit v1.2.3 From 787d7ac308ff2279e4b2ea393ad4d990de486ef2 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 7 Jan 2016 14:28:39 +0100 Subject: udp: restrict offloads to one namespace udp tunnel offloads tend to aggregate datagrams based on inner headers. gro engine gets notified by tunnel implementations about possible offloads. The match is solely based on the port number. Imagine a tunnel bound to port 53, the offloading will look into all DNS packets and tries to aggregate them based on the inner data found within. This could lead to data corruption and malformed DNS packets. While this patch minimizes the problem and helps an administrator to find the issue by querying ip tunnel/fou, a better way would be to match on the specific destination ip address so if a user space socket is bound to the same address it will conflict. Cc: Tom Herbert Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 2 +- include/net/protocol.h | 2 +- net/ipv4/fou.c | 2 +- net/ipv4/udp_offload.c | 10 +++++++--- 5 files changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 58efdec12f30..db96b0cbb8ba 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -376,7 +376,7 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) int err; if (sa_family == AF_INET) { - err = udp_add_offload(&gs->udp_offloads); + err = udp_add_offload(sock_net(sk), &gs->udp_offloads); if (err) pr_warn("geneve: udp_add_offload failed with status %d\n", err); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 405a7b6cca25..e1e147f2d6ce 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -621,7 +621,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) int err; if (sa_family == AF_INET) { - err = udp_add_offload(&vs->udp_offloads); + err = udp_add_offload(net, &vs->udp_offloads); if (err) pr_warn("vxlan: udp_add_offload failed with status %d\n", err); } diff --git a/include/net/protocol.h b/include/net/protocol.h index d6fcc1fcdb5b..da689f5432de 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -107,7 +107,7 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num); void inet_register_protosw(struct inet_protosw *p); void inet_unregister_protosw(struct inet_protosw *p); -int udp_add_offload(struct udp_offload *prot); +int udp_add_offload(struct net *net, struct udp_offload *prot); void udp_del_offload(struct udp_offload *prot); #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index bd903fe0f750..976f0dcf6991 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -498,7 +498,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; if (cfg->udp_config.family == AF_INET) { - err = udp_add_offload(&fou->udp_offloads); + err = udp_add_offload(net, &fou->udp_offloads); if (err) goto error; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9386160cbee..5d396b96ae8b 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,6 +21,7 @@ static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; struct udp_offload_priv { struct udp_offload *offload; + possible_net_t net; struct rcu_head rcu; struct udp_offload_priv __rcu *next; }; @@ -241,13 +242,14 @@ out: return segs; } -int udp_add_offload(struct udp_offload *uo) +int udp_add_offload(struct net *net, struct udp_offload *uo) { struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); if (!new_offload) return -ENOMEM; + write_pnet(&new_offload->net, net); new_offload->offload = uo; spin_lock(&udp_offload_lock); @@ -311,7 +313,8 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (uo_priv->offload->port == uh->dest && + if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && + uo_priv->offload->port == uh->dest && uo_priv->offload->callbacks.gro_receive) goto unflush; } @@ -389,7 +392,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) uo_priv = rcu_dereference(udp_offload_base); for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (uo_priv->offload->port == uh->dest && + if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && + uo_priv->offload->port == uh->dest && uo_priv->offload->callbacks.gro_complete) break; } -- cgit v1.2.3 From 13b287e8d1cad951634389f85b8c9b816bd3bb1e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:43 +0200 Subject: ipv4: Namespaceify tcp_keepalive_time sysctl knob Different net namespaces might have different requirements as to the keepalive time of tcp sockets. This might be required in cases where different firewall rules are in place which require tcp timeout sockets to be increased/decreased independently of the host. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 5 +++-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_timer.c | 1 - 5 files changed, 14 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d75be32650ba..9e9bbebaebd1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -94,6 +94,8 @@ struct netns_ipv4 { int sysctl_tcp_probe_threshold; u32 sysctl_tcp_probe_interval; + int sysctl_tcp_keepalive_time; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/tcp.h b/include/net/tcp.h index f33fecf4e282..cb4d4cf25744 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_keepalive_time; extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_intvl; extern int sysctl_tcp_syn_retries; @@ -1230,7 +1229,9 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) static inline int keepalive_time_when(const struct tcp_sock *tp) { - return tp->keepalive_time ? : sysctl_tcp_keepalive_time; + struct net *net = sock_net((struct sock *)tp); + + return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; } static inline int keepalive_probes(const struct tcp_sock *tp) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 41ff1f87dfd7..1886cc842871 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -336,13 +336,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_keepalive_time", - .data = &sysctl_tcp_keepalive_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { .procname = "tcp_keepalive_probes", .data = &sysctl_tcp_keepalive_probes, @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_keepalive_time", + .data = &init_net.ipv4.sysctl_tcp_keepalive_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc4f72686705..6e14ff9a8580 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2385,6 +2385,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 193ba1fa8a9a..166f27b43cc0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; -- cgit v1.2.3 From 9bd6861bd4326e3afd3f14a9ec8a723771fb20bb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:44 +0200 Subject: ipv4: Namespecify tcp_keepalive_probes sysctl knob This is required to have full tcp keepalive mechanism namespace support. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 5 +++-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 1 - 5 files changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9e9bbebaebd1..6e26ea2d0374 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -95,6 +95,7 @@ struct netns_ipv4 { u32 sysctl_tcp_probe_interval; int sysctl_tcp_keepalive_time; + int sysctl_tcp_keepalive_probes; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index cb4d4cf25744..0646521400bf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_intvl; extern int sysctl_tcp_syn_retries; extern int sysctl_tcp_synack_retries; @@ -1236,7 +1235,9 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) static inline int keepalive_probes(const struct tcp_sock *tp) { - return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; + struct net *net = sock_net((struct sock *)tp); + + return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1886cc842871..e99fbb77dba7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -336,13 +336,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_keepalive_probes", - .data = &sysctl_tcp_keepalive_probes, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_keepalive_intvl", .data = &sysctl_tcp_keepalive_intvl, @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "tcp_keepalive_probes", + .data = &init_net.ipv4.sysctl_tcp_keepalive_probes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6e14ff9a8580..ed98de85871e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2386,6 +2386,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; + net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 166f27b43cc0..0ccb120d591a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; -- cgit v1.2.3 From b840d15d39128d08ed4486085e5507d2617b9ae1 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:45 +0200 Subject: ipv4: Namespecify the tcp_keepalive_intvl sysctl knob This is the final part required to namespaceify the tcp keep alive mechanism. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 5 +++-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 1 - 5 files changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6e26ea2d0374..2b7907a35568 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -96,6 +96,7 @@ struct netns_ipv4 { int sysctl_tcp_keepalive_time; int sysctl_tcp_keepalive_probes; + int sysctl_tcp_keepalive_intvl; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0646521400bf..a80255f4ca33 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_keepalive_intvl; extern int sysctl_tcp_syn_retries; extern int sysctl_tcp_synack_retries; extern int sysctl_tcp_retries1; @@ -1223,7 +1222,9 @@ void tcp_enter_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { - return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; + struct net *net = sock_net((struct sock *)tp); + + return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; } static inline int keepalive_time_when(const struct tcp_sock *tp) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e99fbb77dba7..46ce410703b1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -336,13 +336,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_keepalive_intvl", - .data = &sysctl_tcp_keepalive_intvl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { .procname = "tcp_retries1", .data = &sysctl_tcp_retries1, @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_keepalive_intvl", + .data = &init_net.ipv4.sysctl_tcp_keepalive_intvl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ed98de85871e..65947c1f4733 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2387,6 +2387,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; + net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0ccb120d591a..a4730a28b220 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; -- cgit v1.2.3 From fdc5432a7b44ab7de17141beec19d946b9344e91 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 15:50:22 +0100 Subject: net, sched: add skb_at_tc_ingress helper Add a skb_at_tc_ingress() as this will be needed elsewhere as well and can hide the ugly ifdef. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/sch_generic.h | 9 +++++++++ net/sched/cls_bpf.c | 6 +----- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index b2a8e6338576..636a362a0e03 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -407,6 +407,15 @@ bool tcf_destroy(struct tcf_proto *tp, bool force); void tcf_destroy_chain(struct tcf_proto __rcu **fl); int skb_do_redirect(struct sk_buff *); +static inline bool skb_at_tc_ingress(const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + return G_TC_AT(skb->tc_verd) & AT_INGRESS; +#else + return false; +#endif +} + /* Reset all TX qdiscs greater then index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5faaa5425f7b..b3c8bb4aeef5 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -79,12 +79,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_bpf_head *head = rcu_dereference_bh(tp->root); + bool at_ingress = skb_at_tc_ingress(skb); struct cls_bpf_prog *prog; -#ifdef CONFIG_NET_CLS_ACT - bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; -#else - bool at_ingress = false; -#endif int ret = -1; if (unlikely(!skb_mac_header_was_set(skb))) -- cgit v1.2.3 From f8ffad69c9f8b8dfb0b633425d4ef4d2493ba61a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 15:50:23 +0100 Subject: bpf: add skb_postpush_rcsum and fix dev_forward_skb occasions Add a small helper skb_postpush_rcsum() and fix up redirect locations that need CHECKSUM_COMPLETE fixups on ingress. dev_forward_skb() expects a proper csum that covers also Ethernet header, f.e. since 2c26d34bbcc0 ("net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding"), we also do skb_postpull_rcsum() after pulling Ethernet header off via eth_type_trans(). When using eBPF in a netns setup f.e. with vxlan in collect metadata mode, I can trigger the following csum issue with an IPv6 setup: [ 505.144065] dummy1: hw csum failure [...] [ 505.144108] Call Trace: [ 505.144112] [] dump_stack+0x44/0x5c [ 505.144134] [] netdev_rx_csum_fault+0x3a/0x40 [ 505.144142] [] __skb_checksum_complete+0xcf/0xe0 [ 505.144149] [] nf_ip6_checksum+0xb2/0x120 [ 505.144161] [] icmpv6_error+0x17e/0x328 [nf_conntrack_ipv6] [ 505.144170] [] ? ip6t_do_table+0x2fa/0x645 [ip6_tables] [ 505.144177] [] ? ipv6_get_l4proto+0x65/0xd0 [nf_conntrack_ipv6] [ 505.144189] [] nf_conntrack_in+0xc2/0x5a0 [nf_conntrack] [ 505.144196] [] ipv6_conntrack_in+0x1c/0x20 [nf_conntrack_ipv6] [ 505.144204] [] nf_iterate+0x5d/0x70 [ 505.144210] [] nf_hook_slow+0x66/0xc0 [ 505.144218] [] ipv6_rcv+0x3f2/0x4f0 [ 505.144225] [] ? ip6_make_skb+0x1b0/0x1b0 [ 505.144232] [] __netif_receive_skb_core+0x36b/0x9a0 [ 505.144239] [] ? __netif_receive_skb+0x18/0x60 [ 505.144245] [] __netif_receive_skb+0x18/0x60 [ 505.144252] [] process_backlog+0x9f/0x140 [ 505.144259] [] net_rx_action+0x145/0x320 [...] What happens is that on ingress, we push Ethernet header back in, either from cls_bpf or right before skb_do_redirect(), but without updating csum. The "hw csum failure" can be fixed by using the new skb_postpush_rcsum() helper for the dev_forward_skb() case to correct the csum diff again. Thanks to Hannes Frederic Sowa for the csum_partial() idea! Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 17 +++++++++++++++++ net/core/filter.c | 17 +++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6b6bd42d6134..07f9ccd28654 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2805,6 +2805,23 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +static inline void skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + /* For performing the reverse operation to skb_postpull_rcsum(), + * we can instead of ... + * + * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); + * + * ... just use this equivalent version here to save a few + * instructions. Feeding csum of 0 in csum_partial() and later + * on adding skb->csum is equivalent to feed skb->csum in the + * first place. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(start, len, skb->csum); +} + /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim diff --git a/net/core/filter.c b/net/core/filter.c index 35e6fed28709..0db92b5e2cbf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1368,8 +1368,9 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) /* skb_store_bits cannot return -EFAULT here */ skb_store_bits(skb, offset, ptr, len); - if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); + if (BPF_RECOMPUTE_CSUM(flags)) + skb_postpush_rcsum(skb, ptr, len); + return 0; } @@ -1525,8 +1526,12 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!skb2)) return -ENOMEM; - if (BPF_IS_REDIRECT_INGRESS(flags)) + if (BPF_IS_REDIRECT_INGRESS(flags)) { + if (skb_at_tc_ingress(skb2)) + skb_postpush_rcsum(skb2, skb_mac_header(skb2), + skb2->mac_len); return dev_forward_skb(dev, skb2); + } skb2->dev = dev; skb_sender_cpu_clear(skb2); @@ -1569,8 +1574,12 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - if (BPF_IS_REDIRECT_INGRESS(ri->flags)) + if (BPF_IS_REDIRECT_INGRESS(ri->flags)) { + if (skb_at_tc_ingress(skb)) + skb_postpush_rcsum(skb, skb_mac_header(skb), + skb->mac_len); return dev_forward_skb(dev, skb); + } skb->dev = dev; skb_sender_cpu_clear(skb); -- cgit v1.2.3 From 320f1a4a175e7cd5d3f006f92b4d4d3e2cbb7bb5 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 7 Jan 2016 14:52:43 -0500 Subject: net: sctp: prevent writes to cookie_hmac_alg from accessing invalid memory proc_dostring() needs an initialized destination string, while the one provided in proc_sctp_do_hmac_alg() contains stack garbage. Thus, writing to cookie_hmac_alg would strlen() that garbage and end up accessing invalid memory. Fixes: 3c68198e7 ("sctp: Make hmac algorithm selection for cookie generation dynamic") Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 26d50c565f54..3e0fc5127225 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -320,7 +320,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, struct ctl_table tbl; bool changed = false; char *none = "none"; - char tmp[8]; + char tmp[8] = {0}; int ret; memset(&tbl, 0, sizeof(struct ctl_table)); -- cgit v1.2.3 From 1f211a1b929c804100e138c5d3d656992cfd5622 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 22:29:47 +0100 Subject: net, sched: add clsact qdisc This work adds a generalization of the ingress qdisc as a qdisc holding only classifiers. The clsact qdisc works on ingress, but also on egress. In both cases, it's execution happens without taking the qdisc lock, and the main difference for the egress part compared to prior version of [1] is that this can be applied with _any_ underlying real egress qdisc (also classless ones). Besides solving the use-case of [1], that is, allowing for more programmability on assigning skb->priority for the mqprio case that is supported by most popular 10G+ NICs, it also opens up a lot more flexibility for other tc applications. The main work on classification can already be done at clsact egress time if the use-case allows and state stored for later retrieval f.e. again in skb->priority with major/minors (which is checked by most classful qdiscs before consulting tc_classify()) and/or in other skb fields like skb->tc_index for some light-weight post-processing to get to the eventual classid in case of a classful qdisc. Another use case is that the clsact egress part allows to have a central egress counterpart to the ingress classifiers, so that classifiers can easily share state (e.g. in cls_bpf via eBPF maps) for ingress and egress. Currently, default setups like mq + pfifo_fast would require for this to use, for example, prio qdisc instead (to get a tc_classify() run) and to duplicate the egress classifier for each queue. With clsact, it allows for leaving the setup as is, it can additionally assign skb->priority to put the skb in one of pfifo_fast's bands and it can share state with maps. Moreover, we can access the skb's dst entry (f.e. to retrieve tclassid) w/o the need to perform a skb_dst_force() to hold on to it any longer. In lwt case, we can also use this facility to setup dst metadata via cls_bpf (bpf_skb_set_tunnel_key()) without needing a real egress qdisc just for that (case of IFF_NO_QUEUE devices, for example). The realization can be done without any changes to the scheduler core framework. All it takes is that we have two a-priori defined minors/child classes, where we can mux between ingress and egress classifier list (dev->ingress_cl_list and dev->egress_cl_list, latter stored close to dev->_tx to avoid extra cacheline miss for moderate loads). The egress part is a bit similar modelled to handle_ing() and patched to a noop in case the functionality is not used. Both handlers are now called sch_handle_ingress() and sch_handle_egress(), code sharing among the two doesn't seem practical as there are various minor differences in both paths, so that making them conditional in a single handler would rather slow things down. Full compatibility to ingress qdisc is provided as well. Since both piggyback on TC_H_CLSACT, only one of them (ingress/clsact) can exist per netdevice, and thus ingress qdisc specific behaviour can be retained for user space. This means, either a user does 'tc qdisc add dev foo ingress' and configures ingress qdisc as usual, or the 'tc qdisc add dev foo clsact' alternative, where both, ingress and egress classifier can be configured as in the below example. ingress qdisc supports attaching classifier to any minor number whereas clsact has two fixed minors for muxing between the lists, therefore to not break user space setups, they are better done as two separate qdiscs. I decided to extend the sch_ingress module with clsact functionality so that commonly used code can be reused, the module is being aliased with sch_clsact so that it can be auto-loaded properly. Alternative would have been to add a flag when initializing ingress to alter its behaviour plus aliasing to a different name (as it's more than just ingress). However, the first would end up, based on the flag, choosing the new/old behaviour by calling different function implementations to handle each anyway, the latter would require to register ingress qdisc once again under different alias. So, this really begs to provide a minimal, cleaner approach to have Qdisc_ops and Qdisc_class_ops by its own that share callbacks used by both. Example, adding qdisc: # tc qdisc add dev foo clsact # tc qdisc show dev foo qdisc mq 0: root qdisc pfifo_fast 0: parent :1 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :3 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :4 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc clsact ffff: parent ffff:fff1 Adding filters (deleting, etc works analogous by specifying ingress/egress): # tc filter add dev foo ingress bpf da obj bar.o sec ingress # tc filter add dev foo egress bpf da obj bar.o sec egress # tc filter show dev foo ingress filter protocol all pref 49152 bpf filter protocol all pref 49152 bpf handle 0x1 bar.o:[ingress] direct-action # tc filter show dev foo egress filter protocol all pref 49152 bpf filter protocol all pref 49152 bpf handle 0x1 bar.o:[egress] direct-action A 'tc filter show dev foo' or 'tc filter show dev foo parent ffff:' will show an empty list for clsact. Either using the parent names (ingress/egress) or specifying the full major/minor will then show the related filter lists. Prior work on a mqprio prequeue() facility [1] was done mainly by John Fastabend. [1] http://patchwork.ozlabs.org/patch/512949/ Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +- include/linux/rtnetlink.h | 5 +++ include/uapi/linux/pkt_sched.h | 4 ++ net/Kconfig | 3 ++ net/core/dev.c | 82 +++++++++++++++++++++++++++++++++++---- net/sched/Kconfig | 14 +++++-- net/sched/cls_bpf.c | 2 +- net/sched/sch_ingress.c | 88 +++++++++++++++++++++++++++++++++++++++++- 8 files changed, 186 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8d8e5ca951b4..2285596e7045 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1739,7 +1739,9 @@ struct net_device { #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif - +#ifdef CONFIG_NET_CLS_ACT + struct tcf_proto __rcu *egress_cl_list; +#endif #ifdef CONFIG_NET_SWITCHDEV u32 offload_fwd_mark; #endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 4be5048b1fbe..c006cc900c44 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -84,6 +84,11 @@ void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif +#ifdef CONFIG_NET_EGRESS +void net_inc_egress_queue(void); +void net_dec_egress_queue(void); +#endif + extern void rtnetlink_init(void); extern void __rtnl_unlock(void); diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 8d2530daca9f..8cb18b44968e 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -72,6 +72,10 @@ struct tc_estimator { #define TC_H_UNSPEC (0U) #define TC_H_ROOT (0xFFFFFFFFU) #define TC_H_INGRESS (0xFFFFFFF1U) +#define TC_H_CLSACT TC_H_INGRESS + +#define TC_H_MIN_INGRESS 0xFFF2U +#define TC_H_MIN_EGRESS 0xFFF3U /* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ enum tc_link_layer { diff --git a/net/Kconfig b/net/Kconfig index 11f8c22af34d..174354618f8a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -48,6 +48,9 @@ config COMPAT_NETLINK_MESSAGES config NET_INGRESS bool +config NET_EGRESS + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index 914b4a24c654..0ca95d5d7af0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1676,6 +1676,22 @@ void net_dec_ingress_queue(void) EXPORT_SYMBOL_GPL(net_dec_ingress_queue); #endif +#ifdef CONFIG_NET_EGRESS +static struct static_key egress_needed __read_mostly; + +void net_inc_egress_queue(void) +{ + static_key_slow_inc(&egress_needed); +} +EXPORT_SYMBOL_GPL(net_inc_egress_queue); + +void net_dec_egress_queue(void) +{ + static_key_slow_dec(&egress_needed); +} +EXPORT_SYMBOL_GPL(net_dec_egress_queue); +#endif + static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL /* We are not allowed to call static_key_slow_dec() from irq context @@ -3007,7 +3023,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, bool contended; int rc; - qdisc_pkt_len_init(skb); qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a @@ -3100,6 +3115,49 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dev_loopback_xmit); +#ifdef CONFIG_NET_EGRESS +static struct sk_buff * +sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) +{ + struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); + struct tcf_result cl_res; + + if (!cl) + return skb; + + /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set + * earlier by the caller. + */ + qdisc_bstats_cpu_update(cl->q, skb); + + switch (tc_classify(skb, cl, &cl_res, false)) { + case TC_ACT_OK: + case TC_ACT_RECLASSIFY: + skb->tc_index = TC_H_MIN(cl_res.classid); + break; + case TC_ACT_SHOT: + qdisc_qstats_cpu_drop(cl->q); + *ret = NET_XMIT_DROP; + goto drop; + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *ret = NET_XMIT_SUCCESS; +drop: + kfree_skb(skb); + return NULL; + case TC_ACT_REDIRECT: + /* No need to push/pop skb's mac_header here on egress! */ + skb_do_redirect(skb); + *ret = NET_XMIT_SUCCESS; + return NULL; + default: + break; + } + + return skb; +} +#endif /* CONFIG_NET_EGRESS */ + static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS @@ -3226,6 +3284,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb_update_prio(skb); + qdisc_pkt_len_init(skb); +#ifdef CONFIG_NET_CLS_ACT + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); +# ifdef CONFIG_NET_EGRESS + if (static_key_false(&egress_needed)) { + skb = sch_handle_egress(skb, &rc, dev); + if (!skb) + goto out; + } +# endif +#endif /* If device/qdisc don't need skb->dst, release it right now while * its hot in this cpu cache. */ @@ -3247,9 +3316,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); -#endif trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); @@ -3806,9 +3872,9 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev, EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif -static inline struct sk_buff *handle_ing(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, struct net_device *orig_dev) +static inline struct sk_buff * +sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); @@ -4002,7 +4068,7 @@ another_round: skip_taps: #ifdef CONFIG_NET_INGRESS if (static_key_false(&ingress_needed)) { - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index daa33432b716..82830824fb1f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -310,15 +310,21 @@ config NET_SCH_PIE If unsure, say N. config NET_SCH_INGRESS - tristate "Ingress Qdisc" + tristate "Ingress/classifier-action Qdisc" depends on NET_CLS_ACT select NET_INGRESS + select NET_EGRESS ---help--- - Say Y here if you want to use classifiers for incoming packets. + Say Y here if you want to use classifiers for incoming and/or outgoing + packets. This qdisc doesn't do anything else besides running classifiers, + which can also have actions attached to them. In case of outgoing packets, + classifiers that this qdisc holds are executed in the transmit path + before real enqueuing to an egress qdisc happens. + If unsure, say Y. - To compile this code as a module, choose M here: the - module will be called sch_ingress. + To compile this code as a module, choose M here: the module will be + called sch_ingress with alias of sch_clsact. config NET_SCH_PLUG tristate "Plug network traffic until release (PLUG)" diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index b3c8bb4aeef5..8dc84300ee79 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -291,7 +291,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, prog->bpf_name = name; prog->filter = fp; - if (fp->dst_needed) + if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS)) netif_keep_dst(qdisc_dev(tp->q)); return 0; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index e7c648fa9dc3..10adbc617905 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -1,4 +1,5 @@ -/* net/sched/sch_ingress.c - Ingress qdisc +/* net/sched/sch_ingress.c - Ingress and clsact qdisc + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -98,17 +99,100 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static unsigned long clsact_get(struct Qdisc *sch, u32 classid) +{ + switch (TC_H_MIN(classid)) { + case TC_H_MIN(TC_H_MIN_INGRESS): + case TC_H_MIN(TC_H_MIN_EGRESS): + return TC_H_MIN(classid); + default: + return 0; + } +} + +static unsigned long clsact_bind_filter(struct Qdisc *sch, + unsigned long parent, u32 classid) +{ + return clsact_get(sch, classid); +} + +static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch, + unsigned long cl) +{ + struct net_device *dev = qdisc_dev(sch); + + switch (cl) { + case TC_H_MIN(TC_H_MIN_INGRESS): + return &dev->ingress_cl_list; + case TC_H_MIN(TC_H_MIN_EGRESS): + return &dev->egress_cl_list; + default: + return NULL; + } +} + +static int clsact_init(struct Qdisc *sch, struct nlattr *opt) +{ + net_inc_ingress_queue(); + net_inc_egress_queue(); + + sch->flags |= TCQ_F_CPUSTATS; + + return 0; +} + +static void clsact_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + + tcf_destroy_chain(&dev->ingress_cl_list); + tcf_destroy_chain(&dev->egress_cl_list); + + net_dec_ingress_queue(); + net_dec_egress_queue(); +} + +static const struct Qdisc_class_ops clsact_class_ops = { + .leaf = ingress_leaf, + .get = clsact_get, + .put = ingress_put, + .walk = ingress_walk, + .tcf_chain = clsact_find_tcf, + .bind_tcf = clsact_bind_filter, + .unbind_tcf = ingress_put, +}; + +static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { + .cl_ops = &clsact_class_ops, + .id = "clsact", + .init = clsact_init, + .destroy = clsact_destroy, + .dump = ingress_dump, + .owner = THIS_MODULE, +}; + static int __init ingress_module_init(void) { - return register_qdisc(&ingress_qdisc_ops); + int ret; + + ret = register_qdisc(&ingress_qdisc_ops); + if (!ret) { + ret = register_qdisc(&clsact_qdisc_ops); + if (ret) + unregister_qdisc(&ingress_qdisc_ops); + } + + return ret; } static void __exit ingress_module_exit(void) { unregister_qdisc(&ingress_qdisc_ops); + unregister_qdisc(&clsact_qdisc_ops); } module_init(ingress_module_init); module_exit(ingress_module_exit); +MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 3d171f3907329d4b1ce31d5ec9c852c5f0269578 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Fri, 8 Jan 2016 13:47:23 +0100 Subject: ipv6: always add flag an address that failed DAD with DADFAILED The userspace needs to know why is the address being removed so that it can perhaps obtain a new address. Without the DADFAILED flag it's impossible to distinguish removal of a temporary and tentative address due to DAD failure from other reasons (device removed, manual address removal). Signed-off-by: Lubomir Rintel Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8697551b16a8..38eeddedfc21 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1772,12 +1772,13 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { + if (dad_failed) + ifp->flags |= IFA_F_DADFAILED; + if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; - if (dad_failed) - ifp->flags |= IFA_F_DADFAILED; spin_unlock_bh(&ifp->lock); if (dad_failed) ipv6_ifa_notify(0, ifp); -- cgit v1.2.3 From a78cb84c62c427807d917c5aa8797740f00b0bbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jan 2016 08:37:20 -0800 Subject: net: add scheduling point in recvmmsg/sendmmsg Applications often have to reduce number of datagrams they receive or send per system call to avoid starvation problems. Really the kernel should take care of this by using cond_resched(), so that applications can experiment bigger batch sizes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index d730ef9dfbf0..91c2de6f5020 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2041,6 +2041,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (err) break; ++datagrams; + cond_resched(); } fput_light(sock->file, fput_needed); @@ -2236,6 +2237,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, /* Out of band data, return right away */ if (msg_sys.msg_flags & MSG_OOB) break; + cond_resched(); } out_put: -- cgit v1.2.3 From 3e4006f0b86a5ae5eb0e8215f9a9e1db24506977 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jan 2016 09:35:51 -0800 Subject: ipv6: tcp: add rcu locking in tcp_v6_send_synack() When first SYNACK is sent, we already hold rcu_read_lock(), but this is not true if a SYNACK is retransmitted, as a timer (soft) interrupt does not hold rcu_read_lock() Fixes: 45f6fad84cc30 ("ipv6: add complete rcu protection around np->opt") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6b8a8a9091fa..bd100b47c717 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -462,8 +462,10 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + rcu_read_lock(); err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } -- cgit v1.2.3 From 5ea030429fed07ea47e45152202d6ecb24133374 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 9 Jan 2016 16:07:09 -0700 Subject: openvswitch: clean up unused function commit 6b001e682e90 ("openvswitch: Use Geneve device.") The commit above deleted the only call site of ovs_tunnel_route_lookup() and now that function is not used any more. So let's delete the function definition as well. Signed-off-by: Jean Sacren Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/vport.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 8ea3a96980ac..f00bb153ad13 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -204,26 +204,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops); }) void ovs_vport_ops_unregister(struct vport_ops *ops); - -static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, - const struct ip_tunnel_key *key, - u32 mark, - struct flowi4 *fl, - u8 protocol) -{ - struct rtable *rt; - - memset(fl, 0, sizeof(*fl)); - fl->daddr = key->u.ipv4.dst; - fl->saddr = key->u.ipv4.src; - fl->flowi4_tos = RT_TOS(key->tos); - fl->flowi4_mark = mark; - fl->flowi4_proto = protocol; - - rt = ip_route_output_key(net, fl); - return rt; -} - void ovs_vport_send(struct vport *vport, struct sk_buff *skb); #endif /* vport.h */ -- cgit v1.2.3 From 2f7066ada15c865eeab5a3f6c69dcf58d196e349 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 9 Jan 2016 16:07:10 -0700 Subject: openvswitch: fix struct geneve_port member name commit 6b001e682e90 ("openvswitch: Use Geneve device.") The commit above introduced 'port_no' as the name for the member of struct geneve_port. The correct name should be 'dst_port' as described in the kernel doc. Let's fix that member name and all the pertinent instances so that both doc and code would be consistent. Signed-off-by: Jean Sacren Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/vport-geneve.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index e41cd12d9b2d..30ab8e127288 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -34,7 +34,7 @@ static struct vport_ops ovs_geneve_vport_ops; * @dst_port: destination port. */ struct geneve_port { - u16 port_no; + u16 dst_port; }; static inline struct geneve_port *geneve_vport(const struct vport *vport) @@ -47,7 +47,7 @@ static int geneve_get_options(const struct vport *vport, { struct geneve_port *geneve_port = geneve_vport(vport); - if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no)) + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->dst_port)) return -EMSGSIZE; return 0; } @@ -83,7 +83,7 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) return vport; geneve_port = geneve_vport(vport); - geneve_port->port_no = dst_port; + geneve_port->dst_port = dst_port; rtnl_lock(); dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port); -- cgit v1.2.3 From c5420eb12f8e26dd2951c5acc954ca4848f488cb Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 9 Jan 2016 16:07:11 -0700 Subject: openvswitch: update kernel doc for struct vport commit be4ace6e6b1b ("openvswitch: Move dev pointer into vport itself") The commit above added @dev and moved @rcu to the bottom of struct vport, but the change was not reflected in the kernel doc. So let's update the kernel doc as well. Signed-off-by: Jean Sacren Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/vport.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index f00bb153ad13..c10899cb9040 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -70,7 +70,7 @@ struct vport_portids { /** * struct vport - one port within a datapath - * @rcu: RCU callback head for deferred destruction. + * @dev: Pointer to net_device. * @dp: Datapath to which this port belongs. * @upcall_portids: RCU protected 'struct vport_portids'. * @port_no: Index into @dp's @ports array. @@ -78,6 +78,7 @@ struct vport_portids { * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. * @detach_list: list used for detaching vport in net-exit call. + * @rcu: RCU callback head for deferred destruction. */ struct vport { struct net_device *dev; -- cgit v1.2.3 From 712f4aad406bb1ed67f3f98d04c044191f0ff593 Mon Sep 17 00:00:00 2001 From: willy tarreau Date: Sun, 10 Jan 2016 07:54:56 +0100 Subject: unix: properly account for FDs passed over unix sockets It is possible for a process to allocate and accumulate far more FDs than the process' limit by sending them over a unix socket then closing them to keep the process' fd count low. This change addresses this problem by keeping track of the number of FDs in flight per user and preventing non-privileged processes from having more FDs in flight than their configured FD limit. Reported-by: socketpair@gmail.com Reported-by: Tetsuo Handa Mitigates: CVE-2013-4312 (Linux 2.0+) Suggested-by: Linus Torvalds Acked-by: Hannes Frederic Sowa Signed-off-by: Willy Tarreau Signed-off-by: David S. Miller --- include/linux/sched.h | 1 + net/unix/af_unix.c | 24 ++++++++++++++++++++---- net/unix/garbage.c | 13 ++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/sched.h b/include/linux/sched.h index edad7a43edea..fbf25f19b3b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -830,6 +830,7 @@ struct user_struct { unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ef05cd9403d4..e3f85bc8b135 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1513,6 +1513,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1521,6 +1536,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1542,10 +1560,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226f2d33..8fcdc2283af5 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight++; + spin_unlock(&unix_gc_lock); } void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), -- cgit v1.2.3 From 617cfc753049a4e1e161ae5e5e00e92d56be2b90 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Sun, 10 Jan 2016 21:26:57 +0600 Subject: net/rtnetlink: remove unused sz_idx variable The sz_idx variable is defined in the rtnetlink_rcv_msg(), but not used anywhere. Let's remove it. Signed-off-by: Alexander Kuleshov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index baf49cb2f23d..d735e854f916 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3351,7 +3351,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); rtnl_doit_func doit; - int sz_idx, kind; + int kind; int family; int type; int err; @@ -3367,7 +3367,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return 0; family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; - sz_idx = type>>2; kind = type&3; if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) -- cgit v1.2.3 From 649621e3d54439ae232d726d7beef295d3887a68 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 8 Jan 2016 11:00:54 -0200 Subject: sctp: fix use-after-free in pr_debug statement Dmitry Vyukov reported a use-after-free in the code expanded by the macro debug_post_sfx, which is caused by the use of the asoc pointer after it was freed within sctp_side_effect() scope. This patch fixes it by allowing sctp_side_effect to clear that asoc pointer when the TCB is freed. As Vlad explained, we also have to cover the SCTP_DISPOSITION_ABORT case because it will trigger DELETE_TCB too on that same loop. Also, there were places issuing SCTP_CMD_INIT_FAILED and ASSOC_FAILED but returning SCTP_DISPOSITION_CONSUME, which would fool the scheme above. Fix it by returning SCTP_DISPOSITION_ABORT instead. The macro is already prepared to handle such NULL pointer. Reported-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 11 ++++++----- net/sctp/sm_statefuns.c | 17 ++++------------- 2 files changed, 10 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6098d4c42fa9..be23d5c2074f 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -63,7 +63,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_state_t state, struct sctp_endpoint *ep, - struct sctp_association *asoc, + struct sctp_association **asoc, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, @@ -1123,7 +1123,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, debug_post_sfn(); error = sctp_side_effects(event_type, subtype, state, - ep, asoc, event_arg, status, + ep, &asoc, event_arg, status, &commands, gfp); debug_post_sfx(); @@ -1136,7 +1136,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_state_t state, struct sctp_endpoint *ep, - struct sctp_association *asoc, + struct sctp_association **asoc, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, @@ -1151,7 +1151,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, * disposition SCTP_DISPOSITION_CONSUME. */ if (0 != (error = sctp_cmd_interpreter(event_type, subtype, state, - ep, asoc, + ep, *asoc, event_arg, status, commands, gfp))) goto bail; @@ -1174,11 +1174,12 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, break; case SCTP_DISPOSITION_DELETE_TCB: + case SCTP_DISPOSITION_ABORT: /* This should now be a command. */ + *asoc = NULL; break; case SCTP_DISPOSITION_CONSUME: - case SCTP_DISPOSITION_ABORT: /* * We should no longer have much work to do here as the * real work has been done as explicit commands above. diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 22c2bf367d7e..f1f08c8f277b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2976,7 +2976,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: - goto consume; + return SCTP_DISPOSITION_ABORT; case SCTP_IERROR_PROTO_VIOLATION: return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); @@ -3043,9 +3043,6 @@ discard_noforce: sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, force); return SCTP_DISPOSITION_DISCARD; -consume: - return SCTP_DISPOSITION_CONSUME; - } /* @@ -3093,7 +3090,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, case SCTP_IERROR_BAD_STREAM: break; case SCTP_IERROR_NO_DATA: - goto consume; + return SCTP_DISPOSITION_ABORT; case SCTP_IERROR_PROTO_VIOLATION: return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); @@ -3119,7 +3116,6 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); } -consume: return SCTP_DISPOSITION_CONSUME; } @@ -4825,9 +4821,6 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( * if necessary to fill gaps. */ struct sctp_chunk *abort = arg; - sctp_disposition_t retval; - - retval = SCTP_DISPOSITION_CONSUME; if (abort) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); @@ -4845,7 +4838,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); - return retval; + return SCTP_DISPOSITION_ABORT; } /* We tried an illegal operation on an association which is closed. */ @@ -4960,12 +4953,10 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( sctp_cmd_seq_t *commands) { struct sctp_chunk *abort = arg; - sctp_disposition_t retval; /* Stop T1-init timer */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); - retval = SCTP_DISPOSITION_CONSUME; if (abort) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); @@ -4985,7 +4976,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, SCTP_PERR(SCTP_ERROR_USER_ABORT)); - return retval; + return SCTP_DISPOSITION_ABORT; } /* -- cgit v1.2.3 From 83d15e70c4d8909d722c0d64747d8fb42e38a48f Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 11 Jan 2016 13:42:43 -0500 Subject: tcp_yeah: don't set ssthresh below 2 For tcp_yeah, use an ssthresh floor of 2, the same floor used by Reno and CUBIC, per RFC 5681 (equation 4). tcp_yeah_ssthresh() was sometimes returning a 0 or negative ssthresh value if the intended reduction is as big or bigger than the current cwnd. Congestion control modules should never return a zero or negative ssthresh. A zero ssthresh generally results in a zero cwnd, causing the connection to stall. A negative ssthresh value will be interpreted as a u32 and will set a target cwnd for PRR near 4 billion. Oleksandr Natalenko reported that a system using tcp_yeah with ECN could see a warning about a prior_cwnd of 0 in tcp_cwnd_reduction(). Testing verified that this was due to tcp_yeah_ssthresh() misbehaving in this way. Reported-by: Oleksandr Natalenko Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_yeah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 17d35662930d..3e6a472e6b88 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); - return tp->snd_cwnd - reduction; + return max_t(int, tp->snd_cwnd - reduction, 2); } static struct tcp_congestion_ops tcp_yeah __read_mostly = { -- cgit v1.2.3 From 66530bdf85eb1d72a0c399665e09a2c2298501c6 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 10 Jan 2016 11:47:01 -0500 Subject: sched,cls_flower: set key address type when present only when user space passes the addresses should we consider their presence Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 57692947ebbe..95b021243233 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -252,23 +252,28 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, sizeof(key->eth.src)); + fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, &mask->basic.n_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.n_proto)); + if (key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) { fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.ip_proto)); } - if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + + if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) { + key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, sizeof(key->ipv4.src)); fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, sizeof(key->ipv4.dst)); - } else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) { + key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, sizeof(key->ipv6.src)); @@ -276,6 +281,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, sizeof(key->ipv6.dst)); } + if (key->basic.ip_proto == IPPROTO_TCP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, &mask->tp.src, TCA_FLOWER_UNSPEC, -- cgit v1.2.3 From 781c53bc5d5628065a46c70f02f5a0450f5842f4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Jan 2016 01:16:38 +0100 Subject: bpf: export helper function flags and reject invalid ones Export flags used by eBPF helper functions through UAPI, so they can be used by programs (instead of them redefining all flags each time or just using the hard-coded values). It also gives a better overview what flags are used where and we can further get rid of the extra macros defined in filter.c. Moreover, reject invalid flags. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 16 ++++++++++++++++ net/core/filter.c | 37 +++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8bed7f1176b8..d94797ce9a5a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -273,6 +273,22 @@ enum bpf_func_id { __BPF_FUNC_MAX_ID, }; +/* All flags used by eBPF helper functions, placed here. */ + +/* BPF_FUNC_skb_store_bytes flags. */ +#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) + +/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. + * First 4 bits are for passing the header field size. + */ +#define BPF_F_HDR_FIELD_MASK 0xfULL + +/* BPF_FUNC_l4_csum_replace flags. */ +#define BPF_F_PSEUDO_HDR (1ULL << 4) + +/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ +#define BPF_F_INGRESS (1ULL << 0) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ diff --git a/net/core/filter.c b/net/core/filter.c index 0db92b5e2cbf..7c55cadc0f38 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1328,8 +1328,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) return 0; } -#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) -#define BPF_LDST_LEN 16U +#define BPF_LDST_LEN 16U static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { @@ -1340,6 +1339,9 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) char buf[BPF_LDST_LEN]; void *ptr; + if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM))) + return -EINVAL; + /* bpf verifier guarantees that: * 'from' pointer points to bpf program stack * 'len' bytes of it were initialized @@ -1359,7 +1361,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) if (unlikely(!ptr)) return -EFAULT; - if (BPF_RECOMPUTE_CSUM(flags)) + if (flags & BPF_F_RECOMPUTE_CSUM) skb_postpull_rcsum(skb, ptr, len); memcpy(ptr, from, len); @@ -1368,7 +1370,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) /* skb_store_bits cannot return -EFAULT here */ skb_store_bits(skb, offset, ptr, len); - if (BPF_RECOMPUTE_CSUM(flags)) + if (flags & BPF_F_RECOMPUTE_CSUM) skb_postpush_rcsum(skb, ptr, len); return 0; @@ -1415,15 +1417,14 @@ const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; -#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) -#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) - static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; int offset = (int) r2; __sum16 sum, *ptr; + if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) + return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; @@ -1435,7 +1436,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely(!ptr)) return -EFAULT; - switch (BPF_HEADER_FIELD_SIZE(flags)) { + switch (flags & BPF_F_HDR_FIELD_MASK) { case 2: csum_replace2(ptr, from, to); break; @@ -1467,10 +1468,12 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags); + bool is_pseudo = flags & BPF_F_PSEUDO_HDR; int offset = (int) r2; __sum16 sum, *ptr; + if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) + return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; @@ -1482,7 +1485,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely(!ptr)) return -EFAULT; - switch (BPF_HEADER_FIELD_SIZE(flags)) { + switch (flags & BPF_F_HDR_FIELD_MASK) { case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); break; @@ -1511,13 +1514,14 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1) - static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; struct net_device *dev; + if (unlikely(flags & ~(BPF_F_INGRESS))) + return -EINVAL; + dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); if (unlikely(!dev)) return -EINVAL; @@ -1526,7 +1530,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!skb2)) return -ENOMEM; - if (BPF_IS_REDIRECT_INGRESS(flags)) { + if (flags & BPF_F_INGRESS) { if (skb_at_tc_ingress(skb2)) skb_postpush_rcsum(skb2, skb_mac_header(skb2), skb2->mac_len); @@ -1553,12 +1557,17 @@ struct redirect_info { }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); + static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); + if (unlikely(flags & ~(BPF_F_INGRESS))) + return TC_ACT_SHOT; + ri->ifindex = ifindex; ri->flags = flags; + return TC_ACT_REDIRECT; } @@ -1574,7 +1583,7 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - if (BPF_IS_REDIRECT_INGRESS(ri->flags)) { + if (ri->flags & BPF_F_INGRESS) { if (skb_at_tc_ingress(skb)) skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); -- cgit v1.2.3 From c6c33454072fc9fe961e2b25f22a619e4fa98838 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Jan 2016 01:16:39 +0100 Subject: bpf: support ipv6 for bpf_skb_{set,get}_tunnel_key After IPv6 support has recently been added to metadata dst and related encaps, add support for populating/reading it from an eBPF program. Commit d3aa45ce6b ("bpf: add helpers to access tunnel metadata") started with initial IPv4-only support back then (due to IPv6 metadata support not being available yet). To stay compatible with older programs, we need to test for the passed structure size. Also TOS and TTL support from the ip_tunnel_info key has been added. Tested with vxlan devs in collect meta data mode with IPv4, IPv6 and in compat mode over different network namespaces. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 ++++++- net/core/filter.c | 69 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 71 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d94797ce9a5a..aa6f8571de13 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -289,6 +289,9 @@ enum bpf_func_id { /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ #define BPF_F_INGRESS (1ULL << 0) +/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ +#define BPF_F_TUNINFO_IPV6 (1ULL << 0) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -312,7 +315,12 @@ struct __sk_buff { struct bpf_tunnel_key { __u32 tunnel_id; - __u32 remote_ipv4; + union { + __u32 remote_ipv4; + __u32 remote_ipv6[4]; + }; + __u8 tunnel_tos; + __u8 tunnel_ttl; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/net/core/filter.c b/net/core/filter.c index 7c55cadc0f38..77cdfb455e7f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1680,19 +1680,49 @@ bool bpf_helper_changes_skb_data(void *func) return false; } +static unsigned short bpf_tunnel_key_af(u64 flags) +{ + return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; +} + static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; - struct ip_tunnel_info *info = skb_tunnel_info(skb); + const struct ip_tunnel_info *info = skb_tunnel_info(skb); + u8 compat[sizeof(struct bpf_tunnel_key)]; - if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info)) - return -EINVAL; - if (ip_tunnel_info_af(info) != AF_INET) + if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) return -EINVAL; + if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) + return -EPROTO; + if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + switch (size) { + case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): + /* Fixup deprecated structure layouts here, so we have + * a common path later on. + */ + if (ip_tunnel_info_af(info) != AF_INET) + return -EINVAL; + to = (struct bpf_tunnel_key *)compat; + break; + default: + return -EINVAL; + } + } to->tunnel_id = be64_to_cpu(info->key.tun_id); - to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); + to->tunnel_tos = info->key.tos; + to->tunnel_ttl = info->key.ttl; + + if (flags & BPF_F_TUNINFO_IPV6) + memcpy(to->remote_ipv6, &info->key.u.ipv6.src, + sizeof(to->remote_ipv6)); + else + to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); + + if (unlikely(size != sizeof(struct bpf_tunnel_key))) + memcpy((void *)(long) r2, to, size); return 0; } @@ -1714,10 +1744,25 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) struct sk_buff *skb = (struct sk_buff *) (long) r1; struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2; struct metadata_dst *md = this_cpu_ptr(md_dst); + u8 compat[sizeof(struct bpf_tunnel_key)]; struct ip_tunnel_info *info; - if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags)) + if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6))) return -EINVAL; + if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + switch (size) { + case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): + /* Fixup deprecated structure layouts here, so we have + * a common path later on. + */ + memcpy(compat, from, size); + memset(compat + size, 0, sizeof(compat) - size); + from = (struct bpf_tunnel_key *)compat; + break; + default: + return -EINVAL; + } + } skb_dst_drop(skb); dst_hold((struct dst_entry *) md); @@ -1725,9 +1770,19 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) info = &md->u.tun_info; info->mode = IP_TUNNEL_INFO_TX; + info->key.tun_flags = TUNNEL_KEY; info->key.tun_id = cpu_to_be64(from->tunnel_id); - info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + info->key.tos = from->tunnel_tos; + info->key.ttl = from->tunnel_ttl; + + if (flags & BPF_F_TUNINFO_IPV6) { + info->mode |= IP_TUNNEL_INFO_IPV6; + memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, + sizeof(from->remote_ipv6)); + } else { + info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + } return 0; } -- cgit v1.2.3 From 3de03596dfeee48bc803c1d1a6daf60a459929f3 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 10 Jan 2016 21:38:44 -0800 Subject: net: pktgen: fix null ptr deref in skb allocation Fix possible null pointer dereference that may occur when calling skb_reserve() on a null skb. Fixes: 879c7220e82 ("net: pktgen: Observe needed_headroom of the device") Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/pktgen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de8d5cc5eb24..4da4d51a2ccf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2787,7 +2787,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, } else { skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + + if (likely(skb)) + skb_reserve(skb, LL_RESERVED_SPACE(dev)); return skb; } -- cgit v1.2.3 From 40ba330227ad00b8c0cdf2f425736ff9549cc423 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Mon, 11 Jan 2016 07:50:30 +0100 Subject: udp: disallow UFO for sockets with SO_NO_CHECK option Commit acf8dd0a9d0b ("udp: only allow UFO for packets from SOCK_DGRAM sockets") disallows UFO for packets sent from raw sockets. We need to do the same also for SOCK_DGRAM sockets with SO_NO_CHECK options, even if for a bit different reason: while such socket would override the CHECKSUM_PARTIAL set by ip_ufo_append_data(), gso_size is still set and bad offloading flags warning is triggered in __skb_gso_segment(). In the IPv6 case, SO_NO_CHECK option is ignored but we need to disallow UFO for packets sent by sockets with UDP_NO_CHECK6_TX option. Signed-off-by: Michal Kubecek Tested-by: Shannon Nelson Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4233cbe47052..36ac9f3a6451 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -921,7 +921,7 @@ static int __ip_append_data(struct sock *sk, if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e6a7bd15b9b7..6473889f1736 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1353,7 +1353,7 @@ emsgsize: (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, fl6); -- cgit v1.2.3 From 7aaed57c5c2890634cfadf725173c7c68ea4cb4f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jan 2016 08:58:00 -0800 Subject: phonet: properly unshare skbs in phonet_rcv() Ivaylo Dimitrov reported a regression caused by commit 7866a621043f ("dev: add per net_device packet type chains"). skb->dev becomes NULL and we crash in __netif_receive_skb_core(). Before above commit, different kind of bugs or corruptions could happen without major crash. But the root cause is that phonet_rcv() can queue skb without checking if skb is shared or not. Many thanks to Ivaylo Dimitrov for his help, diagnosis and tests. Reported-by: Ivaylo Dimitrov Tested-by: Ivaylo Dimitrov Signed-off-by: Eric Dumazet Cc: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 10d42f3220ab..f925753668a7 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct sockaddr_pn sa; u16 len; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + /* check we have at least a full Phonet header */ if (!pskb_pull(skb, sizeof(struct phonethdr))) goto out; -- cgit v1.2.3 From 229394e8e62a4191d592842cf67e80c62a492937 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 12 Jan 2016 20:17:08 +0100 Subject: net: bpf: reject invalid shifts On ARM64, a BUG() is triggered in the eBPF JIT if a filter with a constant shift that can't be encoded in the immediate field of the UBFM/SBFM instructions is passed to the JIT. Since these shifts amounts, which are negative or >= regsize, are invalid, reject them in the eBPF verifier and the classic BPF filter checker, for all architectures. Signed-off-by: Rabin Vincent Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 10 ++++++++++ net/core/filter.c | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'net') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a7945d10b378..d1d3e8f57de9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1121,6 +1121,16 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return -EINVAL; } + if ((opcode == BPF_LSH || opcode == BPF_RSH || + opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { + int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; + + if (insn->imm < 0 || insn->imm >= size) { + verbose("invalid shift %d\n", insn->imm); + return -EINVAL; + } + } + /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && regs[insn->dst_reg].type == FRAME_PTR && diff --git a/net/core/filter.c b/net/core/filter.c index 77cdfb455e7f..94d26201080d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -785,6 +785,11 @@ static int bpf_check_classic(const struct sock_filter *filter, if (ftest->k == 0) return -EINVAL; break; + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + if (ftest->k >= 32) + return -EINVAL; + break; case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: case BPF_ST: -- cgit v1.2.3